1
0
mirror of https://github.com/containers/bootc.git synced 2026-02-05 15:45:53 +01:00

chunking: Support exclusive chunks defined via xattrs

Signed-off-by: ckyrouac <ckyrouac@redhat.com>
This commit is contained in:
ckyrouac
2025-07-07 11:58:52 -04:00
parent e7d15d4283
commit 81b3c270b0
6 changed files with 269 additions and 48 deletions

View File

@@ -49,7 +49,7 @@ pub(crate) struct Chunk {
pub(crate) packages: Vec<String>,
}
#[derive(Debug, Deserialize, Serialize)]
#[derive(Debug, Clone, Deserialize, Serialize)]
/// Object metadata, but with additional size data
pub struct ObjectSourceMetaSized {
/// The original metadata
@@ -276,9 +276,10 @@ impl Chunking {
meta: &ObjectMetaSized,
max_layers: &Option<NonZeroU32>,
prior_build_metadata: Option<&oci_spec::image::ImageManifest>,
specific_contentmeta: Option<&ObjectMetaSized>,
) -> Result<Self> {
let mut r = Self::new(repo, rev)?;
r.process_mapping(meta, max_layers, prior_build_metadata)?;
r.process_mapping(meta, max_layers, prior_build_metadata, specific_contentmeta)?;
Ok(r)
}
@@ -294,6 +295,7 @@ impl Chunking {
meta: &ObjectMetaSized,
max_layers: &Option<NonZeroU32>,
prior_build_metadata: Option<&oci_spec::image::ImageManifest>,
specific_contentmeta: Option<&ObjectMetaSized>,
) -> Result<()> {
self.max = max_layers
.unwrap_or(NonZeroU32::new(MAX_CHUNKS).unwrap())
@@ -314,6 +316,25 @@ impl Chunking {
rmap.entry(Rc::clone(contentid)).or_default().push(checksum);
}
// Create exclusive chunks first if specified
let mut processed_specific_components = BTreeSet::new();
if let Some(specific_meta) = specific_contentmeta {
for component in &specific_meta.sizes {
let mut chunk = Chunk::new(&component.meta.name);
chunk.packages = vec![component.meta.name.to_string()];
// Move all objects belonging to this exclusive component
if let Some(objects) = rmap.get(&component.meta.identifier) {
for &obj in objects {
self.remainder.move_obj(&mut chunk, obj);
}
}
self.chunks.push(chunk);
processed_specific_components.insert(&*component.meta.identifier);
}
}
// Safety: Let's assume no one has over 4 billion components.
self.n_provided_components = meta.sizes.len().try_into().unwrap();
self.n_sized_components = sizes
@@ -323,49 +344,59 @@ impl Chunking {
.try_into()
.unwrap();
// TODO: Compute bin packing in a better way
let start = Instant::now();
let packing = basic_packing(
sizes,
NonZeroU32::new(self.max).unwrap(),
prior_build_metadata,
)?;
let duration = start.elapsed();
tracing::debug!("Time elapsed in packing: {:#?}", duration);
// Filter out exclusive components for regular packing
let regular_sizes: Vec<ObjectSourceMetaSized> = sizes
.iter()
.filter(|component| {
!processed_specific_components.contains(&*component.meta.identifier)
})
.cloned()
.collect();
for bin in packing.into_iter() {
let name = match bin.len() {
0 => Cow::Borrowed("Reserved for new packages"),
1 => {
let first = bin[0];
let first_name = &*first.meta.identifier;
Cow::Borrowed(first_name)
}
2..=5 => {
let first = bin[0];
let first_name = &*first.meta.identifier;
let r = bin.iter().map(|v| &*v.meta.identifier).skip(1).fold(
String::from(first_name),
|mut acc, v| {
write!(acc, " and {}", v).unwrap();
acc
},
);
Cow::Owned(r)
}
n => Cow::Owned(format!("{n} components")),
};
let mut chunk = Chunk::new(&name);
chunk.packages = bin.iter().map(|v| String::from(&*v.meta.name)).collect();
for szmeta in bin {
for &obj in rmap.get(&szmeta.meta.identifier).unwrap() {
self.remainder.move_obj(&mut chunk, obj.as_str());
// Process regular components with bin packing if we have remaining layers
if let Some(remaining) = NonZeroU32::new(self.remaining()) {
let start = Instant::now();
let packing = basic_packing(&regular_sizes, remaining, prior_build_metadata)?;
let duration = start.elapsed();
tracing::debug!("Time elapsed in packing: {:#?}", duration);
for bin in packing.into_iter() {
let name = match bin.len() {
0 => Cow::Borrowed("Reserved for new packages"),
1 => {
let first = bin[0];
let first_name = &*first.meta.identifier;
Cow::Borrowed(first_name)
}
2..=5 => {
let first = bin[0];
let first_name = &*first.meta.identifier;
let r = bin.iter().map(|v| &*v.meta.identifier).skip(1).fold(
String::from(first_name),
|mut acc, v| {
write!(acc, " and {}", v).unwrap();
acc
},
);
Cow::Owned(r)
}
n => Cow::Owned(format!("{n} components")),
};
let mut chunk = Chunk::new(&name);
chunk.packages = bin.iter().map(|v| String::from(&*v.meta.name)).collect();
for szmeta in bin {
for &obj in rmap.get(&szmeta.meta.identifier).unwrap() {
self.remainder.move_obj(&mut chunk, obj.as_str());
}
}
self.chunks.push(chunk);
}
self.chunks.push(chunk);
}
assert_eq!(self.remainder.content.len(), 0);
// Check that all objects have been processed
if !processed_specific_components.is_empty() || !regular_sizes.is_empty() {
assert_eq!(self.remainder.content.len(), 0);
}
Ok(())
}
@@ -1003,4 +1034,191 @@ mod test {
assert_eq!(structure_derived, v2_expected_structure);
Ok(())
}
fn setup_exclusive_test(
component_data: &[(u32, u32, u64)],
max_layers: u32,
num_fake_objects: Option<usize>,
) -> Result<(
Vec<ObjectSourceMetaSized>,
ObjectMetaSized,
ObjectMetaSized,
Chunking,
)> {
// Create content metadata from provided data
let contentmeta: Vec<ObjectSourceMetaSized> = component_data
.iter()
.map(|&(id, freq, size)| ObjectSourceMetaSized {
meta: ObjectSourceMeta {
identifier: RcStr::from(format!("pkg{}.0", id)),
name: RcStr::from(format!("pkg{}", id)),
srcid: RcStr::from(format!("srcpkg{}", id)),
change_time_offset: 0,
change_frequency: freq,
},
size,
})
.collect();
// Create object maps with fake checksums
let mut object_map = IndexMap::new();
let mut regular_map = IndexMap::new();
for (i, component) in contentmeta.iter().enumerate() {
let checksum = format!("checksum_{}", i);
regular_map.insert(checksum.clone(), component.meta.identifier.clone());
object_map.insert(checksum, component.meta.identifier.clone());
}
let regular_meta = ObjectMetaSized {
map: regular_map,
sizes: contentmeta.clone(),
};
// Create exclusive metadata (initially empty, to be populated by individual tests)
let exclusive_meta = ObjectMetaSized {
map: object_map,
sizes: Vec::new(),
};
// Set up chunking with remainder chunk
let mut chunking = Chunking::default();
chunking.max = max_layers;
chunking.remainder = Chunk::new("remainder");
// Add fake objects to the remainder chunk if specified
if let Some(num_objects) = num_fake_objects {
for i in 0..num_objects {
let checksum = format!("checksum_{}", i);
chunking
.remainder
.content
.insert(RcStr::from(checksum), (1000, vec![]));
chunking.remainder.size += 1000;
}
}
Ok((contentmeta, regular_meta, exclusive_meta, chunking))
}
#[test]
fn test_exclusive_chunks() -> Result<()> {
// Test that exclusive chunks are created first and get their own layers
let component_data = [
(1, 100, 50000),
(2, 200, 40000),
(3, 300, 30000),
(4, 400, 20000),
(5, 500, 10000),
];
let (contentmeta, regular_meta, mut exclusive_meta, mut chunking) =
setup_exclusive_test(&component_data, 8, Some(5))?;
// Create exclusive content metadata for pkg1 and pkg2
let exclusive_content: Vec<ObjectSourceMetaSized> =
vec![contentmeta[0].clone(), contentmeta[1].clone()];
exclusive_meta.sizes = exclusive_content;
chunking.process_mapping(
&regular_meta,
&Some(NonZeroU32::new(8).unwrap()),
None,
Some(&exclusive_meta),
)?;
// Verify exclusive chunks are created first
assert!(chunking.chunks.len() >= 2);
assert_eq!(chunking.chunks[0].name, "pkg1");
assert_eq!(chunking.chunks[1].name, "pkg2");
assert_eq!(chunking.chunks[0].packages, vec!["pkg1".to_string()]);
assert_eq!(chunking.chunks[1].packages, vec!["pkg2".to_string()]);
Ok(())
}
#[test]
fn test_exclusive_chunks_with_regular_packing() -> Result<()> {
// Test that exclusive chunks are created first, then regular packing continues
let component_data = [
(1, 100, 50000), // exclusive
(2, 200, 40000), // exclusive
(3, 300, 30000), // regular
(4, 400, 20000), // regular
(5, 500, 10000), // regular
(6, 600, 5000), // regular
];
let (contentmeta, regular_meta, mut exclusive_meta, mut chunking) =
setup_exclusive_test(&component_data, 8, Some(6))?;
// Create exclusive content metadata for pkg1 and pkg2
let exclusive_content: Vec<ObjectSourceMetaSized> =
vec![contentmeta[0].clone(), contentmeta[1].clone()];
exclusive_meta.sizes = exclusive_content;
chunking.process_mapping(
&regular_meta,
&Some(NonZeroU32::new(8).unwrap()),
None,
Some(&exclusive_meta),
)?;
// Verify exclusive chunks are created first
assert!(chunking.chunks.len() >= 2);
assert_eq!(chunking.chunks[0].name, "pkg1");
assert_eq!(chunking.chunks[1].name, "pkg2");
assert_eq!(chunking.chunks[0].packages, vec!["pkg1".to_string()]);
assert_eq!(chunking.chunks[1].packages, vec!["pkg2".to_string()]);
// Verify regular components are not in exclusive chunks
for chunk in &chunking.chunks[2..] {
assert!(!chunk.packages.contains(&"pkg1".to_string()));
assert!(!chunk.packages.contains(&"pkg2".to_string()));
}
Ok(())
}
#[test]
fn test_exclusive_chunks_isolation() -> Result<()> {
// Test that exclusive chunks properly isolate components
let component_data = [(1, 100, 50000), (2, 200, 40000), (3, 300, 30000)];
let (contentmeta, regular_meta, mut exclusive_meta, mut chunking) =
setup_exclusive_test(&component_data, 8, Some(3))?;
// Create exclusive content metadata for pkg1 only
let exclusive_content: Vec<ObjectSourceMetaSized> = vec![contentmeta[0].clone()];
exclusive_meta.sizes = exclusive_content;
chunking.process_mapping(
&regular_meta,
&Some(NonZeroU32::new(8).unwrap()),
None,
Some(&exclusive_meta),
)?;
// Verify pkg1 is in its own exclusive chunk
assert!(chunking.chunks.len() >= 1);
assert_eq!(chunking.chunks[0].name, "pkg1");
assert_eq!(chunking.chunks[0].packages, vec!["pkg1".to_string()]);
// Verify pkg2 and pkg3 are in regular chunks, not mixed with pkg1
let mut found_pkg2 = false;
let mut found_pkg3 = false;
for chunk in &chunking.chunks[1..] {
if chunk.packages.contains(&"pkg2".to_string()) {
found_pkg2 = true;
assert!(!chunk.packages.contains(&"pkg1".to_string()));
}
if chunk.packages.contains(&"pkg3".to_string()) {
found_pkg3 = true;
assert!(!chunk.packages.contains(&"pkg1".to_string()));
}
}
assert!(found_pkg2 && found_pkg3);
Ok(())
}
}

View File

@@ -766,7 +766,7 @@ async fn container_export(
container_config: Option<Utf8PathBuf>,
cmd: Option<Vec<String>>,
compression_fast: bool,
contentmeta: Option<Utf8PathBuf>,
package_contentmeta: Option<Utf8PathBuf>,
) -> Result<()> {
let container_config = if let Some(container_config) = container_config {
serde_json::from_reader(File::open(container_config).map(BufReader::new)?)?
@@ -777,7 +777,7 @@ async fn container_export(
let mut contentmeta_data = None;
let mut created = None;
let mut labels = labels.clone();
if let Some(contentmeta) = contentmeta {
if let Some(contentmeta) = package_contentmeta {
let buf = File::open(contentmeta).map(BufReader::new);
let raw: RawMeta = serde_json::from_reader(buf?)?;
@@ -842,7 +842,7 @@ async fn container_export(
container_config,
authfile,
skip_compression: compression_fast, // TODO rename this in the struct at the next semver break
contentmeta: contentmeta_data.as_ref(),
package_contentmeta: contentmeta_data.as_ref(),
max_layers,
created,
..Default::default()

View File

@@ -239,7 +239,7 @@ fn build_oci(
let mut manifest = writer.new_empty_manifest()?.build().unwrap();
let chunking = opts
.contentmeta
.package_contentmeta
.as_ref()
.map(|meta| {
crate::chunking::Chunking::from_mapping(
@@ -248,6 +248,7 @@ fn build_oci(
meta,
&opts.max_layers,
opts.prior_build,
opts.specific_contentmeta,
)
})
.transpose()?;
@@ -427,7 +428,9 @@ pub struct ExportOpts<'m, 'o> {
pub prior_build: Option<&'m oci_image::ImageManifest>,
/// Metadata mapping between objects and their owning component/package;
/// used to optimize packing.
pub contentmeta: Option<&'o ObjectMetaSized>,
pub package_contentmeta: Option<&'o ObjectMetaSized>,
/// Metadata for exclusive components that should have their own layers.
pub specific_contentmeta: Option<&'o ObjectMetaSized>,
/// Sets the created tag in the image manifest.
pub created: Option<String>,
/// Whether to explicitly create all parent directories in the tar layers.

View File

@@ -909,7 +909,7 @@ impl Fixture {
.context("Computing sizes")?;
let opts = ExportOpts {
max_layers: std::num::NonZeroU32::new(PKGS_V0_LEN as u32),
contentmeta: Some(&contentmeta),
package_contentmeta: Some(&contentmeta),
..Default::default()
};
let digest = crate::container::encapsulate(

View File

@@ -37,7 +37,7 @@ mod rcstr_serialize {
pub type ContentID = Rc<str>;
/// Metadata about a component/package.
#[derive(Debug, Eq, Deserialize, Serialize)]
#[derive(Debug, Clone, Eq, Deserialize, Serialize)]
pub struct ObjectSourceMeta {
/// Unique identifier, does not need to be human readable, but can be.
#[serde(with = "rcstr_serialize")]

View File

@@ -534,7 +534,7 @@ async fn impl_test_container_import_export(chunked: bool) -> Result<()> {
opts.copy_meta_keys = vec!["buildsys.checksum".to_string()];
opts.copy_meta_opt_keys = vec!["nosuchvalue".to_string()];
opts.max_layers = std::num::NonZeroU32::new(PKGS_V0_LEN as u32);
opts.contentmeta = contentmeta.as_ref();
opts.package_contentmeta = contentmeta.as_ref();
opts.container_config = Some(container_config);
let digest = ostree_ext::container::encapsulate(
fixture.srcrepo(),