1use std::borrow::{Borrow, Cow};
6use std::collections::{BTreeMap, BTreeSet};
7use std::fmt::Write;
8use std::hash::{Hash, Hasher};
9use std::num::NonZeroU32;
10use std::rc::Rc;
11use std::time::Instant;
12
13use crate::container::{COMPONENT_SEPARATOR, CONTENT_ANNOTATION};
14use crate::objectsource::{ContentID, ObjectMeta, ObjectMetaMap, ObjectSourceMeta};
15use crate::objgv::*;
16use crate::statistics;
17use anyhow::{Result, anyhow};
18use camino::{Utf8Path, Utf8PathBuf};
19use containers_image_proxy::oci_spec;
20use gvariant::aligned_bytes::TryAsAligned;
21use gvariant::{Marker, Structure};
22use indexmap::IndexMap;
23use ostree::{gio, glib};
24use serde::{Deserialize, Serialize};
25
26pub(crate) const MAX_CHUNKS: u32 = 64;
30const MIN_CHUNKED_LAYERS: u32 = 4;
33
34pub(crate) type RcStr = Rc<str>;
36pub(crate) type ChunkMapping = BTreeMap<RcStr, (u64, Vec<Utf8PathBuf>)>;
39const LOW_PARTITION: &str = "2ls";
42const HIGH_PARTITION: &str = "1hs";
43
44#[derive(Debug, Default)]
45pub(crate) struct Chunk {
46 pub(crate) name: String,
47 pub(crate) content: ChunkMapping,
48 pub(crate) size: u64,
49 pub(crate) packages: Vec<String>,
50}
51
52#[derive(Debug, Clone, Deserialize, Serialize)]
53pub struct ObjectSourceMetaSized {
55 #[serde(flatten)]
57 pub meta: ObjectSourceMeta,
58 pub size: u64,
60}
61
62impl Hash for ObjectSourceMetaSized {
63 fn hash<H: Hasher>(&self, state: &mut H) {
64 self.meta.identifier.hash(state);
65 }
66}
67
68impl Eq for ObjectSourceMetaSized {}
69
70impl PartialEq for ObjectSourceMetaSized {
71 fn eq(&self, other: &Self) -> bool {
72 self.meta.identifier == other.meta.identifier
73 }
74}
75
76#[derive(Debug)]
78pub struct ObjectMetaSized {
79 pub map: ObjectMetaMap,
81 pub sizes: Vec<ObjectSourceMetaSized>,
83}
84
85impl ObjectMetaSized {
86 pub fn compute_sizes(repo: &ostree::Repo, meta: ObjectMeta) -> Result<ObjectMetaSized> {
88 let cancellable = gio::Cancellable::NONE;
89 let map = meta.map;
91 let mut set = meta.set;
92 let mut sizes = BTreeMap::<&str, u64>::new();
94 for (checksum, contentid) in map.iter() {
96 let finfo = repo.query_file(checksum, cancellable)?.0;
97 let sz = sizes.entry(contentid).or_default();
98 *sz += finfo.size() as u64;
99 }
100 let sized: Result<Vec<_>> = sizes
102 .into_iter()
103 .map(|(id, size)| -> Result<ObjectSourceMetaSized> {
104 set.take(id)
105 .ok_or_else(|| anyhow!("Failed to find {} in content set", id))
106 .map(|meta| ObjectSourceMetaSized { meta, size })
107 })
108 .collect();
109 let mut sizes = sized?;
110 sizes.sort_by(|a, b| b.size.cmp(&a.size));
111 Ok(ObjectMetaSized { map, sizes })
112 }
113}
114
115#[derive(Debug, Default)]
117pub struct Chunking {
118 pub(crate) metadata_size: u64,
119 pub(crate) remainder: Chunk,
120 pub(crate) chunks: Vec<Chunk>,
121
122 pub(crate) max: u32,
123
124 processed_mapping: bool,
125 pub(crate) n_provided_components: u32,
127 pub(crate) n_sized_components: u32,
129}
130
131#[derive(Default)]
132struct Generation {
133 path: Utf8PathBuf,
134 metadata_size: u64,
135 dirtree_found: BTreeSet<RcStr>,
136 dirmeta_found: BTreeSet<RcStr>,
137}
138
139fn push_dirmeta(repo: &ostree::Repo, generation: &mut Generation, checksum: &str) -> Result<()> {
140 if generation.dirtree_found.contains(checksum) {
141 return Ok(());
142 }
143 let checksum = RcStr::from(checksum);
144 generation.dirmeta_found.insert(RcStr::clone(&checksum));
145 let child_v = repo.load_variant(ostree::ObjectType::DirMeta, checksum.borrow())?;
146 generation.metadata_size += child_v.data_as_bytes().as_ref().len() as u64;
147 Ok(())
148}
149
150fn push_dirtree(
151 repo: &ostree::Repo,
152 generation: &mut Generation,
153 checksum: &str,
154) -> Result<glib::Variant> {
155 let child_v = repo.load_variant(ostree::ObjectType::DirTree, checksum)?;
156 if !generation.dirtree_found.contains(checksum) {
157 generation.metadata_size += child_v.data_as_bytes().as_ref().len() as u64;
158 } else {
159 let checksum = RcStr::from(checksum);
160 generation.dirtree_found.insert(checksum);
161 }
162 Ok(child_v)
163}
164
165fn generate_chunking_recurse(
166 repo: &ostree::Repo,
167 generation: &mut Generation,
168 chunk: &mut Chunk,
169 dt: &glib::Variant,
170) -> Result<()> {
171 let dt = dt.data_as_bytes();
172 let dt = dt.try_as_aligned()?;
173 let dt = gv_dirtree!().cast(dt);
174 let (files, dirs) = dt.to_tuple();
175 let mut hexbuf = [0u8; 64];
177 for file in files {
178 let (name, csum) = file.to_tuple();
179 let fpath = generation.path.join(name.to_str());
180 hex::encode_to_slice(csum, &mut hexbuf)?;
181 let checksum = std::str::from_utf8(&hexbuf)?;
182 let meta = repo.query_file(checksum, gio::Cancellable::NONE)?.0;
183 let size = meta.size() as u64;
184 let entry = chunk.content.entry(RcStr::from(checksum)).or_default();
185 entry.0 = size;
186 let first = entry.1.is_empty();
187 if first {
188 chunk.size += size;
189 }
190 entry.1.push(fpath);
191 }
192 for item in dirs {
193 let (name, contents_csum, meta_csum) = item.to_tuple();
194 let name = name.to_str();
195 generation.path.push(name);
197 hex::encode_to_slice(contents_csum, &mut hexbuf)?;
198 let checksum_s = std::str::from_utf8(&hexbuf)?;
199 let dirtree_v = push_dirtree(repo, generation, checksum_s)?;
200 generate_chunking_recurse(repo, generation, chunk, &dirtree_v)?;
201 drop(dirtree_v);
202 hex::encode_to_slice(meta_csum, &mut hexbuf)?;
203 let checksum_s = std::str::from_utf8(&hexbuf)?;
204 push_dirmeta(repo, generation, checksum_s)?;
205 assert!(generation.path.pop());
207 }
208 Ok(())
209}
210
211impl Chunk {
212 fn new(name: &str) -> Self {
213 Chunk {
214 name: name.to_string(),
215 ..Default::default()
216 }
217 }
218
219 pub(crate) fn move_obj(&mut self, dest: &mut Self, checksum: &str) -> bool {
220 if let Some((name, (size, paths))) = self.content.remove_entry(checksum) {
224 let v = dest.content.insert(name, (size, paths));
225 debug_assert!(v.is_none());
226 self.size -= size;
227 dest.size += size;
228 true
229 } else {
230 false
231 }
232 }
233
234 pub(crate) fn move_path(&mut self, dest: &mut Self, checksum: &str, path: &Utf8Path) {
235 if let Some((_size, paths)) = self.content.get_mut(checksum) {
236 let path_index = paths.iter().position(|p| *p == path);
237 if let Some(index) = path_index {
238 let removed_path = paths.remove(index);
239
240 let dest_entry = dest
241 .content
242 .entry(RcStr::from(checksum))
243 .or_insert((0, Vec::new()));
244 dest_entry.1.push(removed_path);
245
246 if paths.is_empty() {
247 self.content.remove(checksum);
248 }
249 }
250 }
251 }
252}
253
254impl Chunking {
255 fn create_content_id_map(
257 map: &IndexMap<String, ContentID>,
258 ) -> IndexMap<ContentID, Vec<&String>> {
259 let mut rmap = IndexMap::<ContentID, Vec<&String>>::new();
260 for (checksum, contentid) in map.iter() {
261 rmap.entry(Rc::clone(contentid)).or_default().push(checksum);
262 }
263 rmap
264 }
265
266 pub fn new(repo: &ostree::Repo, rev: &str) -> Result<Self> {
268 let rev = repo.require_rev(rev)?;
270
271 let (commit_v, _) = repo.load_commit(&rev)?;
273 let commit_v = commit_v.data_as_bytes();
274 let commit_v = commit_v.try_as_aligned()?;
275 let commit = gv_commit!().cast(commit_v);
276 let commit = commit.to_tuple();
277
278 let mut generation = Generation {
280 path: Utf8PathBuf::from("/"),
281 ..Default::default()
282 };
283 let mut chunk: Chunk = Default::default();
284
285 let contents_checksum = &hex::encode(commit.6);
287 let contents_v = repo.load_variant(ostree::ObjectType::DirTree, contents_checksum)?;
288 push_dirtree(repo, &mut generation, contents_checksum)?;
289 let meta_checksum = &hex::encode(commit.7);
290 push_dirmeta(repo, &mut generation, meta_checksum.as_str())?;
291
292 generate_chunking_recurse(repo, &mut generation, &mut chunk, &contents_v)?;
293
294 let chunking = Chunking {
295 metadata_size: generation.metadata_size,
296 remainder: chunk,
297 ..Default::default()
298 };
299 Ok(chunking)
300 }
301
302 pub fn from_mapping(
304 repo: &ostree::Repo,
305 rev: &str,
306 meta: &ObjectMetaSized,
307 max_layers: &Option<NonZeroU32>,
308 prior_build_metadata: Option<&oci_spec::image::ImageManifest>,
309 specific_contentmeta: Option<&BTreeMap<ContentID, Vec<(Utf8PathBuf, String)>>>,
310 ) -> Result<Self> {
311 let mut r = Self::new(repo, rev)?;
312 r.process_mapping(meta, max_layers, prior_build_metadata, specific_contentmeta)?;
313 Ok(r)
314 }
315
316 fn remaining(&self) -> u32 {
317 self.max.saturating_sub(self.chunks.len() as u32)
318 }
319
320 #[allow(clippy::or_fun_call)]
323 pub fn process_mapping(
324 &mut self,
325 meta: &ObjectMetaSized,
326 max_layers: &Option<NonZeroU32>,
327 prior_build_metadata: Option<&oci_spec::image::ImageManifest>,
328 specific_contentmeta: Option<&BTreeMap<ContentID, Vec<(Utf8PathBuf, String)>>>,
329 ) -> Result<()> {
330 self.max = max_layers
331 .unwrap_or(NonZeroU32::new(MAX_CHUNKS).unwrap())
332 .get();
333
334 let sizes = &meta.sizes;
335 assert!(!self.processed_mapping);
337 self.processed_mapping = true;
338 let remaining = self.remaining();
339 if remaining == 0 {
340 return Ok(());
341 }
342
343 let mut processed_specific_components = BTreeSet::new();
345 if let Some(specific_meta) = specific_contentmeta {
346 for (component, files) in specific_meta {
347 let mut chunk = Chunk::new(&component);
348 chunk.packages = vec![component.to_string()];
349
350 for (path, checksum) in files {
352 self.remainder
353 .move_path(&mut chunk, checksum.as_str(), path);
354 }
355
356 self.chunks.push(chunk);
357 processed_specific_components.insert(component.clone());
358 }
359 }
360
361 self.n_provided_components = meta.sizes.len().try_into().unwrap();
363 self.n_sized_components = sizes
364 .iter()
365 .filter(|v| v.size > 0)
366 .count()
367 .try_into()
368 .unwrap();
369
370 let regular_sizes: Vec<ObjectSourceMetaSized> = sizes
372 .iter()
373 .filter(|component| {
374 !processed_specific_components.contains(&*component.meta.identifier)
375 })
376 .cloned()
377 .collect();
378
379 let rmap = Self::create_content_id_map(&meta.map);
380
381 if let Some(remaining) = NonZeroU32::new(self.remaining()) {
383 let start = Instant::now();
384 let packing = basic_packing(®ular_sizes, remaining, prior_build_metadata)?;
385 let duration = start.elapsed();
386 tracing::debug!("Time elapsed in packing: {:#?}", duration);
387
388 for bin in packing.into_iter() {
389 let name = match bin.len() {
390 0 => Cow::Borrowed("Reserved for new packages"),
391 1 => {
392 let first = bin[0];
393 let first_name = &*first.meta.identifier;
394 Cow::Borrowed(first_name)
395 }
396 2..=5 => {
397 let first = bin[0];
398 let first_name = &*first.meta.identifier;
399 let r = bin.iter().map(|v| &*v.meta.identifier).skip(1).fold(
400 String::from(first_name),
401 |mut acc, v| {
402 write!(acc, " and {v}").unwrap();
403 acc
404 },
405 );
406 Cow::Owned(r)
407 }
408 n => Cow::Owned(format!("{n} components")),
409 };
410 let mut chunk = Chunk::new(&name);
411 chunk.packages = bin.iter().map(|v| String::from(&*v.meta.name)).collect();
412 for szmeta in bin {
413 for &obj in rmap.get(&szmeta.meta.identifier).unwrap() {
414 self.remainder.move_obj(&mut chunk, obj.as_str());
415 }
416 }
417 self.chunks.push(chunk);
418 }
419 }
420
421 if !processed_specific_components.is_empty() || !regular_sizes.is_empty() {
423 assert_eq!(self.remainder.content.len(), 0);
424 }
425
426 Ok(())
427 }
428
429 pub(crate) fn take_chunks(&mut self) -> Vec<Chunk> {
430 let mut r = Vec::new();
431 std::mem::swap(&mut self.chunks, &mut r);
432 r
433 }
434
435 pub fn print(&self) {
437 println!("Metadata: {}", glib::format_size(self.metadata_size));
438 if self.n_provided_components > 0 {
439 println!(
440 "Components: provided={} sized={}",
441 self.n_provided_components, self.n_sized_components
442 );
443 }
444 for (n, chunk) in self.chunks.iter().enumerate() {
445 let sz = glib::format_size(chunk.size);
446 println!(
447 "Chunk {}: \"{}\": objects:{} size:{}",
448 n,
449 chunk.name,
450 chunk.content.len(),
451 sz
452 );
453 }
454 if !self.remainder.content.is_empty() {
455 let sz = glib::format_size(self.remainder.size);
456 println!(
457 "Remainder: \"{}\": objects:{} size:{}",
458 self.remainder.name,
459 self.remainder.content.len(),
460 sz
461 );
462 }
463 }
464}
465
466#[cfg(test)]
467fn components_size(components: &[&ObjectSourceMetaSized]) -> u64 {
468 components.iter().map(|k| k.size).sum()
469}
470
471#[cfg(test)]
473fn packing_size(packing: &[Vec<&ObjectSourceMetaSized>]) -> u64 {
474 packing.iter().map(|v| components_size(v)).sum()
475}
476
477fn get_partitions_with_threshold<'a>(
490 components: &[&'a ObjectSourceMetaSized],
491 limit_hs_bins: usize,
492 threshold: f64,
493) -> Option<BTreeMap<String, Vec<&'a ObjectSourceMetaSized>>> {
494 let mut partitions: BTreeMap<String, Vec<&ObjectSourceMetaSized>> = BTreeMap::new();
495 let mut med_size: Vec<&ObjectSourceMetaSized> = Vec::new();
496 let mut high_size: Vec<&ObjectSourceMetaSized> = Vec::new();
497
498 let mut sizes: Vec<u64> = components.iter().map(|a| a.size).collect();
499 let (median_size, mad_size) = statistics::median_absolute_deviation(&mut sizes)?;
500
501 let size_low_limit = 0.5 * f64::abs(median_size - threshold * mad_size);
503 let size_high_limit = median_size + threshold * mad_size;
504
505 for pkg in components {
506 let size = pkg.size as f64;
507
508 if size >= size_high_limit {
510 high_size.push(pkg);
511 }
512 else if size <= size_low_limit {
514 partitions
515 .entry(LOW_PARTITION.to_string())
516 .and_modify(|bin| bin.push(pkg))
517 .or_insert_with(|| vec![pkg]);
518 }
519 else {
521 med_size.push(pkg);
522 }
523 }
524
525 let mut remaining_pkgs: Vec<_> = if high_size.len() <= limit_hs_bins {
527 Vec::new()
528 } else {
529 high_size.drain(limit_hs_bins..).collect()
530 };
531 assert!(high_size.len() <= limit_hs_bins);
532
533 remaining_pkgs.append(&mut med_size);
535 partitions.insert(HIGH_PARTITION.to_string(), high_size);
536
537 remaining_pkgs.sort_by(|a, b| {
539 a.meta
540 .change_frequency
541 .partial_cmp(&b.meta.change_frequency)
542 .unwrap()
543 });
544 let med_sizes: Vec<u64> = remaining_pkgs.iter().map(|a| a.size).collect();
545 let med_frequencies: Vec<u64> = remaining_pkgs
546 .iter()
547 .map(|a| a.meta.change_frequency.into())
548 .collect();
549
550 let med_mean_freq = statistics::mean(&med_frequencies)?;
551 let med_stddev_freq = statistics::std_deviation(&med_frequencies)?;
552 let med_mean_size = statistics::mean(&med_sizes)?;
553 let med_stddev_size = statistics::std_deviation(&med_sizes)?;
554
555 let med_freq_low_limit = 0.5f64 * f64::abs(med_mean_freq - threshold * med_stddev_freq);
557 let med_freq_high_limit = med_mean_freq + threshold * med_stddev_freq;
558 let med_size_low_limit = 0.5f64 * f64::abs(med_mean_size - threshold * med_stddev_size);
559 let med_size_high_limit = med_mean_size + threshold * med_stddev_size;
560
561 for pkg in remaining_pkgs {
562 let size = pkg.size as f64;
563 let freq = pkg.meta.change_frequency as f64;
564
565 let size_name;
566 if size >= med_size_high_limit {
567 size_name = "hs";
568 } else if size <= med_size_low_limit {
569 size_name = "ls";
570 } else {
571 size_name = "ms";
572 }
573
574 let freq_name;
576 if freq >= med_freq_high_limit {
577 freq_name = "3hf";
578 } else if freq <= med_freq_low_limit {
579 freq_name = "5lf";
580 } else {
581 freq_name = "4mf";
582 }
583
584 let bucket = format!("{freq_name}_{size_name}");
585 partitions
586 .entry(bucket.to_string())
587 .and_modify(|bin| bin.push(pkg))
588 .or_insert_with(|| vec![pkg]);
589 }
590
591 for (name, pkgs) in &partitions {
592 tracing::debug!("{:#?}: {:#?}", name, pkgs.len());
593 }
594
595 Some(partitions)
596}
597
598fn basic_packing_with_prior_build<'a>(
612 components: &'a [ObjectSourceMetaSized],
613 bin_size: NonZeroU32,
614 prior_build: &oci_spec::image::ImageManifest,
615) -> Result<Option<Vec<Vec<&'a ObjectSourceMetaSized>>>> {
616 let before_processing_pkgs_len = components.len();
617
618 tracing::debug!("Attempting to use old package structure");
619
620 let curr_build: Result<Vec<Vec<String>>> = prior_build
623 .layers()
624 .iter()
625 .skip(1)
626 .map(|layer| -> Result<_> {
627 let annotation_layer = layer
628 .annotations()
629 .as_ref()
630 .and_then(|annos| annos.get(CONTENT_ANNOTATION))
631 .ok_or_else(|| anyhow!("Missing {CONTENT_ANNOTATION} on prior build"))?;
632 Ok(annotation_layer
633 .split(COMPONENT_SEPARATOR)
634 .map(ToOwned::to_owned)
635 .collect())
636 })
637 .collect();
638 let mut curr_build = curr_build?;
639
640 if (bin_size.get() as usize) < curr_build.len() {
641 tracing::debug!("bin_size = {bin_size} is too small to be compatible with the prior build");
642 return Ok(None);
643 }
644
645 let prev_pkgs_set: BTreeSet<String> = curr_build
647 .iter()
648 .flat_map(|v| v.iter().cloned())
649 .filter(|name| !name.is_empty())
650 .collect();
651 let curr_pkgs_set: BTreeSet<String> = components
652 .iter()
653 .map(|pkg| pkg.meta.name.to_string())
654 .collect();
655
656 if let Some(last_bin) = curr_build.last_mut() {
658 let added = curr_pkgs_set.difference(&prev_pkgs_set);
659 last_bin.retain(|name| !name.is_empty());
660 last_bin.extend(added.into_iter().cloned());
661 } else {
662 tracing::debug!("No empty last bin for added packages.");
663 return Ok(None);
664 }
665
666 let removed: BTreeSet<&String> = prev_pkgs_set.difference(&curr_pkgs_set).collect();
668 for bin in curr_build.iter_mut() {
669 bin.retain(|pkg| !removed.contains(pkg));
670 }
671
672 let mut name_to_component: BTreeMap<String, &ObjectSourceMetaSized> = BTreeMap::new();
674 for component in components.iter() {
675 name_to_component
676 .entry(component.meta.name.to_string())
677 .or_insert(component);
678 }
679 let mut modified_build: Vec<Vec<&ObjectSourceMetaSized>> = Vec::new();
680 for bin in curr_build {
681 let mut mod_bin = Vec::new();
682 for pkg in bin {
683 if pkg.is_empty() {
685 continue;
686 }
687 mod_bin.push(name_to_component[&pkg]);
688 }
689 modified_build.push(mod_bin);
690 }
691
692 let after_processing_pkgs_len: usize = modified_build.iter().map(|b| b.len()).sum();
694 assert_eq!(after_processing_pkgs_len, before_processing_pkgs_len);
695 assert!(modified_build.len() <= bin_size.get() as usize);
696 Ok(Some(modified_build))
697}
698
699fn basic_packing<'a>(
715 components: &'a [ObjectSourceMetaSized],
716 bin_size: NonZeroU32,
717 prior_build_metadata: Option<&oci_spec::image::ImageManifest>,
718) -> Result<Vec<Vec<&'a ObjectSourceMetaSized>>> {
719 const HIGH_SIZE_CUTOFF: f32 = 0.6;
720 let before_processing_pkgs_len = components.len();
721
722 anyhow::ensure!(bin_size.get() >= MIN_CHUNKED_LAYERS);
723
724 if let Some(prior_build) = prior_build_metadata {
726 if let Some(packing) = basic_packing_with_prior_build(components, bin_size, prior_build)? {
727 tracing::debug!("Keeping old package structure");
728 return Ok(packing);
729 } else {
730 tracing::debug!(
731 "Failed to use old package structure; discarding info from prior build."
732 );
733 }
734 }
735
736 tracing::debug!("Creating new packing structure");
737
738 if before_processing_pkgs_len < bin_size.get() as usize {
741 let mut r = components.iter().map(|pkg| vec![pkg]).collect::<Vec<_>>();
742 if before_processing_pkgs_len > 0 {
743 let new_pkgs_bin: Vec<&ObjectSourceMetaSized> = Vec::new();
744 r.push(new_pkgs_bin);
745 }
746 return Ok(r);
747 }
748
749 let mut r = Vec::new();
750 let (components, max_freq_components) = components
752 .iter()
753 .partition::<Vec<_>, _>(|pkg| pkg.meta.change_frequency != u32::MAX);
754 if !components.is_empty() {
755 let limit_ls_bins = 1usize;
758 let limit_new_bins = 1usize;
759 let _limit_new_pkgs = 0usize;
760 let limit_max_frequency_pkgs = max_freq_components.len();
761 let limit_max_frequency_bins = limit_max_frequency_pkgs.min(1);
762 let low_and_other_bin_limit = limit_ls_bins + limit_new_bins + limit_max_frequency_bins;
763 let limit_hs_bins = (HIGH_SIZE_CUTOFF
764 * (bin_size.get() - low_and_other_bin_limit as u32) as f32)
765 .floor() as usize;
766 let limit_ms_bins =
767 (bin_size.get() - (limit_hs_bins + low_and_other_bin_limit) as u32) as usize;
768 let partitions = get_partitions_with_threshold(&components, limit_hs_bins, 2f64)
769 .expect("Partitioning components into sets");
770
771 let low_sized_component_count = partitions
773 .get(LOW_PARTITION)
774 .map(|p| p.len())
775 .unwrap_or_default();
776
777 let pkg_per_bin_ms: usize = (components.len() - limit_hs_bins - low_sized_component_count)
779 .checked_div(limit_ms_bins)
780 .ok_or_else(|| anyhow::anyhow!("number of bins should be >= {}", MIN_CHUNKED_LAYERS))?;
781
782 for (partition, pkgs) in partitions.iter() {
784 if partition == HIGH_PARTITION {
785 for pkg in pkgs {
786 r.push(vec![*pkg]);
787 }
788 } else if partition == LOW_PARTITION {
789 let mut bin: Vec<&ObjectSourceMetaSized> = Vec::new();
790 for pkg in pkgs {
791 bin.push(*pkg);
792 }
793 r.push(bin);
794 } else {
795 let mut bin: Vec<&ObjectSourceMetaSized> = Vec::new();
796 for (i, pkg) in pkgs.iter().enumerate() {
797 if bin.len() < pkg_per_bin_ms {
798 bin.push(*pkg);
799 } else {
800 r.push(bin.clone());
801 bin.clear();
802 bin.push(*pkg);
803 }
804 if i == pkgs.len() - 1 && !bin.is_empty() {
805 r.push(bin.clone());
806 bin.clear();
807 }
808 }
809 }
810 }
811 tracing::debug!("Bins before unoptimized build: {}", r.len());
812
813 while r.len() > (bin_size.get() as usize - limit_new_bins - limit_max_frequency_bins) {
823 for i in (limit_ls_bins + limit_hs_bins..r.len() - 1)
824 .step_by(2)
825 .rev()
826 {
827 if r.len() <= (bin_size.get() as usize - limit_new_bins - limit_max_frequency_bins)
828 {
829 break;
830 }
831 let prev = &r[i - 1];
832 let curr = &r[i];
833 let mut merge: Vec<&ObjectSourceMetaSized> = Vec::new();
834 merge.extend(prev.iter());
835 merge.extend(curr.iter());
836 r.remove(i);
837 r.remove(i - 1);
838 r.insert(i, merge);
839 }
840 }
841 tracing::debug!("Bins after optimization: {}", r.len());
842 }
843
844 if !max_freq_components.is_empty() {
845 r.push(max_freq_components);
846 }
847
848 r.push(Vec::new());
850 let after_processing_pkgs_len = r.iter().map(|b| b.len()).sum::<usize>();
851 assert_eq!(after_processing_pkgs_len, before_processing_pkgs_len);
852 assert!(r.len() <= bin_size.get() as usize);
853 Ok(r)
854}
855
856#[cfg(test)]
857mod test {
858 use super::*;
859
860 use oci_spec::image as oci_image;
861 use std::str::FromStr;
862
863 const FCOS_CONTENTMETA: &[u8] = include_bytes!("fixtures/fedora-coreos-contentmeta.json.gz");
864 const SHA256_EXAMPLE: &str =
865 "sha256:0000111122223333444455556666777788889999aaaabbbbccccddddeeeeffff";
866
867 #[test]
868 fn test_packing_basics() -> Result<()> {
869 for v in [4, 7].map(|v| NonZeroU32::new(v).unwrap()) {
871 assert_eq!(basic_packing(&[], v, None).unwrap().len(), 0);
872 }
873 Ok(())
874 }
875
876 #[test]
877 fn test_packing_fcos() -> Result<()> {
878 let contentmeta: Vec<ObjectSourceMetaSized> =
879 serde_json::from_reader(flate2::read::GzDecoder::new(FCOS_CONTENTMETA))?;
880 let total_size = contentmeta.iter().map(|v| v.size).sum::<u64>();
881
882 let packing =
883 basic_packing(&contentmeta, NonZeroU32::new(MAX_CHUNKS).unwrap(), None).unwrap();
884 assert!(!contentmeta.is_empty());
885 assert_eq!(packing.len() as u32, MAX_CHUNKS);
887 let packed_total_size = packing_size(&packing);
889 assert_eq!(total_size, packed_total_size);
890 Ok(())
891 }
892
893 #[test]
894 fn test_packing_one_layer() -> Result<()> {
895 let contentmeta: Vec<ObjectSourceMetaSized> =
896 serde_json::from_reader(flate2::read::GzDecoder::new(FCOS_CONTENTMETA))?;
897 let r = basic_packing(&contentmeta, NonZeroU32::new(1).unwrap(), None);
898 assert!(r.is_err());
899 Ok(())
900 }
901
902 fn create_manifest(prev_expected_structure: Vec<Vec<&str>>) -> oci_spec::image::ImageManifest {
903 use std::collections::HashMap;
904
905 let mut p = prev_expected_structure
906 .iter()
907 .map(|b| {
908 b.iter()
909 .map(|p| p.split('.').collect::<Vec<&str>>()[0].to_string())
910 .collect()
911 })
912 .collect();
913 let mut metadata_with_ostree_commit = vec![vec![String::from("ostree_commit")]];
914 metadata_with_ostree_commit.append(&mut p);
915
916 let config = oci_spec::image::DescriptorBuilder::default()
917 .media_type(oci_spec::image::MediaType::ImageConfig)
918 .size(7023_u64)
919 .digest(oci_image::Digest::from_str(SHA256_EXAMPLE).unwrap())
920 .build()
921 .expect("build config descriptor");
922
923 let layers: Vec<oci_spec::image::Descriptor> = metadata_with_ostree_commit
924 .iter()
925 .map(|l| {
926 let mut buf = [0; 8];
927 let sep = COMPONENT_SEPARATOR.encode_utf8(&mut buf);
928 oci_spec::image::DescriptorBuilder::default()
929 .media_type(oci_spec::image::MediaType::ImageLayerGzip)
930 .size(100_u64)
931 .digest(oci_image::Digest::from_str(SHA256_EXAMPLE).unwrap())
932 .annotations(HashMap::from([(
933 CONTENT_ANNOTATION.to_string(),
934 l.join(sep),
935 )]))
936 .build()
937 .expect("build layer")
938 })
939 .collect();
940
941 oci_spec::image::ImageManifestBuilder::default()
942 .schema_version(oci_spec::image::SCHEMA_VERSION)
943 .config(config)
944 .layers(layers)
945 .build()
946 .expect("build image manifest")
947 }
948
949 #[test]
950 fn test_advanced_packing() -> Result<()> {
951 let contentmeta_v0: Vec<ObjectSourceMetaSized> = vec![
953 vec![1, u32::MAX, 100000],
954 vec![2, u32::MAX, 99999],
955 vec![3, 30, 99998],
956 vec![4, 100, 99997],
957 vec![10, 51, 1000],
958 vec![8, 50, 500],
959 vec![9, 1, 200],
960 vec![11, 100000, 199],
961 vec![6, 30, 2],
962 vec![7, 30, 1],
963 ]
964 .iter()
965 .map(|data| ObjectSourceMetaSized {
966 meta: ObjectSourceMeta {
967 identifier: RcStr::from(format!("pkg{}.0", data[0])),
968 name: RcStr::from(format!("pkg{}", data[0])),
969 srcid: RcStr::from(format!("srcpkg{}", data[0])),
970 change_time_offset: 0,
971 change_frequency: data[1],
972 },
973 size: data[2] as u64,
974 })
975 .collect();
976
977 let packing = basic_packing(
978 &contentmeta_v0.as_slice(),
979 NonZeroU32::new(6).unwrap(),
980 None,
981 )
982 .unwrap();
983 let structure: Vec<Vec<&str>> = packing
984 .iter()
985 .map(|bin| bin.iter().map(|pkg| &*pkg.meta.identifier).collect())
986 .collect();
987 let v0_expected_structure = vec![
988 vec!["pkg3.0"],
989 vec!["pkg4.0"],
990 vec!["pkg6.0", "pkg7.0", "pkg11.0"],
991 vec!["pkg9.0", "pkg8.0", "pkg10.0"],
992 vec!["pkg1.0", "pkg2.0"],
993 vec![],
994 ];
995 assert_eq!(structure, v0_expected_structure);
996
997 let mut contentmeta_v1: Vec<ObjectSourceMetaSized> = contentmeta_v0;
1000 contentmeta_v1[0].meta.identifier = RcStr::from("pkg1.1");
1002 contentmeta_v1.remove(contentmeta_v1.len() - 1);
1004 contentmeta_v1.push(ObjectSourceMetaSized {
1006 meta: ObjectSourceMeta {
1007 identifier: RcStr::from("pkg5.0"),
1008 name: RcStr::from("pkg5"),
1009 srcid: RcStr::from("srcpkg5"),
1010 change_time_offset: 0,
1011 change_frequency: 42,
1012 },
1013 size: 100000,
1014 });
1015
1016 let image_manifest_v0 = create_manifest(v0_expected_structure);
1017 let packing_derived = basic_packing(
1018 &contentmeta_v1.as_slice(),
1019 NonZeroU32::new(6).unwrap(),
1020 Some(&image_manifest_v0),
1021 )
1022 .unwrap();
1023 let structure_derived: Vec<Vec<&str>> = packing_derived
1024 .iter()
1025 .map(|bin| bin.iter().map(|pkg| &*pkg.meta.identifier).collect())
1026 .collect();
1027 let v1_expected_structure = vec![
1028 vec!["pkg3.0"],
1029 vec!["pkg4.0"],
1030 vec!["pkg6.0", "pkg11.0"],
1031 vec!["pkg9.0", "pkg8.0", "pkg10.0"],
1032 vec!["pkg1.1", "pkg2.0"],
1033 vec!["pkg5.0"],
1034 ];
1035
1036 assert_eq!(structure_derived, v1_expected_structure);
1037
1038 let mut contentmeta_v2: Vec<ObjectSourceMetaSized> = contentmeta_v1;
1041 contentmeta_v2[9].meta.identifier = RcStr::from("pkg5.1");
1043 contentmeta_v2.push(ObjectSourceMetaSized {
1045 meta: ObjectSourceMeta {
1046 identifier: RcStr::from("pkg12.0"),
1047 name: RcStr::from("pkg12"),
1048 srcid: RcStr::from("srcpkg12"),
1049 change_time_offset: 0,
1050 change_frequency: 42,
1051 },
1052 size: 100000,
1053 });
1054
1055 let image_manifest_v1 = create_manifest(v1_expected_structure);
1056 let packing_derived = basic_packing(
1057 &contentmeta_v2.as_slice(),
1058 NonZeroU32::new(6).unwrap(),
1059 Some(&image_manifest_v1),
1060 )
1061 .unwrap();
1062 let structure_derived: Vec<Vec<&str>> = packing_derived
1063 .iter()
1064 .map(|bin| bin.iter().map(|pkg| &*pkg.meta.identifier).collect())
1065 .collect();
1066 let v2_expected_structure = vec![
1067 vec!["pkg3.0"],
1068 vec!["pkg4.0"],
1069 vec!["pkg6.0", "pkg11.0"],
1070 vec!["pkg9.0", "pkg8.0", "pkg10.0"],
1071 vec!["pkg1.1", "pkg2.0"],
1072 vec!["pkg5.1", "pkg12.0"],
1073 ];
1074
1075 assert_eq!(structure_derived, v2_expected_structure);
1076 Ok(())
1077 }
1078
1079 fn setup_exclusive_test(
1080 component_data: &[(u32, u32, u64)],
1081 max_layers: u32,
1082 num_fake_objects: Option<usize>,
1083 ) -> Result<(
1084 Vec<ObjectSourceMetaSized>,
1085 ObjectMetaSized,
1086 BTreeMap<ContentID, Vec<(Utf8PathBuf, String)>>,
1087 Chunking,
1088 )> {
1089 let contentmeta: Vec<ObjectSourceMetaSized> = component_data
1091 .iter()
1092 .map(|&(id, freq, size)| ObjectSourceMetaSized {
1093 meta: ObjectSourceMeta {
1094 identifier: RcStr::from(format!("pkg{id}.0")),
1095 name: RcStr::from(format!("pkg{id}")),
1096 srcid: RcStr::from(format!("srcpkg{id}")),
1097 change_time_offset: 0,
1098 change_frequency: freq,
1099 },
1100 size,
1101 })
1102 .collect();
1103
1104 let mut object_map = IndexMap::new();
1106
1107 for (i, component) in contentmeta.iter().enumerate() {
1108 let checksum = format!("checksum_{i}");
1109 object_map.insert(checksum, component.meta.identifier.clone());
1110 }
1111
1112 let regular_meta = ObjectMetaSized {
1113 map: object_map,
1114 sizes: contentmeta.clone(),
1115 };
1116
1117 let specific_contentmeta = BTreeMap::new();
1119
1120 let mut chunking = Chunking::default();
1122 chunking.max = max_layers;
1123 chunking.remainder = Chunk::new("remainder");
1124
1125 if let Some(num_objects) = num_fake_objects {
1127 for i in 0..num_objects {
1128 let checksum = format!("checksum_{i}");
1129 chunking.remainder.content.insert(
1130 RcStr::from(checksum),
1131 (
1132 1000,
1133 vec![Utf8PathBuf::from(format!("/path/to/checksum_{i}"))],
1134 ),
1135 );
1136 chunking.remainder.size += 1000;
1137 }
1138 }
1139
1140 Ok((contentmeta, regular_meta, specific_contentmeta, chunking))
1141 }
1142
1143 #[test]
1144 fn test_exclusive_chunks() -> Result<()> {
1145 let component_data = [
1147 (1, 100, 50000),
1148 (2, 200, 40000),
1149 (3, 300, 30000),
1150 (4, 400, 20000),
1151 (5, 500, 10000),
1152 ];
1153
1154 let (contentmeta, regular_meta, mut specific_contentmeta, mut chunking) =
1155 setup_exclusive_test(&component_data, 8, Some(5))?;
1156
1157 specific_contentmeta.insert(
1159 contentmeta[0].meta.identifier.clone(),
1160 vec![(
1161 Utf8PathBuf::from("/path/to/checksum_0"),
1162 "checksum_0".to_string(),
1163 )],
1164 );
1165 specific_contentmeta.insert(
1166 contentmeta[1].meta.identifier.clone(),
1167 vec![(
1168 Utf8PathBuf::from("/path/to/checksum_1"),
1169 "checksum_1".to_string(),
1170 )],
1171 );
1172
1173 chunking.process_mapping(
1174 ®ular_meta,
1175 &Some(NonZeroU32::new(8).unwrap()),
1176 None,
1177 Some(&specific_contentmeta),
1178 )?;
1179
1180 assert!(chunking.chunks.len() >= 2);
1182 assert_eq!(chunking.chunks[0].name, "pkg1.0");
1183 assert_eq!(chunking.chunks[1].name, "pkg2.0");
1184 assert_eq!(chunking.chunks[0].packages, vec!["pkg1.0".to_string()]);
1185 assert_eq!(chunking.chunks[1].packages, vec!["pkg2.0".to_string()]);
1186
1187 Ok(())
1188 }
1189
1190 #[test]
1191 fn test_exclusive_chunks_with_regular_packing() -> Result<()> {
1192 let component_data = [
1194 (1, 100, 50000), (2, 200, 40000), (3, 300, 30000), (4, 400, 20000), (5, 500, 10000), (6, 600, 5000), ];
1201
1202 let (contentmeta, regular_meta, mut specific_contentmeta, mut chunking) =
1203 setup_exclusive_test(&component_data, 8, Some(6))?;
1204
1205 specific_contentmeta.insert(
1207 contentmeta[0].meta.identifier.clone(),
1208 vec![(
1209 Utf8PathBuf::from("/path/to/checksum_0"),
1210 "checksum_0".to_string(),
1211 )],
1212 );
1213 specific_contentmeta.insert(
1214 contentmeta[1].meta.identifier.clone(),
1215 vec![(
1216 Utf8PathBuf::from("/path/to/checksum_1"),
1217 "checksum_1".to_string(),
1218 )],
1219 );
1220
1221 chunking.process_mapping(
1222 ®ular_meta,
1223 &Some(NonZeroU32::new(8).unwrap()),
1224 None,
1225 Some(&specific_contentmeta),
1226 )?;
1227
1228 assert!(chunking.chunks.len() >= 2);
1230 assert_eq!(chunking.chunks[0].name, "pkg1.0");
1231 assert_eq!(chunking.chunks[1].name, "pkg2.0");
1232 assert_eq!(chunking.chunks[0].packages, vec!["pkg1.0".to_string()]);
1233 assert_eq!(chunking.chunks[1].packages, vec!["pkg2.0".to_string()]);
1234
1235 for chunk in &chunking.chunks[2..] {
1237 assert!(!chunk.packages.contains(&"pkg1.0".to_string()));
1238 assert!(!chunk.packages.contains(&"pkg2.0".to_string()));
1239 }
1240
1241 Ok(())
1242 }
1243
1244 #[test]
1245 fn test_exclusive_chunks_isolation() -> Result<()> {
1246 let component_data = [(1, 100, 50000), (2, 200, 40000), (3, 300, 30000)];
1248
1249 let (contentmeta, regular_meta, mut specific_contentmeta, mut chunking) =
1250 setup_exclusive_test(&component_data, 8, Some(3))?;
1251
1252 specific_contentmeta.insert(
1254 contentmeta[0].meta.identifier.clone(),
1255 vec![(
1256 Utf8PathBuf::from("/path/to/checksum_0"),
1257 "checksum_0".to_string(),
1258 )],
1259 );
1260
1261 chunking.process_mapping(
1262 ®ular_meta,
1263 &Some(NonZeroU32::new(8).unwrap()),
1264 None,
1265 Some(&specific_contentmeta),
1266 )?;
1267
1268 assert!(!chunking.chunks.is_empty());
1270 assert_eq!(chunking.chunks[0].name, "pkg1.0");
1271 assert_eq!(chunking.chunks[0].packages, vec!["pkg1.0".to_string()]);
1272
1273 let mut found_pkg2 = false;
1275 let mut found_pkg3 = false;
1276 for chunk in &chunking.chunks[1..] {
1277 if chunk.packages.contains(&"pkg2".to_string()) {
1278 found_pkg2 = true;
1279 assert!(!chunk.packages.contains(&"pkg1.0".to_string()));
1280 }
1281 if chunk.packages.contains(&"pkg3".to_string()) {
1282 found_pkg3 = true;
1283 assert!(!chunk.packages.contains(&"pkg1.0".to_string()));
1284 }
1285 }
1286 assert!(found_pkg2 && found_pkg3);
1287
1288 Ok(())
1289 }
1290
1291 #[test]
1292 fn test_process_mapping_specific_components_contain_correct_objects() -> Result<()> {
1293 let packages = [
1300 (1, 100, 50000), (2, 200, 40000), (3, 300, 30000), (4, 400, 20000), (5, 500, 10000), ];
1306
1307 let (contentmeta, mut system_metadata, mut specific_contentmeta, mut chunking) =
1308 setup_exclusive_test(&packages, 8, None)?;
1309
1310 let mut system_objects_map = IndexMap::new();
1313
1314 let pkg1_objects = ["checksum_1_a", "checksum_1_b", "checksum_1_c"];
1316
1317 let pkg2_objects = ["checksum_2_a", "checksum_2_b"];
1319
1320 let pkg3_objects = ["checksum_3_a", "checksum_3_b"];
1322 for obj in &pkg3_objects {
1323 system_objects_map.insert(obj.to_string(), contentmeta[2].meta.identifier.clone());
1324 }
1325
1326 let pkg4_objects = ["checksum_4_a"];
1328 for obj in &pkg4_objects {
1329 system_objects_map.insert(obj.to_string(), contentmeta[3].meta.identifier.clone());
1330 }
1331
1332 let pkg5_objects = ["checksum_5_a", "checksum_5_b", "checksum_5_c"];
1334 for obj in &pkg5_objects {
1335 system_objects_map.insert(obj.to_string(), contentmeta[4].meta.identifier.clone());
1336 }
1337
1338 system_metadata.map = system_objects_map;
1340
1341 specific_contentmeta.insert(
1343 contentmeta[0].meta.identifier.clone(),
1344 pkg1_objects
1345 .iter()
1346 .map(|obj| {
1347 (
1348 Utf8PathBuf::from(format!("/path/to/{obj}")),
1349 obj.to_string(),
1350 )
1351 })
1352 .collect(),
1353 );
1354 specific_contentmeta.insert(
1355 contentmeta[1].meta.identifier.clone(),
1356 pkg2_objects
1357 .iter()
1358 .map(|obj| {
1359 (
1360 Utf8PathBuf::from(format!("/path/to/{obj}")),
1361 obj.to_string(),
1362 )
1363 })
1364 .collect(),
1365 );
1366
1367 for (checksum, _) in &system_metadata.map {
1371 chunking.remainder.content.insert(
1372 RcStr::from(checksum.as_str()),
1373 (
1374 1000,
1375 vec![Utf8PathBuf::from(format!("/path/to/{checksum}"))],
1376 ),
1377 );
1378 chunking.remainder.size += 1000;
1379 }
1380 for paths in specific_contentmeta.values() {
1381 for (p, checksum) in paths {
1382 chunking
1383 .remainder
1384 .content
1385 .insert(RcStr::from(checksum.as_str()), (1000, vec![p.clone()]));
1386 }
1387 }
1388
1389 chunking.process_mapping(
1393 &system_metadata,
1394 &Some(NonZeroU32::new(8).unwrap()),
1395 None,
1396 Some(&specific_contentmeta),
1397 )?;
1398
1399 assert!(
1401 chunking.chunks.len() >= 2,
1402 "Should have at least 2 chunks for specific components"
1403 );
1404
1405 let specific_component_1_layer = &chunking.chunks[0];
1407 assert_eq!(specific_component_1_layer.name, "pkg1.0");
1408 assert_eq!(specific_component_1_layer.packages, vec!["pkg1.0"]);
1409 assert_eq!(specific_component_1_layer.content.len(), 3);
1410 for obj in &pkg1_objects {
1411 assert!(
1412 specific_component_1_layer.content.contains_key(*obj),
1413 "Specific component 1 layer should contain {obj}"
1414 );
1415 }
1416
1417 let specific_component_2_layer = &chunking.chunks[1];
1419 assert_eq!(specific_component_2_layer.name, "pkg2.0");
1420 assert_eq!(specific_component_2_layer.packages, vec!["pkg2.0"]);
1421 assert_eq!(specific_component_2_layer.content.len(), 2);
1422 for obj in &pkg2_objects {
1423 assert!(
1424 specific_component_2_layer.content.contains_key(*obj),
1425 "Specific component 2 layer should contain {obj}"
1426 );
1427 }
1428
1429 for specific_layer in &chunking.chunks[0..2] {
1431 for obj in pkg3_objects
1432 .iter()
1433 .chain(&pkg4_objects)
1434 .chain(&pkg5_objects)
1435 {
1436 assert!(
1437 !specific_layer.content.contains_key(*obj),
1438 "Specific component layer '{}' should NOT contain regular package object {}",
1439 specific_layer.name,
1440 obj
1441 );
1442 }
1443 }
1444
1445 let regular_package_layers = &chunking.chunks[2..];
1447 for regular_layer in regular_package_layers {
1448 for obj in pkg1_objects.iter().chain(&pkg2_objects) {
1449 assert!(
1450 !regular_layer.content.contains_key(*obj),
1451 "Regular package layer should NOT contain specific component object {obj}"
1452 );
1453 }
1454 }
1455
1456 let mut found_regular_objects = BTreeSet::new();
1458 for regular_layer in regular_package_layers {
1459 for obj in regular_layer.content.keys() {
1460 found_regular_objects.insert(obj.as_ref());
1461 }
1462 }
1463
1464 for obj in pkg3_objects
1465 .iter()
1466 .chain(&pkg4_objects)
1467 .chain(&pkg5_objects)
1468 {
1469 assert!(
1470 found_regular_objects.contains(*obj),
1471 "Regular package object {obj} should be in some regular layer"
1472 );
1473 }
1474
1475 assert_eq!(
1477 chunking.remainder.content.len(),
1478 0,
1479 "All objects should be moved from remainder"
1480 );
1481 assert_eq!(chunking.remainder.size, 0, "Remainder size should be 0");
1482
1483 Ok(())
1484 }
1485}