1use std::{
8 cell::RefCell,
9 collections::{BTreeMap, HashMap},
10 mem::size_of,
11 os::unix::ffi::OsStrExt,
12 rc::Rc,
13};
14
15use log::trace;
16use xxhash_rust::xxh32::xxh32;
17use zerocopy::{Immutable, IntoBytes};
18
19use crate::{
20 erofs::{composefs::OverlayMetacopy, format, reader::round_up},
21 fsverity::FsVerityHashValue,
22 tree,
23};
24
25#[derive(Clone, Copy, Debug)]
26enum Offset {
27 Header,
28 Superblock,
29 Inode,
30 XAttr,
31 Block,
32 End,
33}
34
35trait Output {
36 fn note_offset(&mut self, offset_type: Offset);
37 fn get(&self, offset_type: Offset, idx: usize) -> usize;
38 fn write(&mut self, data: &[u8]);
39 fn pad(&mut self, alignment: usize);
40 fn len(&self) -> usize;
41
42 fn get_div(&self, offset_type: Offset, idx: usize, div: usize) -> usize {
43 let offset = self.get(offset_type, idx);
44 assert_eq!(offset % div, 0);
45 offset / div
46 }
47
48 fn get_nid(&self, idx: usize) -> u64 {
49 self.get_div(Offset::Inode, idx, 32) as u64
50 }
51
52 fn get_xattr(&self, idx: usize) -> u32 {
53 self.get_div(Offset::XAttr, idx, 4).try_into().unwrap()
54 }
55
56 fn write_struct(&mut self, st: impl IntoBytes + Immutable) {
57 self.write(st.as_bytes());
58 }
59}
60
61#[derive(PartialOrd, PartialEq, Eq, Ord, Clone)]
62struct XAttr {
63 prefix: u8,
64 suffix: Box<[u8]>,
65 value: Box<[u8]>,
66}
67
68#[derive(Clone, Default)]
69struct InodeXAttrs {
70 shared: Vec<usize>,
71 local: Vec<XAttr>,
72 filter: u32,
73}
74
75#[derive(Debug)]
76struct DirEnt<'a> {
77 name: &'a [u8],
78 inode: usize,
79 file_type: format::FileType,
80}
81
82#[derive(Debug, Default)]
83struct Directory<'a> {
84 blocks: Box<[Box<[DirEnt<'a>]>]>,
85 inline: Box<[DirEnt<'a>]>,
86 size: u64,
87 nlink: usize,
88}
89
90#[derive(Debug)]
91struct Leaf<'a, ObjectID: FsVerityHashValue> {
92 content: &'a tree::LeafContent<ObjectID>,
93 nlink: usize,
94}
95
96#[derive(Debug)]
97enum InodeContent<'a, ObjectID: FsVerityHashValue> {
98 Directory(Directory<'a>),
99 Leaf(Leaf<'a, ObjectID>),
100}
101
102struct Inode<'a, ObjectID: FsVerityHashValue> {
103 stat: &'a tree::Stat,
104 xattrs: InodeXAttrs,
105 content: InodeContent<'a, ObjectID>,
106}
107
108impl XAttr {
109 pub fn write(&self, output: &mut impl Output) {
110 output.write_struct(format::XAttrHeader {
111 name_len: self.suffix.len() as u8,
112 name_index: self.prefix,
113 value_size: (self.value.len() as u16).into(),
114 });
115 output.write(&self.suffix);
116 output.write(&self.value);
117 output.pad(4);
118 }
119}
120
121impl InodeXAttrs {
122 fn add(&mut self, name: &[u8], value: &[u8]) {
123 for (idx, prefix) in format::XATTR_PREFIXES.iter().enumerate().rev() {
124 if let Some(suffix) = name.strip_prefix(*prefix) {
125 self.filter |= 1 << (xxh32(suffix, format::XATTR_FILTER_SEED + idx as u32) % 32);
126 self.local.push(XAttr {
127 prefix: idx as u8,
128 suffix: Box::from(suffix),
129 value: Box::from(value),
130 });
131 return;
132 }
133 }
134 unreachable!("{:?}", std::str::from_utf8(name)); }
136
137 fn write(&self, output: &mut impl Output) {
138 if self.filter != 0 {
139 trace!(" write xattrs block");
140 output.write_struct(format::InodeXAttrHeader {
141 name_filter: (!self.filter).into(),
142 shared_count: self.shared.len() as u8,
143 ..Default::default()
144 });
145 for idx in &self.shared {
146 trace!(" shared {} @{}", idx, output.len());
147 output.write(&output.get_xattr(*idx).to_le_bytes());
148 }
149 for attr in &self.local {
150 trace!(" local @{}", output.len());
151 attr.write(output);
152 }
153 }
154 }
156}
157
158impl<'a> Directory<'a> {
159 pub fn from_entries(entries: Vec<DirEnt<'a>>) -> Self {
160 let mut blocks = vec![];
161 let mut rest = vec![];
162
163 let mut n_bytes = 0u64;
164 let mut nlink = 0;
165
166 trace!("Directory with {} items", entries.len());
167
168 for entry in entries.into_iter() {
171 let entry_size: u64 = (size_of::<format::DirectoryEntryHeader>() + entry.name.len())
172 .try_into()
173 .unwrap();
174 assert!(entry_size <= 4096);
175
176 trace!(" {:?}", entry.file_type);
177
178 if matches!(entry.file_type, format::FileType::Directory) {
179 nlink += 1;
180 }
181
182 n_bytes += entry_size;
183 if n_bytes <= 4096 {
184 rest.push(entry);
185 } else {
186 trace!(" block {}", rest.len());
188 blocks.push(rest.into_boxed_slice());
189
190 rest = vec![entry];
192 n_bytes = entry_size;
193 }
194 }
195
196 if n_bytes > 2048 {
198 blocks.push(rest.into_boxed_slice());
199 rest = vec![];
200 n_bytes = 0;
201 }
202
203 trace!(
204 " blocks {} inline {} inline_size {n_bytes}",
205 blocks.len(),
206 rest.len()
207 );
208
209 let block_size: u64 = format::BLOCK_SIZE.into();
210 let size = block_size * blocks.len() as u64 + n_bytes;
211 Self {
212 blocks: blocks.into_boxed_slice(),
213 inline: rest.into_boxed_slice(),
214 size,
215 nlink,
216 }
217 }
218
219 fn write_block(&self, output: &mut impl Output, block: &[DirEnt]) {
220 trace!(" write dir block {} @{}", block.len(), output.len());
221 let mut nameofs = size_of::<format::DirectoryEntryHeader>() * block.len();
222
223 for entry in block {
224 trace!(
225 " entry {:?} name {} @{}",
226 entry.file_type,
227 nameofs,
228 output.len()
229 );
230 output.write_struct(format::DirectoryEntryHeader {
231 name_offset: (nameofs as u16).into(),
232 inode_offset: output.get_nid(entry.inode).into(),
233 file_type: entry.file_type.into(),
234 ..Default::default()
235 });
236 nameofs += entry.name.len();
237 }
238
239 for entry in block {
240 trace!(" name @{}", output.len());
241 output.write(entry.name.as_bytes());
242 }
243 }
244
245 fn write_inline(&self, output: &mut impl Output) {
246 trace!(
247 " write inline len {} expected size {} of {}",
248 self.inline.len(),
249 self.size % 4096,
250 self.size
251 );
252 self.write_block(output, &self.inline);
253 }
254
255 fn write_blocks(&self, output: &mut impl Output) {
256 let block_size: usize = format::BLOCK_SIZE.into();
257 for block in &self.blocks {
258 assert_eq!(output.len() % block_size, 0);
259 self.write_block(output, block);
260 output.pad(block_size);
261 }
262 }
263
264 fn inode_meta(&self, block_offset: usize) -> (format::DataLayout, u32, u64, usize) {
265 let (layout, u) = if self.inline.is_empty() {
266 (format::DataLayout::FlatPlain, block_offset as u32 / 4096)
267 } else if !self.blocks.is_empty() {
268 (format::DataLayout::FlatInline, block_offset as u32 / 4096)
269 } else {
270 (format::DataLayout::FlatInline, 0)
271 };
272 (layout, u, self.size, self.nlink)
273 }
274}
275
276impl<ObjectID: FsVerityHashValue> Leaf<'_, ObjectID> {
277 fn inode_meta(&self) -> (format::DataLayout, u32, u64, usize) {
278 let (layout, u, size) = match &self.content {
279 tree::LeafContent::Regular(tree::RegularFile::Inline(data)) => {
280 if data.is_empty() {
281 (format::DataLayout::FlatPlain, 0, data.len() as u64)
282 } else {
283 (format::DataLayout::FlatInline, 0, data.len() as u64)
284 }
285 }
286 tree::LeafContent::Regular(tree::RegularFile::External(.., size)) => {
287 (format::DataLayout::ChunkBased, 31, *size)
288 }
289 tree::LeafContent::CharacterDevice(rdev) | tree::LeafContent::BlockDevice(rdev) => {
290 (format::DataLayout::FlatPlain, *rdev as u32, 0)
291 }
292 tree::LeafContent::Fifo | tree::LeafContent::Socket => {
293 (format::DataLayout::FlatPlain, 0, 0)
294 }
295 tree::LeafContent::Symlink(target) => {
296 (format::DataLayout::FlatInline, 0, target.len() as u64)
297 }
298 };
299 (layout, u, size, self.nlink)
300 }
301
302 fn write_inline(&self, output: &mut impl Output) {
303 output.write(match self.content {
304 tree::LeafContent::Regular(tree::RegularFile::Inline(data)) => data,
305 tree::LeafContent::Regular(tree::RegularFile::External(..)) => b"\xff\xff\xff\xff", tree::LeafContent::Symlink(target) => target.as_bytes(),
307 _ => &[],
308 });
309 }
310}
311
312impl<ObjectID: FsVerityHashValue> Inode<'_, ObjectID> {
313 fn file_type(&self) -> format::FileType {
314 match &self.content {
315 InodeContent::Directory(..) => format::FileType::Directory,
316 InodeContent::Leaf(leaf) => match &leaf.content {
317 tree::LeafContent::Regular(..) => format::FileType::RegularFile,
318 tree::LeafContent::CharacterDevice(..) => format::FileType::CharacterDevice,
319 tree::LeafContent::BlockDevice(..) => format::FileType::BlockDevice,
320 tree::LeafContent::Fifo => format::FileType::Fifo,
321 tree::LeafContent::Socket => format::FileType::Socket,
322 tree::LeafContent::Symlink(..) => format::FileType::Symlink,
323 },
324 }
325 }
326
327 fn write_inode(&self, output: &mut impl Output, idx: usize) {
328 let (layout, u, size, nlink) = match &self.content {
329 InodeContent::Directory(dir) => dir.inode_meta(output.get(Offset::Block, idx)),
330 InodeContent::Leaf(leaf) => leaf.inode_meta(),
331 };
332
333 let xattr_size = {
334 let mut xattr = FirstPass::default();
335 self.xattrs.write(&mut xattr);
336 xattr.offset
337 };
338
339 output.pad(32);
341 if matches!(layout, format::DataLayout::FlatInline) {
342 let block_size = u64::from(format::BLOCK_SIZE);
343 let inode_and_xattr_size: u64 = (size_of::<format::ExtendedInodeHeader>() + xattr_size)
344 .try_into()
345 .unwrap();
346 let inline_start: u64 = output.len().try_into().unwrap();
347 let inline_start = inline_start + inode_and_xattr_size;
348 let end_of_metadata = inline_start - 1;
349 let inline_end = inline_start + (size % block_size);
350 if end_of_metadata / block_size != inline_end / block_size {
351 let pad_size = (block_size - end_of_metadata % block_size) as usize;
356 let pad = vec![0; pad_size];
357 trace!("added pad {}", pad.len());
358 output.write(&pad);
359 output.pad(32);
360 }
361 }
362
363 let format = format::InodeLayout::Extended | layout;
364
365 trace!(
366 "write inode {idx} nid {} {:?} {:?} xattrsize{xattr_size} icount{} inline{} @{}",
367 output.len() / 32,
368 format,
369 self.file_type(),
370 match xattr_size {
371 0 => 0,
372 n => (1 + (n - 12) / 4) as u16,
373 },
374 size % 4096,
375 output.len()
376 );
377
378 output.note_offset(Offset::Inode);
379 output.write_struct(format::ExtendedInodeHeader {
380 format,
381 xattr_icount: match xattr_size {
382 0 => 0,
383 n => (1 + (n - 12) / 4) as u16,
384 }
385 .into(),
386 mode: self.file_type() | self.stat.st_mode,
387 size: size.into(),
388 u: u.into(),
389 ino: ((output.len() / 32) as u32).into(),
390 uid: self.stat.st_uid.into(),
391 gid: self.stat.st_gid.into(),
392 mtime: (self.stat.st_mtim_sec as u64).into(),
393 nlink: (nlink as u32).into(),
394 ..Default::default()
395 });
396
397 self.xattrs.write(output);
398
399 match &self.content {
400 InodeContent::Directory(dir) => dir.write_inline(output),
401 InodeContent::Leaf(leaf) => leaf.write_inline(output),
402 };
403
404 output.pad(32);
405 }
406
407 fn write_blocks(&self, output: &mut impl Output) {
408 if let InodeContent::Directory(dir) = &self.content {
409 dir.write_blocks(output);
410 }
411 }
412}
413
414struct InodeCollector<'a, ObjectID: FsVerityHashValue> {
415 inodes: Vec<Inode<'a, ObjectID>>,
416 hardlinks: HashMap<*const tree::Leaf<ObjectID>, usize>,
417}
418
419impl<'a, ObjectID: FsVerityHashValue> InodeCollector<'a, ObjectID> {
420 fn push_inode(&mut self, stat: &'a tree::Stat, content: InodeContent<'a, ObjectID>) -> usize {
421 let mut xattrs = InodeXAttrs::default();
422
423 if let InodeContent::Leaf(Leaf {
425 content: tree::LeafContent::Regular(tree::RegularFile::External(id, ..)),
426 ..
427 }) = content
428 {
429 xattrs.add(
430 b"trusted.overlay.metacopy",
431 OverlayMetacopy::new(id).as_bytes(),
432 );
433
434 let redirect = format!("/{}", id.to_object_pathname());
435 xattrs.add(b"trusted.overlay.redirect", redirect.as_bytes());
436 }
437
438 for (name, value) in RefCell::borrow(&stat.xattrs).iter() {
440 let name = name.as_bytes();
441
442 if let Some(escapee) = name.strip_prefix(b"trusted.overlay.") {
443 let escaped = [b"trusted.overlay.overlay.", escapee].concat();
444 xattrs.add(&escaped, value);
445 } else {
446 xattrs.add(name, value);
447 }
448 }
449
450 let inode = self.inodes.len();
453 self.inodes.push(Inode {
454 stat,
455 xattrs,
456 content,
457 });
458 inode
459 }
460
461 fn collect_leaf(&mut self, leaf: &'a Rc<tree::Leaf<ObjectID>>) -> usize {
462 let nlink = Rc::strong_count(leaf);
463
464 if nlink > 1 {
465 if let Some(inode) = self.hardlinks.get(&Rc::as_ptr(leaf)) {
466 return *inode;
467 }
468 }
469
470 let inode = self.push_inode(
471 &leaf.stat,
472 InodeContent::Leaf(Leaf {
473 content: &leaf.content,
474 nlink,
475 }),
476 );
477
478 if nlink > 1 {
479 self.hardlinks.insert(Rc::as_ptr(leaf), inode);
480 }
481
482 inode
483 }
484
485 fn insert_sorted(
486 entries: &mut Vec<DirEnt<'a>>,
487 name: &'a [u8],
488 inode: usize,
489 file_type: format::FileType,
490 ) {
491 let entry = DirEnt {
492 name,
493 inode,
494 file_type,
495 };
496 let point = entries.partition_point(|e| e.name < entry.name);
497 entries.insert(point, entry);
498 }
499
500 fn collect_dir(&mut self, dir: &'a tree::Directory<ObjectID>, parent: usize) -> usize {
501 let me = self.push_inode(&dir.stat, InodeContent::Directory(Directory::default()));
504
505 let mut entries = vec![];
506
507 for (name, inode) in dir.sorted_entries() {
508 let child = match inode {
509 tree::Inode::Directory(dir) => self.collect_dir(dir, me),
510 tree::Inode::Leaf(leaf) => self.collect_leaf(leaf),
511 };
512 entries.push(DirEnt {
513 name: name.as_bytes(),
514 inode: child,
515 file_type: self.inodes[child].file_type(),
516 });
517 }
518
519 Self::insert_sorted(&mut entries, b".", me, format::FileType::Directory);
521 Self::insert_sorted(&mut entries, b"..", parent, format::FileType::Directory);
522
523 self.inodes[me].content = InodeContent::Directory(Directory::from_entries(entries));
525 me
526 }
527
528 pub fn collect(fs: &'a tree::FileSystem<ObjectID>) -> Vec<Inode<'a, ObjectID>> {
529 let mut this = Self {
530 inodes: vec![],
531 hardlinks: HashMap::new(),
532 };
533
534 let root_inode = this.collect_dir(&fs.root, 0);
536 assert_eq!(root_inode, 0);
537
538 this.inodes
539 }
540}
541
542fn share_xattrs(inodes: &mut [Inode<impl FsVerityHashValue>]) -> Vec<XAttr> {
545 let mut xattrs: BTreeMap<XAttr, usize> = BTreeMap::new();
546
547 for inode in inodes.iter() {
549 for attr in &inode.xattrs.local {
550 if let Some(count) = xattrs.get_mut(attr) {
551 *count += 1;
552 } else {
553 xattrs.insert(attr.clone(), 1);
554 }
555 }
556 }
557
558 xattrs.retain(|_k, v| *v > 1);
560
561 for (idx, value) in xattrs.values_mut().enumerate() {
563 *value = idx;
564 }
565
566 for inode in inodes.iter_mut() {
568 inode.xattrs.local.retain(|attr| {
569 if let Some(idx) = xattrs.get(attr) {
570 inode.xattrs.shared.push(*idx);
571 false } else {
573 true }
575 });
576 }
577
578 xattrs.into_keys().collect()
580}
581
582fn write_erofs(
583 output: &mut impl Output,
584 inodes: &[Inode<impl FsVerityHashValue>],
585 xattrs: &[XAttr],
586) {
587 output.note_offset(Offset::Header);
589 output.write_struct(format::ComposefsHeader {
590 magic: format::COMPOSEFS_MAGIC,
591 version: format::VERSION,
592 flags: 0.into(),
593 composefs_version: format::COMPOSEFS_VERSION,
594 ..Default::default()
595 });
596 output.pad(1024);
597
598 output.note_offset(Offset::Superblock);
600 output.write_struct(format::Superblock {
601 magic: format::MAGIC_V1,
602 blkszbits: format::BLOCK_BITS,
603 feature_compat: format::FEATURE_COMPAT_MTIME | format::FEATURE_COMPAT_XATTR_FILTER,
604 root_nid: (output.get_nid(0) as u16).into(),
605 inos: (inodes.len() as u64).into(),
606 blocks: ((output.get(Offset::End, 0) / usize::from(format::BLOCK_SIZE)) as u32).into(),
607 ..Default::default()
608 });
609
610 for (idx, inode) in inodes.iter().enumerate() {
612 inode.write_inode(output, idx);
614 }
615
616 for xattr in xattrs {
618 output.note_offset(Offset::XAttr);
619 xattr.write(output);
620 }
621
622 output.pad(4096);
624 for inode in inodes.iter() {
625 output.note_offset(Offset::Block);
626 inode.write_blocks(output);
627 }
628
629 output.note_offset(Offset::End);
631}
632
633#[derive(Default)]
634struct Layout {
635 offset_types: Vec<usize>,
636 offsets: Vec<usize>,
637}
638
639#[derive(Default)]
640struct FirstPass {
641 offset: usize,
642 layout: Layout,
643}
644
645struct SecondPass {
646 output: Vec<u8>,
647 layout: Layout,
648}
649
650impl Output for SecondPass {
651 fn note_offset(&mut self, _offset_type: Offset) {
652 }
654
655 fn get(&self, offset_type: Offset, idx: usize) -> usize {
656 let start = self.layout.offset_types[offset_type as usize];
657 self.layout.offsets[start + idx]
658 }
659
660 fn write(&mut self, data: &[u8]) {
661 self.output.extend_from_slice(data);
662 }
663
664 fn pad(&mut self, alignment: usize) {
665 self.output
666 .resize(round_up(self.output.len(), alignment), 0);
667 }
668
669 fn len(&self) -> usize {
670 self.output.len()
671 }
672}
673
674impl Output for FirstPass {
675 fn note_offset(&mut self, offset_type: Offset) {
676 while self.layout.offset_types.len() <= offset_type as usize {
677 self.layout.offset_types.push(self.layout.offsets.len());
678 }
679 assert_eq!(self.layout.offset_types.len(), offset_type as usize + 1);
680
681 trace!(
682 "{:?} #{} @{}",
683 offset_type,
684 self.layout.offsets.len() - self.layout.offset_types[offset_type as usize],
685 self.offset
686 );
687 self.layout.offsets.push(self.offset);
688 }
689
690 fn get(&self, _: Offset, _: usize) -> usize {
691 0 }
693
694 fn write(&mut self, data: &[u8]) {
695 self.offset += data.len();
696 }
697
698 fn pad(&mut self, alignment: usize) {
699 self.offset = round_up(self.offset, alignment);
700 }
701
702 fn len(&self) -> usize {
703 self.offset
704 }
705}
706
707pub fn mkfs_erofs<ObjectID: FsVerityHashValue>(fs: &tree::FileSystem<ObjectID>) -> Box<[u8]> {
715 let mut inodes = InodeCollector::collect(fs);
717 let xattrs = share_xattrs(&mut inodes);
718
719 let mut first_pass = FirstPass::default();
721 write_erofs(&mut first_pass, &inodes, &xattrs);
722
723 let mut second_pass = SecondPass {
725 output: vec![],
726 layout: first_pass.layout,
727 };
728 write_erofs(&mut second_pass, &inodes, &xattrs);
729
730 second_pass.output.into_boxed_slice()
732}