bootc_initramfs_setup/
lib.rs

1//! Mount helpers for bootc-initramfs
2
3use std::{
4    ffi::OsString,
5    fmt::Debug,
6    io::ErrorKind,
7    os::fd::{AsFd, AsRawFd, OwnedFd},
8    path::{Path, PathBuf},
9};
10
11use anyhow::{Context, Result};
12use clap::Parser;
13use rustix::{
14    fs::{CWD, Mode, OFlags, major, minor, mkdirat, openat, stat, symlink},
15    io::Errno,
16    mount::{
17        FsMountFlags, MountAttrFlags, OpenTreeFlags, UnmountFlags, fsconfig_create,
18        fsconfig_set_string, fsmount, open_tree, unmount,
19    },
20    path,
21};
22use serde::Deserialize;
23
24use composefs::{
25    fsverity::{FsVerityHashValue, Sha512HashValue},
26    mount::FsHandle,
27    mountcompat::{overlayfs_set_fd, overlayfs_set_lower_and_data_fds, prepare_mount},
28    repository::Repository,
29};
30use composefs_boot::cmdline::get_cmdline_composefs;
31
32use fn_error_context::context;
33
34use bootc_kernel_cmdline::utf8::Cmdline;
35
36// mount_setattr syscall support
37const MOUNT_ATTR_RDONLY: u64 = 0x00000001;
38
39#[repr(C)]
40struct MountAttr {
41    attr_set: u64,
42    attr_clr: u64,
43    propagation: u64,
44    userns_fd: u64,
45}
46
47/// Set mount attributes using mount_setattr syscall
48#[context("Setting mount attributes")]
49#[allow(unsafe_code)]
50fn mount_setattr(fd: impl AsFd, flags: libc::c_int, attr: &MountAttr) -> Result<()> {
51    let ret = unsafe {
52        libc::syscall(
53            libc::SYS_mount_setattr,
54            fd.as_fd().as_raw_fd(),
55            c"".as_ptr(),
56            flags,
57            attr as *const MountAttr,
58            std::mem::size_of::<MountAttr>(),
59        )
60    };
61    if ret == -1 {
62        Err(std::io::Error::last_os_error())?;
63    }
64    Ok(())
65}
66
67/// Set mount to readonly
68#[context("Setting mount readonly")]
69fn set_mount_readonly(fd: impl AsFd) -> Result<()> {
70    let attr = MountAttr {
71        attr_set: MOUNT_ATTR_RDONLY,
72        attr_clr: 0,
73        propagation: 0,
74        userns_fd: 0,
75    };
76    mount_setattr(fd, libc::AT_EMPTY_PATH, &attr)
77}
78
79/// Types of mounts supported by the configuration
80#[derive(Clone, Copy, Debug, Deserialize)]
81#[serde(rename_all = "lowercase")]
82pub enum MountType {
83    /// No mount
84    None,
85    /// Bind mount
86    Bind,
87    /// Overlay mount
88    Overlay,
89    /// Transient mount
90    Transient,
91}
92
93#[derive(Debug, Default, Deserialize)]
94struct RootConfig {
95    #[serde(default)]
96    transient: bool,
97}
98
99/// Configuration for mount operations
100#[derive(Debug, Default, Deserialize)]
101pub struct MountConfig {
102    /// The type of mount to use
103    pub mount: Option<MountType>,
104    #[serde(default)]
105    /// Whether this mount should be transient (temporary)
106    pub transient: bool,
107}
108
109#[derive(Deserialize, Default)]
110struct Config {
111    #[serde(default)]
112    etc: MountConfig,
113    #[serde(default)]
114    var: MountConfig,
115    #[serde(default)]
116    root: RootConfig,
117}
118
119/// Command-line arguments
120#[derive(Parser, Debug)]
121#[command(version)]
122pub struct Args {
123    #[arg(help = "Execute this command (for testing)")]
124    /// Execute this command (for testing)
125    pub cmd: Vec<OsString>,
126
127    #[arg(
128        long,
129        default_value = "/sysroot",
130        help = "sysroot directory in initramfs"
131    )]
132    /// sysroot directory in initramfs
133    pub sysroot: PathBuf,
134
135    #[arg(
136        long,
137        default_value = "/usr/lib/composefs/setup-root-conf.toml",
138        help = "Config path (for testing)"
139    )]
140    /// Config path (for testing)
141    pub config: PathBuf,
142
143    // we want to test in a userns, but can't mount erofs there
144    #[arg(long, help = "Bind mount root-fs from (for testing)")]
145    /// Bind mount root-fs from (for testing)
146    pub root_fs: Option<PathBuf>,
147
148    #[arg(long, help = "Kernel commandline args (for testing)")]
149    /// Kernel commandline args (for testing)
150    pub cmdline: Option<Cmdline<'static>>,
151
152    #[arg(long, help = "Mountpoint (don't replace sysroot, for testing)")]
153    /// Mountpoint (don't replace sysroot, for testing)
154    pub target: Option<PathBuf>,
155}
156
157/// Wrapper around [`composefs::mount::mount_at`]
158pub fn mount_at_wrapper(
159    fs_fd: impl AsFd,
160    dirfd: impl AsFd,
161    path: impl path::Arg + Debug + Clone,
162) -> Result<()> {
163    composefs::mount::mount_at(fs_fd, dirfd, path.clone())
164        .with_context(|| format!("Mounting at path {path:?}"))
165}
166
167/// Wrapper around [`rustix::fs::openat`]
168#[context("Opening dir {name:?}")]
169pub fn open_dir(dirfd: impl AsFd, name: impl AsRef<Path> + Debug) -> Result<OwnedFd> {
170    let res = openat(
171        dirfd,
172        name.as_ref(),
173        OFlags::PATH | OFlags::DIRECTORY | OFlags::CLOEXEC,
174        Mode::empty(),
175    );
176
177    Ok(res?)
178}
179
180#[context("Ensure dir")]
181fn ensure_dir(dirfd: impl AsFd, name: &str, mode: Option<rustix::fs::Mode>) -> Result<OwnedFd> {
182    match mkdirat(dirfd.as_fd(), name, mode.unwrap_or(0o700.into())) {
183        Ok(()) | Err(Errno::EXIST) => {}
184        Err(err) => Err(err).with_context(|| format!("Creating dir {name}"))?,
185    }
186
187    open_dir(dirfd, name)
188}
189
190#[context("Bind mounting to path {path}")]
191fn bind_mount(fd: impl AsFd, path: &str) -> Result<OwnedFd> {
192    let res = open_tree(
193        fd.as_fd(),
194        path,
195        OpenTreeFlags::OPEN_TREE_CLONE
196            | OpenTreeFlags::OPEN_TREE_CLOEXEC
197            | OpenTreeFlags::AT_EMPTY_PATH,
198    );
199
200    Ok(res?)
201}
202
203#[context("Mounting tmpfs")]
204fn mount_tmpfs() -> Result<OwnedFd> {
205    let tmpfs = FsHandle::open("tmpfs")?;
206    fsconfig_create(tmpfs.as_fd())?;
207    Ok(fsmount(
208        tmpfs.as_fd(),
209        FsMountFlags::FSMOUNT_CLOEXEC,
210        MountAttrFlags::empty(),
211    )?)
212}
213
214#[context("Mounting state as overlay")]
215fn overlay_state(
216    base: impl AsFd,
217    state: impl AsFd,
218    source: &str,
219    mode: Option<rustix::fs::Mode>,
220) -> Result<()> {
221    let upper = ensure_dir(state.as_fd(), "upper", mode)?;
222    let work = ensure_dir(state.as_fd(), "work", mode)?;
223
224    let overlayfs = FsHandle::open("overlay")?;
225    fsconfig_set_string(overlayfs.as_fd(), "source", source)?;
226    overlayfs_set_fd(overlayfs.as_fd(), "workdir", work.as_fd())?;
227    overlayfs_set_fd(overlayfs.as_fd(), "upperdir", upper.as_fd())?;
228    overlayfs_set_lower_and_data_fds(&overlayfs, base.as_fd(), None::<OwnedFd>)?;
229    fsconfig_create(overlayfs.as_fd())?;
230    let fs = fsmount(
231        overlayfs.as_fd(),
232        FsMountFlags::FSMOUNT_CLOEXEC,
233        MountAttrFlags::empty(),
234    )?;
235
236    mount_at_wrapper(fs, base, ".").context("Moving mount")
237}
238
239/// Mounts a transient overlayfs with passed in fd as the lowerdir
240#[context("Mounting transient overlayfs")]
241pub fn overlay_transient(base: impl AsFd, mode: Option<rustix::fs::Mode>) -> Result<()> {
242    overlay_state(base, prepare_mount(mount_tmpfs()?)?, "transient", mode)
243}
244
245#[context("Opening rootfs")]
246fn open_root_fs(path: &Path) -> Result<OwnedFd> {
247    let rootfs = open_tree(
248        CWD,
249        path,
250        OpenTreeFlags::OPEN_TREE_CLONE | OpenTreeFlags::OPEN_TREE_CLOEXEC,
251    )?;
252
253    set_mount_readonly(&rootfs)?;
254
255    Ok(rootfs)
256}
257
258/// Prepares a floating mount for composefs and returns the fd
259///
260/// # Arguments
261/// * sysroot  - fd for /sysroot
262/// * name     - Name of the EROFS image to be mounted
263/// * insecure - Whether fsverity is optional or not
264#[context("Mounting composefs image")]
265pub fn mount_composefs_image(sysroot: &OwnedFd, name: &str, insecure: bool) -> Result<OwnedFd> {
266    let mut repo = Repository::<Sha512HashValue>::open_path(sysroot, "composefs")?;
267    repo.set_insecure(insecure);
268    let rootfs = repo
269        .mount(name)
270        .context("Failed to mount composefs image")?;
271
272    set_mount_readonly(&rootfs)?;
273
274    Ok(rootfs)
275}
276
277/// Mounts a subdirectory with the specified configuration
278#[context("Mounting subdirectory")]
279pub fn mount_subdir(
280    new_root: impl AsFd,
281    state: impl AsFd,
282    subdir: &str,
283    config: MountConfig,
284    default: MountType,
285) -> Result<()> {
286    let mount_type = match config.mount {
287        Some(mt) => mt,
288        None => match config.transient {
289            true => MountType::Transient,
290            false => default,
291        },
292    };
293
294    match mount_type {
295        MountType::None => Ok(()),
296        MountType::Bind => Ok(mount_at_wrapper(
297            bind_mount(&state, subdir)?,
298            &new_root,
299            subdir,
300        )?),
301        MountType::Overlay => overlay_state(
302            open_dir(&new_root, subdir)?,
303            open_dir(&state, subdir)?,
304            "overlay",
305            None,
306        ),
307        MountType::Transient => overlay_transient(open_dir(&new_root, subdir)?, None),
308    }
309}
310
311#[context("GPT workaround")]
312/// Workaround for /dev/gpt-auto-root
313pub fn gpt_workaround() -> Result<()> {
314    // https://github.com/systemd/systemd/issues/35017
315    let rootdev = stat("/dev/gpt-auto-root");
316
317    let rootdev = match rootdev {
318        Ok(r) => r,
319        Err(e) if e.kind() == ErrorKind::NotFound => return Ok(()),
320        Err(e) => Err(e)?,
321    };
322
323    let target = format!(
324        "/dev/block/{}:{}",
325        major(rootdev.st_rdev),
326        minor(rootdev.st_rdev)
327    );
328    symlink(target, "/run/systemd/volatile-root")?;
329    Ok(())
330}
331
332/// Sets up /sysroot for switch-root
333#[context("Setting up /sysroot")]
334pub fn setup_root(args: Args) -> Result<()> {
335    let config = match std::fs::read_to_string(args.config) {
336        Ok(text) => toml::from_str(&text)?,
337        Err(err) if err.kind() == ErrorKind::NotFound => Config::default(),
338        Err(err) => Err(err)?,
339    };
340
341    let sysroot = open_dir(CWD, &args.sysroot)
342        .with_context(|| format!("Failed to open sysroot {:?}", args.sysroot))?;
343
344    let cmdline = args
345        .cmdline
346        .unwrap_or(Cmdline::from_proc().context("Failed to read cmdline")?);
347
348    let (image, insecure) = get_cmdline_composefs::<Sha512HashValue>(&cmdline)?;
349
350    let new_root = match args.root_fs {
351        Some(path) => open_root_fs(&path).context("Failed to clone specified root fs")?,
352        None => mount_composefs_image(&sysroot, &image.to_hex(), insecure)?,
353    };
354
355    // we need to clone this before the next step to make sure we get the old one
356    let sysroot_clone = bind_mount(&sysroot, "")?;
357
358    set_mount_readonly(&sysroot_clone)?;
359
360    let mount_target = args.target.unwrap_or(args.sysroot.clone());
361
362    // Ideally we build the new root filesystem together before we mount it, but that only works on
363    // 6.15 and later.  Before 6.15 we can't mount into a floating tree, so mount it first.  This
364    // will leave an abandoned clone of the sysroot mounted under it, but that's OK for now.
365    if cfg!(feature = "pre-6.15") {
366        mount_at_wrapper(&new_root, CWD, &mount_target)?;
367    }
368
369    if config.root.transient {
370        overlay_transient(&new_root, None)?;
371    }
372
373    match composefs::mount::mount_at(&sysroot_clone, &new_root, "sysroot") {
374        Ok(()) | Err(Errno::NOENT) => {}
375        Err(err) => Err(err)?,
376    }
377
378    // etc + var
379    let state = open_dir(open_dir(&sysroot, "state/deploy")?, image.to_hex())?;
380    mount_subdir(&new_root, &state, "etc", config.etc, MountType::Bind)?;
381    mount_subdir(&new_root, &state, "var", config.var, MountType::Bind)?;
382
383    if cfg!(not(feature = "pre-6.15")) {
384        // Replace the /sysroot with the new composed root filesystem
385        unmount(&args.sysroot, UnmountFlags::DETACH)?;
386        mount_at_wrapper(&new_root, CWD, &mount_target)?;
387    }
388
389    Ok(())
390}