bootc_internal_blockdev/
blockdev.rs

1use std::collections::HashMap;
2use std::env;
3use std::path::Path;
4use std::process::{Command, Stdio};
5use std::sync::OnceLock;
6
7use anyhow::{Context, Result, anyhow};
8use camino::{Utf8Path, Utf8PathBuf};
9use fn_error_context::context;
10use regex::Regex;
11use serde::Deserialize;
12
13use bootc_utils::CommandRunExt;
14
15/// EFI System Partition (ESP) on MBR
16/// Refer to <https://en.wikipedia.org/wiki/Partition_type>
17pub const ESP_ID_MBR: &[u8] = &[0x06, 0xEF];
18
19/// EFI System Partition (ESP) for UEFI boot on GPT
20pub const ESP: &str = "c12a7328-f81f-11d2-ba4b-00a0c93ec93b";
21
22#[derive(Debug, Deserialize)]
23struct DevicesOutput {
24    blockdevices: Vec<Device>,
25}
26
27#[allow(dead_code)]
28#[derive(Debug, Deserialize)]
29pub struct Device {
30    pub name: String,
31    pub serial: Option<String>,
32    pub model: Option<String>,
33    pub partlabel: Option<String>,
34    pub parttype: Option<String>,
35    pub partuuid: Option<String>,
36    pub children: Option<Vec<Device>>,
37    pub size: u64,
38    #[serde(rename = "maj:min")]
39    pub maj_min: Option<String>,
40    // NOTE this one is not available on older util-linux, and
41    // will also not exist for whole blockdevs (as opposed to partitions).
42    pub start: Option<u64>,
43
44    // Filesystem-related properties
45    pub label: Option<String>,
46    pub fstype: Option<String>,
47    pub uuid: Option<String>,
48    pub path: Option<String>,
49}
50
51impl Device {
52    #[allow(dead_code)]
53    // RHEL8's lsblk doesn't have PATH, so we do it
54    pub fn path(&self) -> String {
55        self.path.clone().unwrap_or(format!("/dev/{}", &self.name))
56    }
57
58    #[allow(dead_code)]
59    pub fn has_children(&self) -> bool {
60        self.children.as_ref().is_some_and(|v| !v.is_empty())
61    }
62
63    // The "start" parameter was only added in a version of util-linux that's only
64    // in Fedora 40 as of this writing.
65    fn backfill_start(&mut self) -> Result<()> {
66        let Some(majmin) = self.maj_min.as_deref() else {
67            // This shouldn't happen
68            return Ok(());
69        };
70        let sysfs_start_path = format!("/sys/dev/block/{majmin}/start");
71        if Utf8Path::new(&sysfs_start_path).try_exists()? {
72            let start = std::fs::read_to_string(&sysfs_start_path)
73                .with_context(|| format!("Reading {sysfs_start_path}"))?;
74            tracing::debug!("backfilled start to {start}");
75            self.start = Some(
76                start
77                    .trim()
78                    .parse()
79                    .context("Parsing sysfs start property")?,
80            );
81        }
82        Ok(())
83    }
84
85    /// Older versions of util-linux may be missing some properties. Backfill them if they're missing.
86    pub fn backfill_missing(&mut self) -> Result<()> {
87        // Add new properties to backfill here
88        self.backfill_start()?;
89        // And recurse to child devices
90        for child in self.children.iter_mut().flatten() {
91            child.backfill_missing()?;
92        }
93        Ok(())
94    }
95}
96
97#[context("Listing device {dev}")]
98pub fn list_dev(dev: &Utf8Path) -> Result<Device> {
99    let mut devs: DevicesOutput = Command::new("lsblk")
100        .args(["-J", "-b", "-O"])
101        .arg(dev)
102        .log_debug()
103        .run_and_parse_json()?;
104    for dev in devs.blockdevices.iter_mut() {
105        dev.backfill_missing()?;
106    }
107    devs.blockdevices
108        .into_iter()
109        .next()
110        .ok_or_else(|| anyhow!("no device output from lsblk for {dev}"))
111}
112
113#[derive(Debug, Deserialize)]
114struct SfDiskOutput {
115    partitiontable: PartitionTable,
116}
117
118#[derive(Debug, Deserialize)]
119#[allow(dead_code)]
120pub struct Partition {
121    pub node: String,
122    pub start: u64,
123    pub size: u64,
124    #[serde(rename = "type")]
125    pub parttype: String,
126    pub uuid: Option<String>,
127    pub name: Option<String>,
128    pub bootable: Option<bool>,
129}
130
131#[derive(Debug, Deserialize, PartialEq, Eq)]
132#[serde(rename_all = "kebab-case")]
133pub enum PartitionType {
134    Dos,
135    Gpt,
136    Unknown(String),
137}
138
139#[derive(Debug, Deserialize)]
140#[allow(dead_code)]
141pub struct PartitionTable {
142    pub label: PartitionType,
143    pub id: String,
144    pub device: String,
145    // We're not using these fields
146    // pub unit: String,
147    // pub firstlba: u64,
148    // pub lastlba: u64,
149    // pub sectorsize: u64,
150    pub partitions: Vec<Partition>,
151}
152
153impl PartitionTable {
154    /// Find the partition with the given device name
155    #[allow(dead_code)]
156    pub fn find<'a>(&'a self, devname: &str) -> Option<&'a Partition> {
157        self.partitions.iter().find(|p| p.node.as_str() == devname)
158    }
159
160    pub fn path(&self) -> &Utf8Path {
161        self.device.as_str().into()
162    }
163
164    // Find the partition with the given offset (starting at 1)
165    #[allow(dead_code)]
166    pub fn find_partno(&self, partno: u32) -> Result<&Partition> {
167        let r = self
168            .partitions
169            .get(partno.checked_sub(1).expect("1 based partition offset") as usize)
170            .ok_or_else(|| anyhow::anyhow!("Missing partition for index {partno}"))?;
171        Ok(r)
172    }
173
174    /// Find the partition with the given type UUID (case-insensitive).
175    ///
176    /// Partition type UUIDs are compared case-insensitively per the GPT specification,
177    /// as different tools may report them in different cases.
178    pub fn find_partition_of_type(&self, uuid: &str) -> Option<&Partition> {
179        self.partitions.iter().find(|p| p.parttype_matches(uuid))
180    }
181
182    /// Find the partition with bootable is 'true'.
183    pub fn find_partition_of_bootable(&self) -> Option<&Partition> {
184        self.partitions.iter().find(|p| p.is_bootable())
185    }
186
187    /// Find the esp partition.
188    pub fn find_partition_of_esp(&self) -> Result<Option<&Partition>> {
189        match &self.label {
190            PartitionType::Dos => Ok(self.partitions.iter().find(|b| {
191                u8::from_str_radix(&b.parttype, 16)
192                    .map(|pt| ESP_ID_MBR.contains(&pt))
193                    .unwrap_or(false)
194            })),
195            PartitionType::Gpt => Ok(self.find_partition_of_type(ESP)),
196            _ => Err(anyhow::anyhow!("Unsupported partition table type")),
197        }
198    }
199}
200
201impl Partition {
202    #[allow(dead_code)]
203    pub fn path(&self) -> &Utf8Path {
204        self.node.as_str().into()
205    }
206
207    /// Check if this partition's type matches the given UUID (case-insensitive).
208    ///
209    /// Partition type UUIDs are compared case-insensitively per the GPT specification,
210    /// as different tools may report them in different cases.
211    pub fn parttype_matches(&self, uuid: &str) -> bool {
212        self.parttype.eq_ignore_ascii_case(uuid)
213    }
214
215    /// Check this partition's bootable property.
216    pub fn is_bootable(&self) -> bool {
217        self.bootable.unwrap_or(false)
218    }
219}
220
221#[context("Listing partitions of {dev}")]
222pub fn partitions_of(dev: &Utf8Path) -> Result<PartitionTable> {
223    let o: SfDiskOutput = Command::new("sfdisk")
224        .args(["-J", dev.as_str()])
225        .run_and_parse_json()?;
226    Ok(o.partitiontable)
227}
228
229pub struct LoopbackDevice {
230    pub dev: Option<Utf8PathBuf>,
231    // Handle to the cleanup helper process
232    cleanup_handle: Option<LoopbackCleanupHandle>,
233}
234
235/// Handle to manage the cleanup helper process for loopback devices
236struct LoopbackCleanupHandle {
237    /// Child process handle
238    child: std::process::Child,
239}
240
241impl LoopbackDevice {
242    // Create a new loopback block device targeting the provided file path.
243    pub fn new(path: &Path) -> Result<Self> {
244        let direct_io = match env::var("BOOTC_DIRECT_IO") {
245            Ok(val) => {
246                if val == "on" {
247                    "on"
248                } else {
249                    "off"
250                }
251            }
252            Err(_e) => "off",
253        };
254
255        let dev = Command::new("losetup")
256            .args([
257                "--show",
258                format!("--direct-io={direct_io}").as_str(),
259                "-P",
260                "--find",
261            ])
262            .arg(path)
263            .run_get_string()?;
264        let dev = Utf8PathBuf::from(dev.trim());
265        tracing::debug!("Allocated loopback {dev}");
266
267        // Try to spawn cleanup helper, but don't fail if it doesn't work
268        let cleanup_handle = match Self::spawn_cleanup_helper(dev.as_str()) {
269            Ok(handle) => Some(handle),
270            Err(e) => {
271                tracing::warn!(
272                    "Failed to spawn loopback cleanup helper for {}: {}. \
273                     Loopback device may not be cleaned up if process is interrupted.",
274                    dev,
275                    e
276                );
277                None
278            }
279        };
280
281        Ok(Self {
282            dev: Some(dev),
283            cleanup_handle,
284        })
285    }
286
287    // Access the path to the loopback block device.
288    pub fn path(&self) -> &Utf8Path {
289        // SAFETY: The option cannot be destructured until we are dropped
290        self.dev.as_deref().unwrap()
291    }
292
293    /// Spawn a cleanup helper process that will clean up the loopback device
294    /// if the parent process dies unexpectedly
295    fn spawn_cleanup_helper(device_path: &str) -> Result<LoopbackCleanupHandle> {
296        // Try multiple strategies to find the bootc binary
297        let bootc_path = bootc_utils::reexec::executable_path()
298            .context("Failed to locate bootc binary for cleanup helper")?;
299
300        // Create the helper process
301        let mut cmd = Command::new(bootc_path);
302        cmd.args([
303            "internals",
304            "loopback-cleanup-helper",
305            "--device",
306            device_path,
307        ]);
308
309        // Set environment variable to indicate this is a cleanup helper
310        cmd.env("BOOTC_LOOPBACK_CLEANUP_HELPER", "1");
311
312        // Set up stdio to redirect to /dev/null
313        cmd.stdin(Stdio::null());
314        cmd.stdout(Stdio::null());
315        // Don't redirect stderr so we can see error messages
316
317        // Spawn the process
318        let child = cmd
319            .spawn()
320            .context("Failed to spawn loopback cleanup helper")?;
321
322        Ok(LoopbackCleanupHandle { child })
323    }
324
325    // Shared backend for our `close` and `drop` implementations.
326    fn impl_close(&mut self) -> Result<()> {
327        // SAFETY: This is the only place we take the option
328        let Some(dev) = self.dev.take() else {
329            tracing::trace!("loopback device already deallocated");
330            return Ok(());
331        };
332
333        // Kill the cleanup helper since we're cleaning up normally
334        if let Some(mut cleanup_handle) = self.cleanup_handle.take() {
335            // Send SIGTERM to the child process and let it do the cleanup
336            let _ = cleanup_handle.child.kill();
337        }
338
339        Command::new("losetup")
340            .args(["-d", dev.as_str()])
341            .run_capture_stderr()
342    }
343
344    /// Consume this device, unmounting it.
345    pub fn close(mut self) -> Result<()> {
346        self.impl_close()
347    }
348}
349
350impl Drop for LoopbackDevice {
351    fn drop(&mut self) {
352        // Best effort to unmount if we're dropped without invoking `close`
353        let _ = self.impl_close();
354    }
355}
356
357/// Main function for the loopback cleanup helper process
358/// This function does not return - it either exits normally or via signal
359pub async fn run_loopback_cleanup_helper(device_path: &str) -> Result<()> {
360    // Check if we're running as a cleanup helper
361    if std::env::var("BOOTC_LOOPBACK_CLEANUP_HELPER").is_err() {
362        anyhow::bail!("This function should only be called as a cleanup helper");
363    }
364
365    // Set up death signal notification - we want to be notified when parent dies
366    rustix::process::set_parent_process_death_signal(Some(rustix::process::Signal::TERM))
367        .context("Failed to set parent death signal")?;
368
369    // Wait for SIGTERM (either from parent death or normal cleanup)
370    tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())
371        .expect("Failed to create signal stream")
372        .recv()
373        .await;
374
375    // Clean up the loopback device
376    let output = std::process::Command::new("losetup")
377        .args(["-d", device_path])
378        .output();
379
380    match output {
381        Ok(output) if output.status.success() => {
382            // Log to systemd journal instead of stderr
383            tracing::info!("Cleaned up leaked loopback device {}", device_path);
384            std::process::exit(0);
385        }
386        Ok(output) => {
387            let stderr = String::from_utf8_lossy(&output.stderr);
388            tracing::error!(
389                "Failed to clean up loopback device {}: {}. Stderr: {}",
390                device_path,
391                output.status,
392                stderr.trim()
393            );
394            std::process::exit(1);
395        }
396        Err(e) => {
397            tracing::error!(
398                "Error executing losetup to clean up loopback device {}: {}",
399                device_path,
400                e
401            );
402            std::process::exit(1);
403        }
404    }
405}
406
407/// Parse key-value pairs from lsblk --pairs.
408/// Newer versions of lsblk support JSON but the one in CentOS 7 doesn't.
409fn split_lsblk_line(line: &str) -> HashMap<String, String> {
410    static REGEX: OnceLock<Regex> = OnceLock::new();
411    let regex = REGEX.get_or_init(|| Regex::new(r#"([A-Z-_]+)="([^"]+)""#).unwrap());
412    let mut fields: HashMap<String, String> = HashMap::new();
413    for cap in regex.captures_iter(line) {
414        fields.insert(cap[1].to_string(), cap[2].to_string());
415    }
416    fields
417}
418
419/// This is a bit fuzzy, but... this function will return every block device in the parent
420/// hierarchy of `device` capable of containing other partitions. So e.g. parent devices of type
421/// "part" doesn't match, but "disk" and "mpath" does.
422pub fn find_parent_devices(device: &str) -> Result<Vec<String>> {
423    let output = Command::new("lsblk")
424        // Older lsblk, e.g. in CentOS 7.6, doesn't support PATH, but --paths option
425        .arg("--pairs")
426        .arg("--paths")
427        .arg("--inverse")
428        .arg("--output")
429        .arg("NAME,TYPE")
430        .arg(device)
431        .run_get_string()?;
432    let mut parents = Vec::new();
433    // skip first line, which is the device itself
434    for line in output.lines().skip(1) {
435        let dev = split_lsblk_line(line);
436        let name = dev
437            .get("NAME")
438            .with_context(|| format!("device in hierarchy of {device} missing NAME"))?;
439        let kind = dev
440            .get("TYPE")
441            .with_context(|| format!("device in hierarchy of {device} missing TYPE"))?;
442        if kind == "disk" || kind == "loop" {
443            parents.push(name.clone());
444        } else if kind == "mpath" {
445            parents.push(name.clone());
446            // we don't need to know what disks back the multipath
447            break;
448        }
449    }
450    Ok(parents)
451}
452
453/// Parse a string into mibibytes
454pub fn parse_size_mib(mut s: &str) -> Result<u64> {
455    let suffixes = [
456        ("MiB", 1u64),
457        ("M", 1u64),
458        ("GiB", 1024),
459        ("G", 1024),
460        ("TiB", 1024 * 1024),
461        ("T", 1024 * 1024),
462    ];
463    let mut mul = 1u64;
464    for (suffix, imul) in suffixes {
465        if let Some((sv, rest)) = s.rsplit_once(suffix) {
466            if !rest.is_empty() {
467                anyhow::bail!("Trailing text after size: {rest}");
468            }
469            s = sv;
470            mul = imul;
471        }
472    }
473    let v = s.parse::<u64>()?;
474    Ok(v * mul)
475}
476
477#[cfg(test)]
478mod test {
479    use super::*;
480
481    #[test]
482    fn test_parse_size_mib() {
483        let ident_cases = [0, 10, 9, 1024].into_iter().map(|k| (k.to_string(), k));
484        let cases = [
485            ("0M", 0),
486            ("10M", 10),
487            ("10MiB", 10),
488            ("1G", 1024),
489            ("9G", 9216),
490            ("11T", 11 * 1024 * 1024),
491        ]
492        .into_iter()
493        .map(|(k, v)| (k.to_string(), v));
494        for (s, v) in ident_cases.chain(cases) {
495            assert_eq!(parse_size_mib(&s).unwrap(), v as u64, "Parsing {s}");
496        }
497    }
498
499    #[test]
500    fn test_parse_lsblk() {
501        let fixture = include_str!("../tests/fixtures/lsblk.json");
502        let devs: DevicesOutput = serde_json::from_str(fixture).unwrap();
503        let dev = devs.blockdevices.into_iter().next().unwrap();
504        let children = dev.children.as_deref().unwrap();
505        assert_eq!(children.len(), 3);
506        let first_child = &children[0];
507        assert_eq!(
508            first_child.parttype.as_deref().unwrap(),
509            "21686148-6449-6e6f-744e-656564454649"
510        );
511        assert_eq!(
512            first_child.partuuid.as_deref().unwrap(),
513            "3979e399-262f-4666-aabc-7ab5d3add2f0"
514        );
515    }
516
517    #[test]
518    fn test_parse_sfdisk() -> Result<()> {
519        let fixture = indoc::indoc! { r#"
520        {
521            "partitiontable": {
522               "label": "gpt",
523               "id": "A67AA901-2C72-4818-B098-7F1CAC127279",
524               "device": "/dev/loop0",
525               "unit": "sectors",
526               "firstlba": 34,
527               "lastlba": 20971486,
528               "sectorsize": 512,
529               "partitions": [
530                  {
531                     "node": "/dev/loop0p1",
532                     "start": 2048,
533                     "size": 8192,
534                     "type": "9E1A2D38-C612-4316-AA26-8B49521E5A8B",
535                     "uuid": "58A4C5F0-BD12-424C-B563-195AC65A25DD",
536                     "name": "PowerPC-PReP-boot"
537                  },{
538                     "node": "/dev/loop0p2",
539                     "start": 10240,
540                     "size": 20961247,
541                     "type": "0FC63DAF-8483-4772-8E79-3D69D8477DE4",
542                     "uuid": "F51ABB0D-DA16-4A21-83CB-37F4C805AAA0",
543                     "name": "root"
544                  }
545               ]
546            }
547         }
548        "# };
549        let table: SfDiskOutput = serde_json::from_str(fixture).unwrap();
550        assert_eq!(
551            table.partitiontable.find("/dev/loop0p2").unwrap().size,
552            20961247
553        );
554        Ok(())
555    }
556
557    #[test]
558    fn test_parttype_matches() {
559        let partition = Partition {
560            node: "/dev/loop0p1".to_string(),
561            start: 2048,
562            size: 8192,
563            parttype: "c12a7328-f81f-11d2-ba4b-00a0c93ec93b".to_string(), // lowercase ESP UUID
564            uuid: Some("58A4C5F0-BD12-424C-B563-195AC65A25DD".to_string()),
565            name: Some("EFI System".to_string()),
566            bootable: None,
567        };
568
569        // Test exact match (lowercase)
570        assert!(partition.parttype_matches("c12a7328-f81f-11d2-ba4b-00a0c93ec93b"));
571
572        // Test case-insensitive match (uppercase)
573        assert!(partition.parttype_matches("C12A7328-F81F-11D2-BA4B-00A0C93EC93B"));
574
575        // Test case-insensitive match (mixed case)
576        assert!(partition.parttype_matches("C12a7328-F81f-11d2-Ba4b-00a0C93ec93b"));
577
578        // Test non-match
579        assert!(!partition.parttype_matches("0FC63DAF-8483-4772-8E79-3D69D8477DE4"));
580    }
581
582    #[test]
583    fn test_find_partition_of_type() -> Result<()> {
584        let fixture = indoc::indoc! { r#"
585        {
586            "partitiontable": {
587               "label": "gpt",
588               "id": "A67AA901-2C72-4818-B098-7F1CAC127279",
589               "device": "/dev/loop0",
590               "unit": "sectors",
591               "firstlba": 34,
592               "lastlba": 20971486,
593               "sectorsize": 512,
594               "partitions": [
595                  {
596                     "node": "/dev/loop0p1",
597                     "start": 2048,
598                     "size": 8192,
599                     "type": "C12A7328-F81F-11D2-BA4B-00A0C93EC93B",
600                     "uuid": "58A4C5F0-BD12-424C-B563-195AC65A25DD",
601                     "name": "EFI System"
602                  },{
603                     "node": "/dev/loop0p2",
604                     "start": 10240,
605                     "size": 20961247,
606                     "type": "0FC63DAF-8483-4772-8E79-3D69D8477DE4",
607                     "uuid": "F51ABB0D-DA16-4A21-83CB-37F4C805AAA0",
608                     "name": "root"
609                  }
610               ]
611            }
612         }
613        "# };
614        let table: SfDiskOutput = serde_json::from_str(fixture).unwrap();
615
616        // Find ESP partition using lowercase UUID (should match uppercase in fixture)
617        let esp = table
618            .partitiontable
619            .find_partition_of_type("c12a7328-f81f-11d2-ba4b-00a0c93ec93b");
620        assert!(esp.is_some());
621        assert_eq!(esp.unwrap().node, "/dev/loop0p1");
622
623        // Find root partition using uppercase UUID (should match case-insensitively)
624        let root = table
625            .partitiontable
626            .find_partition_of_type("0fc63daf-8483-4772-8e79-3d69d8477de4");
627        assert!(root.is_some());
628        assert_eq!(root.unwrap().node, "/dev/loop0p2");
629
630        // Try to find non-existent partition type
631        let nonexistent = table
632            .partitiontable
633            .find_partition_of_type("00000000-0000-0000-0000-000000000000");
634        assert!(nonexistent.is_none());
635
636        // Find esp partition on GPT
637        let esp = table.partitiontable.find_partition_of_esp()?.unwrap();
638        assert_eq!(esp.node, "/dev/loop0p1");
639
640        Ok(())
641    }
642    #[test]
643    fn test_find_partition_of_type_mbr() -> Result<()> {
644        let fixture = indoc::indoc! { r#"
645        {
646            "partitiontable": {
647                "label": "dos",
648                "id": "0xc1748067",
649                "device": "/dev/mmcblk0",
650                "unit": "sectors",
651                "sectorsize": 512,
652                "partitions": [
653                    {
654                        "node": "/dev/mmcblk0p1",
655                        "start": 2048,
656                        "size": 1026048,
657                        "type": "6",
658                        "bootable": true
659                    },{
660                        "node": "/dev/mmcblk0p2",
661                        "start": 1028096,
662                        "size": 2097152,
663                        "type": "83"
664                    },{
665                        "node": "/dev/mmcblk0p3",
666                        "start": 3125248,
667                        "size": 121610240,
668                        "type": "ef"
669                    }
670                ]
671            }
672        }
673        "# };
674        let table: SfDiskOutput = serde_json::from_str(fixture).unwrap();
675
676        // Find ESP partition using bootalbe is true
677        assert_eq!(table.partitiontable.label, PartitionType::Dos);
678        let esp = table
679            .partitiontable
680            .find_partition_of_bootable()
681            .expect("bootable partition not found");
682        assert_eq!(esp.node, "/dev/mmcblk0p1");
683
684        // Find esp partition on MBR
685        let esp1 = table.partitiontable.find_partition_of_esp()?.unwrap();
686        assert_eq!(esp1.node, "/dev/mmcblk0p1");
687        Ok(())
688    }
689}