diff --git a/lib/src/install.rs b/lib/src/install.rs index 98857edf..6a35cd57 100644 --- a/lib/src/install.rs +++ b/lib/src/install.rs @@ -1269,6 +1269,8 @@ async fn prepare_install( tracing::debug!("Target image reference: {target_imgref}"); // A bit of basic global state setup + crate::mount::ensure_mirrored_host_mount("/dev")?; + crate::mount::ensure_mirrored_host_mount("/var/lib/containers")?; ensure_var()?; setup_tmp_mounts()?; // Allocate a temporary directory we can use in various places to avoid @@ -1454,12 +1456,6 @@ async fn install_to_filesystem_impl(state: &State, rootfs: &mut RootSetup) -> Re .ok_or_else(|| anyhow!("No uuid for boot/root"))?; tracing::debug!("boot uuid={boot_uuid}"); - // If we're doing an alongside install, then the /dev bootupd sees needs to be the host's. - ensure!( - crate::mount::is_same_as_host(Utf8Path::new("/dev"))?, - "Missing /dev mount to host /dev" - ); - let bound_images = BoundImages::from_state(state).await?; // Initialize the ostree sysroot (repo, stateroot, etc.) @@ -1514,9 +1510,6 @@ pub(crate) async fn install_to_disk(mut opts: InstallToDiskOpts) -> Result<()> { block_opts.device ); } - if !crate::mount::is_same_as_host(Utf8Path::new("/dev"))? { - anyhow::bail!("Loopback mounts (--via-loopback) require host devices (-v /dev:/dev)"); - } } else if !target_blockdev_meta.file_type().is_block_device() { anyhow::bail!("Not a block device: {}", block_opts.device); } diff --git a/lib/src/mount.rs b/lib/src/mount.rs index 82522933..d4785b17 100644 --- a/lib/src/mount.rs +++ b/lib/src/mount.rs @@ -1,11 +1,24 @@ //! Helpers for interacting with mountpoints -use std::process::Command; +use std::{ + fs, + os::fd::{AsFd, OwnedFd}, + process::Command, +}; -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, Context, Result}; use bootc_utils::CommandRunExt; use camino::Utf8Path; use fn_error_context::context; +use rustix::{ + mount::{MoveMountFlags, OpenTreeFlags}, + net::{ + AddressFamily, RecvFlags, SendAncillaryBuffer, SendAncillaryMessage, SendFlags, + SocketFlags, SocketType, + }, + process::WaitOptions, + thread::Pid, +}; use serde::Deserialize; use crate::task::Task; @@ -124,3 +137,134 @@ pub(crate) fn is_same_as_host(path: &Utf8Path) -> Result { ); Ok(devstat.f_fsid == hostdevstat.f_fsid) } + +/// Given a pid, enter its mount namespace and acquire a file descriptor +/// for a mount from that namespace. +#[allow(unsafe_code)] +#[context("Opening mount tree from pid")] +pub(crate) fn open_tree_from_pidns( + pid: rustix::process::Pid, + path: &Utf8Path, + recursive: bool, +) -> Result { + // Allocate a socket pair to use for sending file descriptors. + let (sock_parent, sock_child) = rustix::net::socketpair( + AddressFamily::UNIX, + SocketType::STREAM, + SocketFlags::CLOEXEC, + None, + ) + .context("socketpair")?; + const DUMMY_DATA: &[u8] = &[b'!']; + match unsafe { libc::fork() } { + 0 => { + // We're in the child. At this point we know we don't have multiple threads, so we + // can safely `setns`. + + // Open up the namespace of the target process as a file descriptor, and enter it. + let pidlink = fs::File::open(format!("/proc/{}/ns/mnt", pid.as_raw_nonzero()))?; + rustix::thread::move_into_link_name_space( + pidlink.as_fd(), + Some(rustix::thread::LinkNameSpaceType::Mount), + ) + .context("setns")?; + + // Open the target mount path as a file descriptor. + let recursive = if recursive { + OpenTreeFlags::AT_RECURSIVE + } else { + OpenTreeFlags::empty() + }; + let fd = rustix::mount::open_tree( + rustix::fs::CWD, + path.as_std_path(), + OpenTreeFlags::OPEN_TREE_CLOEXEC | OpenTreeFlags::OPEN_TREE_CLONE | recursive, + ) + .context("open_tree")?; + + // And send that file descriptor via fd passing over the socketpair. + let fd = fd.as_fd(); + let fds = [fd]; + let mut buffer = [0u8; rustix::cmsg_space!(ScmRights(1))]; + let mut control = SendAncillaryBuffer::new(&mut buffer); + let pushed = control.push(SendAncillaryMessage::ScmRights(&fds)); + assert!(pushed); + let ios = std::io::IoSlice::new(DUMMY_DATA); + rustix::net::sendmsg(sock_child, &[ios], &mut control, SendFlags::empty())?; + // Then we're done. + std::process::exit(0) + } + -1 => { + // fork failed + let e = std::io::Error::last_os_error(); + anyhow::bail!("failed to fork: {e}"); + } + n => { + // We're in the parent; create a pid (checking that n > 0). + let pid = rustix::process::Pid::from_raw(n).unwrap(); + // Receive the mount file descriptor from the child + let mut cmsg_space = vec![0; rustix::cmsg_space!(ScmRights(1))]; + let mut cmsg_buffer = rustix::net::RecvAncillaryBuffer::new(&mut cmsg_space); + let mut buf = [0u8; DUMMY_DATA.len()]; + let iov = std::io::IoSliceMut::new(buf.as_mut()); + let mut iov = [iov]; + let nread = rustix::net::recvmsg( + sock_parent, + &mut iov, + &mut cmsg_buffer, + RecvFlags::CMSG_CLOEXEC, + ) + .context("recvmsg")? + .bytes; + assert_eq!(nread, DUMMY_DATA.len()); + assert_eq!(buf, DUMMY_DATA); + // And extract the file descriptor + let r = cmsg_buffer + .drain() + .filter_map(|m| match m { + rustix::net::RecvAncillaryMessage::ScmRights(f) => Some(f), + _ => None, + }) + .flatten() + .next() + .ok_or_else(|| anyhow::anyhow!("Did not receive a file descriptor"))?; + rustix::process::waitpid(Some(pid), WaitOptions::empty())?; + Ok(r) + } + } +} + +/// Create a bind mount from the mount namespace of the target pid +/// into our mount namespace. +pub(crate) fn bind_mount_from_pidns( + pid: Pid, + src: &Utf8Path, + target: &Utf8Path, + recursive: bool, +) -> Result<()> { + let src = open_tree_from_pidns(pid, src, recursive)?; + rustix::mount::move_mount( + src.as_fd(), + "", + rustix::fs::CWD, + target.as_std_path(), + MoveMountFlags::MOVE_MOUNT_F_EMPTY_PATH, + ) + .context("Moving mount")?; + Ok(()) +} + +// If the target path is not already mirrored from the host (e.g. via -v /dev:/dev) +// then recursively mount it. +pub(crate) fn ensure_mirrored_host_mount(path: impl AsRef) -> Result<()> { + let path = path.as_ref(); + // If we didn't have this in our filesystem already (e.g. for /var/lib/containers) + // then create it now. + std::fs::create_dir_all(path)?; + if is_same_as_host(path)? { + tracing::debug!("Already mounted from host: {path}"); + return Ok(()); + } + tracing::debug!("Propagating host mount: {path}"); + bind_mount_from_pidns(Pid::from_raw(1).unwrap(), path, path, true) +}