Skip to content

Commit

Permalink
Merge pull request #919 from cgwalters/dynamic-mount-dev
Browse files Browse the repository at this point in the history
install: Automatically set up /dev and /var/lib/containers
  • Loading branch information
cgwalters authored Dec 4, 2024
2 parents d866f5c + 3261203 commit 8d4bf5c
Show file tree
Hide file tree
Showing 7 changed files with 187 additions and 98 deletions.
2 changes: 1 addition & 1 deletion hack/lldb/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ sudo podman build --build-arg "sshpubkey=$(cat ~/.ssh/id_rsa.pub)" -f Containerf
mkdir -p ~/.cache/bootc-dev/disks
rm -f ~/.cache/bootc-dev/disks/lldb.raw
truncate -s 10G ~/.cache/bootc-dev/disks/lldb.raw
sudo podman run --pid=host --network=host --privileged --security-opt label=type:unconfined_t -v /dev:/dev -v /var/lib/containers:/var/lib/containers -v ~/.cache/bootc-dev/disks:/output -v /dev:/dev localhost/bootc-lldb bootc install to-disk --via-loopback --generic-image --skip-fetch-check /output/lldb.raw
sudo podman run --pid=host --network=host --privileged --security-opt label=type:unconfined_t -v ~/.cache/bootc-dev/disks:/output localhost/bootc-lldb bootc install to-disk --via-loopback --generic-image --skip-fetch-check /output/lldb.raw

# create a new VM in libvirt
set +e
Expand Down
110 changes: 28 additions & 82 deletions lib/src/install.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ use crate::utils::sigpolicy_from_opts;
const BOOT: &str = "boot";
/// Directory for transient runtime state
const RUN_BOOTC: &str = "/run/bootc";
/// The default path for the host rootfs
const ALONGSIDE_ROOT_MOUNT: &str = "/target";
/// This is an ext4 special directory we need to ignore.
const LOST_AND_FOUND: &str = "lost+found";
/// The filename of the composefs EROFS superblock; TODO move this into ostree
Expand Down Expand Up @@ -316,9 +318,10 @@ pub(crate) struct InstallToExistingRootOpts {
#[clap(long)]
pub(crate) acknowledge_destructive: bool,

/// Path to the mounted root; it's expected to invoke podman with
/// `-v /:/target`, then supplying this argument is unnecessary.
#[clap(default_value = "/target")]
/// Path to the mounted root; this is now not necessary to provide.
/// Historically it was necessary to ensure the host rootfs was mounted at here
/// via e.g. `-v /:/target`.
#[clap(default_value = ALONGSIDE_ROOT_MOUNT)]
pub(crate) root_path: Utf8PathBuf,
}

Expand All @@ -333,8 +336,6 @@ pub(crate) struct SourceInfo {
pub(crate) selinux: bool,
/// Whether the source is available in the host mount namespace
pub(crate) in_host_mountns: bool,
/// Whether we were invoked with -v /var/lib/containers:/var/lib/containers
pub(crate) have_host_container_storage: bool,
}

// Shared read-only global state
Expand Down Expand Up @@ -516,38 +517,13 @@ impl SourceInfo {
tracing::debug!("Finding digest for image ID {}", container_info.imageid);
let digest = crate::podman::imageid_to_digest(&container_info.imageid)?;

let have_host_container_storage = Utf8Path::new(crate::podman::CONTAINER_STORAGE)
.try_exists()?
&& ostree_ext::mountutil::is_mountpoint(
&root,
crate::podman::CONTAINER_STORAGE.trim_start_matches('/'),
)?
.unwrap_or_default();

// Verify up front we can do the fetch
if have_host_container_storage {
tracing::debug!("Host container storage found");
} else {
tracing::debug!(
"No {} mount available, checking skopeo",
crate::podman::CONTAINER_STORAGE
);
require_skopeo_with_containers_storage()?;
}

Self::new(
imageref,
Some(digest),
root,
true,
have_host_container_storage,
)
Self::new(imageref, Some(digest), root, true)
}

#[context("Creating source info from a given imageref")]
pub(crate) fn from_imageref(imageref: &str, root: &Dir) -> Result<Self> {
let imageref = ostree_container::ImageReference::try_from(imageref)?;
Self::new(imageref, None, root, false, false)
Self::new(imageref, None, root, false)
}

fn have_selinux_from_repo(root: &Dir) -> Result<bool> {
Expand All @@ -573,7 +549,6 @@ impl SourceInfo {
digest: Option<String>,
root: &Dir,
in_host_mountns: bool,
have_host_container_storage: bool,
) -> Result<Self> {
let selinux = if Path::new("/ostree/repo").try_exists()? {
Self::have_selinux_from_repo(root)?
Expand All @@ -585,7 +560,6 @@ impl SourceInfo {
digest,
selinux,
in_host_mountns,
have_host_container_storage,
})
}
}
Expand Down Expand Up @@ -716,19 +690,7 @@ async fn install_container(
}
};

// We need to fetch the container image from the root mount namespace. If
// we don't have /var/lib/containers mounted in this image, fork off skopeo
// in the host mountnfs.
let skopeo_cmd = if !state.source.have_host_container_storage {
Some(run_in_host_mountns("skopeo"))
} else {
None
};
let proxy_cfg = ostree_container::store::ImageProxyConfig {
skopeo_cmd,
..Default::default()
};

let proxy_cfg = ostree_container::store::ImageProxyConfig::default();
(src_imageref, Some(proxy_cfg))
};
let src_imageref = ostree_container::OstreeImageReference {
Expand Down Expand Up @@ -895,32 +857,6 @@ pub(crate) fn exec_in_host_mountns(args: &[std::ffi::OsString]) -> Result<()> {
Err(Command::new(cmd).args(args).exec()).context("exec")?
}

#[context("Querying skopeo version")]
fn require_skopeo_with_containers_storage() -> Result<()> {
let out = Task::new_cmd("skopeo --version", run_in_host_mountns("skopeo"))
.args(["--version"])
.quiet()
.read()
.context("Failed to run skopeo (it currently must be installed in the host root)")?;
let mut v = out
.strip_prefix("skopeo version ")
.map(|v| v.split('.'))
.ok_or_else(|| anyhow::anyhow!("Unexpected output from skopeo version"))?;
let major = v
.next()
.ok_or_else(|| anyhow::anyhow!("Missing major version"))?;
let minor = v
.next()
.ok_or_else(|| anyhow::anyhow!("Missing minor version"))?;
let (major, minor) = (major.parse::<u64>()?, minor.parse::<u64>()?);
let supported = major > 1 || minor > 10;
if supported {
Ok(())
} else {
anyhow::bail!("skopeo >= 1.11 is required on host")
}
}

pub(crate) struct RootSetup {
luks_device: Option<String>,
device_info: crate::blockdev::PartitionTable,
Expand Down Expand Up @@ -1269,6 +1205,8 @@ async fn prepare_install(
tracing::debug!("Target image reference: {target_imgref}");

// A bit of basic global state setup
crate::mount::ensure_mirrored_host_mount("/dev")?;
crate::mount::ensure_mirrored_host_mount("/var/lib/containers")?;
ensure_var()?;
setup_tmp_mounts()?;
// Allocate a temporary directory we can use in various places to avoid
Expand Down Expand Up @@ -1454,12 +1392,6 @@ async fn install_to_filesystem_impl(state: &State, rootfs: &mut RootSetup) -> Re
.ok_or_else(|| anyhow!("No uuid for boot/root"))?;
tracing::debug!("boot uuid={boot_uuid}");

// If we're doing an alongside install, then the /dev bootupd sees needs to be the host's.
ensure!(
crate::mount::is_same_as_host(Utf8Path::new("/dev"))?,
"Missing /dev mount to host /dev"
);

let bound_images = BoundImages::from_state(state).await?;

// Initialize the ostree sysroot (repo, stateroot, etc.)
Expand Down Expand Up @@ -1514,9 +1446,6 @@ pub(crate) async fn install_to_disk(mut opts: InstallToDiskOpts) -> Result<()> {
block_opts.device
);
}
if !crate::mount::is_same_as_host(Utf8Path::new("/dev"))? {
anyhow::bail!("Loopback mounts (--via-loopback) require host devices (-v /dev:/dev)");
}
} else if !target_blockdev_meta.file_type().is_block_device() {
anyhow::bail!("Not a block device: {}", block_opts.device);
}
Expand Down Expand Up @@ -1705,6 +1634,23 @@ pub(crate) async fn install_to_filesystem(
// And the last bit of state here is the fsopts, which we also destructure now.
let mut fsopts = opts.filesystem_opts;

// If we're doing an alongside install, automatically set up the host rootfs
// mount if it wasn't done already.
if targeting_host_root
&& fsopts.root_path.as_str() == ALONGSIDE_ROOT_MOUNT
&& !fsopts.root_path.try_exists()?
{
tracing::debug!("Mounting host / to {ALONGSIDE_ROOT_MOUNT}");
std::fs::create_dir(ALONGSIDE_ROOT_MOUNT)?;
crate::mount::bind_mount_from_pidns(
crate::mount::PID1,
"/".into(),
ALONGSIDE_ROOT_MOUNT.into(),
true,
)
.context("Mounting host / to {ALONGSIDE_ROOT_MOUNT}")?;
}

// Check that the target is a directory
{
let root_path = &fsopts.root_path;
Expand Down
156 changes: 154 additions & 2 deletions lib/src/mount.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,36 @@
//! Helpers for interacting with mountpoints
use std::process::Command;
use std::{
fs,
os::fd::{AsFd, OwnedFd},
process::Command,
};

use anyhow::{anyhow, Result};
use anyhow::{anyhow, Context, Result};
use bootc_utils::CommandRunExt;
use camino::Utf8Path;
use fn_error_context::context;
use rustix::{
mount::{MoveMountFlags, OpenTreeFlags},
net::{
AddressFamily, RecvFlags, SendAncillaryBuffer, SendAncillaryMessage, SendFlags,
SocketFlags, SocketType,
},
process::WaitOptions,
thread::Pid,
};
use serde::Deserialize;

use crate::task::Task;

/// Well known identifier for pid 1
pub(crate) const PID1: Pid = const {
match Pid::from_raw(1) {
Some(v) => v,
None => panic!("Expected to parse pid1"),
}
};

#[derive(Deserialize, Debug)]
#[serde(rename_all = "kebab-case")]
#[allow(dead_code)]
Expand Down Expand Up @@ -124,3 +145,134 @@ pub(crate) fn is_same_as_host(path: &Utf8Path) -> Result<bool> {
);
Ok(devstat.f_fsid == hostdevstat.f_fsid)
}

/// Given a pid, enter its mount namespace and acquire a file descriptor
/// for a mount from that namespace.
#[allow(unsafe_code)]
#[context("Opening mount tree from pid")]
pub(crate) fn open_tree_from_pidns(
pid: rustix::process::Pid,
path: &Utf8Path,
recursive: bool,
) -> Result<OwnedFd> {
// Allocate a socket pair to use for sending file descriptors.
let (sock_parent, sock_child) = rustix::net::socketpair(
AddressFamily::UNIX,
SocketType::STREAM,
SocketFlags::CLOEXEC,
None,
)
.context("socketpair")?;
const DUMMY_DATA: &[u8] = &[b'!'];
match unsafe { libc::fork() } {
0 => {
// We're in the child. At this point we know we don't have multiple threads, so we
// can safely `setns`.

// Open up the namespace of the target process as a file descriptor, and enter it.
let pidlink = fs::File::open(format!("/proc/{}/ns/mnt", pid.as_raw_nonzero()))?;
rustix::thread::move_into_link_name_space(
pidlink.as_fd(),
Some(rustix::thread::LinkNameSpaceType::Mount),
)
.context("setns")?;

// Open the target mount path as a file descriptor.
let recursive = if recursive {
OpenTreeFlags::AT_RECURSIVE
} else {
OpenTreeFlags::empty()
};
let fd = rustix::mount::open_tree(
rustix::fs::CWD,
path.as_std_path(),
OpenTreeFlags::OPEN_TREE_CLOEXEC | OpenTreeFlags::OPEN_TREE_CLONE | recursive,
)
.context("open_tree")?;

// And send that file descriptor via fd passing over the socketpair.
let fd = fd.as_fd();
let fds = [fd];
let mut buffer = [0u8; rustix::cmsg_space!(ScmRights(1))];
let mut control = SendAncillaryBuffer::new(&mut buffer);
let pushed = control.push(SendAncillaryMessage::ScmRights(&fds));
assert!(pushed);
let ios = std::io::IoSlice::new(DUMMY_DATA);
rustix::net::sendmsg(sock_child, &[ios], &mut control, SendFlags::empty())?;
// Then we're done.
std::process::exit(0)
}
-1 => {
// fork failed
let e = std::io::Error::last_os_error();
anyhow::bail!("failed to fork: {e}");
}
n => {
// We're in the parent; create a pid (checking that n > 0).
let pid = rustix::process::Pid::from_raw(n).unwrap();
// Receive the mount file descriptor from the child
let mut cmsg_space = vec![0; rustix::cmsg_space!(ScmRights(1))];
let mut cmsg_buffer = rustix::net::RecvAncillaryBuffer::new(&mut cmsg_space);
let mut buf = [0u8; DUMMY_DATA.len()];
let iov = std::io::IoSliceMut::new(buf.as_mut());
let mut iov = [iov];
let nread = rustix::net::recvmsg(
sock_parent,
&mut iov,
&mut cmsg_buffer,
RecvFlags::CMSG_CLOEXEC,
)
.context("recvmsg")?
.bytes;
assert_eq!(nread, DUMMY_DATA.len());
assert_eq!(buf, DUMMY_DATA);
// And extract the file descriptor
let r = cmsg_buffer
.drain()
.filter_map(|m| match m {
rustix::net::RecvAncillaryMessage::ScmRights(f) => Some(f),
_ => None,
})
.flatten()
.next()
.ok_or_else(|| anyhow::anyhow!("Did not receive a file descriptor"))?;
rustix::process::waitpid(Some(pid), WaitOptions::empty())?;
Ok(r)
}
}
}

/// Create a bind mount from the mount namespace of the target pid
/// into our mount namespace.
pub(crate) fn bind_mount_from_pidns(
pid: Pid,
src: &Utf8Path,
target: &Utf8Path,
recursive: bool,
) -> Result<()> {
let src = open_tree_from_pidns(pid, src, recursive)?;
rustix::mount::move_mount(
src.as_fd(),
"",
rustix::fs::CWD,
target.as_std_path(),
MoveMountFlags::MOVE_MOUNT_F_EMPTY_PATH,
)
.context("Moving mount")?;
Ok(())
}

// If the target path is not already mirrored from the host (e.g. via -v /dev:/dev)
// then recursively mount it.
pub(crate) fn ensure_mirrored_host_mount(path: impl AsRef<Utf8Path>) -> Result<()> {
let path = path.as_ref();
// If we didn't have this in our filesystem already (e.g. for /var/lib/containers)
// then create it now.
std::fs::create_dir_all(path)?;
if is_same_as_host(path)? {
tracing::debug!("Already mounted from host: {path}");
return Ok(());
}
tracing::debug!("Propagating host mount: {path}");
bind_mount_from_pidns(PID1, path, path, true)
}
2 changes: 1 addition & 1 deletion ostree-ext/.github/workflows/bootc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:
- name: Integration tests
run: |
set -xeuo pipefail
sudo podman run --rm -ti --privileged -v /:/target -v /var/lib/containers:/var/lib/containers -v ./usr/bin/bootc:/usr/bin/bootc --pid=host --security-opt label=disable \
sudo podman run --rm -ti --privileged -v ./usr/bin/bootc:/usr/bin/bootc --pid=host --security-opt label=disable \
quay.io/centos-bootc/centos-bootc-dev:stream9 bootc install to-filesystem \
--karg=foo=bar --disable-selinux --replace=alongside /target
Loading

0 comments on commit 8d4bf5c

Please sign in to comment.