From 99a73b535e0da7b0a276d95a3778c930ead223fe Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 9 Sep 2024 09:49:27 -0400 Subject: [PATCH] Automatically manage FEX rootfs This adds a mechanism for automatically managing the FEX rootfs, mounting and overlayfs'ing the relevant squashfs files from the host (as many as desired). Depends on: * libkrun https://github.com/containers/libkrun/pull/217 * libkrunfw https://github.com/containers/libkrunfw/pull/65 Signed-off-by: Alyssa Rosenzweig --- crates/krun/src/bin/krun.rs | 51 ++++++++++++++++-- crates/krun/src/cli_options.rs | 11 ++++ crates/krun/src/guest/mount.rs | 98 +++++++++++++++++++++++++++++----- 3 files changed, 143 insertions(+), 17 deletions(-) diff --git a/crates/krun/src/bin/krun.rs b/crates/krun/src/bin/krun.rs index a573378..a3536a5 100644 --- a/crates/krun/src/bin/krun.rs +++ b/crates/krun/src/bin/krun.rs @@ -11,10 +11,10 @@ use krun::launch::{launch_or_lock, LaunchResult}; use krun::net::{connect_to_passt, start_passt}; use krun::types::MiB; use krun_sys::{ - krun_add_vsock_port, krun_create_ctx, krun_set_exec, krun_set_gpu_options, krun_set_log_level, - krun_set_passt_fd, krun_set_root, krun_set_vm_config, krun_set_workdir, krun_start_enter, - VIRGLRENDERER_DRM, VIRGLRENDERER_THREAD_SYNC, VIRGLRENDERER_USE_ASYNC_FENCE_CB, - VIRGLRENDERER_USE_EGL, + krun_add_disk, krun_add_vsock_port, krun_create_ctx, krun_set_exec, krun_set_gpu_options, + krun_set_log_level, krun_set_passt_fd, krun_set_root, krun_set_vm_config, krun_set_workdir, + krun_start_enter, VIRGLRENDERER_DRM, VIRGLRENDERER_THREAD_SYNC, + VIRGLRENDERER_USE_ASYNC_FENCE_CB, VIRGLRENDERER_USE_EGL, }; use log::debug; use nix::sys::sysinfo::sysinfo; @@ -24,6 +24,23 @@ use rustix::process::{ geteuid, getgid, getrlimit, getuid, sched_setaffinity, setrlimit, CpuSet, Resource, }; +fn add_ro_disk(ctx_id: u32, label: &str, path: &str) -> Result<()> { + let path_cstr = CString::new(path).unwrap(); + let path_ptr = path_cstr.as_ptr(); + + let label_cstr = CString::new(label).unwrap(); + let label_ptr = label_cstr.as_ptr(); + + // SAFETY: `path_ptr` and `label_ptr` are live pointers to C-strings + let err = unsafe { krun_add_disk(ctx_id, label_ptr, path_ptr, true) }; + + if err < 0 { + Err(Errno::from_raw_os_error(-err).into()) + } else { + Ok(()) + } +} + fn main() -> Result<()> { env_logger::init(); @@ -122,6 +139,32 @@ fn main() -> Result<()> { setrlimit(Resource::Nofile, rlim).context("Failed to raise `RLIMIT_NOFILE`")?; } + // If the user specified a disk image, we want to load and fail if it's missing. If the user + // did not specify a disk image, we want to load the system images if installed but fail + // gracefully if missing. This follows the principle of least surprise. + // + // What we don't want is a clever autodiscovery mechanism that searches $HOME for images. + // That's liable to blow up in exciting ways. Instead we require images to be selected + // explicitly, either on the CLI or hardcoded here. + let disks: Vec = if !options.fex_images.is_empty() { + options.fex_images + } else { + let default_disks = vec![ + "/usr/share/fex-emu/RootFS/default.erofs", + "/usr/share/fex-emu/overlays/mesa.erofs", + ]; + + default_disks + .iter() + .map(|x| x.to_string()) + .filter(|x| Path::new(x).exists()) + .collect() + }; + + for path in disks { + add_ro_disk(ctx_id, &path, &path).context("Failed to configure disk")?; + } + { // SAFETY: `root_path` is a pointer to a C-string literal. let err = unsafe { krun_set_root(ctx_id, c"/".as_ptr()) }; diff --git a/crates/krun/src/cli_options.rs b/crates/krun/src/cli_options.rs index deac936..1de0677 100644 --- a/crates/krun/src/cli_options.rs +++ b/crates/krun/src/cli_options.rs @@ -13,6 +13,7 @@ pub struct Options { pub mem: Option, pub passt_socket: Option, pub server_port: u32, + pub fex_images: Vec, pub command: PathBuf, pub command_args: Vec, } @@ -66,6 +67,15 @@ pub fn options() -> OptionParser { ) .argument("MEM") .optional(); + let fex_images = long("fex-image") + .short('f') + .help( + "Adds an erofs file to be mounted as a FEX rootfs. + May be specified multiple times. + First the base image, then overlays in order.", + ) + .argument::("FEX_IMAGE") + .many(); let passt_socket = long("passt-socket") .help("Instead of starting passt, connect to passt socket at PATH") .argument("PATH") @@ -89,6 +99,7 @@ pub fn options() -> OptionParser { mem, passt_socket, server_port, + fex_images, // positionals command, command_args, diff --git a/crates/krun/src/guest/mount.rs b/crates/krun/src/guest/mount.rs index 8286d64..a4d3f34 100644 --- a/crates/krun/src/guest/mount.rs +++ b/crates/krun/src/guest/mount.rs @@ -1,22 +1,101 @@ -use std::fs::File; +use std::ffi::CString; +use std::fs::{read_dir, File}; +use std::io::Write; use std::os::fd::AsFd; use std::path::Path; use anyhow::{Context, Result}; -use rustix::fs::CWD; +use rustix::fs::{mkdir, symlink, Mode, CWD}; use rustix::mount::{ mount2, mount_bind, move_mount, open_tree, MountFlags, MoveMountFlags, OpenTreeFlags, }; -pub fn mount_filesystems() -> Result<()> { +fn make_tmpfs(dir: &str) -> Result<()> { mount2( Some("tmpfs"), - "/var/run", + dir, Some("tmpfs"), MountFlags::NOEXEC | MountFlags::NOSUID | MountFlags::RELATIME, None, ) - .context("Failed to mount `/var/run`")?; + .context("Failed to mount tmpfs") +} + +fn mkdir_fex(dir: &str) { + // Must succeed since /run/ was just mounted and is now an empty tmpfs. + mkdir( + dir, + Mode::RUSR | Mode::XUSR | Mode::RGRP | Mode::XGRP | Mode::ROTH | Mode::XOTH, + ) + .unwrap(); +} + +fn mount_fex_rootfs() -> Result<()> { + let dir = "/run/fex-emu/"; + let dir_rootfs = dir.to_string() + "rootfs"; + + // Make base directories + mkdir_fex(dir); + + let flags = MountFlags::RDONLY; + let mut images = Vec::new(); + + // Find /dev/vd* + for x in read_dir("/dev").unwrap() { + let file = x.unwrap(); + let name = file.file_name().into_string().unwrap(); + if !name.starts_with("vd") { + continue; + } + + let path = file.path().into_os_string().into_string().unwrap(); + let dir = dir.to_string() + &name; + + // Mount the erofs images. + mkdir_fex(&dir); + mount2(Some(path), dir.clone(), Some("erofs"), flags, None) + .context("Failed to mount erofs") + .unwrap(); + images.push(dir); + } + + if images.len() >= 2 { + // Overlay the mounts together. + let opts = format!( + "lowerdir={}", + images.into_iter().rev().collect::>().join(":") + ); + let opts = CString::new(opts).unwrap(); + let overlay = "overlay".to_string(); + let overlay_ = Some(&overlay); + + mkdir_fex(&dir_rootfs); + mount2(overlay_, &dir_rootfs, overlay_, flags, Some(&opts)).context("Failed to overlay")?; + } else if images.len() == 1 { + // Just expose the one mount + symlink(&images[0], &dir_rootfs)?; + } + + // Now we need to tell FEX about this. One of the FEX share directories has an unmounted rootfs + // and a Config.json telling FEX to use FUSE. Neither should be visible to the guest. Instead, + // we want to replace the folders and tell FEX to use our mounted rootfs + for base in ["/usr/share/fex-emu", "/usr/local/share/fex-emu"] { + let json = format!("{{\"Config\":{{\"RootFS\":\"{dir_rootfs}\"}}}}\n"); + let path = base.to_string() + "/Config.json"; + + make_tmpfs(base)?; + File::create(Path::new(&path))?.write_all(json.as_bytes())?; + } + + Ok(()) +} + +pub fn mount_filesystems() -> Result<()> { + make_tmpfs("/var/run")?; + + if let Err(_) = mount_fex_rootfs() { + println!("Failed to mount FEX rootfs, carrying on without.") + } let _ = File::options() .write(true) @@ -60,14 +139,7 @@ pub fn mount_filesystems() -> Result<()> { if Path::new("/tmp/.X11-unix").exists() { // Mount a tmpfs for X11 sockets, so the guest doesn't clobber host X server // sockets - mount2( - Some("tmpfs"), - "/tmp/.X11-unix", - Some("tmpfs"), - MountFlags::NOEXEC | MountFlags::NOSUID | MountFlags::RELATIME, - None, - ) - .context("Failed to mount `/tmp/.X11-unix`")?; + make_tmpfs("/tmp/.X11-unix")?; } Ok(())