From 3bdb09c2d596c18867a2a1a6badabb9f9b6ba11a Mon Sep 17 00:00:00 2001 From: Andy Fiddaman Date: Wed, 4 Oct 2023 13:27:34 +0000 Subject: [PATCH] Back /var/fm/fmd with a dataset from the boot M.2 --- illumos-utils/src/zfs.rs | 41 +++++++++--- sled-agent/src/backing_fs.rs | 92 ++++++++++++++++++++++++++ sled-agent/src/bootstrap/pre_server.rs | 1 + sled-agent/src/lib.rs | 1 + sled-agent/src/sled_agent.rs | 22 ++++-- sled-agent/src/storage_manager.rs | 1 + sled-agent/src/swap_device.rs | 3 - sled-hardware/src/disk.rs | 47 +++++++++++-- 8 files changed, 186 insertions(+), 22 deletions(-) create mode 100644 sled-agent/src/backing_fs.rs diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index ba8cd8c84a8..e5d5fd8feb1 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -61,6 +61,9 @@ enum EnsureFilesystemErrorRaw { #[error("Failed to mount encrypted filesystem: {0}")] MountEncryptedFsFailed(crate::ExecutionError), + + #[error("Failed to mount overlay filesystem: {0}")] + MountOverlayFsFailed(crate::ExecutionError), } /// Error returned by [`Zfs::ensure_filesystem`]. @@ -202,6 +205,7 @@ impl Zfs { /// Creates a new ZFS filesystem named `name`, unless one already exists. /// /// Applies an optional quota, provided _in bytes_. + #[allow(clippy::too_many_arguments)] pub fn ensure_filesystem( name: &str, mountpoint: Mountpoint, @@ -209,6 +213,7 @@ impl Zfs { do_format: bool, encryption_details: Option, size_details: Option, + options: Option>, ) -> Result<(), EnsureFilesystemError> { let (exists, mounted) = Self::dataset_exists(name, &mountpoint)?; if exists { @@ -261,7 +266,14 @@ impl Zfs { ]); } + if let Some(opts) = options { + for o in &opts { + cmd.args(&["-o", &o]); + } + } + cmd.args(&["-o", &format!("mountpoint={}", mountpoint), name]); + execute(cmd).map_err(|err| EnsureFilesystemError { name: name.to_string(), mountpoint: mountpoint.clone(), @@ -322,6 +334,20 @@ impl Zfs { Ok(()) } + pub fn mount_overlay_dataset( + name: &str, + mountpoint: &Mountpoint, + ) -> Result<(), EnsureFilesystemError> { + let mut command = std::process::Command::new(PFEXEC); + let cmd = command.args(&[ZFS, "mount", "-O", name]); + execute(cmd).map_err(|err| EnsureFilesystemError { + name: name.to_string(), + mountpoint: mountpoint.clone(), + err: EnsureFilesystemErrorRaw::MountOverlayFsFailed(err), + })?; + Ok(()) + } + // Return (true, mounted) if the dataset exists, (false, false) otherwise, // where mounted is if the dataset is mounted. fn dataset_exists( @@ -385,7 +411,7 @@ impl Zfs { Zfs::get_value(filesystem_name, &format!("oxide:{}", name)) } - fn get_value( + pub fn get_value( filesystem_name: &str, name: &str, ) -> Result { @@ -422,13 +448,12 @@ pub fn get_all_omicron_datasets_for_delete() -> anyhow::Result> { let internal = pool.kind() == crate::zpool::ZpoolKind::Internal; let pool = pool.to_string(); for dataset in &Zfs::list_datasets(&pool)? { - // Avoid erasing crashdump datasets on internal pools - if dataset == "crash" && internal { - continue; - } - - // The swap device might be in use, so don't assert that it can be deleted. - if dataset == "swap" && internal { + // Avoid erasing crashdump, backing data and swap datasets on + // internal pools. The swap device may be in use. + if internal + && (["crash", "backing", "swap"].contains(&dataset.as_str()) + || dataset.starts_with("backing/")) + { continue; } diff --git a/sled-agent/src/backing_fs.rs b/sled-agent/src/backing_fs.rs new file mode 100644 index 00000000000..dbca7351d87 --- /dev/null +++ b/sled-agent/src/backing_fs.rs @@ -0,0 +1,92 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Operations for dealing with persistent backing mounts for OS data + +use camino::Utf8PathBuf; +use illumos_utils::zfs::{ + EnsureFilesystemError, GetValueError, Mountpoint, Zfs, +}; + +#[derive(Debug, thiserror::Error)] +pub enum BackingFsError { + #[error("Error administering service: {0}")] + Adm(#[from] smf::AdmError), + + #[error("Error retrieving dataset property: {0}")] + DatasetProperty(#[from] GetValueError), + + #[error("Error mounting dataset: {0}")] + Mount(#[from] EnsureFilesystemError), +} + +struct BackingFs { + // Dataset name + name: &'static str, + // Linked service + service: Option<&'static str>, +} + +impl BackingFs { + const fn new(name: &'static str) -> Self { + Self { name, service: None } + } + + const fn service(mut self, service: &'static str) -> Self { + self.service = Some(service); + self + } +} + +const BACKING_FS_COUNT: usize = 1; +static BACKING_FS: [BackingFs; BACKING_FS_COUNT] = + [BackingFs::new(sled_hardware::disk::M2_BACKING_FM_DATASET) + .service("svc:/system/fmd:default")]; + +pub(crate) fn ensure_backing_fs( + log: &slog::Logger, + boot_zpool_name: &illumos_utils::zpool::ZpoolName, +) -> Result<(), BackingFsError> { + let log = log.new(o!( + "component" => "BackingFs", + )); + for bfs in BACKING_FS.iter().into_iter() { + info!(log, "Processing backing FS {}", bfs.name); + + let dataset = format!("{}/{}", boot_zpool_name, bfs.name); + + if Zfs::get_value(&dataset, "mounted")? == "yes" { + info!(log, "backing FS {} is already mounted", bfs.name); + return Ok(()); + } + + let mountpoint = Zfs::get_value(&dataset, "mountpoint")?; + + if let Some(service) = bfs.service { + info!(log, "Stopping service {}", service); + smf::Adm::new() + .disable() + .temporary() + .synchronous() + .run(smf::AdmSelection::ByPattern(&[service]))?; + } + + info!(log, "Mounting backing FS {} on {}", bfs.name, mountpoint); + + Zfs::mount_overlay_dataset( + &dataset, + &Mountpoint::Path(Utf8PathBuf::from(mountpoint)), + )?; + + if let Some(service) = bfs.service { + info!(log, "Starting service {}", service); + smf::Adm::new() + .enable() + .synchronous() + .run(smf::AdmSelection::ByPattern(&[service]))?; + } + } + + Ok(()) +} diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs index 0899bdd82fe..71325fef3d9 100644 --- a/sled-agent/src/bootstrap/pre_server.rs +++ b/sled-agent/src/bootstrap/pre_server.rs @@ -381,6 +381,7 @@ fn ensure_zfs_ramdisk_dataset() -> Result<(), StartError> { do_format, encryption_details, quota, + None, ) .map_err(StartError::EnsureZfsRamdiskDataset) } diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index 5c4dbd83107..4e7921c605d 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -17,6 +17,7 @@ pub mod sim; pub mod common; // Modules for the non-simulated sled agent. +mod backing_fs; pub mod bootstrap; pub mod config; mod http_entrypoints; diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 7e62f6a8a70..5574edca55a 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -59,9 +59,15 @@ use illumos_utils::{dladm::MockDladm as Dladm, zone::MockZones as Zones}; #[derive(thiserror::Error, Debug)] pub enum Error { + #[error("Could not find boot disk")] + BootDiskNotFound, + #[error("Configuration error: {0}")] Config(#[from] crate::config::ConfigError), + #[error("Error setting up backing filesystems: {0}")] + BackingFs(#[from] crate::backing_fs::BackingFsError), + #[error("Error setting up swap device: {0}")] SwapDevice(#[from] crate::swap_device::SwapDeviceError), @@ -268,14 +274,17 @@ impl SledAgent { )); info!(&log, "SledAgent::new(..) starting"); - // Configure a swap device of the configured size before other system setup. + let boot_disk = storage + .resources() + .boot_disk() + .await + .ok_or_else(|| Error::BootDiskNotFound)?; + + // Configure a swap device of the configured size before other system + // setup. match config.swap_device_size_gb { Some(sz) if sz > 0 => { info!(log, "Requested swap device of size {} GiB", sz); - let boot_disk = - storage.resources().boot_disk().await.ok_or_else(|| { - crate::swap_device::SwapDeviceError::BootDiskNotFound - })?; crate::swap_device::ensure_swap_device( &parent_log, &boot_disk.1, @@ -290,6 +299,9 @@ impl SledAgent { } } + info!(log, "Mounting backing filesystems"); + crate::backing_fs::ensure_backing_fs(&parent_log, &boot_disk.1)?; + // Ensure we have a thread that automatically reaps process contracts // when they become empty. See the comments in // illumos-utils/src/running_zone.rs for more detail. diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index bd713713963..c31a4dc0bc4 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -417,6 +417,7 @@ impl StorageWorker { do_format, encryption_details, size_details, + None, )?; // Ensure the dataset has a usable UUID. if let Ok(id_str) = Zfs::get_oxide_value(&fs_name, "uuid") { diff --git a/sled-agent/src/swap_device.rs b/sled-agent/src/swap_device.rs index 5a8f40adbd1..6a00b42672b 100644 --- a/sled-agent/src/swap_device.rs +++ b/sled-agent/src/swap_device.rs @@ -9,9 +9,6 @@ use zeroize::Zeroize; #[derive(Debug, thiserror::Error)] pub enum SwapDeviceError { - #[error("Could not find boot disk")] - BootDiskNotFound, - #[error("Error running ZFS command: {0}")] Zfs(illumos_utils::ExecutionError), diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index aec99ae3f84..4b8019bf314 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -228,11 +228,21 @@ struct ExpectedDataset { wipe: bool, // Optional compression mode compression: Option<&'static str>, + // Optional mountpoint. Setting this also configures the dataset so that it + // does not mount automatically, and software is responsible for mounting a + // single one chosen from the available M.2s drives. + mountpoint: Option<&'static str>, } impl ExpectedDataset { const fn new(name: &'static str) -> Self { - ExpectedDataset { name, quota: None, wipe: false, compression: None } + ExpectedDataset { + name, + quota: None, + wipe: false, + compression: None, + mountpoint: None, + } } const fn quota(mut self, quota: usize) -> Self { @@ -249,6 +259,11 @@ impl ExpectedDataset { self.compression = Some(compression); self } + + const fn mountpoint(mut self, mountpoint: &'static str) -> Self { + self.mountpoint = Some(mountpoint); + self + } } pub const INSTALL_DATASET: &'static str = "install"; @@ -256,6 +271,9 @@ pub const CRASH_DATASET: &'static str = "crash"; pub const CLUSTER_DATASET: &'static str = "cluster"; pub const CONFIG_DATASET: &'static str = "config"; pub const M2_DEBUG_DATASET: &'static str = "debug"; +pub const M2_BACKING_DATASET: &'static str = "backing"; +pub const M2_BACKING_FM_DATASET: &'static str = "backing/varfm"; +pub const M2_BACKING_FM_DATASET_MP: &'static str = "/var/fm/fmd"; // TODO-correctness: This value of 100GiB is a pretty wild guess, and should be // tuned as needed. pub const DEBUG_DATASET_QUOTA: usize = 100 * (1 << 30); @@ -282,7 +300,7 @@ static U2_EXPECTED_DATASETS: [ExpectedDataset; U2_EXPECTED_DATASET_COUNT] = [ .compression(DUMP_DATASET_COMPRESSION), ]; -const M2_EXPECTED_DATASET_COUNT: usize = 5; +const M2_EXPECTED_DATASET_COUNT: usize = 7; static M2_EXPECTED_DATASETS: [ExpectedDataset; M2_EXPECTED_DATASET_COUNT] = [ // Stores software images. // @@ -290,7 +308,13 @@ static M2_EXPECTED_DATASETS: [ExpectedDataset; M2_EXPECTED_DATASET_COUNT] = [ ExpectedDataset::new(INSTALL_DATASET), // Stores crash dumps. ExpectedDataset::new(CRASH_DATASET), - // Stores cluter configuration information. + // Backing store for OS data that should be persisted across reboots. + // Its children are selectively overlay mounted onto parts of the ramdisk + // root. + ExpectedDataset::new(M2_BACKING_DATASET), + ExpectedDataset::new(M2_BACKING_FM_DATASET) + .mountpoint(M2_BACKING_FM_DATASET_MP), + // Stores cluster configuration information. // // Should be duplicated to both M.2s. ExpectedDataset::new(CLUSTER_DATASET), @@ -524,6 +548,7 @@ impl Disk { do_format, Some(encryption_details), None, + None, ); keyfile.zero_and_unlink().await.map_err(|error| { @@ -534,7 +559,15 @@ impl Disk { }; for dataset in datasets.into_iter() { - let mountpoint = zpool_name.dataset_mountpoint(dataset.name); + let mut options = vec![]; + + let mountpoint = match dataset.mountpoint { + Some(explicit_mountpoint) => { + options.push("canmount=noauto".to_string()); + Utf8PathBuf::from(explicit_mountpoint) + } + _ => zpool_name.dataset_mountpoint(dataset.name), + }; let name = &format!("{}/{}", zpool_name, dataset.name); // Use a value that's alive for the duration of this sled agent @@ -562,8 +595,8 @@ impl Disk { "Automatically destroying dataset: {}", name ); Zfs::destroy_dataset(name).or_else(|err| { - // If we can't find the dataset, that's fine -- it might - // not have been formatted yet. + // If we can't find the dataset, that's fine -- it + // might not have been formatted yet. if let DestroyDatasetErrorVariant::NotFound = err.err { @@ -581,6 +614,7 @@ impl Disk { quota: dataset.quota, compression: dataset.compression, }); + Zfs::ensure_filesystem( name, Mountpoint::Path(mountpoint), @@ -588,6 +622,7 @@ impl Disk { do_format, encryption_details, size_details, + Some(options), )?; if dataset.wipe {