From 7facfdf224c7e8fd069b58a4221313874dceb629 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Thu, 28 Sep 2023 05:12:21 +0000
Subject: [PATCH 01/66] wip

---
 Cargo.lock                     |  23 +
 Cargo.toml                     |   2 +
 sled-storage/Cargo.toml        |  27 ++
 sled-storage/src/dataset.rs    | 120 +++++
 sled-storage/src/disk.rs       |  57 +++
 sled-storage/src/dump_setup.rs | 795 +++++++++++++++++++++++++++++++++
 sled-storage/src/error.rs      |   0
 sled-storage/src/lib.rs        |   9 +
 8 files changed, 1033 insertions(+)
 create mode 100644 sled-storage/Cargo.toml
 create mode 100644 sled-storage/src/dataset.rs
 create mode 100644 sled-storage/src/disk.rs
 create mode 100644 sled-storage/src/dump_setup.rs
 create mode 100644 sled-storage/src/error.rs
 create mode 100644 sled-storage/src/lib.rs
diff --git a/Cargo.lock b/Cargo.lock
index e5130b6b33..bdf2d44ea4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5265,6 +5265,29 @@ dependencies = [
  "zone",
 ]
 
+[[package]]
+name = "omicron-sled-storage"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "camino",
+ "derive_more",
+ "glob",
+ "illumos-utils",
+ "key-manager",
+ "nexus-client 0.1.0",
+ "omicron-common 0.1.0",
+ "schemars",
+ "serde",
+ "serde_json",
+ "sled-agent-client",
+ "sled-hardware",
+ "slog",
+ "thiserror",
+ "tokio",
+ "uuid",
+]
+
 [[package]]
 name = "omicron-test-utils"
 version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
index 9498157b28..0b63984ea7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -54,6 +54,7 @@ members = [
     "sled-agent-client",
     "sled-agent",
     "sled-hardware",
+    "sled-storage",
     "sp-sim",
     "test-utils",
     "tufaceous-lib",
@@ -115,6 +116,7 @@ default-members = [
     "sled-agent",
     "sled-agent-client",
     "sled-hardware",
+    "sled-storage",
     "sp-sim",
     "test-utils",
     "tufaceous",
diff --git a/sled-storage/Cargo.toml b/sled-storage/Cargo.toml
new file mode 100644
index 0000000000..03f0f608de
--- /dev/null
+++ b/sled-storage/Cargo.toml
@@ -0,0 +1,27 @@
+[package]
+name = "omicron-sled-storage"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+async-trait.workspace = true
+camino.workspace = true
+derive_more.workspace = true
+glob.workspace = true
+illumos-utils.workspace = true
+key-manager.workspace = true
+# Needed strictly for parameter type conversion
+# We could put this in the nexus-client instead
+nexus-client.workspace = true
+omicron-common.workspace = true
+schemars = { workspace = true, features = [ "chrono", "uuid1" ] }
+serde.workspace = true
+serde_json.workspace = true
+# Needed strictly for parameter type conversion
+# We could put this in the sled-agent-client instead
+sled-agent-client.workspace = true
+sled-hardware.workspace = true
+slog.workspace = true
+thiserror.workspace = true
+tokio.workspace = true
+uuid.workspace = true
diff --git a/sled-storage/src/dataset.rs b/sled-storage/src/dataset.rs
new file mode 100644
index 0000000000..e521dd963a
--- /dev/null
+++ b/sled-storage/src/dataset.rs
@@ -0,0 +1,120 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use illumos_utils::zpool::ZpoolName;
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+use std::str::FromStr;
+
+/// The type of a dataset, and an auxiliary information necessary
+/// to successfully launch a zone managing the associated data.
+#[derive(
+    Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash,
+)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum DatasetKind {
+    CockroachDb,
+    Crucible,
+    Clickhouse,
+    ClickhouseKeeper,
+    ExternalDns,
+    InternalDns,
+}
+
+impl From<DatasetKind> for sled_agent_client::types::DatasetKind {
+    fn from(k: DatasetKind) -> Self {
+        use DatasetKind::*;
+        match k {
+            CockroachDb => Self::CockroachDb,
+            Crucible => Self::Crucible,
+            Clickhouse => Self::Clickhouse,
+            ClickhouseKeeper => Self::ClickhouseKeeper,
+            ExternalDns => Self::ExternalDns,
+            InternalDns => Self::InternalDns,
+        }
+    }
+}
+
+impl From<DatasetKind> for nexus_client::types::DatasetKind {
+    fn from(k: DatasetKind) -> Self {
+        use DatasetKind::*;
+        match k {
+            CockroachDb => Self::Cockroach,
+            Crucible => Self::Crucible,
+            Clickhouse => Self::Clickhouse,
+            ClickhouseKeeper => Self::ClickhouseKeeper,
+            ExternalDns => Self::ExternalDns,
+            InternalDns => Self::InternalDns,
+        }
+    }
+}
+
+impl std::fmt::Display for DatasetKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        use DatasetKind::*;
+        let s = match self {
+            Crucible => "crucible",
+            CockroachDb { .. } => "cockroachdb",
+            Clickhouse => "clickhouse",
+            ClickhouseKeeper => "clickhouse_keeper",
+            ExternalDns { .. } => "external_dns",
+            InternalDns { .. } => "internal_dns",
+        };
+        write!(f, "{}", s)
+    }
+}
+
+#[derive(
+    Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone, JsonSchema,
+)]
+pub struct DatasetName {
+    // A unique identifier for the Zpool on which the dataset is stored.
+    pool_name: ZpoolName,
+    // A name for the dataset within the Zpool.
+    kind: DatasetKind,
+}
+
+impl DatasetName {
+    pub fn new(pool_name: ZpoolName, kind: DatasetKind) -> Self {
+        Self { pool_name, kind }
+    }
+
+    pub fn pool(&self) -> &ZpoolName {
+        &self.pool_name
+    }
+
+    pub fn dataset(&self) -> &DatasetKind {
+        &self.kind
+    }
+
+    pub fn full(&self) -> String {
+        format!("{}/{}", self.pool_name, self.kind)
+    }
+}
+
+impl From<DatasetName> for sled_agent_client::types::DatasetName {
+    fn from(n: DatasetName) -> Self {
+        Self {
+            pool_name: sled_agent_client::types::ZpoolName::from_str(
+                &n.pool().to_string(),
+            )
+            .unwrap(),
+            kind: n.dataset().clone().into(),
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use uuid::Uuid;
+
+    #[test]
+    fn serialize_dataset_name() {
+        let pool = ZpoolName::new_internal(Uuid::new_v4());
+        let kind = DatasetKind::Crucible;
+        let name = DatasetName::new(pool, kind);
+        serde_json::to_string(&name).unwrap();
+    }
+}
diff --git a/sled-storage/src/disk.rs b/sled-storage/src/disk.rs
new file mode 100644
index 0000000000..b78bd57af9
--- /dev/null
+++ b/sled-storage/src/disk.rs
@@ -0,0 +1,57 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Disk related types
+
+use camino::Utf8PathBuf;
+use illumos_utils::zpool::{ZpoolKind, ZpoolName};
+use omicron_common::disk::DiskIdentity;
+use sled_hardware::{Disk, DiskVariant};
+
+/// A wrapper around real disks or synthetic disks backed by a file
+#[derive(PartialEq, Eq, Clone)]
+pub(crate) enum DiskWrapper {
+    Real { disk: Disk, devfs_path: Utf8PathBuf },
+    Synthetic { zpool_name: ZpoolName },
+}
+
+impl From<Disk> for DiskWrapper {
+    fn from(disk: Disk) -> Self {
+        let devfs_path = disk.devfs_path().clone();
+        Self::Real { disk, devfs_path }
+    }
+}
+
+impl DiskWrapper {
+    fn identity(&self) -> DiskIdentity {
+        match self {
+            DiskWrapper::Real { disk, .. } => disk.identity().clone(),
+            DiskWrapper::Synthetic { zpool_name } => {
+                let id = zpool_name.id();
+                DiskIdentity {
+                    vendor: "synthetic-vendor".to_string(),
+                    serial: format!("synthetic-serial-{id}"),
+                    model: "synthetic-model".to_string(),
+                }
+            }
+        }
+    }
+
+    fn variant(&self) -> DiskVariant {
+        match self {
+            DiskWrapper::Real { disk, .. } => disk.variant(),
+            DiskWrapper::Synthetic { zpool_name } => match zpool_name.kind() {
+                ZpoolKind::External => DiskVariant::U2,
+                ZpoolKind::Internal => DiskVariant::M2,
+            },
+        }
+    }
+
+    fn zpool_name(&self) -> &ZpoolName {
+        match self {
+            DiskWrapper::Real { disk, .. } => disk.zpool_name(),
+            DiskWrapper::Synthetic { zpool_name } => zpool_name,
+        }
+    }
+}
diff --git a/sled-storage/src/dump_setup.rs b/sled-storage/src/dump_setup.rs
new file mode 100644
index 0000000000..ea51251f84
--- /dev/null
+++ b/sled-storage/src/dump_setup.rs
@@ -0,0 +1,795 @@
+use crate::disk::DiskWrapper;
+use camino::Utf8PathBuf;
+use derive_more::{AsRef, Deref, From};
+use illumos_utils::dumpadm::DumpAdmError;
+use illumos_utils::zone::{AdmError, Zones};
+use illumos_utils::zpool::{ZpoolHealth, ZpoolName};
+use omicron_common::disk::DiskIdentity;
+use sled_hardware::DiskVariant;
+use slog::{debug, error, info, o, warn, Logger};
+use std::collections::{HashMap, HashSet};
+use std::ffi::OsString;
+use std::path::{Path, PathBuf};
+use std::sync::{Arc, Weak};
+use std::time::{Duration, SystemTime, SystemTimeError, UNIX_EPOCH};
+use tokio::sync::MutexGuard;
+
+pub struct DumpSetup {
+    worker: Arc<std::sync::Mutex<DumpSetupWorker>>,
+    _poller: std::thread::JoinHandle<()>,
+    log: Logger,
+}
+
+impl DumpSetup {
+    pub fn new(log: &Logger) -> Self {
+        let worker = Arc::new(std::sync::Mutex::new(DumpSetupWorker::new(
+            log.new(o!("component" => "DumpSetup-worker")),
+        )));
+        let worker_weak = Arc::downgrade(&worker);
+        let log_poll = log.new(o!("component" => "DumpSetup-archival"));
+        let _poller = std::thread::spawn(move || {
+            Self::poll_file_archival(worker_weak, log_poll)
+        });
+        let log = log.new(o!("component" => "DumpSetup"));
+        Self { worker, _poller, log }
+    }
+}
+
+// we sure are passing a lot of Utf8PathBufs around, let's be careful about it
+#[derive(
+    AsRef, Clone, Debug, Deref, Eq, From, Hash, Ord, PartialEq, PartialOrd,
+)]
+struct DumpSlicePath(Utf8PathBuf);
+#[derive(
+    AsRef, Clone, Debug, Deref, Eq, From, Hash, Ord, PartialEq, PartialOrd,
+)]
+struct DebugDataset(Utf8PathBuf);
+#[derive(
+    AsRef, Clone, Debug, Deref, Eq, From, Hash, Ord, PartialEq, PartialOrd,
+)]
+struct CoreDataset(Utf8PathBuf);
+
+#[derive(Deref)]
+struct CoreZpool(ZpoolName);
+#[derive(Deref)]
+struct DebugZpool(ZpoolName);
+
+// only want to access these directories after they're mounted!
+trait GetMountpoint: std::ops::Deref<Target = ZpoolName> {
+    type NewType: From<Utf8PathBuf>;
+    const MOUNTPOINT: &'static str;
+    fn mountpoint(&self) -> Result<Option<Self::NewType>, ZfsGetError> {
+        if zfs_get_prop(self.to_string(), "mounted")? == "yes" {
+            Ok(Some(Self::NewType::from(
+                self.dataset_mountpoint(Self::MOUNTPOINT),
+            )))
+        } else {
+            Ok(None)
+        }
+    }
+}
+impl GetMountpoint for DebugZpool {
+    type NewType = DebugDataset;
+    const MOUNTPOINT: &'static str = sled_hardware::disk::DUMP_DATASET;
+}
+impl GetMountpoint for CoreZpool {
+    type NewType = CoreDataset;
+    const MOUNTPOINT: &'static str = sled_hardware::disk::CRASH_DATASET;
+}
+
+struct DumpSetupWorker {
+    core_dataset_names: Vec<CoreZpool>,
+    debug_dataset_names: Vec<DebugZpool>,
+
+    chosen_dump_slice: Option<DumpSlicePath>,
+    chosen_debug_dir: Option<DebugDataset>,
+    chosen_core_dir: Option<CoreDataset>,
+
+    known_dump_slices: Vec<DumpSlicePath>,
+    known_debug_dirs: Vec<DebugDataset>,
+    known_core_dirs: Vec<CoreDataset>,
+
+    savecored_slices: HashSet<DumpSlicePath>,
+
+    log: Logger,
+}
+
+const ARCHIVAL_INTERVAL: Duration = Duration::from_secs(300);
+
+impl DumpSetup {
+    pub(crate) async fn update_dumpdev_setup(
+        &self,
+        disks: &mut MutexGuard<'_, HashMap<DiskIdentity, DiskWrapper>>,
+    ) {
+        let log = &self.log;
+        let mut m2_dump_slices = Vec::new();
+        let mut u2_debug_datasets = Vec::new();
+        let mut m2_core_datasets = Vec::new();
+        for (_id, disk_wrapper) in disks.iter() {
+            match disk_wrapper {
+                DiskWrapper::Real { disk, .. } => match disk.variant() {
+                    DiskVariant::M2 => {
+                        match disk.dump_device_devfs_path(false) {
+                            Ok(path) => {
+                                m2_dump_slices.push(DumpSlicePath(path))
+                            }
+                            Err(err) => {
+                                warn!(log, "Error getting dump device devfs path: {err:?}");
+                            }
+                        }
+                        let name = disk.zpool_name();
+                        if let Ok(info) = illumos_utils::zpool::Zpool::get_info(
+                            &name.to_string(),
+                        ) {
+                            if info.health() == ZpoolHealth::Online {
+                                m2_core_datasets.push(CoreZpool(name.clone()));
+                            } else {
+                                warn!(log, "Zpool {name:?} not online, won't attempt to save process core dumps there");
+                            }
+                        }
+                    }
+                    DiskVariant::U2 => {
+                        let name = disk.zpool_name();
+                        if let Ok(info) = illumos_utils::zpool::Zpool::get_info(
+                            &name.to_string(),
+                        ) {
+                            if info.health() == ZpoolHealth::Online {
+                                u2_debug_datasets
+                                    .push(DebugZpool(name.clone()));
+                            } else {
+                                warn!(log, "Zpool {name:?} not online, won't attempt to save kernel core dumps there");
+                            }
+                        }
+                    }
+                },
+                DiskWrapper::Synthetic { .. } => {}
+            }
+        }
+
+        let savecore_lock = self.worker.clone();
+        let log_tmp = log.new(o!("component" => "DumpSetup-mutex"));
+        tokio::task::spawn_blocking(move || match savecore_lock.lock() {
+            Ok(mut guard) => {
+                guard.update_disk_loadout(
+                    m2_dump_slices,
+                    u2_debug_datasets,
+                    m2_core_datasets,
+                );
+            }
+            Err(err) => {
+                error!(log_tmp, "DumpSetup mutex poisoned: {err:?}");
+            }
+        });
+    }
+
+    fn poll_file_archival(
+        worker: Weak<std::sync::Mutex<DumpSetupWorker>>,
+        log: Logger,
+    ) {
+        info!(log, "DumpSetup poll loop started.");
+        loop {
+            if let Some(mutex) = worker.upgrade() {
+                match mutex.lock() {
+                    Ok(mut guard) => {
+                        guard.reevaluate_choices();
+                        if let Err(err) = guard.archive_files() {
+                            error!(
+                                log,
+                                "Failed to archive debug/dump files: {err:?}"
+                            );
+                        }
+                    }
+                    Err(err) => {
+                        error!(
+                            log,
+                            "DumpSetup mutex poisoned in poll thread: {err:?}"
+                        );
+                        break;
+                    }
+                }
+            } else {
+                info!(
+                    log,
+                    "DumpSetup weak pointer dropped, leaving poll loop."
+                );
+                break;
+            }
+            std::thread::sleep(ARCHIVAL_INTERVAL);
+        }
+    }
+}
+
+#[derive(Debug, thiserror::Error)]
+enum ZfsGetError {
+    #[error("Error executing 'zfs get' command: {0}")]
+    IoError(#[from] std::io::Error),
+    #[error("Output of 'zfs get' was not only not an integer string, it wasn't even UTF-8: {0}")]
+    Utf8(#[from] std::string::FromUtf8Error),
+    #[error("Error parsing output of 'zfs get' command as integer: {0}")]
+    Parse(#[from] std::num::ParseIntError),
+}
+
+const ZFS_PROP_USED: &str = "used";
+const ZFS_PROP_AVAILABLE: &str = "available";
+
+fn zfs_get_integer(
+    mountpoint_or_name: impl AsRef<str>,
+    property: &str,
+) -> Result<u64, ZfsGetError> {
+    zfs_get_prop(mountpoint_or_name, property)?.parse().map_err(Into::into)
+}
+
+fn zfs_get_prop(
+    mountpoint_or_name: impl AsRef<str> + Sized,
+    property: &str,
+) -> Result<String, ZfsGetError> {
+    let mountpoint = mountpoint_or_name.as_ref();
+    let mut cmd = std::process::Command::new(illumos_utils::zfs::ZFS);
+    cmd.arg("get").arg("-Hpo").arg("value");
+    cmd.arg(property);
+    cmd.arg(mountpoint);
+    let output = cmd.output()?;
+    Ok(String::from_utf8(output.stdout)?.trim().to_string())
+}
+
+const DATASET_USAGE_PERCENT_CHOICE: u64 = 70;
+const DATASET_USAGE_PERCENT_CLEANUP: u64 = 80;
+
+fn below_thresh(
+    mountpoint: &Utf8PathBuf,
+    percent: u64,
+) -> Result<(bool, u64), ZfsGetError> {
+    let used = zfs_get_integer(mountpoint, ZFS_PROP_USED)?;
+    let available = zfs_get_integer(mountpoint, ZFS_PROP_AVAILABLE)?;
+    let capacity = used + available;
+    let below = (used * 100) / capacity < percent;
+    Ok((below, used))
+}
+
+impl DumpSetupWorker {
+    fn new(log: Logger) -> Self {
+        Self {
+            core_dataset_names: vec![],
+            debug_dataset_names: vec![],
+            chosen_dump_slice: None,
+            chosen_debug_dir: None,
+            chosen_core_dir: None,
+            known_dump_slices: vec![],
+            known_debug_dirs: vec![],
+            known_core_dirs: vec![],
+            savecored_slices: Default::default(),
+            log,
+        }
+    }
+
+    fn update_disk_loadout(
+        &mut self,
+        dump_slices: Vec<DumpSlicePath>,
+        debug_datasets: Vec<DebugZpool>,
+        core_datasets: Vec<CoreZpool>,
+    ) {
+        self.core_dataset_names = core_datasets;
+        self.debug_dataset_names = debug_datasets;
+
+        self.known_dump_slices = dump_slices;
+
+        self.reevaluate_choices();
+    }
+
+    // only allow mounted zfs datasets into 'known_*_dirs',
+    // such that we don't render them non-auto-mountable by zfs
+    fn update_mounted_dirs(&mut self) {
+        self.known_debug_dirs = self
+            .debug_dataset_names
+            .iter()
+            .flat_map(|ds| ds.mountpoint())
+            .flatten()
+            .collect();
+        self.known_core_dirs = self
+            .core_dataset_names
+            .iter()
+            .flat_map(|ds| ds.mountpoint())
+            .flatten()
+            .collect();
+    }
+
+    fn reevaluate_choices(&mut self) {
+        self.update_mounted_dirs();
+
+        self.known_dump_slices.sort();
+        // sort key: prefer to choose a dataset where there's already other
+        // dumps so we don't shotgun them across every U.2, but only if they're
+        // below a certain usage threshold.
+        self.known_debug_dirs.sort_by_cached_key(
+            |mountpoint: &DebugDataset| {
+                match below_thresh(mountpoint.as_ref(), DATASET_USAGE_PERCENT_CHOICE) {
+                    Ok((below, used)) => {
+                        let priority = if below { 0 } else { 1 };
+                        (priority, used, mountpoint.clone())
+                    }
+                    Err(err) => {
+                        error!(self.log, "Could not query zfs properties of debug dump dir: {err:?}");
+                        // deprioritize anything we get errors querying.
+                        (usize::MAX, u64::MAX, mountpoint.clone())
+                    }
+                }
+            },
+        );
+        self.known_core_dirs.sort_by_cached_key(|mnt| {
+            // these get archived periodically anyway, pick one with room
+            let available = zfs_get_integer(&**mnt, "available").unwrap_or(0);
+            (u64::MAX - available, mnt.clone())
+        });
+
+        if let Some(x) = &self.chosen_debug_dir {
+            if !self.known_debug_dirs.contains(x) {
+                warn!(self.log, "Previously-chosen debug/dump dir {x:?} no longer exists in our view of reality");
+                self.chosen_debug_dir = None;
+            } else {
+                match below_thresh(x.as_ref(), DATASET_USAGE_PERCENT_CLEANUP) {
+                    Ok((true, _)) => {}
+                    Ok((false, _)) => {
+                        if self.known_debug_dirs.iter().any(|x| {
+                            below_thresh(
+                                x.as_ref(),
+                                DATASET_USAGE_PERCENT_CHOICE,
+                            )
+                            .unwrap_or((false, 0))
+                            .0
+                        }) {
+                            info!(self.log, "Previously-chosen debug/dump dir {x:?} is over usage threshold, choosing a more vacant disk");
+                            self.chosen_debug_dir = None;
+                        } else {
+                            warn!(self.log, "All candidate debug/dump dirs are over usage threshold, removing older archived files");
+                            if let Err(err) = self.cleanup() {
+                                error!(self.log, "Couldn't clean up any debug/dump dirs, may hit dataset quota in {x:?}: {err:?}");
+                            } else {
+                                self.chosen_debug_dir = None;
+                            }
+                        }
+                    }
+                    Err(err) => {
+                        error!(self.log, "Previously-chosen debug/dump dir {x:?} couldn't be queried for zfs properties!  Choosing another. {err:?}");
+                        self.chosen_debug_dir = None;
+                    }
+                }
+            }
+        }
+        if let Some(x) = &self.chosen_dump_slice {
+            if !self.known_dump_slices.contains(x) {
+                warn!(self.log, "Previously-chosen dump slice {x:?} no longer exists in our view of reality");
+                self.chosen_dump_slice = None;
+            }
+        }
+        if let Some(x) = &self.chosen_core_dir {
+            if !self.known_core_dirs.contains(x) {
+                warn!(self.log, "Previously-chosen core dir {x:?} no longer exists in our view of reality");
+                self.chosen_core_dir = None;
+            }
+        }
+
+        if self.chosen_debug_dir.is_none() {
+            self.chosen_debug_dir = self.known_debug_dirs.first().cloned();
+        }
+
+        if self.chosen_core_dir.is_none() {
+            for core_dir in &self.known_core_dirs {
+                // tell the system to write *userspace process* cores here.
+                match illumos_utils::coreadm::coreadm(core_dir) {
+                    Ok(()) => {
+                        self.chosen_core_dir = Some(core_dir.clone());
+                        info!(
+                            self.log,
+                            "Set process core dump directory to {core_dir:?}"
+                        );
+                        break;
+                    }
+                    Err(err) => {
+                        error!(self.log, "Couldn't configure process core dump directory to {core_dir:?}: {err:?}");
+                    }
+                }
+            }
+        }
+
+        if self.chosen_dump_slice.is_none() {
+            if self.chosen_debug_dir.is_some() {
+                for dump_slice in self.known_dump_slices.clone() {
+                    // Let's try to see if it appears to have a kernel dump already
+                    match illumos_utils::dumpadm::dump_flag_is_valid(
+                        &dump_slice,
+                    ) {
+                        Ok(true) => {
+                            debug!(self.log, "Dump slice {dump_slice:?} appears to have a valid header; will attempt to savecore");
+                        }
+                        Ok(false) => {
+                            info!(self.log, "Dump slice {dump_slice:?} appears to have already been saved");
+                        }
+                        Err(err) => {
+                            debug!(self.log, "Dump slice {dump_slice:?} appears to be unused: {err:?}");
+                        }
+                    }
+                    if let Ok(saved) = self.dumpadm_and_savecore(&dump_slice) {
+                        if let Some(out) = saved {
+                            info!(self.log, "Previous dump on slice {dump_slice:?} saved, configured slice as target for new dumps. {out:?}");
+                        }
+                        self.chosen_dump_slice = Some(dump_slice);
+                        break;
+                    }
+                }
+            } else {
+                // Don't risk overwriting an existing kernel dump if there's
+                // already one there until we can attempt to savecore(8)
+                // it away and clear the flag to make room.
+                for dump_slice in &self.known_dump_slices {
+                    match illumos_utils::dumpadm::dump_flag_is_valid(dump_slice)
+                    {
+                        Ok(false) => {
+                            // Have dumpadm write the config for crash dumps to be
+                            // on this slice, at least, until a U.2 comes along.
+                            match illumos_utils::dumpadm::dumpadm(
+                                dump_slice, None,
+                            ) {
+                                Ok(_) => {
+                                    info!(self.log, "Using dump device {dump_slice:?} with no savecore destination (no U.2 debug zvol yet)");
+                                    self.chosen_dump_slice =
+                                        Some(dump_slice.clone());
+                                    break;
+                                }
+                                Err(err) => {
+                                    warn!(self.log, "Could not configure {dump_slice:?} as dump device: {err:?}");
+                                }
+                            }
+                        }
+                        Ok(true) => {
+                            warn!(self.log, "Not configuring {dump_slice:?} as it appears to contain a dump we cannot yet send to a U.2 debug zvol");
+                        }
+                        Err(err) => {
+                            debug!(
+                                self.log,
+                                "Dump slice {dump_slice:?} appears to be unused : {err:?}",
+                            );
+                        }
+                    }
+                }
+            }
+        }
+
+        if let Some(debug_dir) = self.chosen_debug_dir.clone() {
+            let mut changed_slice = false;
+            for dump_slice in self.known_dump_slices.clone() {
+                if !self.savecored_slices.contains(&dump_slice) {
+                    changed_slice = true;
+                    // temporarily changes the system's dump slice so savecore(8)
+                    // can update the header in the slice when it finishes...
+                    match self.dumpadm_and_savecore(&dump_slice) {
+                        Ok(saved) => {
+                            if let Some(stdout) = &saved {
+                                info!(
+                                    self.log,
+                                    "Saved dump from {dump_slice:?} to {debug_dir:?}: {stdout:?}"
+                                );
+                            } else {
+                                info!(
+                                    self.log,
+                                    "Set {dump_slice:?} as system dump slice",
+                                );
+                            }
+                        }
+                        Err(err) => {
+                            warn!(self.log, "Could not configure {dump_slice:?} as dump device with {debug_dir:?} as savecore destination: {err:?}");
+                        }
+                    }
+                }
+            }
+
+            // ...so then we restore the chosen dump slice for the system to use
+            // in the event of a kernel crash
+            if changed_slice {
+                if let Some(dump_slice) = &self.chosen_dump_slice {
+                    if let Err(err) =
+                        illumos_utils::dumpadm::dumpadm(dump_slice, None)
+                    {
+                        error!(self.log, "Could not restore dump slice to {dump_slice:?}: {err:?}");
+                    }
+                }
+            }
+        }
+    }
+
+    fn archive_files(&self) -> std::io::Result<()> {
+        if let Some(debug_dir) = &self.chosen_debug_dir {
+            if self.known_core_dirs.is_empty() {
+                info!(self.log, "No core dump locations yet known.");
+            }
+            for core_dir in &self.known_core_dirs {
+                if let Ok(dir) = core_dir.read_dir() {
+                    for entry in dir.flatten() {
+                        if let Some(path) = entry.file_name().to_str() {
+                            let dest = debug_dir.join(path);
+
+                            if let Err(err) =
+                                Self::copy_sync_and_remove(&entry.path(), &dest)
+                            {
+                                error!(
+                                    self.log,
+                                    "Failed to archive {entry:?}: {err:?}"
+                                );
+                            } else {
+                                info!(
+                                    self.log,
+                                    "Relocated {entry:?} to {dest:?}"
+                                );
+                            }
+                        } else {
+                            error!(self.log, "Non-UTF8 path found while archiving core dumps: {entry:?}");
+                        }
+                    }
+                }
+            }
+        } else {
+            info!(
+                self.log,
+                "No archival destination for crash dumps yet chosen."
+            );
+        }
+
+        if let Err(err) = self.archive_logs() {
+            if !matches!(err, ArchiveLogsError::NoDebugDirYet) {
+                error!(
+                    self.log,
+                    "Failure while trying to archive logs to debug dataset: {err:?}"
+                );
+            }
+        }
+
+        Ok(())
+    }
+
+    fn copy_sync_and_remove(
+        source: impl AsRef<Path>,
+        dest: impl AsRef<Path>,
+    ) -> std::io::Result<()> {
+        let source = source.as_ref();
+        let dest = dest.as_ref();
+        let mut dest_f = std::fs::File::create(&dest)?;
+        let mut src_f = std::fs::File::open(&source)?;
+
+        std::io::copy(&mut src_f, &mut dest_f)?;
+
+        dest_f.sync_all()?;
+
+        drop(src_f);
+        drop(dest_f);
+
+        std::fs::remove_file(source)?;
+        Ok(())
+    }
+
+    fn archive_logs(&self) -> Result<(), ArchiveLogsError> {
+        let debug_dir = self
+            .chosen_debug_dir
+            .as_ref()
+            .ok_or(ArchiveLogsError::NoDebugDirYet)?;
+        // zone crate's 'deprecated' functions collide if you try to enable
+        // its 'sync' and 'async' features simultaneously :(
+        let rt =
+            tokio::runtime::Runtime::new().map_err(ArchiveLogsError::Tokio)?;
+        let oxz_zones = rt.block_on(Zones::get())?;
+        self.archive_logs_inner(
+            debug_dir,
+            PathBuf::from("/var/svc/log"),
+            "global",
+        )?;
+        for zone in oxz_zones {
+            let logdir = zone.path().join("root/var/svc/log");
+            let zone_name = zone.name();
+            self.archive_logs_inner(debug_dir, logdir, zone_name)?;
+        }
+        Ok(())
+    }
+
+    fn archive_logs_inner(
+        &self,
+        debug_dir: &DebugDataset,
+        logdir: PathBuf,
+        zone_name: &str,
+    ) -> Result<(), ArchiveLogsError> {
+        let mut rotated_log_files = Vec::new();
+        // patterns matching archived logs, e.g. foo.log.3
+        // keep checking for greater numbers of digits until we don't find any
+        for n in 1..9 {
+            let pattern = logdir
+                .join(format!("*.log.{}", "[0-9]".repeat(n)))
+                .to_str()
+                .ok_or_else(|| ArchiveLogsError::Utf8(zone_name.to_string()))?
+                .to_string();
+            rotated_log_files.extend(glob::glob(&pattern)?.flatten());
+        }
+        let dest_dir = debug_dir.join(zone_name).into_std_path_buf();
+        if !rotated_log_files.is_empty() {
+            std::fs::create_dir_all(&dest_dir)?;
+            let count = rotated_log_files.len();
+            info!(
+                self.log,
+                "Archiving {count} log files from {zone_name} zone"
+            );
+        }
+        for entry in rotated_log_files {
+            let src_name = entry.file_name().unwrap();
+            // as we archive them, logadm will keep resetting to .log.0,
+            // so we need to maintain our own numbering in the dest dataset.
+            // we'll use the modified date of the rotated log file, or try
+            // falling back to the time of archival if that fails, and
+            // falling back to counting up from 0 if *that* somehow fails.
+            let mut n = entry
+                .metadata()
+                .and_then(|m| m.modified())
+                .unwrap_or_else(|_| SystemTime::now())
+                .duration_since(UNIX_EPOCH)
+                .map(|d| d.as_secs())
+                .unwrap_or(0);
+            let mut dest;
+            loop {
+                dest = dest_dir.join(src_name).with_extension(format!("{n}"));
+                if dest.exists() {
+                    n += 1;
+                } else {
+                    break;
+                }
+            }
+            if let Err(err) = Self::copy_sync_and_remove(&entry, dest) {
+                warn!(self.log, "Failed to archive {entry:?}: {err:?}");
+            }
+        }
+        Ok(())
+    }
+
+    // Have dumpadm write the config for crash dumps to be
+    // on this slice, and then invoke savecore(8) to save any
+    // dump that's already present there.
+    //
+    // NOTE: because of the need to have dumpadm change the global
+    // state of which slice the system is using for dumps in order
+    // for savecore to behave the way we want (i.e. clear the flag
+    // after succeeding), we could hypothetically miss a dump if
+    // the kernel crashes again while savecore is still running.
+    fn dumpadm_and_savecore(
+        &mut self,
+        dump_slice: &DumpSlicePath,
+    ) -> Result<Option<OsString>, DumpAdmError> {
+        // TODO: untangle savecore from illumos_utils::dumpadm
+        assert!(self.chosen_debug_dir.is_some());
+
+        let savecore_dir = self.chosen_debug_dir.clone().unwrap().0;
+
+        match illumos_utils::dumpadm::dumpadm(&dump_slice, Some(&savecore_dir))
+        {
+            Ok(saved) => {
+                self.savecored_slices.insert(dump_slice.clone());
+                Ok(saved)
+            }
+            Err(err) => Err(err),
+        }
+    }
+
+    fn cleanup(&self) -> Result<(), CleanupError> {
+        let mut dir_info = Vec::new();
+        for dir in &self.known_debug_dirs {
+            match Self::scope_dir_for_cleanup(dir) {
+                Ok(info) => {
+                    dir_info.push((info, dir));
+                }
+                Err(err) => {
+                    error!(self.log, "Could not analyze {dir:?} for debug dataset cleanup task: {err:?}");
+                }
+            }
+        }
+        if dir_info.is_empty() {
+            return Err(CleanupError::NoDatasetsToClean);
+        }
+        // find dir with oldest average time of files that must be deleted
+        // to achieve desired threshold, and reclaim that space.
+        dir_info.sort();
+        'outer: for (dir_info, dir) in dir_info {
+            let CleanupDirInfo { average_time: _, num_to_delete, file_list } =
+                dir_info;
+            for (_time, _bytes, path) in &file_list[..num_to_delete as usize] {
+                // if we are unable to remove a file, we cannot guarantee
+                // that we will reach our target size threshold, and suspect
+                // the i/o error *may* be an issue with the underlying disk, so
+                // we continue to the dataset with the next-oldest average age
+                // of files-to-delete in the sorted list.
+                if let Err(err) = std::fs::remove_file(&path) {
+                    error!(self.log, "Couldn't delete {path:?} from debug dataset, skipping {dir:?}. {err:?}");
+                    continue 'outer;
+                }
+            }
+            // we made it through all the files we planned to remove, thereby
+            // freeing up enough space on one of the debug datasets for it to
+            // be chosen when reevaluating targets.
+            break;
+        }
+        Ok(())
+    }
+
+    fn scope_dir_for_cleanup(
+        debug_dir: &DebugDataset,
+    ) -> Result<CleanupDirInfo, CleanupError> {
+        let used = zfs_get_integer(&**debug_dir, ZFS_PROP_USED)?;
+        let available = zfs_get_integer(&**debug_dir, ZFS_PROP_AVAILABLE)?;
+        let capacity = used + available;
+
+        let target_used = capacity * DATASET_USAGE_PERCENT_CHOICE / 100;
+
+        let mut file_list = Vec::new();
+        // find all files in the debug dataset and sort by modified time
+        for path in glob::glob(debug_dir.join("**/*").as_str())?.flatten() {
+            let meta = std::fs::metadata(&path)?;
+            // we need this to be a Duration rather than SystemTime so we can
+            // do math to it later.
+            let time = meta.modified()?.duration_since(UNIX_EPOCH)?;
+            let size = meta.len();
+
+            file_list.push((time, size, path))
+        }
+        file_list.sort();
+
+        // find how many old files must be deleted to get the dataset under
+        // the limit, and what the average age of that set is.
+        let mut possible_bytes = 0;
+        let mut total_time = Duration::ZERO;
+        let mut num_to_delete = 0;
+        for (time, size, _path) in &file_list {
+            if used - possible_bytes < target_used {
+                break;
+            } else {
+                total_time += *time;
+                num_to_delete += 1;
+                possible_bytes += size;
+            }
+        }
+        let average_time =
+            total_time.checked_div(num_to_delete).unwrap_or(Duration::MAX);
+
+        Ok(CleanupDirInfo { average_time, num_to_delete, file_list })
+    }
+}
+
+#[derive(thiserror::Error, Debug)]
+enum ArchiveLogsError {
+    #[error("Couldn't make an async runtime to get zone info: {0}")]
+    Tokio(std::io::Error),
+    #[error("I/O error: {0}")]
+    IoError(#[from] std::io::Error),
+    #[error("Error calling zoneadm: {0}")]
+    Zoneadm(#[from] AdmError),
+    #[error("Non-UTF8 zone path for zone {0}")]
+    Utf8(String),
+    #[error("Glob pattern invalid: {0}")]
+    Glob(#[from] glob::PatternError),
+    #[error(
+        "No debug dir into which we should archive logs has yet been chosen"
+    )]
+    NoDebugDirYet,
+}
+
+#[derive(thiserror::Error, Debug)]
+enum CleanupError {
+    #[error("No debug datasets were successfully evaluated for cleanup")]
+    NoDatasetsToClean,
+    #[error("Failed to query ZFS properties: {0}")]
+    ZfsError(#[from] ZfsGetError),
+    #[error("I/O error: {0}")]
+    IoError(#[from] std::io::Error),
+    #[error("Glob pattern invalid: {0}")]
+    Glob(#[from] glob::PatternError),
+    #[error("A file's observed modified time was before the Unix epoch: {0}")]
+    TimelineWentSideways(#[from] SystemTimeError),
+}
+
+#[derive(Ord, PartialOrd, Eq, PartialEq)]
+struct CleanupDirInfo {
+    average_time: Duration,
+    num_to_delete: u32,
+    file_list: Vec<(Duration, u64, PathBuf)>,
+}
diff --git a/sled-storage/src/error.rs b/sled-storage/src/error.rs
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/sled-storage/src/lib.rs b/sled-storage/src/lib.rs
new file mode 100644
index 0000000000..48f335a36a
--- /dev/null
+++ b/sled-storage/src/lib.rs
@@ -0,0 +1,9 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Local storage abstraction for use by sled-agent
+
+pub(crate) mod dataset;
+pub(crate) mod disk;
+pub(crate) mod dump_setup;

From f927d023e9bf470e50cf5bb08bc53906a17ec1f7 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Thu, 28 Sep 2023 06:40:30 +0000
Subject: [PATCH 02/66] wip

---
 sled-agent/src/storage_manager.rs |  25 -------
 sled-storage/src/disk.rs          |  10 +--
 sled-storage/src/error.rs         |  80 ++++++++++++++++++++
 sled-storage/src/lib.rs           |   3 +
 sled-storage/src/pool.rs          |  36 +++++++++
 sled-storage/src/state.rs         | 118 ++++++++++++++++++++++++++++++
 6 files changed, 242 insertions(+), 30 deletions(-)
 create mode 100644 sled-storage/src/pool.rs
 create mode 100644 sled-storage/src/state.rs

diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs
index bd71371396..e43f2d841d 100644
--- a/sled-agent/src/storage_manager.rs
+++ b/sled-agent/src/storage_manager.rs
@@ -38,7 +38,6 @@ use tokio::task::JoinHandle;
 use tokio::time::{interval, MissedTickBehavior};
 use uuid::Uuid;
 
-use illumos_utils::dumpadm::DumpHdrError;
 #[cfg(test)]
 use illumos_utils::{zfs::MockZfs as Zfs, zpool::MockZpool as Zpool};
 #[cfg(not(test))]
@@ -114,32 +113,8 @@ pub enum Error {
     #[error("Zpool Not Found: {0}")]
     ZpoolNotFound(String),
 
-    #[error("Failed to serialize toml (intended for {path:?}): {err}")]
-    Serialize {
-        path: Utf8PathBuf,
-        #[source]
-        err: toml::ser::Error,
-    },
-
-    #[error("Failed to deserialize toml from {path:?}: {err}")]
-    Deserialize {
-        path: Utf8PathBuf,
-        #[source]
-        err: toml::de::Error,
-    },
-
-    #[error("Failed to perform I/O: {message}: {err}")]
-    Io {
-        message: String,
-        #[source]
-        err: std::io::Error,
-    },
-
     #[error("Underlay not yet initialized")]
     UnderlayNotInitialized,
-
-    #[error("Encountered error checking dump device flags: {0}")]
-    DumpHdr(#[from] DumpHdrError),
 }
 
 /// A ZFS storage pool.
diff --git a/sled-storage/src/disk.rs b/sled-storage/src/disk.rs
index b78bd57af9..aef68528bf 100644
--- a/sled-storage/src/disk.rs
+++ b/sled-storage/src/disk.rs
@@ -10,8 +10,8 @@ use omicron_common::disk::DiskIdentity;
 use sled_hardware::{Disk, DiskVariant};
 
 /// A wrapper around real disks or synthetic disks backed by a file
-#[derive(PartialEq, Eq, Clone)]
-pub(crate) enum DiskWrapper {
+#[derive(Debug, PartialEq, Eq, Clone)]
+pub enum DiskWrapper {
     Real { disk: Disk, devfs_path: Utf8PathBuf },
     Synthetic { zpool_name: ZpoolName },
 }
@@ -24,7 +24,7 @@ impl From<Disk> for DiskWrapper {
 }
 
 impl DiskWrapper {
-    fn identity(&self) -> DiskIdentity {
+    pub fn identity(&self) -> DiskIdentity {
         match self {
             DiskWrapper::Real { disk, .. } => disk.identity().clone(),
             DiskWrapper::Synthetic { zpool_name } => {
@@ -38,7 +38,7 @@ impl DiskWrapper {
         }
     }
 
-    fn variant(&self) -> DiskVariant {
+    pub fn variant(&self) -> DiskVariant {
         match self {
             DiskWrapper::Real { disk, .. } => disk.variant(),
             DiskWrapper::Synthetic { zpool_name } => match zpool_name.kind() {
@@ -48,7 +48,7 @@ impl DiskWrapper {
         }
     }
 
-    fn zpool_name(&self) -> &ZpoolName {
+    pub fn zpool_name(&self) -> &ZpoolName {
         match self {
             DiskWrapper::Real { disk, .. } => disk.zpool_name(),
             DiskWrapper::Synthetic { zpool_name } => zpool_name,
diff --git a/sled-storage/src/error.rs b/sled-storage/src/error.rs
index e69de29bb2..d2a2a473b1 100644
--- a/sled-storage/src/error.rs
+++ b/sled-storage/src/error.rs
@@ -0,0 +1,80 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Storage related errors
+
+use crate::dataset::DatasetName;
+use camino::Utf8PathBuf;
+use omicron_common::api::external::ByteCountRangeError;
+use uuid::Uuid;
+
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+    #[error(transparent)]
+    DiskError(#[from] sled_hardware::DiskError),
+
+    // TODO: We could add the context of "why are we doint this op", maybe?
+    #[error(transparent)]
+    ZfsListDataset(#[from] illumos_utils::zfs::ListDatasetsError),
+
+    #[error(transparent)]
+    ZfsEnsureFilesystem(#[from] illumos_utils::zfs::EnsureFilesystemError),
+
+    #[error(transparent)]
+    ZfsSetValue(#[from] illumos_utils::zfs::SetValueError),
+
+    #[error(transparent)]
+    ZfsGetValue(#[from] illumos_utils::zfs::GetValueError),
+
+    #[error(transparent)]
+    GetZpoolInfo(#[from] illumos_utils::zpool::GetInfoError),
+
+    #[error(transparent)]
+    Fstyp(#[from] illumos_utils::fstyp::Error),
+
+    #[error(transparent)]
+    ZoneCommand(#[from] illumos_utils::running_zone::RunCommandError),
+
+    #[error(transparent)]
+    ZoneBoot(#[from] illumos_utils::running_zone::BootError),
+
+    #[error(transparent)]
+    ZoneEnsureAddress(#[from] illumos_utils::running_zone::EnsureAddressError),
+
+    #[error(transparent)]
+    ZoneInstall(#[from] illumos_utils::running_zone::InstallZoneError),
+
+    #[error("No U.2 Zpools found")]
+    NoU2Zpool,
+
+    #[error("Failed to parse UUID from {path}: {err}")]
+    ParseUuid {
+        path: Utf8PathBuf,
+        #[source]
+        err: uuid::Error,
+    },
+
+    #[error("Dataset {name:?} exists with a different uuid (has {old}, requested {new})")]
+    UuidMismatch { name: Box<DatasetName>, old: Uuid, new: Uuid },
+
+    #[error("Error parsing pool {name}'s size: {err}")]
+    BadPoolSize {
+        name: String,
+        #[source]
+        err: ByteCountRangeError,
+    },
+
+    #[error("Failed to parse the dataset {name}'s UUID: {err}")]
+    ParseDatasetUuid {
+        name: String,
+        #[source]
+        err: uuid::Error,
+    },
+
+    #[error("Zpool Not Found: {0}")]
+    ZpoolNotFound(String),
+
+    #[error("Underlay not yet initialized")]
+    UnderlayNotInitialized,
+}
diff --git a/sled-storage/src/lib.rs b/sled-storage/src/lib.rs
index 48f335a36a..a1bd4eecfb 100644
--- a/sled-storage/src/lib.rs
+++ b/sled-storage/src/lib.rs
@@ -7,3 +7,6 @@
 pub(crate) mod dataset;
 pub(crate) mod disk;
 pub(crate) mod dump_setup;
+pub mod error;
+pub(crate) mod pool;
+pub mod state;
diff --git a/sled-storage/src/pool.rs b/sled-storage/src/pool.rs
new file mode 100644
index 0000000000..4a9960da4c
--- /dev/null
+++ b/sled-storage/src/pool.rs
@@ -0,0 +1,36 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! ZFS storage pool
+
+use crate::error::Error;
+use illumos_utils::zpool::{ZpoolInfo, ZpoolName};
+use omicron_common::disk::DiskIdentity;
+
+#[cfg(test)]
+use illumos_utils::{zfs::MockZfs as Zfs, zpool::MockZpool as Zpool};
+#[cfg(not(test))]
+use illumos_utils::{zfs::Zfs, zpool::Zpool};
+
+/// A ZFS storage pool
+#[derive(Debug, Clone)]
+pub struct Pool {
+    name: ZpoolName,
+    info: ZpoolInfo,
+    parent: DiskIdentity,
+}
+
+impl Pool {
+    /// Queries for an existing Zpool by name.
+    ///
+    /// Returns Ok if the pool exists.
+    fn new(name: ZpoolName, parent: DiskIdentity) -> Result<Pool, Error> {
+        let info = Zpool::get_info(&name.to_string())?;
+        Ok(Pool { name, info, parent })
+    }
+
+    fn parent(&self) -> &DiskIdentity {
+        &self.parent
+    }
+}
diff --git a/sled-storage/src/state.rs b/sled-storage/src/state.rs
new file mode 100644
index 0000000000..a7de70999e
--- /dev/null
+++ b/sled-storage/src/state.rs
@@ -0,0 +1,118 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! The internal state of the storage manager task
+
+use crate::disk::DiskWrapper;
+use crate::pool::Pool;
+use camino::Utf8PathBuf;
+use illumos_utils::zpool::ZpoolName;
+use omicron_common::disk::DiskIdentity;
+use sled_hardware::DiskVariant;
+use std::collections::BTreeMap;
+use std::sync::Arc;
+use uuid::Uuid;
+
+// The directory within the debug dataset in which bundles are created.
+const BUNDLE_DIRECTORY: &str = "bundle";
+
+// The directory for zone bundles.
+const ZONE_BUNDLE_DIRECTORY: &str = "zone";
+
+/// Storage related state
+///
+/// This state is internal to the [`crate::StorageManager`] task. Clones
+/// of this state, or subsets of it, can be retrieved by requests to the
+/// `StorageManager` task from the [`crate::StorageManagerHandle`]. This state
+/// is not `Sync`, and as such does not require any mutexes. However, we do
+/// expect to share it relatively frequently, and we want copies of it to be
+/// as cheaply made as possible. So any large state is stored inside `Arc`s. On
+/// the other hand, we expect infrequent updates to this state, and as such, we
+/// use [`std::sync::Arc::make_mut`] to implement clone on write functionality
+/// inside the `StorageManager` task if there are any outstanding copies.
+/// Therefore, we only pay the cost to update infrequently, and no locks are
+/// required by callers when operating on cloned data. The only contention here
+/// is for the refrence counters of the internal Arcs when `State` gets cloned
+/// or dropped.
+#[derive(Debug, Clone)]
+pub struct State {
+    // All disks, real and synthetic, being managed by this sled
+    disks: Arc<BTreeMap<DiskIdentity, DiskWrapper>>,
+
+    // A map of "Uuid" to "pool".
+    pools: Arc<BTreeMap<Uuid, Pool>>,
+}
+
+impl State {
+    /// Returns the identity of the boot disk.
+    ///
+    /// If this returns `None`, we have not processed the boot disk yet.
+    pub fn boot_disk(&self) -> Option<(DiskIdentity, ZpoolName)> {
+        self.disks.iter().find_map(|(id, disk)| {
+            match disk {
+                // This is the "real" use-case: if we have real disks, query
+                // their properties to identify if they truly are the boot disk.
+                DiskWrapper::Real { disk, .. } => {
+                    if disk.is_boot_disk() {
+                        return Some((id.clone(), disk.zpool_name().clone()));
+                    }
+                }
+                // This is the "less real" use-case: if we have synthetic disks,
+                // just label the first M.2-looking one as a "boot disk".
+                DiskWrapper::Synthetic { .. } => {
+                    if matches!(disk.variant(), DiskVariant::M2) {
+                        return Some((id.clone(), disk.zpool_name().clone()));
+                    }
+                }
+            };
+            None
+        })
+    }
+    /// Returns all M.2 zpools
+    pub fn all_m2_zpools(&self) -> Vec<ZpoolName> {
+        self.all_zpools(DiskVariant::M2)
+    }
+
+    /// Returns all U.2 zpools
+    pub fn all_u2_zpools(&self) -> Vec<ZpoolName> {
+        self.all_zpools(DiskVariant::U2)
+    }
+
+    /// Returns all mountpoints within all M.2s for a particular dataset.
+    pub fn all_m2_mountpoints(&self, dataset: &str) -> Vec<Utf8PathBuf> {
+        self.all_m2_zpools()
+            .iter()
+            .map(|zpool| zpool.dataset_mountpoint(dataset))
+            .collect()
+    }
+
+    /// Returns all mountpoints within all U.2s for a particular dataset.
+    pub fn all_u2_mountpoints(&self, dataset: &str) -> Vec<Utf8PathBuf> {
+        self.all_u2_zpools()
+            .iter()
+            .map(|zpool| zpool.dataset_mountpoint(dataset))
+            .collect()
+    }
+
+    /// Returns all zpools of a particular variant
+    pub fn all_zpools(&self, variant: DiskVariant) -> Vec<ZpoolName> {
+        self.disks
+            .values()
+            .filter_map(|disk| {
+                if disk.variant() == variant {
+                    return Some(disk.zpool_name().clone());
+                }
+                None
+            })
+            .collect()
+    }
+
+    /// Return the directories for storing zone service bundles.
+    pub fn all_zone_bundle_directories(&self) -> Vec<Utf8PathBuf> {
+        self.all_m2_mountpoints(sled_hardware::disk::M2_DEBUG_DATASET)
+            .into_iter()
+            .map(|p| p.join(BUNDLE_DIRECTORY).join(ZONE_BUNDLE_DIRECTORY))
+            .collect()
+    }
+}

From 9818c05473bd431017950498f613671d18e0998e Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Thu, 28 Sep 2023 20:00:04 +0000
Subject: [PATCH 03/66] wip

---
 Cargo.lock                              |   2 +
 sled-agent/src/storage_manager.rs       |  14 +-
 sled-hardware/src/disk.rs               | 470 ++----------------------
 sled-hardware/src/illumos/partitions.rs |  26 +-
 sled-storage/Cargo.toml                 |   5 +
 sled-storage/src/dataset.rs             | 274 ++++++++++++++
 sled-storage/src/disk.rs                | 111 +++++-
 sled-storage/src/dump_setup.rs          |  11 +-
 sled-storage/src/error.rs               |   2 +-
 sled-storage/src/keyfile.rs             |  68 ++++
 sled-storage/src/lib.rs                 |   5 +
 sled-storage/src/pool.rs                |   8 +-
 sled-storage/src/state.rs               |   3 +-
 13 files changed, 536 insertions(+), 463 deletions(-)
 create mode 100644 sled-storage/src/keyfile.rs

diff --git a/Cargo.lock b/Cargo.lock
index bdf2d44ea4..a448600863 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5277,6 +5277,8 @@ dependencies = [
  "key-manager",
  "nexus-client 0.1.0",
  "omicron-common 0.1.0",
+ "omicron-test-utils",
+ "rand 0.8.5",
  "schemars",
  "serde",
  "serde_json",
diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs
index e43f2d841d..3d3e544573 100644
--- a/sled-agent/src/storage_manager.rs
+++ b/sled-agent/src/storage_manager.rs
@@ -50,7 +50,7 @@ static KEY_MANAGER_READY: OnceLock<()> = OnceLock::new();
 #[derive(thiserror::Error, Debug)]
 pub enum Error {
     #[error(transparent)]
-    DiskError(#[from] sled_hardware::DiskError),
+    DiskError(#[from] sled_hardware::PooledDiskError),
 
     // TODO: We could add the context of "why are we doint this op", maybe?
     #[error(transparent)]
@@ -610,7 +610,7 @@ impl StorageWorker {
         &mut self,
         unparsed_disk: UnparsedDisk,
         queued_u2_drives: &mut Option<HashSet<QueuedDiskCreate>>,
-    ) -> Result<Disk, sled_hardware::DiskError> {
+    ) -> Result<Disk, sled_hardware::PooledDiskError> {
         match sled_hardware::Disk::new(
             &self.log,
             unparsed_disk.clone(),
@@ -619,7 +619,7 @@ impl StorageWorker {
         .await
         {
             Ok(disk) => Ok(disk),
-            Err(sled_hardware::DiskError::KeyManager(err)) => {
+            Err(sled_hardware::PooledDiskError::KeyManager(err)) => {
                 warn!(
                     self.log,
                     "Transient error: {err} - queuing disk {:?}", unparsed_disk
@@ -630,7 +630,7 @@ impl StorageWorker {
                     *queued_u2_drives =
                         Some(HashSet::from([unparsed_disk.into()]));
                 }
-                Err(sled_hardware::DiskError::KeyManager(err))
+                Err(sled_hardware::PooledDiskError::KeyManager(err))
             }
             Err(err) => {
                 error!(
@@ -651,7 +651,7 @@ impl StorageWorker {
         &mut self,
         zpool_name: ZpoolName,
         queued_u2_drives: &mut Option<HashSet<QueuedDiskCreate>>,
-    ) -> Result<(), sled_hardware::DiskError> {
+    ) -> Result<(), sled_hardware::PooledDiskError> {
         let synthetic_id = DiskIdentity {
             vendor: "fake_vendor".to_string(),
             serial: "fake_serial".to_string(),
@@ -666,7 +666,7 @@ impl StorageWorker {
         .await
         {
             Ok(()) => Ok(()),
-            Err(sled_hardware::DiskError::KeyManager(err)) => {
+            Err(sled_hardware::PooledDiskError::KeyManager(err)) => {
                 warn!(
                     self.log,
                     "Transient error: {err} - queuing synthetic disk: {:?}",
@@ -678,7 +678,7 @@ impl StorageWorker {
                     *queued_u2_drives =
                         Some(HashSet::from([zpool_name.into()]));
                 }
-                Err(sled_hardware::DiskError::KeyManager(err))
+                Err(sled_hardware::PooledDiskError::KeyManager(err))
             }
             Err(err) => {
                 error!(
diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs
index aec99ae3f8..bea7e23c73 100644
--- a/sled-hardware/src/disk.rs
+++ b/sled-hardware/src/disk.rs
@@ -4,34 +4,14 @@
 
 use camino::{Utf8Path, Utf8PathBuf};
 use illumos_utils::fstyp::Fstyp;
-use illumos_utils::zfs;
-use illumos_utils::zfs::DestroyDatasetErrorVariant;
-use illumos_utils::zfs::EncryptionDetails;
-use illumos_utils::zfs::Keypath;
-use illumos_utils::zfs::Mountpoint;
-use illumos_utils::zfs::SizeDetails;
-use illumos_utils::zfs::Zfs;
 use illumos_utils::zpool::Zpool;
 use illumos_utils::zpool::ZpoolKind;
 use illumos_utils::zpool::ZpoolName;
-use key_manager::StorageKeyRequester;
 use omicron_common::disk::DiskIdentity;
-use rand::distributions::{Alphanumeric, DistString};
 use slog::Logger;
 use slog::{info, warn};
-use std::sync::OnceLock;
-use tokio::fs::{remove_file, File};
-use tokio::io::{AsyncSeekExt, AsyncWriteExt, SeekFrom};
 use uuid::Uuid;
 
-/// This path is intentionally on a `tmpfs` to prevent copy-on-write behavior
-/// and to ensure it goes away on power off.
-///
-/// We want minimize the time the key files are in memory, and so we rederive
-/// the keys and recreate the files on demand when creating and mounting
-/// encrypted filesystems. We then zero them and unlink them.
-pub const KEYPATH_ROOT: &str = "/var/run/oxide/";
-
 cfg_if::cfg_if! {
     if #[cfg(target_os = "illumos")] {
         use crate::illumos::*;
@@ -41,7 +21,7 @@ cfg_if::cfg_if! {
 }
 
 #[derive(Debug, thiserror::Error)]
-pub enum DiskError {
+pub enum PooledDiskError {
     #[error("Cannot open {path} due to {error}")]
     IoError { path: Utf8PathBuf, error: std::io::Error },
     #[error("Failed to open partition at {path} due to {error}")]
@@ -51,10 +31,6 @@ pub enum DiskError {
     #[error("Requested partition {partition:?} not found on device {path}")]
     NotFound { path: Utf8PathBuf, partition: Partition },
     #[error(transparent)]
-    DestroyFilesystem(#[from] illumos_utils::zfs::DestroyDatasetError),
-    #[error(transparent)]
-    EnsureFilesystem(#[from] illumos_utils::zfs::EnsureFilesystemError),
-    #[error(transparent)]
     ZpoolCreate(#[from] illumos_utils::zpool::CreateError),
     #[error("Cannot import zpool: {0}")]
     ZpoolImport(illumos_utils::zpool::Error),
@@ -62,18 +38,6 @@ pub enum DiskError {
     CannotFormatMissingDevPath { path: Utf8PathBuf },
     #[error("Formatting M.2 devices is not yet implemented")]
     CannotFormatM2NotImplemented,
-    #[error("KeyManager error: {0}")]
-    KeyManager(#[from] key_manager::Error),
-    #[error("Missing StorageKeyRequester when creating U.2 disk")]
-    MissingStorageKeyRequester,
-    #[error("Encrypted filesystem '{0}' missing 'oxide:epoch' property")]
-    CannotParseEpochProperty(String),
-    #[error("Encrypted dataset '{dataset}' cannot set 'oxide:agent' property: {err}")]
-    CannotSetAgentProperty {
-        dataset: String,
-        #[source]
-        err: Box<zfs::SetValueError>,
-    },
 }
 
 /// A partition (or 'slice') of a disk.
@@ -126,17 +90,17 @@ impl DiskPaths {
     }
 
     // Finds the first 'variant' partition, and returns the path to it.
-    fn partition_device_path(
+    pub fn partition_device_path(
         &self,
         partitions: &[Partition],
         expected_partition: Partition,
         raw: bool,
-    ) -> Result<Utf8PathBuf, DiskError> {
+    ) -> Result<Utf8PathBuf, PooledDiskError> {
         for (index, partition) in partitions.iter().enumerate() {
             if &expected_partition == partition {
                 let path =
                     self.partition_path(index, raw).ok_or_else(|| {
-                        DiskError::NotFound {
+                        PooledDiskError::NotFound {
                             path: self.devfs_path.clone(),
                             partition: expected_partition,
                         }
@@ -144,7 +108,7 @@ impl DiskPaths {
                 return Ok(path);
             }
         }
-        Err(DiskError::NotFound {
+        Err(PooledDiskError::NotFound {
             path: self.devfs_path.clone(),
             partition: expected_partition,
         })
@@ -202,122 +166,33 @@ impl UnparsedDisk {
     }
 }
 
-/// A physical disk conforming to the expected partition layout.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct Disk {
-    paths: DiskPaths,
-    slot: i64,
-    variant: DiskVariant,
-    identity: DiskIdentity,
-    is_boot_disk: bool,
-    partitions: Vec<Partition>,
-
+/// A physical disk that is partitioned to contain exactly one zpool
+///
+/// A PooledDisk relies on hardware specific information to be constructed
+/// and is the highest level disk structure in the `sled-hardware` package.
+/// The `sled-storage` package contains `Disk`s whose zpool and datasets can be
+/// manipulated. This separation exists to remove the hardware dependent logic
+/// from the ZFS related logic which can also operate on file backed zpools.
+/// Doing things this way allows us to not put higher level concepts like
+/// storage keys into this hardware related package.
+pub struct PooledDisk {
+    pub paths: DiskPaths,
+    pub slot: i64,
+    pub variant: DiskVariant,
+    pub identity: DiskIdentity,
+    pub is_boot_disk: bool,
+    pub partitions: Vec<Partition>,
     // This embeds the assumtion that there is exactly one parsed zpool per
     // disk.
-    zpool_name: ZpoolName,
+    pub zpool_name: ZpoolName,
 }
 
-// Helper type for describing expected datasets and their optional quota.
-#[derive(Clone, Copy, Debug)]
-struct ExpectedDataset {
-    // Name for the dataset
-    name: &'static str,
-    // Optional quota, in _bytes_
-    quota: Option<usize>,
-    // Identifies if the dataset should be deleted on boot
-    wipe: bool,
-    // Optional compression mode
-    compression: Option<&'static str>,
-}
-
-impl ExpectedDataset {
-    const fn new(name: &'static str) -> Self {
-        ExpectedDataset { name, quota: None, wipe: false, compression: None }
-    }
-
-    const fn quota(mut self, quota: usize) -> Self {
-        self.quota = Some(quota);
-        self
-    }
-
-    const fn wipe(mut self) -> Self {
-        self.wipe = true;
-        self
-    }
-
-    const fn compression(mut self, compression: &'static str) -> Self {
-        self.compression = Some(compression);
-        self
-    }
-}
-
-pub const INSTALL_DATASET: &'static str = "install";
-pub const CRASH_DATASET: &'static str = "crash";
-pub const CLUSTER_DATASET: &'static str = "cluster";
-pub const CONFIG_DATASET: &'static str = "config";
-pub const M2_DEBUG_DATASET: &'static str = "debug";
-// TODO-correctness: This value of 100GiB is a pretty wild guess, and should be
-// tuned as needed.
-pub const DEBUG_DATASET_QUOTA: usize = 100 * (1 << 30);
-// ditto.
-pub const DUMP_DATASET_QUOTA: usize = 100 * (1 << 30);
-// passed to zfs create -o compression=
-pub const DUMP_DATASET_COMPRESSION: &'static str = "gzip-9";
-
-// U.2 datasets live under the encrypted dataset and inherit encryption
-pub const ZONE_DATASET: &'static str = "crypt/zone";
-pub const DUMP_DATASET: &'static str = "crypt/debug";
-pub const U2_DEBUG_DATASET: &'static str = "crypt/debug";
-
-// This is the root dataset for all U.2 drives. Encryption is inherited.
-pub const CRYPT_DATASET: &'static str = "crypt";
-
-const U2_EXPECTED_DATASET_COUNT: usize = 2;
-static U2_EXPECTED_DATASETS: [ExpectedDataset; U2_EXPECTED_DATASET_COUNT] = [
-    // Stores filesystems for zones
-    ExpectedDataset::new(ZONE_DATASET).wipe(),
-    // For storing full kernel RAM dumps
-    ExpectedDataset::new(DUMP_DATASET)
-        .quota(DUMP_DATASET_QUOTA)
-        .compression(DUMP_DATASET_COMPRESSION),
-];
-
-const M2_EXPECTED_DATASET_COUNT: usize = 5;
-static M2_EXPECTED_DATASETS: [ExpectedDataset; M2_EXPECTED_DATASET_COUNT] = [
-    // Stores software images.
-    //
-    // Should be duplicated to both M.2s.
-    ExpectedDataset::new(INSTALL_DATASET),
-    // Stores crash dumps.
-    ExpectedDataset::new(CRASH_DATASET),
-    // Stores cluter configuration information.
-    //
-    // Should be duplicated to both M.2s.
-    ExpectedDataset::new(CLUSTER_DATASET),
-    // Stores configuration data, including:
-    // - What services should be launched on this sled
-    // - Information about how to initialize the Sled Agent
-    // - (For scrimlets) RSS setup information
-    //
-    // Should be duplicated to both M.2s.
-    ExpectedDataset::new(CONFIG_DATASET),
-    // Store debugging data, such as service bundles.
-    ExpectedDataset::new(M2_DEBUG_DATASET).quota(DEBUG_DATASET_QUOTA),
-];
-
-impl Disk {
-    /// Create a new Disk
-    ///
-    /// WARNING: In all cases where a U.2 is a possible `DiskVariant`, a
-    /// `StorageKeyRequester` must be passed so that disk encryption can
-    /// be used. The `StorageManager` for the sled-agent  always has a
-    /// `StorageKeyRequester` available, and so the only place we should pass
-    /// `None` is for the M.2s touched by the Installinator.
-    pub async fn new(
+impl PooledDisk {
+    /// Create a new PooledDisk
+    pub fn new(
         log: &Logger,
         unparsed_disk: UnparsedDisk,
-        key_requester: Option<&StorageKeyRequester>,
-    ) -> Result<Self, DiskError> {
+    ) -> Result<Self, PooledDiskError> {
         let paths = &unparsed_disk.paths;
         let variant = unparsed_disk.variant;
         // Ensure the GPT has the right format. This does not necessarily
@@ -335,13 +210,8 @@ impl Disk {
         )?;
 
         let zpool_name = Self::ensure_zpool_exists(log, variant, &zpool_path)?;
-        Self::ensure_zpool_ready(
-            log,
-            &zpool_name,
-            &unparsed_disk.identity,
-            key_requester,
-        )
-        .await?;
+        Self::ensure_zpool_imported(log, &zpool_name)?;
+        Self::ensure_zpool_failmode_is_continue(log, &zpool_name)?;
 
         Ok(Self {
             paths: unparsed_disk.paths,
@@ -354,29 +224,11 @@ impl Disk {
         })
     }
 
-    pub async fn ensure_zpool_ready(
-        log: &Logger,
-        zpool_name: &ZpoolName,
-        disk_identity: &DiskIdentity,
-        key_requester: Option<&StorageKeyRequester>,
-    ) -> Result<(), DiskError> {
-        Self::ensure_zpool_imported(log, &zpool_name)?;
-        Self::ensure_zpool_failmode_is_continue(log, &zpool_name)?;
-        Self::ensure_zpool_has_datasets(
-            log,
-            &zpool_name,
-            disk_identity,
-            key_requester,
-        )
-        .await?;
-        Ok(())
-    }
-
     fn ensure_zpool_exists(
         log: &Logger,
         variant: DiskVariant,
         zpool_path: &Utf8Path,
-    ) -> Result<ZpoolName, DiskError> {
+    ) -> Result<ZpoolName, PooledDiskError> {
         let zpool_name = match Fstyp::get_zpool(&zpool_path) {
             Ok(zpool_name) => zpool_name,
             Err(_) => {
@@ -407,7 +259,7 @@ impl Disk {
         };
         Zpool::import(zpool_name.clone()).map_err(|e| {
             warn!(log, "Failed to import zpool {zpool_name}: {e}");
-            DiskError::ZpoolImport(e)
+            PooledDiskError::ZpoolImport(e)
         })?;
 
         Ok(zpool_name)
@@ -416,10 +268,10 @@ impl Disk {
     fn ensure_zpool_imported(
         log: &Logger,
         zpool_name: &ZpoolName,
-    ) -> Result<(), DiskError> {
+    ) -> Result<(), PooledDiskError> {
         Zpool::import(zpool_name.clone()).map_err(|e| {
             warn!(log, "Failed to import zpool {zpool_name}: {e}");
-            DiskError::ZpoolImport(e)
+            PooledDiskError::ZpoolImport(e)
         })?;
         Ok(())
     }
@@ -427,7 +279,7 @@ impl Disk {
     fn ensure_zpool_failmode_is_continue(
         log: &Logger,
         zpool_name: &ZpoolName,
-    ) -> Result<(), DiskError> {
+    ) -> Result<(), PooledDiskError> {
         // Ensure failmode is set to `continue`. See
         // https://github.com/oxidecomputer/omicron/issues/2766 for details. The
         // short version is, each pool is only backed by one vdev. There is no
@@ -440,212 +292,10 @@ impl Disk {
                 log,
                 "Failed to set failmode=continue on zpool {zpool_name}: {e}"
             );
-            DiskError::ZpoolImport(e)
+            PooledDiskError::ZpoolImport(e)
         })?;
         Ok(())
     }
-
-    // Ensure that the zpool contains all the datasets we would like it to
-    // contain.
-    async fn ensure_zpool_has_datasets(
-        log: &Logger,
-        zpool_name: &ZpoolName,
-        disk_identity: &DiskIdentity,
-        key_requester: Option<&StorageKeyRequester>,
-    ) -> Result<(), DiskError> {
-        let (root, datasets) = match zpool_name.kind().into() {
-            DiskVariant::M2 => (None, M2_EXPECTED_DATASETS.iter()),
-            DiskVariant::U2 => {
-                (Some(CRYPT_DATASET), U2_EXPECTED_DATASETS.iter())
-            }
-        };
-
-        let zoned = false;
-        let do_format = true;
-
-        // Ensure the root encrypted filesystem exists
-        // Datasets below this in the hierarchy will inherit encryption
-        if let Some(dataset) = root {
-            let Some(key_requester) = key_requester else {
-                return Err(DiskError::MissingStorageKeyRequester);
-            };
-            let mountpoint = zpool_name.dataset_mountpoint(dataset);
-            let keypath: Keypath = disk_identity.into();
-
-            let epoch =
-                if let Ok(epoch_str) = Zfs::get_oxide_value(dataset, "epoch") {
-                    if let Ok(epoch) = epoch_str.parse::<u64>() {
-                        epoch
-                    } else {
-                        return Err(DiskError::CannotParseEpochProperty(
-                            dataset.to_string(),
-                        ));
-                    }
-                } else {
-                    // We got an error trying to call `Zfs::get_oxide_value`
-                    // which indicates that the dataset doesn't exist or there
-                    // was a problem  running the command.
-                    //
-                    // Note that `Zfs::get_oxide_value` will succeed even if
-                    // the epoch is missing. `epoch_str` will show up as a dash
-                    // (`-`) and will not parse into a `u64`. So we don't have
-                    // to worry about that case here as it is handled above.
-                    //
-                    // If the error indicated that the command failed for some
-                    // other reason, but the dataset actually existed, we will
-                    // try to create the dataset below and that will fail. So
-                    // there is no harm in just loading the latest secret here.
-                    key_requester.load_latest_secret().await?
-                };
-
-            let key =
-                key_requester.get_key(epoch, disk_identity.clone()).await?;
-
-            let mut keyfile =
-                KeyFile::create(keypath.clone(), key.expose_secret(), log)
-                    .await
-                    .map_err(|error| DiskError::IoError {
-                        path: keypath.0.clone(),
-                        error,
-                    })?;
-
-            let encryption_details = EncryptionDetails { keypath, epoch };
-
-            info!(
-                log,
-                "Ensuring encrypted filesystem: {} for epoch {}",
-                dataset,
-                epoch
-            );
-            let result = Zfs::ensure_filesystem(
-                &format!("{}/{}", zpool_name, dataset),
-                Mountpoint::Path(mountpoint),
-                zoned,
-                do_format,
-                Some(encryption_details),
-                None,
-            );
-
-            keyfile.zero_and_unlink().await.map_err(|error| {
-                DiskError::IoError { path: keyfile.path().0.clone(), error }
-            })?;
-
-            result?;
-        };
-
-        for dataset in datasets.into_iter() {
-            let mountpoint = zpool_name.dataset_mountpoint(dataset.name);
-            let name = &format!("{}/{}", zpool_name, dataset.name);
-
-            // Use a value that's alive for the duration of this sled agent
-            // to answer the question: should we wipe this disk, or have
-            // we seen it before?
-            //
-            // If this value comes from a prior iteration of the sled agent,
-            // we opt to remove the corresponding dataset.
-            static AGENT_LOCAL_VALUE: OnceLock<String> = OnceLock::new();
-            let agent_local_value = AGENT_LOCAL_VALUE.get_or_init(|| {
-                Alphanumeric.sample_string(&mut rand::thread_rng(), 20)
-            });
-
-            if dataset.wipe {
-                match Zfs::get_oxide_value(name, "agent") {
-                    Ok(v) if &v == agent_local_value => {
-                        info!(
-                            log,
-                            "Skipping automatic wipe for dataset: {}", name
-                        );
-                    }
-                    Ok(_) | Err(_) => {
-                        info!(
-                            log,
-                            "Automatically destroying dataset: {}", name
-                        );
-                        Zfs::destroy_dataset(name).or_else(|err| {
-                            // If we can't find the dataset, that's fine -- it might
-                            // not have been formatted yet.
-                            if let DestroyDatasetErrorVariant::NotFound =
-                                err.err
-                            {
-                                Ok(())
-                            } else {
-                                Err(err)
-                            }
-                        })?;
-                    }
-                }
-            }
-
-            let encryption_details = None;
-            let size_details = Some(SizeDetails {
-                quota: dataset.quota,
-                compression: dataset.compression,
-            });
-            Zfs::ensure_filesystem(
-                name,
-                Mountpoint::Path(mountpoint),
-                zoned,
-                do_format,
-                encryption_details,
-                size_details,
-            )?;
-
-            if dataset.wipe {
-                Zfs::set_oxide_value(name, "agent", agent_local_value)
-                    .map_err(|err| DiskError::CannotSetAgentProperty {
-                        dataset: name.clone(),
-                        err: Box::new(err),
-                    })?;
-            }
-        }
-        Ok(())
-    }
-
-    pub fn is_boot_disk(&self) -> bool {
-        self.is_boot_disk
-    }
-
-    pub fn identity(&self) -> &DiskIdentity {
-        &self.identity
-    }
-
-    pub fn variant(&self) -> DiskVariant {
-        self.variant
-    }
-
-    pub fn devfs_path(&self) -> &Utf8PathBuf {
-        &self.paths.devfs_path
-    }
-
-    pub fn zpool_name(&self) -> &ZpoolName {
-        &self.zpool_name
-    }
-
-    pub fn boot_image_devfs_path(
-        &self,
-        raw: bool,
-    ) -> Result<Utf8PathBuf, DiskError> {
-        self.paths.partition_device_path(
-            &self.partitions,
-            Partition::BootImage,
-            raw,
-        )
-    }
-
-    pub fn dump_device_devfs_path(
-        &self,
-        raw: bool,
-    ) -> Result<Utf8PathBuf, DiskError> {
-        self.paths.partition_device_path(
-            &self.partitions,
-            Partition::DumpDevice,
-            raw,
-        )
-    }
-
-    pub fn slot(&self) -> i64 {
-        self.slot
-    }
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
@@ -664,56 +314,6 @@ impl From<ZpoolKind> for DiskVariant {
     }
 }
 
-/// A file that wraps a zfs encryption key.
-///
-/// We put this in a RAM backed filesystem and zero and delete it when we are
-/// done with it. Unfortunately we cannot do this inside `Drop` because there is no
-/// equivalent async drop.
-pub struct KeyFile {
-    path: Keypath,
-    file: File,
-    log: Logger,
-}
-
-impl KeyFile {
-    pub async fn create(
-        path: Keypath,
-        key: &[u8; 32],
-        log: &Logger,
-    ) -> std::io::Result<KeyFile> {
-        // TODO: fix this to not truncate
-        // We want to overwrite any existing contents.
-        // If we truncate we may leave dirty pages around
-        // containing secrets.
-        let mut file = tokio::fs::OpenOptions::new()
-            .create(true)
-            .write(true)
-            .open(&path.0)
-            .await?;
-        file.write_all(key).await?;
-        info!(log, "Created keyfile {}", path);
-        Ok(KeyFile { path, file, log: log.clone() })
-    }
-
-    /// These keyfiles live on a tmpfs and we zero the file so the data doesn't
-    /// linger on the page in memory.
-    ///
-    /// It'd be nice to `impl Drop for `KeyFile` and then call `zero`
-    /// from within the drop handler, but async `Drop` isn't supported.
-    pub async fn zero_and_unlink(&mut self) -> std::io::Result<()> {
-        let zeroes = [0u8; 32];
-        let _ = self.file.seek(SeekFrom::Start(0)).await?;
-        self.file.write_all(&zeroes).await?;
-        info!(self.log, "Zeroed and unlinked keyfile {}", self.path);
-        remove_file(&self.path().0).await?;
-        Ok(())
-    }
-
-    pub fn path(&self) -> &Keypath {
-        &self.path
-    }
-}
-
 #[cfg(test)]
 mod test {
     use super::*;
@@ -825,7 +425,7 @@ mod test {
             paths
                 .partition_device_path(&[], Partition::ZfsPool, false)
                 .expect_err("Should not have found partition"),
-            DiskError::NotFound { .. },
+            PooledDiskError::NotFound { .. },
         ));
     }
 }
diff --git a/sled-hardware/src/illumos/partitions.rs b/sled-hardware/src/illumos/partitions.rs
index 950074bd3a..ee745fc78b 100644
--- a/sled-hardware/src/illumos/partitions.rs
+++ b/sled-hardware/src/illumos/partitions.rs
@@ -5,7 +5,7 @@
 //! illumos-specific mechanisms for parsing disk info.
 
 use crate::illumos::gpt;
-use crate::{DiskError, DiskPaths, DiskVariant, Partition};
+use crate::{DiskPaths, DiskVariant, Partition, PooledDiskError};
 use camino::Utf8Path;
 use illumos_utils::zpool::ZpoolName;
 use slog::info;
@@ -41,9 +41,9 @@ fn parse_partition_types<const N: usize>(
     path: &Utf8Path,
     partitions: &Vec<impl gpt::LibEfiPartition>,
     expected_partitions: &[Partition; N],
-) -> Result<Vec<Partition>, DiskError> {
+) -> Result<Vec<Partition>, PooledDiskError> {
     if partitions.len() != N {
-        return Err(DiskError::BadPartitionLayout {
+        return Err(PooledDiskError::BadPartitionLayout {
             path: path.to_path_buf(),
             why: format!(
                 "Expected {} partitions, only saw {}",
@@ -54,7 +54,7 @@ fn parse_partition_types<const N: usize>(
     }
     for i in 0..N {
         if partitions[i].index() != i {
-            return Err(DiskError::BadPartitionLayout {
+            return Err(PooledDiskError::BadPartitionLayout {
                 path: path.to_path_buf(),
                 why: format!(
                     "The {i}-th partition has index {}",
@@ -80,7 +80,7 @@ pub fn ensure_partition_layout(
     log: &Logger,
     paths: &DiskPaths,
     variant: DiskVariant,
-) -> Result<Vec<Partition>, DiskError> {
+) -> Result<Vec<Partition>, PooledDiskError> {
     internal_ensure_partition_layout::<libefi_illumos::Gpt>(log, paths, variant)
 }
 
@@ -90,7 +90,7 @@ fn internal_ensure_partition_layout<GPT: gpt::LibEfiGpt>(
     log: &Logger,
     paths: &DiskPaths,
     variant: DiskVariant,
-) -> Result<Vec<Partition>, DiskError> {
+) -> Result<Vec<Partition>, PooledDiskError> {
     // Open the "Whole Disk" as a raw device to be parsed by the
     // libefi-illumos library. This lets us peek at the GPT before
     // making too many assumptions about it.
@@ -114,7 +114,9 @@ fn internal_ensure_partition_layout<GPT: gpt::LibEfiGpt>(
             let dev_path = if let Some(dev_path) = &paths.dev_path {
                 dev_path
             } else {
-                return Err(DiskError::CannotFormatMissingDevPath { path });
+                return Err(PooledDiskError::CannotFormatMissingDevPath {
+                    path,
+                });
             };
             match variant {
                 DiskVariant::U2 => {
@@ -129,12 +131,12 @@ fn internal_ensure_partition_layout<GPT: gpt::LibEfiGpt>(
                     // the expected partitions? Or would it be wiser to infer
                     // that this indicates an unexpected error conditions that
                     // needs mitigation?
-                    return Err(DiskError::CannotFormatM2NotImplemented);
+                    return Err(PooledDiskError::CannotFormatM2NotImplemented);
                 }
             }
         }
         Err(err) => {
-            return Err(DiskError::Gpt {
+            return Err(PooledDiskError::Gpt {
                 path,
                 error: anyhow::Error::new(err),
             });
@@ -197,7 +199,7 @@ mod test {
             DiskVariant::U2,
         );
         match result {
-            Err(DiskError::CannotFormatMissingDevPath { .. }) => {}
+            Err(PooledDiskError::CannotFormatMissingDevPath { .. }) => {}
             _ => panic!("Should have failed with a missing dev path error"),
         }
 
@@ -373,7 +375,7 @@ mod test {
                 DiskVariant::M2,
             )
             .expect_err("Should have failed parsing empty GPT"),
-            DiskError::BadPartitionLayout { .. }
+            PooledDiskError::BadPartitionLayout { .. }
         ));
 
         logctx.cleanup_successful();
@@ -398,7 +400,7 @@ mod test {
                 DiskVariant::U2,
             )
             .expect_err("Should have failed parsing empty GPT"),
-            DiskError::BadPartitionLayout { .. }
+            PooledDiskError::BadPartitionLayout { .. }
         ));
 
         logctx.cleanup_successful();
diff --git a/sled-storage/Cargo.toml b/sled-storage/Cargo.toml
index 03f0f608de..ae9718382d 100644
--- a/sled-storage/Cargo.toml
+++ b/sled-storage/Cargo.toml
@@ -14,6 +14,7 @@ key-manager.workspace = true
 # We could put this in the nexus-client instead
 nexus-client.workspace = true
 omicron-common.workspace = true
+rand.workspace = true
 schemars = { workspace = true, features = [ "chrono", "uuid1" ] }
 serde.workspace = true
 serde_json.workspace = true
@@ -25,3 +26,7 @@ slog.workspace = true
 thiserror.workspace = true
 tokio.workspace = true
 uuid.workspace = true
+
+[dev-dependencies]
+illumos-utils = { workspace = true, features = ["testing"] }
+omicron-test-utils.workspace = true
diff --git a/sled-storage/src/dataset.rs b/sled-storage/src/dataset.rs
index e521dd963a..3c40dc10f0 100644
--- a/sled-storage/src/dataset.rs
+++ b/sled-storage/src/dataset.rs
@@ -2,10 +2,112 @@
 // License, v. 2.0. If a copy of the MPL was not distributed with this
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
+//! ZFS dataset related functionality
+
+use crate::keyfile::KeyFile;
+use camino::Utf8PathBuf;
+use illumos_utils::zfs::{
+    self, DestroyDatasetErrorVariant, EncryptionDetails, Keypath, Mountpoint,
+    SizeDetails, Zfs,
+};
 use illumos_utils::zpool::ZpoolName;
+use key_manager::StorageKeyRequester;
+use omicron_common::disk::DiskIdentity;
+use rand::distributions::{Alphanumeric, DistString};
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
+use sled_hardware::DiskVariant;
+use slog::{info, Logger};
 use std::str::FromStr;
+use std::sync::OnceLock;
+
+pub const INSTALL_DATASET: &'static str = "install";
+pub const CRASH_DATASET: &'static str = "crash";
+pub const CLUSTER_DATASET: &'static str = "cluster";
+pub const CONFIG_DATASET: &'static str = "config";
+pub const M2_DEBUG_DATASET: &'static str = "debug";
+// TODO-correctness: This value of 100GiB is a pretty wild guess, and should be
+// tuned as needed.
+pub const DEBUG_DATASET_QUOTA: usize = 100 * (1 << 30);
+// ditto.
+pub const DUMP_DATASET_QUOTA: usize = 100 * (1 << 30);
+// passed to zfs create -o compression=
+pub const DUMP_DATASET_COMPRESSION: &'static str = "gzip-9";
+
+// U.2 datasets live under the encrypted dataset and inherit encryption
+pub const ZONE_DATASET: &'static str = "crypt/zone";
+pub const DUMP_DATASET: &'static str = "crypt/debug";
+pub const U2_DEBUG_DATASET: &'static str = "crypt/debug";
+
+// This is the root dataset for all U.2 drives. Encryption is inherited.
+pub const CRYPT_DATASET: &'static str = "crypt";
+
+const U2_EXPECTED_DATASET_COUNT: usize = 2;
+static U2_EXPECTED_DATASETS: [ExpectedDataset; U2_EXPECTED_DATASET_COUNT] = [
+    // Stores filesystems for zones
+    ExpectedDataset::new(ZONE_DATASET).wipe(),
+    // For storing full kernel RAM dumps
+    ExpectedDataset::new(DUMP_DATASET)
+        .quota(DUMP_DATASET_QUOTA)
+        .compression(DUMP_DATASET_COMPRESSION),
+];
+
+const M2_EXPECTED_DATASET_COUNT: usize = 5;
+static M2_EXPECTED_DATASETS: [ExpectedDataset; M2_EXPECTED_DATASET_COUNT] = [
+    // Stores software images.
+    //
+    // Should be duplicated to both M.2s.
+    ExpectedDataset::new(INSTALL_DATASET),
+    // Stores crash dumps.
+    ExpectedDataset::new(CRASH_DATASET),
+    // Stores cluter configuration information.
+    //
+    // Should be duplicated to both M.2s.
+    ExpectedDataset::new(CLUSTER_DATASET),
+    // Stores configuration data, including:
+    // - What services should be launched on this sled
+    // - Information about how to initialize the Sled Agent
+    // - (For scrimlets) RSS setup information
+    //
+    // Should be duplicated to both M.2s.
+    ExpectedDataset::new(CONFIG_DATASET),
+    // Store debugging data, such as service bundles.
+    ExpectedDataset::new(M2_DEBUG_DATASET).quota(DEBUG_DATASET_QUOTA),
+];
+
+// Helper type for describing expected datasets and their optional quota.
+#[derive(Clone, Copy, Debug)]
+struct ExpectedDataset {
+    // Name for the dataset
+    name: &'static str,
+    // Optional quota, in _bytes_
+    quota: Option<usize>,
+    // Identifies if the dataset should be deleted on boot
+    wipe: bool,
+    // Optional compression mode
+    compression: Option<&'static str>,
+}
+
+impl ExpectedDataset {
+    const fn new(name: &'static str) -> Self {
+        ExpectedDataset { name, quota: None, wipe: false, compression: None }
+    }
+
+    const fn quota(mut self, quota: usize) -> Self {
+        self.quota = Some(quota);
+        self
+    }
+
+    const fn wipe(mut self) -> Self {
+        self.wipe = true;
+        self
+    }
+
+    const fn compression(mut self, compression: &'static str) -> Self {
+        self.compression = Some(compression);
+        self
+    }
+}
 
 /// The type of a dataset, and an auxiliary information necessary
 /// to successfully launch a zone managing the associated data.
@@ -105,6 +207,178 @@ impl From<DatasetName> for sled_agent_client::types::DatasetName {
     }
 }
 
+#[derive(Debug, thiserror::Error)]
+pub enum DatasetError {
+    #[error("Cannot open {path} due to {error}")]
+    IoError { path: Utf8PathBuf, error: std::io::Error },
+    #[error(transparent)]
+    DestroyFilesystem(#[from] illumos_utils::zfs::DestroyDatasetError),
+    #[error(transparent)]
+    EnsureFilesystem(#[from] illumos_utils::zfs::EnsureFilesystemError),
+    #[error("KeyManager error: {0}")]
+    KeyManager(#[from] key_manager::Error),
+    #[error("Missing StorageKeyRequester when creating U.2 disk")]
+    MissingStorageKeyRequester,
+    #[error("Encrypted filesystem '{0}' missing 'oxide:epoch' property")]
+    CannotParseEpochProperty(String),
+    #[error("Encrypted dataset '{dataset}' cannot set 'oxide:agent' property: {err}")]
+    CannotSetAgentProperty {
+        dataset: String,
+        #[source]
+        err: Box<zfs::SetValueError>,
+    },
+}
+
+/// Ensure that the zpool contains all the datasets we would like it to
+/// contain.
+///
+/// WARNING: In all cases where a U.2 is a possible `DiskVariant`, a
+/// `StorageKeyRequester` must be passed so that disk encryption can
+/// be used. The `StorageManager` for the sled-agent always has a
+/// `StorageKeyRequester` available, and so the only place we should pass
+/// `None` is for the M.2s touched by the Installinator.
+pub async fn ensure_zpool_has_datasets(
+    log: &Logger,
+    zpool_name: &ZpoolName,
+    disk_identity: &DiskIdentity,
+    key_requester: Option<&StorageKeyRequester>,
+) -> Result<(), DatasetError> {
+    let (root, datasets) = match zpool_name.kind().into() {
+        DiskVariant::M2 => (None, M2_EXPECTED_DATASETS.iter()),
+        DiskVariant::U2 => (Some(CRYPT_DATASET), U2_EXPECTED_DATASETS.iter()),
+    };
+
+    let zoned = false;
+    let do_format = true;
+
+    // Ensure the root encrypted filesystem exists
+    // Datasets below this in the hierarchy will inherit encryption
+    if let Some(dataset) = root {
+        let Some(key_requester) = key_requester else {
+            return Err(DatasetError::MissingStorageKeyRequester);
+        };
+        let mountpoint = zpool_name.dataset_mountpoint(dataset);
+        let keypath: Keypath = disk_identity.into();
+
+        let epoch =
+            if let Ok(epoch_str) = Zfs::get_oxide_value(dataset, "epoch") {
+                if let Ok(epoch) = epoch_str.parse::<u64>() {
+                    epoch
+                } else {
+                    return Err(DatasetError::CannotParseEpochProperty(
+                        dataset.to_string(),
+                    ));
+                }
+            } else {
+                // We got an error trying to call `Zfs::get_oxide_value`
+                // which indicates that the dataset doesn't exist or there
+                // was a problem  running the command.
+                //
+                // Note that `Zfs::get_oxide_value` will succeed even if
+                // the epoch is missing. `epoch_str` will show up as a dash
+                // (`-`) and will not parse into a `u64`. So we don't have
+                // to worry about that case here as it is handled above.
+                //
+                // If the error indicated that the command failed for some
+                // other reason, but the dataset actually existed, we will
+                // try to create the dataset below and that will fail. So
+                // there is no harm in just loading the latest secret here.
+                key_requester.load_latest_secret().await?
+            };
+
+        let key = key_requester.get_key(epoch, disk_identity.clone()).await?;
+
+        let mut keyfile =
+            KeyFile::create(keypath.clone(), key.expose_secret(), log)
+                .await
+                .map_err(|error| DatasetError::IoError {
+                    path: keypath.0.clone(),
+                    error,
+                })?;
+
+        let encryption_details = EncryptionDetails { keypath, epoch };
+
+        info!(
+            log,
+            "Ensuring encrypted filesystem: {} for epoch {}", dataset, epoch
+        );
+        let result = Zfs::ensure_filesystem(
+            &format!("{}/{}", zpool_name, dataset),
+            Mountpoint::Path(mountpoint),
+            zoned,
+            do_format,
+            Some(encryption_details),
+            None,
+        );
+
+        keyfile.zero_and_unlink().await.map_err(|error| {
+            DatasetError::IoError { path: keyfile.path().0.clone(), error }
+        })?;
+
+        result?;
+    };
+
+    for dataset in datasets.into_iter() {
+        let mountpoint = zpool_name.dataset_mountpoint(dataset.name);
+        let name = &format!("{}/{}", zpool_name, dataset.name);
+
+        // Use a value that's alive for the duration of this sled agent
+        // to answer the question: should we wipe this disk, or have
+        // we seen it before?
+        //
+        // If this value comes from a prior iteration of the sled agent,
+        // we opt to remove the corresponding dataset.
+        static AGENT_LOCAL_VALUE: OnceLock<String> = OnceLock::new();
+        let agent_local_value = AGENT_LOCAL_VALUE.get_or_init(|| {
+            Alphanumeric.sample_string(&mut rand::thread_rng(), 20)
+        });
+
+        if dataset.wipe {
+            match Zfs::get_oxide_value(name, "agent") {
+                Ok(v) if &v == agent_local_value => {
+                    info!(log, "Skipping automatic wipe for dataset: {}", name);
+                }
+                Ok(_) | Err(_) => {
+                    info!(log, "Automatically destroying dataset: {}", name);
+                    Zfs::destroy_dataset(name).or_else(|err| {
+                        // If we can't find the dataset, that's fine -- it might
+                        // not have been formatted yet.
+                        if let DestroyDatasetErrorVariant::NotFound = err.err {
+                            Ok(())
+                        } else {
+                            Err(err)
+                        }
+                    })?;
+                }
+            }
+        }
+
+        let encryption_details = None;
+        let size_details = Some(SizeDetails {
+            quota: dataset.quota,
+            compression: dataset.compression,
+        });
+        Zfs::ensure_filesystem(
+            name,
+            Mountpoint::Path(mountpoint),
+            zoned,
+            do_format,
+            encryption_details,
+            size_details,
+        )?;
+
+        if dataset.wipe {
+            Zfs::set_oxide_value(name, "agent", agent_local_value).map_err(
+                |err| DatasetError::CannotSetAgentProperty {
+                    dataset: name.clone(),
+                    err: Box::new(err),
+                },
+            )?;
+        }
+    }
+    Ok(())
+}
+
 #[cfg(test)]
 mod test {
     use super::*;
diff --git a/sled-storage/src/disk.rs b/sled-storage/src/disk.rs
index aef68528bf..d7e02d8c97 100644
--- a/sled-storage/src/disk.rs
+++ b/sled-storage/src/disk.rs
@@ -6,8 +6,15 @@
 
 use camino::Utf8PathBuf;
 use illumos_utils::zpool::{ZpoolKind, ZpoolName};
+use key_manager::StorageKeyRequester;
 use omicron_common::disk::DiskIdentity;
-use sled_hardware::{Disk, DiskVariant};
+use sled_hardware::{
+    DiskPaths, DiskVariant, Partition, PooledDisk, PooledDiskError,
+    UnparsedDisk,
+};
+use slog::Logger;
+
+use crate::dataset;
 
 /// A wrapper around real disks or synthetic disks backed by a file
 #[derive(Debug, PartialEq, Eq, Clone)]
@@ -55,3 +62,105 @@ impl DiskWrapper {
         }
     }
 }
+
+#[derive(Debug, thiserror::Error)]
+pub enum DiskError {
+    #[error(transparent)]
+    Dataset(#[from] crate::dataset::DatasetError),
+    #[error(transparent)]
+    PooledDisk(#[from] sled_hardware::PooledDiskError),
+}
+
+/// A physical disk conforming to the expected partition layout
+/// and which contains provisioned zpools and datasets. This disk
+/// is ready for usage by higher level software.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct Disk {
+    paths: DiskPaths,
+    slot: i64,
+    variant: DiskVariant,
+    identity: DiskIdentity,
+    is_boot_disk: bool,
+    partitions: Vec<Partition>,
+
+    // This embeds the assumtion that there is exactly one parsed zpool per
+    // disk.
+    zpool_name: ZpoolName,
+}
+
+impl Disk {
+    pub async fn new(
+        log: &Logger,
+        unparsed_disk: UnparsedDisk,
+        key_requester: Option<&StorageKeyRequester>,
+    ) -> Result<Self, DiskError> {
+        let disk = PooledDisk::new(log, unparsed_disk)?;
+        dataset::ensure_zpool_has_datasets(
+            log,
+            &disk.zpool_name,
+            &disk.identity,
+            key_requester,
+        )
+        .await?;
+        Ok(disk.into())
+    }
+    pub fn is_boot_disk(&self) -> bool {
+        self.is_boot_disk
+    }
+
+    pub fn identity(&self) -> &DiskIdentity {
+        &self.identity
+    }
+
+    pub fn variant(&self) -> DiskVariant {
+        self.variant
+    }
+
+    pub fn devfs_path(&self) -> &Utf8PathBuf {
+        &self.paths.devfs_path
+    }
+
+    pub fn zpool_name(&self) -> &ZpoolName {
+        &self.zpool_name
+    }
+
+    pub fn boot_image_devfs_path(
+        &self,
+        raw: bool,
+    ) -> Result<Utf8PathBuf, PooledDiskError> {
+        self.paths.partition_device_path(
+            &self.partitions,
+            Partition::BootImage,
+            raw,
+        )
+    }
+
+    pub fn dump_device_devfs_path(
+        &self,
+        raw: bool,
+    ) -> Result<Utf8PathBuf, PooledDiskError> {
+        self.paths.partition_device_path(
+            &self.partitions,
+            Partition::DumpDevice,
+            raw,
+        )
+    }
+
+    pub fn slot(&self) -> i64 {
+        self.slot
+    }
+}
+
+impl From<PooledDisk> for Disk {
+    fn from(pd: PooledDisk) -> Self {
+        Self {
+            paths: pd.paths,
+            slot: pd.slot,
+            variant: pd.variant,
+            identity: pd.identity,
+            is_boot_disk: pd.is_boot_disk,
+            partitions: pd.partitions,
+            zpool_name: pd.zpool_name,
+        }
+    }
+}
diff --git a/sled-storage/src/dump_setup.rs b/sled-storage/src/dump_setup.rs
index ea51251f84..5befa8e8c8 100644
--- a/sled-storage/src/dump_setup.rs
+++ b/sled-storage/src/dump_setup.rs
@@ -1,3 +1,10 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Dump dataset setup
+
+use crate::dataset::{CRASH_DATASET, DUMP_DATASET};
 use crate::disk::DiskWrapper;
 use camino::Utf8PathBuf;
 use derive_more::{AsRef, Deref, From};
@@ -70,11 +77,11 @@ trait GetMountpoint: std::ops::Deref<Target = ZpoolName> {
 }
 impl GetMountpoint for DebugZpool {
     type NewType = DebugDataset;
-    const MOUNTPOINT: &'static str = sled_hardware::disk::DUMP_DATASET;
+    const MOUNTPOINT: &'static str = DUMP_DATASET;
 }
 impl GetMountpoint for CoreZpool {
     type NewType = CoreDataset;
-    const MOUNTPOINT: &'static str = sled_hardware::disk::CRASH_DATASET;
+    const MOUNTPOINT: &'static str = CRASH_DATASET;
 }
 
 struct DumpSetupWorker {
diff --git a/sled-storage/src/error.rs b/sled-storage/src/error.rs
index d2a2a473b1..04c4f7ec07 100644
--- a/sled-storage/src/error.rs
+++ b/sled-storage/src/error.rs
@@ -12,7 +12,7 @@ use uuid::Uuid;
 #[derive(thiserror::Error, Debug)]
 pub enum Error {
     #[error(transparent)]
-    DiskError(#[from] sled_hardware::DiskError),
+    DiskError(#[from] sled_hardware::PooledDiskError),
 
     // TODO: We could add the context of "why are we doint this op", maybe?
     #[error(transparent)]
diff --git a/sled-storage/src/keyfile.rs b/sled-storage/src/keyfile.rs
new file mode 100644
index 0000000000..396c860fc5
--- /dev/null
+++ b/sled-storage/src/keyfile.rs
@@ -0,0 +1,68 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Key file support for ZFS dataset encryption
+
+use illumos_utils::zfs::Keypath;
+use slog::{info, Logger};
+use tokio::fs::{remove_file, File};
+use tokio::io::{AsyncSeekExt, AsyncWriteExt, SeekFrom};
+
+/// This path is intentionally on a `tmpfs` to prevent copy-on-write behavior
+/// and to ensure it goes away on power off.
+///
+/// We want minimize the time the key files are in memory, and so we rederive
+/// the keys and recreate the files on demand when creating and mounting
+/// encrypted filesystems. We then zero them and unlink them.
+pub const KEYPATH_ROOT: &str = "/var/run/oxide/";
+
+/// A file that wraps a zfs encryption key.
+///
+/// We put this in a RAM backed filesystem and zero and delete it when we are
+/// done with it. Unfortunately we cannot do this inside `Drop` because there is no
+/// equivalent async drop.
+pub struct KeyFile {
+    path: Keypath,
+    file: File,
+    log: Logger,
+}
+
+impl KeyFile {
+    pub async fn create(
+        path: Keypath,
+        key: &[u8; 32],
+        log: &Logger,
+    ) -> std::io::Result<KeyFile> {
+        // TODO: fix this to not truncate
+        // We want to overwrite any existing contents.
+        // If we truncate we may leave dirty pages around
+        // containing secrets.
+        let mut file = tokio::fs::OpenOptions::new()
+            .create(true)
+            .write(true)
+            .open(&path.0)
+            .await?;
+        file.write_all(key).await?;
+        info!(log, "Created keyfile {}", path);
+        Ok(KeyFile { path, file, log: log.clone() })
+    }
+
+    /// These keyfiles live on a tmpfs and we zero the file so the data doesn't
+    /// linger on the page in memory.
+    ///
+    /// It'd be nice to `impl Drop for `KeyFile` and then call `zero`
+    /// from within the drop handler, but async `Drop` isn't supported.
+    pub async fn zero_and_unlink(&mut self) -> std::io::Result<()> {
+        let zeroes = [0u8; 32];
+        let _ = self.file.seek(SeekFrom::Start(0)).await?;
+        self.file.write_all(&zeroes).await?;
+        info!(self.log, "Zeroed and unlinked keyfile {}", self.path);
+        remove_file(&self.path().0).await?;
+        Ok(())
+    }
+
+    pub fn path(&self) -> &Keypath {
+        &self.path
+    }
+}
diff --git a/sled-storage/src/lib.rs b/sled-storage/src/lib.rs
index a1bd4eecfb..783eaf6642 100644
--- a/sled-storage/src/lib.rs
+++ b/sled-storage/src/lib.rs
@@ -3,10 +3,15 @@
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
 //! Local storage abstraction for use by sled-agent
+//!
+//! This abstraction operates at the ZFS level and relies on zpool setup on
+//! hardware partitions from the `sled-hardware` crate. It utilizes the
+//! `illumos-utils` crate to actually perform ZFS related OS calls.
 
 pub(crate) mod dataset;
 pub(crate) mod disk;
 pub(crate) mod dump_setup;
 pub mod error;
+pub(crate) mod keyfile;
 pub(crate) mod pool;
 pub mod state;
diff --git a/sled-storage/src/pool.rs b/sled-storage/src/pool.rs
index 4a9960da4c..1abf43c1de 100644
--- a/sled-storage/src/pool.rs
+++ b/sled-storage/src/pool.rs
@@ -9,9 +9,9 @@ use illumos_utils::zpool::{ZpoolInfo, ZpoolName};
 use omicron_common::disk::DiskIdentity;
 
 #[cfg(test)]
-use illumos_utils::{zfs::MockZfs as Zfs, zpool::MockZpool as Zpool};
+use illumos_utils::zpool::MockZpool as Zpool;
 #[cfg(not(test))]
-use illumos_utils::{zfs::Zfs, zpool::Zpool};
+use illumos_utils::zpool::Zpool;
 
 /// A ZFS storage pool
 #[derive(Debug, Clone)]
@@ -25,12 +25,12 @@ impl Pool {
     /// Queries for an existing Zpool by name.
     ///
     /// Returns Ok if the pool exists.
-    fn new(name: ZpoolName, parent: DiskIdentity) -> Result<Pool, Error> {
+    pub fn new(name: ZpoolName, parent: DiskIdentity) -> Result<Pool, Error> {
         let info = Zpool::get_info(&name.to_string())?;
         Ok(Pool { name, info, parent })
     }
 
-    fn parent(&self) -> &DiskIdentity {
+    pub fn parent(&self) -> &DiskIdentity {
         &self.parent
     }
 }
diff --git a/sled-storage/src/state.rs b/sled-storage/src/state.rs
index a7de70999e..8a0be34f63 100644
--- a/sled-storage/src/state.rs
+++ b/sled-storage/src/state.rs
@@ -4,6 +4,7 @@
 
 //! The internal state of the storage manager task
 
+use crate::dataset::M2_DEBUG_DATASET;
 use crate::disk::DiskWrapper;
 use crate::pool::Pool;
 use camino::Utf8PathBuf;
@@ -110,7 +111,7 @@ impl State {
 
     /// Return the directories for storing zone service bundles.
     pub fn all_zone_bundle_directories(&self) -> Vec<Utf8PathBuf> {
-        self.all_m2_mountpoints(sled_hardware::disk::M2_DEBUG_DATASET)
+        self.all_m2_mountpoints(M2_DEBUG_DATASET)
             .into_iter()
             .map(|p| p.join(BUNDLE_DIRECTORY).join(ZONE_BUNDLE_DIRECTORY))
             .collect()

From ab57c4671e5a0ffdf7f050eb29a7ba3afa4c5fcc Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Thu, 28 Sep 2023 22:51:06 +0000
Subject: [PATCH 04/66] wip

---
 common/src/disk.rs                          |   2 +-
 sled-storage/src/error.rs                   |   3 +-
 sled-storage/src/lib.rs                     |   3 +-
 sled-storage/src/manager.rs                 | 110 ++++++++++++++++++++
 sled-storage/src/pool.rs                    |  10 +-
 sled-storage/src/{state.rs => resources.rs} |  30 ++++--
 6 files changed, 141 insertions(+), 17 deletions(-)
 create mode 100644 sled-storage/src/manager.rs
 rename sled-storage/src/{state.rs => resources.rs} (81%)

diff --git a/common/src/disk.rs b/common/src/disk.rs
index 3ea8091326..3ae9c31e01 100644
--- a/common/src/disk.rs
+++ b/common/src/disk.rs
@@ -5,7 +5,7 @@
 //! Disk related types shared among crates
 
 /// Uniquely identifies a disk.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)]
 pub struct DiskIdentity {
     pub vendor: String,
     pub serial: String,
diff --git a/sled-storage/src/error.rs b/sled-storage/src/error.rs
index 04c4f7ec07..fbf721fab7 100644
--- a/sled-storage/src/error.rs
+++ b/sled-storage/src/error.rs
@@ -5,6 +5,7 @@
 //! Storage related errors
 
 use crate::dataset::DatasetName;
+use crate::disk::DiskError;
 use camino::Utf8PathBuf;
 use omicron_common::api::external::ByteCountRangeError;
 use uuid::Uuid;
@@ -12,7 +13,7 @@ use uuid::Uuid;
 #[derive(thiserror::Error, Debug)]
 pub enum Error {
     #[error(transparent)]
-    DiskError(#[from] sled_hardware::PooledDiskError),
+    DiskError(#[from] DiskError),
 
     // TODO: We could add the context of "why are we doint this op", maybe?
     #[error(transparent)]
diff --git a/sled-storage/src/lib.rs b/sled-storage/src/lib.rs
index 783eaf6642..f923165896 100644
--- a/sled-storage/src/lib.rs
+++ b/sled-storage/src/lib.rs
@@ -13,5 +13,6 @@ pub(crate) mod disk;
 pub(crate) mod dump_setup;
 pub mod error;
 pub(crate) mod keyfile;
+pub mod manager;
 pub(crate) mod pool;
-pub mod state;
+pub mod resources;
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
new file mode 100644
index 0000000000..dbbe5fb57a
--- /dev/null
+++ b/sled-storage/src/manager.rs
@@ -0,0 +1,110 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! The storage manager task
+
+use std::collections::{BTreeSet, HashSet};
+
+use crate::dataset::DatasetError;
+use crate::disk::{Disk, DiskError, DiskWrapper};
+use crate::error::Error;
+use crate::resources::StorageResources;
+use derive_more::From;
+use illumos_utils::zpool::{ZpoolKind, ZpoolName};
+use key_manager::StorageKeyRequester;
+use omicron_common::disk::DiskIdentity;
+use sled_hardware::{DiskVariant, UnparsedDisk};
+use slog::{error, info, o, warn, Logger};
+use tokio::sync::{mpsc, oneshot};
+
+// The size of the mpsc bounded channel used to communicate
+// between the `StorageHandle` and `StorageManager`.
+const QUEUE_SIZE: usize = 256;
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum StorageManagerStage {
+    WaitingForBootDisk,
+    WaitingForKeyManager,
+    QueuingDisks,
+    Normal,
+}
+
+enum StorageRequest {}
+
+/// A mechanism for interacting with the [`StorageManager`]
+pub struct StorageHandle {
+    tx: mpsc::Sender<StorageRequest>,
+}
+
+/// The storage manager responsible for the state of the storage
+/// on a sled. The storage manager runs in its own task and is interacted
+/// with via the [`StorageHandle`].
+pub struct StorageManager {
+    log: Logger,
+    stage: StorageManagerStage,
+    rx: mpsc::Receiver<StorageRequest>,
+    resources: StorageResources,
+    queued_u2_drives: HashSet<UnparsedDisk>,
+    queued_synthetic_u2_drives: BTreeSet<ZpoolName>,
+    key_requester: StorageKeyRequester,
+}
+
+impl StorageManager {
+    pub fn new(
+        log: &Logger,
+        key_requester: StorageKeyRequester,
+    ) -> (StorageManager, StorageHandle) {
+        let (tx, rx) = mpsc::channel(QUEUE_SIZE);
+        (
+            StorageManager {
+                log: log.new(o!("component" => "StorageManager")),
+                stage: StorageManagerStage::WaitingForBootDisk,
+                rx,
+                resources: StorageResources::default(),
+                queued_u2_drives: HashSet::new(),
+                queued_synthetic_u2_drives: BTreeSet::new(),
+                key_requester,
+            },
+            StorageHandle { tx },
+        )
+    }
+
+    /// Add a disk to storage resources or queue it to be added later
+    async fn add_u2_disk(
+        &mut self,
+        unparsed_disk: UnparsedDisk,
+    ) -> Result<(), Error> {
+        if self.stage != StorageManagerStage::Normal {
+            self.queued_u2_drives.insert(unparsed_disk);
+            return Ok(());
+        }
+
+        match Disk::new(
+            &self.log,
+            unparsed_disk.clone(),
+            Some(&self.key_requester),
+        )
+        .await
+        {
+            Ok(disk) => self.resources.insert_real_disk(disk),
+            Err(err @ DiskError::Dataset(DatasetError::KeyManager(_))) => {
+                warn!(
+                    self.log,
+                    "Transient error: {err} - queuing disk {:?}", unparsed_disk
+                );
+                self.queued_u2_drives.insert(unparsed_disk);
+                self.stage = StorageManagerStage::QueuingDisks;
+                Err(err.into())
+            }
+            Err(err) => {
+                error!(
+                    self.log,
+                    "Persistent error: {err} - not queueing disk {:?}",
+                    unparsed_disk
+                );
+                Err(err.into())
+            }
+        }
+    }
+}
diff --git a/sled-storage/src/pool.rs b/sled-storage/src/pool.rs
index 1abf43c1de..a16722537d 100644
--- a/sled-storage/src/pool.rs
+++ b/sled-storage/src/pool.rs
@@ -16,9 +16,9 @@ use illumos_utils::zpool::Zpool;
 /// A ZFS storage pool
 #[derive(Debug, Clone)]
 pub struct Pool {
-    name: ZpoolName,
-    info: ZpoolInfo,
-    parent: DiskIdentity,
+    pub name: ZpoolName,
+    pub info: ZpoolInfo,
+    pub parent: DiskIdentity,
 }
 
 impl Pool {
@@ -29,8 +29,4 @@ impl Pool {
         let info = Zpool::get_info(&name.to_string())?;
         Ok(Pool { name, info, parent })
     }
-
-    pub fn parent(&self) -> &DiskIdentity {
-        &self.parent
-    }
 }
diff --git a/sled-storage/src/state.rs b/sled-storage/src/resources.rs
similarity index 81%
rename from sled-storage/src/state.rs
rename to sled-storage/src/resources.rs
index 8a0be34f63..0e874be522 100644
--- a/sled-storage/src/state.rs
+++ b/sled-storage/src/resources.rs
@@ -2,13 +2,15 @@
 // License, v. 2.0. If a copy of the MPL was not distributed with this
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
-//! The internal state of the storage manager task
+//! Discovered and usable disks and zpools
 
 use crate::dataset::M2_DEBUG_DATASET;
-use crate::disk::DiskWrapper;
+use crate::disk::{Disk, DiskWrapper};
+use crate::error::Error;
 use crate::pool::Pool;
 use camino::Utf8PathBuf;
 use illumos_utils::zpool::ZpoolName;
+use omicron_common::api::external::{ByteCount, ByteCountRangeError};
 use omicron_common::disk::DiskIdentity;
 use sled_hardware::DiskVariant;
 use std::collections::BTreeMap;
@@ -21,7 +23,7 @@ const BUNDLE_DIRECTORY: &str = "bundle";
 // The directory for zone bundles.
 const ZONE_BUNDLE_DIRECTORY: &str = "zone";
 
-/// Storage related state
+/// Storage related resources: disks and zpools
 ///
 /// This state is internal to the [`crate::StorageManager`] task. Clones
 /// of this state, or subsets of it, can be retrieved by requests to the
@@ -34,10 +36,10 @@ const ZONE_BUNDLE_DIRECTORY: &str = "zone";
 /// inside the `StorageManager` task if there are any outstanding copies.
 /// Therefore, we only pay the cost to update infrequently, and no locks are
 /// required by callers when operating on cloned data. The only contention here
-/// is for the refrence counters of the internal Arcs when `State` gets cloned
+/// is for the refrence counters of the internal Arcs when `StorageResources` gets cloned
 /// or dropped.
-#[derive(Debug, Clone)]
-pub struct State {
+#[derive(Debug, Clone, Default)]
+pub struct StorageResources {
     // All disks, real and synthetic, being managed by this sled
     disks: Arc<BTreeMap<DiskIdentity, DiskWrapper>>,
 
@@ -45,7 +47,21 @@ pub struct State {
     pools: Arc<BTreeMap<Uuid, Pool>>,
 }
 
-impl State {
+impl StorageResources {
+    /// Insert a disk and its zpool
+    pub(crate) fn insert_real_disk(&mut self, disk: Disk) -> Result<(), Error> {
+        let parent = disk.identity().clone();
+        let zpool_name = disk.zpool_name().clone();
+        let disk = DiskWrapper::Real {
+            disk: disk.clone(),
+            devfs_path: disk.devfs_path().clone(),
+        };
+        Arc::make_mut(&mut self.disks).insert(disk.identity(), disk);
+        let zpool = Pool::new(zpool_name, parent)?;
+        Arc::make_mut(&mut self.pools).insert(zpool.name.id(), zpool);
+        Ok(())
+    }
+
     /// Returns the identity of the boot disk.
     ///
     /// If this returns `None`, we have not processed the boot disk yet.

From 90ec972e636df289d39bab52eff57c09415300fa Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Fri, 29 Sep 2023 05:11:00 +0000
Subject: [PATCH 05/66] wip

---
 sled-storage/src/manager.rs   | 129 ++++++++++++++++++++++++++++++++--
 sled-storage/src/resources.rs |  13 ++++
 2 files changed, 138 insertions(+), 4 deletions(-)

diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index dbbe5fb57a..c792fde243 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -6,7 +6,7 @@
 
 use std::collections::{BTreeSet, HashSet};
 
-use crate::dataset::DatasetError;
+use crate::dataset::{self, DatasetError};
 use crate::disk::{Disk, DiskError, DiskWrapper};
 use crate::error::Error;
 use crate::resources::StorageResources;
@@ -46,7 +46,7 @@ pub struct StorageManager {
     rx: mpsc::Receiver<StorageRequest>,
     resources: StorageResources,
     queued_u2_drives: HashSet<UnparsedDisk>,
-    queued_synthetic_u2_drives: BTreeSet<ZpoolName>,
+    queued_synthetic_u2_drives: HashSet<ZpoolName>,
     key_requester: StorageKeyRequester,
 }
 
@@ -63,14 +63,14 @@ impl StorageManager {
                 rx,
                 resources: StorageResources::default(),
                 queued_u2_drives: HashSet::new(),
-                queued_synthetic_u2_drives: BTreeSet::new(),
+                queued_synthetic_u2_drives: HashSet::new(),
                 key_requester,
             },
             StorageHandle { tx },
         )
     }
 
-    /// Add a disk to storage resources or queue it to be added later
+    /// Add a real U.2 disk to storage resources or queue it to be added later
     async fn add_u2_disk(
         &mut self,
         unparsed_disk: UnparsedDisk,
@@ -107,4 +107,125 @@ impl StorageManager {
             }
         }
     }
+
+    /// Add a synthetic U.2 disk to storage resources or queue it to be added later
+    async fn add_synthetic_u2_disk(
+        &mut self,
+        zpool_name: ZpoolName,
+    ) -> Result<(), Error> {
+        if self.stage != StorageManagerStage::Normal {
+            self.queued_synthetic_u2_drives.insert(zpool_name);
+            return Ok(());
+        }
+
+        let synthetic_id = DiskIdentity {
+            vendor: "fake_vendor".to_string(),
+            serial: "fake_serial".to_string(),
+            model: zpool_name.id().to_string(),
+        };
+        match dataset::ensure_zpool_has_datasets(
+            &self.log,
+            &zpool_name,
+            &synthetic_id,
+            Some(&self.key_requester),
+        )
+        .await
+        {
+            Ok(disk) => self.resources.insert_synthetic_disk(zpool_name),
+            Err(err @ DatasetError::KeyManager(_)) => {
+                warn!(
+                    self.log,
+                    "Transient error: {err} - queuing disk {:?}", synthetic_id
+                );
+                self.queued_synthetic_u2_drives.insert(zpool_name);
+                self.stage = StorageManagerStage::QueuingDisks;
+                Err(DiskError::Dataset(err).into())
+            }
+            Err(err) => {
+                error!(
+                    self.log,
+                    "Persistent error: {err} - not queueing disk {:?}",
+                    synthetic_id
+                );
+                Err(DiskError::Dataset(err).into())
+            }
+        }
+    }
+}
+
+/// All tests only use synthetic disks, but are expected to be run on illumos
+/// systems.
+#[cfg(all(test, target_os = "illumos"))]
+mod tests {
+    use super::*;
+    use async_trait::async_trait;
+    use key_manager::{
+        KeyManager, SecretRetriever, SecretRetrieverError, SecretState,
+        VersionedIkm,
+    };
+    use uuid::Uuid;
+
+    pub fn log() -> slog::Logger {
+        let drain = slog::Discard;
+        slog::Logger::root(drain, o!())
+    }
+
+    /// A [`key-manager::SecretRetriever`] that only returns hardcoded IKM for
+    /// epoch 0
+    #[derive(Debug)]
+    struct HardcodedSecretRetriever {}
+
+    #[async_trait]
+    impl SecretRetriever for HardcodedSecretRetriever {
+        async fn get_latest(
+            &self,
+        ) -> Result<VersionedIkm, SecretRetrieverError> {
+            let epoch = 0;
+            let salt = [0u8; 32];
+            let secret = [0x1d; 32];
+
+            Ok(VersionedIkm::new(epoch, salt, &secret))
+        }
+
+        /// We don't plan to do any key rotation before trust quorum is ready
+        async fn get(
+            &self,
+            epoch: u64,
+        ) -> Result<SecretState, SecretRetrieverError> {
+            if epoch != 0 {
+                return Err(SecretRetrieverError::NoSuchEpoch(epoch));
+            }
+            Ok(SecretState::Current(self.get_latest().await?))
+        }
+    }
+
+    #[tokio::test]
+    async fn add_u2_disk_while_not_in_normal_stage_and_ensure_it_gets_queued() {
+        let (mut _key_manager, key_requester) =
+            KeyManager::new(&log(), HardcodedSecretRetriever {});
+        let (mut manager, _) = StorageManager::new(&log(), key_requester);
+        let zpool_name = ZpoolName::new_external(Uuid::new_v4());
+        assert_eq!(StorageManagerStage::WaitingForBootDisk, manager.stage);
+        manager.add_synthetic_u2_disk(zpool_name.clone()).await.unwrap();
+        assert!(manager.resources.all_u2_zpools().is_empty());
+        assert_eq!(
+            manager.queued_synthetic_u2_drives,
+            HashSet::from([zpool_name.clone()])
+        );
+
+        // Walk through other non-normal stages and enusre disk gets queued
+        for stage in [
+            StorageManagerStage::WaitingForKeyManager,
+            StorageManagerStage::QueuingDisks,
+        ] {
+            manager.queued_synthetic_u2_drives.clear();
+            manager.stage = stage;
+            manager.add_synthetic_u2_disk(zpool_name.clone()).await.unwrap();
+            assert!(manager.resources.all_u2_zpools().is_empty());
+            assert_eq!(
+                manager.queued_synthetic_u2_drives,
+                HashSet::from([zpool_name.clone()])
+            );
+        }
+    }
 }
diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs
index 0e874be522..7601ac7b86 100644
--- a/sled-storage/src/resources.rs
+++ b/sled-storage/src/resources.rs
@@ -62,6 +62,19 @@ impl StorageResources {
         Ok(())
     }
 
+    /// Insert a synthetic disk and its zpool
+    pub(crate) fn insert_synthetic_disk(
+        &mut self,
+        zpool_name: ZpoolName,
+    ) -> Result<(), Error> {
+        let disk = DiskWrapper::Synthetic { zpool_name: zpool_name.clone() };
+        let parent = disk.identity().clone();
+        Arc::make_mut(&mut self.disks).insert(disk.identity(), disk);
+        let zpool = Pool::new(zpool_name, parent)?;
+        Arc::make_mut(&mut self.pools).insert(zpool.name.id(), zpool);
+        Ok(())
+    }
+
     /// Returns the identity of the boot disk.
     ///
     /// If this returns `None`, we have not processed the boot disk yet.

From 5c169ac81ad497f51006b6b3bf9f9fa66999f483 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Fri, 29 Sep 2023 05:18:29 +0000
Subject: [PATCH 06/66] wip

---
 sled-storage/Cargo.toml  | 1 -
 sled-storage/src/pool.rs | 7 +------
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/sled-storage/Cargo.toml b/sled-storage/Cargo.toml
index ae9718382d..8c8ddeeb88 100644
--- a/sled-storage/Cargo.toml
+++ b/sled-storage/Cargo.toml
@@ -28,5 +28,4 @@ tokio.workspace = true
 uuid.workspace = true
 
 [dev-dependencies]
-illumos-utils = { workspace = true, features = ["testing"] }
 omicron-test-utils.workspace = true
diff --git a/sled-storage/src/pool.rs b/sled-storage/src/pool.rs
index a16722537d..203738b16a 100644
--- a/sled-storage/src/pool.rs
+++ b/sled-storage/src/pool.rs
@@ -5,14 +5,9 @@
 //! ZFS storage pool
 
 use crate::error::Error;
-use illumos_utils::zpool::{ZpoolInfo, ZpoolName};
+use illumos_utils::zpool::{Zpool, ZpoolInfo, ZpoolName};
 use omicron_common::disk::DiskIdentity;
 
-#[cfg(test)]
-use illumos_utils::zpool::MockZpool as Zpool;
-#[cfg(not(test))]
-use illumos_utils::zpool::Zpool;
-
 /// A ZFS storage pool
 #[derive(Debug, Clone)]
 pub struct Pool {

From 255155ccfbbf165072b996e4080b428e29e844e5 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Fri, 29 Sep 2023 21:13:10 +0000
Subject: [PATCH 07/66] wip

---
 Cargo.lock                              |  1 +
 illumos-utils/src/zpool.rs              | 24 +++++++++-
 sled-hardware/src/disk.rs               |  6 +--
 sled-hardware/src/illumos/partitions.rs |  2 +-
 sled-storage/Cargo.toml                 |  1 +
 sled-storage/src/dataset.rs             | 54 +++++++++++++----------
 sled-storage/src/manager.rs             | 58 +++++++++++++++++++++----
 7 files changed, 107 insertions(+), 39 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index a448600863..26358b3459 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5271,6 +5271,7 @@ version = "0.1.0"
 dependencies = [
  "async-trait",
  "camino",
+ "camino-tempfile",
  "derive_more",
  "glob",
  "illumos-utils",
diff --git a/illumos-utils/src/zpool.rs b/illumos-utils/src/zpool.rs
index 81ded2655e..68d5ebd3a2 100644
--- a/illumos-utils/src/zpool.rs
+++ b/illumos-utils/src/zpool.rs
@@ -39,6 +39,13 @@ pub struct CreateError {
     err: Error,
 }
 
+#[derive(thiserror::Error, Debug)]
+#[error("Failed to destroy zpool: {err}")]
+pub struct DestroyError {
+    #[from]
+    err: Error,
+}
+
 #[derive(thiserror::Error, Debug)]
 #[error("Failed to list zpools: {err}")]
 pub struct ListError {
@@ -167,7 +174,10 @@ pub struct Zpool {}
 
 #[cfg_attr(any(test, feature = "testing"), mockall::automock, allow(dead_code))]
 impl Zpool {
-    pub fn create(name: ZpoolName, vdev: &Utf8Path) -> Result<(), CreateError> {
+    pub fn create(
+        name: &ZpoolName,
+        vdev: &Utf8Path,
+    ) -> Result<(), CreateError> {
         let mut cmd = std::process::Command::new(PFEXEC);
         cmd.env_clear();
         cmd.env("LC_ALL", "C.UTF-8");
@@ -189,7 +199,17 @@ impl Zpool {
         Ok(())
     }
 
-    pub fn import(name: ZpoolName) -> Result<(), Error> {
+    pub fn destroy(name: &ZpoolName) -> Result<(), DestroyError> {
+        let mut cmd = std::process::Command::new(PFEXEC);
+        cmd.env_clear();
+        cmd.env("LC_ALL", "C.UTF-8");
+        cmd.arg(ZPOOL).arg("destroy");
+        cmd.arg(&name.to_string());
+        execute(&mut cmd).map_err(Error::from)?;
+        Ok(())
+    }
+
+    pub fn import(name: &ZpoolName) -> Result<(), Error> {
         let mut cmd = std::process::Command::new(PFEXEC);
         cmd.env_clear();
         cmd.env("LC_ALL", "C.UTF-8");
diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs
index bea7e23c73..541d7bd548 100644
--- a/sled-hardware/src/disk.rs
+++ b/sled-hardware/src/disk.rs
@@ -253,11 +253,11 @@ impl PooledDisk {
                     DiskVariant::M2 => ZpoolName::new_internal(Uuid::new_v4()),
                     DiskVariant::U2 => ZpoolName::new_external(Uuid::new_v4()),
                 };
-                Zpool::create(zpool_name.clone(), &zpool_path)?;
+                Zpool::create(&zpool_name, &zpool_path)?;
                 zpool_name
             }
         };
-        Zpool::import(zpool_name.clone()).map_err(|e| {
+        Zpool::import(&zpool_name).map_err(|e| {
             warn!(log, "Failed to import zpool {zpool_name}: {e}");
             PooledDiskError::ZpoolImport(e)
         })?;
@@ -269,7 +269,7 @@ impl PooledDisk {
         log: &Logger,
         zpool_name: &ZpoolName,
     ) -> Result<(), PooledDiskError> {
-        Zpool::import(zpool_name.clone()).map_err(|e| {
+        Zpool::import(&zpool_name).map_err(|e| {
             warn!(log, "Failed to import zpool {zpool_name}: {e}");
             PooledDiskError::ZpoolImport(e)
         })?;
diff --git a/sled-hardware/src/illumos/partitions.rs b/sled-hardware/src/illumos/partitions.rs
index ee745fc78b..4b7e69057d 100644
--- a/sled-hardware/src/illumos/partitions.rs
+++ b/sled-hardware/src/illumos/partitions.rs
@@ -123,7 +123,7 @@ fn internal_ensure_partition_layout<GPT: gpt::LibEfiGpt>(
                     info!(log, "Formatting zpool on disk {}", paths.devfs_path);
                     // If a zpool does not already exist, create one.
                     let zpool_name = ZpoolName::new_external(Uuid::new_v4());
-                    Zpool::create(zpool_name, dev_path)?;
+                    Zpool::create(&zpool_name, dev_path)?;
                     return Ok(vec![Partition::ZfsPool]);
                 }
                 DiskVariant::M2 => {
diff --git a/sled-storage/Cargo.toml b/sled-storage/Cargo.toml
index 8c8ddeeb88..e1ba21db93 100644
--- a/sled-storage/Cargo.toml
+++ b/sled-storage/Cargo.toml
@@ -29,3 +29,4 @@ uuid.workspace = true
 
 [dev-dependencies]
 omicron-test-utils.workspace = true
+camino-tempfile.workspace = true
\ No newline at end of file
diff --git a/sled-storage/src/dataset.rs b/sled-storage/src/dataset.rs
index 3c40dc10f0..99df582371 100644
--- a/sled-storage/src/dataset.rs
+++ b/sled-storage/src/dataset.rs
@@ -260,33 +260,39 @@ pub async fn ensure_zpool_has_datasets(
         let mountpoint = zpool_name.dataset_mountpoint(dataset);
         let keypath: Keypath = disk_identity.into();
 
-        let epoch =
-            if let Ok(epoch_str) = Zfs::get_oxide_value(dataset, "epoch") {
-                if let Ok(epoch) = epoch_str.parse::<u64>() {
-                    epoch
-                } else {
-                    return Err(DatasetError::CannotParseEpochProperty(
-                        dataset.to_string(),
-                    ));
-                }
+        let epoch = if let Ok(epoch_str) =
+            Zfs::get_oxide_value(dataset, "epoch")
+        {
+            if let Ok(epoch) = epoch_str.parse::<u64>() {
+                epoch
             } else {
-                // We got an error trying to call `Zfs::get_oxide_value`
-                // which indicates that the dataset doesn't exist or there
-                // was a problem  running the command.
-                //
-                // Note that `Zfs::get_oxide_value` will succeed even if
-                // the epoch is missing. `epoch_str` will show up as a dash
-                // (`-`) and will not parse into a `u64`. So we don't have
-                // to worry about that case here as it is handled above.
-                //
-                // If the error indicated that the command failed for some
-                // other reason, but the dataset actually existed, we will
-                // try to create the dataset below and that will fail. So
-                // there is no harm in just loading the latest secret here.
-                key_requester.load_latest_secret().await?
-            };
+                return Err(DatasetError::CannotParseEpochProperty(
+                    dataset.to_string(),
+                ));
+            }
+        } else {
+            // We got an error trying to call `Zfs::get_oxide_value`
+            // which indicates that the dataset doesn't exist or there
+            // was a problem  running the command.
+            //
+            // Note that `Zfs::get_oxide_value` will succeed even if
+            // the epoch is missing. `epoch_str` will show up as a dash
+            // (`-`) and will not parse into a `u64`. So we don't have
+            // to worry about that case here as it is handled above.
+            //
+            // If the error indicated that the command failed for some
+            // other reason, but the dataset actually existed, we will
+            // try to create the dataset below and that will fail. So
+            // there is no harm in just loading the latest secret here.
+            info!(log, "Loading latest secret"; "disk_id"=>#?disk_identity);
+            let epoch = key_requester.load_latest_secret().await?;
+            info!(log, "Loaded latest secret"; "epoch"=>%epoch, "disk_id"=>#?disk_identity);
+            epoch
+        };
 
+        info!(log, "Retrieving key"; "epoch"=>%epoch, "disk_id"=>#?disk_identity);
         let key = key_requester.get_key(epoch, disk_identity.clone()).await?;
+        info!(log, "Got key"; "epoch"=>%epoch, "disk_id"=>#?disk_identity);
 
         let mut keyfile =
             KeyFile::create(keypath.clone(), key.expose_secret(), log)
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index c792fde243..543e47a9b7 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -15,7 +15,7 @@ use illumos_utils::zpool::{ZpoolKind, ZpoolName};
 use key_manager::StorageKeyRequester;
 use omicron_common::disk::DiskIdentity;
 use sled_hardware::{DiskVariant, UnparsedDisk};
-use slog::{error, info, o, warn, Logger};
+use slog::{debug, error, info, o, warn, Logger};
 use tokio::sync::{mpsc, oneshot};
 
 // The size of the mpsc bounded channel used to communicate
@@ -114,6 +114,7 @@ impl StorageManager {
         zpool_name: ZpoolName,
     ) -> Result<(), Error> {
         if self.stage != StorageManagerStage::Normal {
+            info!(self.log, "Queuing synthetic U.2 drive: {zpool_name}");
             self.queued_synthetic_u2_drives.insert(zpool_name);
             return Ok(());
         }
@@ -123,6 +124,8 @@ impl StorageManager {
             serial: "fake_serial".to_string(),
             model: zpool_name.id().to_string(),
         };
+
+        debug!(self.log, "Ensure zpool has datasets: {zpool_name}");
         match dataset::ensure_zpool_has_datasets(
             &self.log,
             &zpool_name,
@@ -131,7 +134,7 @@ impl StorageManager {
         )
         .await
         {
-            Ok(disk) => self.resources.insert_synthetic_disk(zpool_name),
+            Ok(()) => self.resources.insert_synthetic_disk(zpool_name),
             Err(err @ DatasetError::KeyManager(_)) => {
                 warn!(
                     self.log,
@@ -159,17 +162,17 @@ impl StorageManager {
 mod tests {
     use super::*;
     use async_trait::async_trait;
+    use camino::{Utf8Path, Utf8PathBuf};
+    use camino_tempfile::tempdir;
+    use illumos_utils::zpool::Zpool;
     use key_manager::{
         KeyManager, SecretRetriever, SecretRetrieverError, SecretState,
         VersionedIkm,
     };
+    use omicron_test_utils::dev::test_setup_log;
+    use std::fs::File;
     use uuid::Uuid;
 
-    pub fn log() -> slog::Logger {
-        let drain = slog::Discard;
-        slog::Logger::root(drain, o!())
-    }
-
     /// A [`key-manager::SecretRetriever`] that only returns hardcoded IKM for
     /// epoch 0
     #[derive(Debug)]
@@ -199,11 +202,29 @@ mod tests {
         }
     }
 
+    // 64 MiB (min size of zpool)
+    const DISK_SIZE: u64 = 64 * 1024 * 1024;
+
+    // Create a synthetic disk with a zpool backed by a file
+    fn new_disk(dir: &Utf8Path, zpool_name: &ZpoolName) -> Utf8PathBuf {
+        let path = dir.join(zpool_name.to_string());
+        let file = File::create(&path).unwrap();
+        file.set_len(DISK_SIZE).unwrap();
+        drop(file);
+        Zpool::create(zpool_name, &path).unwrap();
+        Zpool::import(zpool_name).unwrap();
+        Zpool::set_failmode_continue(zpool_name).unwrap();
+        path
+    }
+
     #[tokio::test]
     async fn add_u2_disk_while_not_in_normal_stage_and_ensure_it_gets_queued() {
+        let logctx = test_setup_log(
+            "add_u2_disk_while_not_in_normal_stage_and_ensure_it_gets_queued",
+        );
         let (mut _key_manager, key_requester) =
-            KeyManager::new(&log(), HardcodedSecretRetriever {});
-        let (mut manager, _) = StorageManager::new(&log(), key_requester);
+            KeyManager::new(&logctx.log, HardcodedSecretRetriever {});
+        let (mut manager, _) = StorageManager::new(&logctx.log, key_requester);
         let zpool_name = ZpoolName::new_external(Uuid::new_v4());
         assert_eq!(StorageManagerStage::WaitingForBootDisk, manager.stage);
         manager.add_synthetic_u2_disk(zpool_name.clone()).await.unwrap();
@@ -227,5 +248,24 @@ mod tests {
                 HashSet::from([zpool_name.clone()])
             );
         }
+        logctx.cleanup_successful();
+    }
+
+    #[tokio::test]
+    async fn ensure_u2_gets_added_to_resources() {
+        let logctx = test_setup_log("ensure_u2_gets_added_to_resources");
+        let (mut key_manager, key_requester) =
+            KeyManager::new(&logctx.log, HardcodedSecretRetriever {});
+        let (mut manager, _) = StorageManager::new(&logctx.log, key_requester);
+        let zpool_name = ZpoolName::new_external(Uuid::new_v4());
+        let dir = tempdir().unwrap();
+        let _ = new_disk(dir.path(), &zpool_name);
+        // Spawn the key_manager so that it will respond to requests for encryption keys
+        tokio::spawn(async move { key_manager.run().await });
+        manager.stage = StorageManagerStage::Normal;
+        manager.add_synthetic_u2_disk(zpool_name.clone()).await.unwrap();
+        assert_eq!(manager.resources.all_u2_zpools().len(), 1);
+        Zpool::destroy(&zpool_name).unwrap();
+        logctx.cleanup_successful();
     }
 }

From 115510a8d094752c305ddba818c0b3acb919b2a9 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Fri, 29 Sep 2023 22:14:59 +0000
Subject: [PATCH 08/66] wip

---
 illumos-utils/Cargo.toml    |  3 +++
 illumos-utils/src/zfs.rs    | 35 +++++++++++++++++++++++++++--------
 sled-storage/Cargo.toml     |  1 +
 sled-storage/src/keyfile.rs |  8 --------
 4 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/illumos-utils/Cargo.toml b/illumos-utils/Cargo.toml
index 3c0c2e7fc9..573d0be14b 100644
--- a/illumos-utils/Cargo.toml
+++ b/illumos-utils/Cargo.toml
@@ -42,3 +42,6 @@ toml.workspace = true
 [features]
 # Enable to generate MockZones
 testing = ["mockall"]
+# Useful for tests that want real functionality and ability to run without
+# pfexec
+tmp_keypath = []
diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs
index ba8cd8c84a..382c01f9aa 100644
--- a/illumos-utils/src/zfs.rs
+++ b/illumos-utils/src/zfs.rs
@@ -20,7 +20,16 @@ pub const ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT: &str = "/zone";
 pub const ZONE_ZFS_RAMDISK_DATASET: &str = "rpool/zone";
 
 pub const ZFS: &str = "/usr/sbin/zfs";
+
+/// This path is intentionally on a `tmpfs` to prevent copy-on-write behavior
+/// and to ensure it goes away on power off.
+///
+/// We want minimize the time the key files are in memory, and so we rederive
+/// the keys and recreate the files on demand when creating and mounting
+/// encrypted filesystems. We then zero them and unlink them.
 pub const KEYPATH_ROOT: &str = "/var/run/oxide/";
+// Use /tmp so we don't have to worry about running tests with pfexec
+pub const TEST_KEYPATH_ROOT: &str = "/tmp";
 
 /// Error returned by [`Zfs::list_datasets`].
 #[derive(thiserror::Error, Debug)]
@@ -135,19 +144,29 @@ impl fmt::Display for Keypath {
     }
 }
 
+#[cfg(not(feature = "tmp_keypath"))]
+impl From<&DiskIdentity> for Keypath {
+    fn from(id: &DiskIdentity) -> Self {
+        build_keypath(id, KEYPATH_ROOT)
+    }
+}
+
+#[cfg(feature = "tmp_keypath")]
 impl From<&DiskIdentity> for Keypath {
     fn from(id: &DiskIdentity) -> Self {
-        let filename = format!(
-            "{}-{}-{}-zfs-aes-256-gcm.key",
-            id.vendor, id.serial, id.model
-        );
-        let mut path = Utf8PathBuf::new();
-        path.push(KEYPATH_ROOT);
-        path.push(filename);
-        Keypath(path)
+        build_keypath(id, TEST_KEYPATH_ROOT)
     }
 }
 
+fn build_keypath(id: &DiskIdentity, root: &str) -> Keypath {
+    let filename =
+        format!("{}-{}-{}-zfs-aes-256-gcm.key", id.vendor, id.serial, id.model);
+    let mut path = Utf8PathBuf::new();
+    path.push(root);
+    path.push(filename);
+    Keypath(path)
+}
+
 #[derive(Debug)]
 pub struct EncryptionDetails {
     pub keypath: Keypath,
diff --git a/sled-storage/Cargo.toml b/sled-storage/Cargo.toml
index e1ba21db93..11bd502183 100644
--- a/sled-storage/Cargo.toml
+++ b/sled-storage/Cargo.toml
@@ -28,5 +28,6 @@ tokio.workspace = true
 uuid.workspace = true
 
 [dev-dependencies]
+illumos-utils = { workspace = true, features = ["tmp_keypath"] }
 omicron-test-utils.workspace = true
 camino-tempfile.workspace = true
\ No newline at end of file
diff --git a/sled-storage/src/keyfile.rs b/sled-storage/src/keyfile.rs
index 396c860fc5..fcdbf8b3bf 100644
--- a/sled-storage/src/keyfile.rs
+++ b/sled-storage/src/keyfile.rs
@@ -9,14 +9,6 @@ use slog::{info, Logger};
 use tokio::fs::{remove_file, File};
 use tokio::io::{AsyncSeekExt, AsyncWriteExt, SeekFrom};
 
-/// This path is intentionally on a `tmpfs` to prevent copy-on-write behavior
-/// and to ensure it goes away on power off.
-///
-/// We want minimize the time the key files are in memory, and so we rederive
-/// the keys and recreate the files on demand when creating and mounting
-/// encrypted filesystems. We then zero them and unlink them.
-pub const KEYPATH_ROOT: &str = "/var/run/oxide/";
-
 /// A file that wraps a zfs encryption key.
 ///
 /// We put this in a RAM backed filesystem and zero and delete it when we are

From 0bc3aa0106d5cb5068547c2379c52465bd976044 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Mon, 2 Oct 2023 02:21:58 +0000
Subject: [PATCH 09/66] wip

---
 sled-storage/src/manager.rs | 66 +++++++++++++++++++++++++++++++++----
 1 file changed, 60 insertions(+), 6 deletions(-)

diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index 543e47a9b7..813f552878 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -16,7 +16,7 @@ use key_manager::StorageKeyRequester;
 use omicron_common::disk::DiskIdentity;
 use sled_hardware::{DiskVariant, UnparsedDisk};
 use slog::{debug, error, info, o, warn, Logger};
-use tokio::sync::{mpsc, oneshot};
+use tokio::sync::{mpsc, oneshot, watch};
 
 // The size of the mpsc bounded channel used to communicate
 // between the `StorageHandle` and `StorageManager`.
@@ -30,11 +30,19 @@ pub enum StorageManagerStage {
     Normal,
 }
 
-enum StorageRequest {}
+enum StorageRequest {
+    AddDisk(UnparsedDisk),
+    AddSyntheticDisk(ZpoolName),
+    RemoveDisk(UnparsedDisk),
+    DisksChanged(Vec<UnparsedDisk>),
+    //    NewFilesystem(NewFilesystemRequest),
+    KeyManagerReady,
+}
 
 /// A mechanism for interacting with the [`StorageManager`]
 pub struct StorageHandle {
     tx: mpsc::Sender<StorageRequest>,
+    resource_updates: watch::Receiver<StorageResources>,
 }
 
 /// The storage manager responsible for the state of the storage
@@ -48,6 +56,7 @@ pub struct StorageManager {
     queued_u2_drives: HashSet<UnparsedDisk>,
     queued_synthetic_u2_drives: HashSet<ZpoolName>,
     key_requester: StorageKeyRequester,
+    resource_updates: watch::Sender<StorageResources>,
 }
 
 impl StorageManager {
@@ -56,20 +65,62 @@ impl StorageManager {
         key_requester: StorageKeyRequester,
     ) -> (StorageManager, StorageHandle) {
         let (tx, rx) = mpsc::channel(QUEUE_SIZE);
+        let resources = StorageResources::default();
+        let (update_tx, update_rx) = watch::channel(resources.clone());
         (
             StorageManager {
                 log: log.new(o!("component" => "StorageManager")),
                 stage: StorageManagerStage::WaitingForBootDisk,
                 rx,
-                resources: StorageResources::default(),
+                resources,
                 queued_u2_drives: HashSet::new(),
                 queued_synthetic_u2_drives: HashSet::new(),
                 key_requester,
+                resource_updates: update_tx,
             },
-            StorageHandle { tx },
+            StorageHandle { tx, resource_updates: update_rx },
         )
     }
 
+    /// Run the main receive loop of the `StorageManager`
+    ///
+    /// This should be spawned into a tokio task
+    pub async fn run(&mut self) {
+        loop {
+            if let Err(e) = self.step().await {
+                warn!(self.log, "{e}");
+                return;
+            }
+        }
+    }
+
+    /// Process the next event
+    ///
+    /// This is useful for testing/debugging
+    pub async fn step(&mut self) -> Result<(), Error> {
+        // The sending side should never disappear
+        match self.rx.recv().await.unwrap() {
+            StorageRequest::AddDisk(unparsed_disk) => {
+                match unparsed_disk.variant() {
+                    DiskVariant::U2 => self.add_u2_disk(unparsed_disk).await?,
+                    DiskVariant::M2 => todo!(),
+                }
+            }
+            StorageRequest::AddSyntheticDisk(zpool_name) => {
+                match zpool_name.kind() {
+                    ZpoolKind::External => {
+                        self.add_synthetic_u2_disk(zpool_name).await?
+                    }
+                    ZpoolKind::Internal => todo!(),
+                }
+            }
+            StorageRequest::RemoveDisk(_unparsed_disk) => todo!(),
+            StorageRequest::DisksChanged(_unparsed_disks) => todo!(),
+            StorageRequest::KeyManagerReady => todo!(),
+        }
+        Ok(())
+    }
+
     /// Add a real U.2 disk to storage resources or queue it to be added later
     async fn add_u2_disk(
         &mut self,
@@ -142,7 +193,7 @@ impl StorageManager {
                 );
                 self.queued_synthetic_u2_drives.insert(zpool_name);
                 self.stage = StorageManagerStage::QueuingDisks;
-                Err(DiskError::Dataset(err).into())
+                Ok(())
             }
             Err(err) => {
                 error!(
@@ -150,7 +201,7 @@ impl StorageManager {
                     "Persistent error: {err} - not queueing disk {:?}",
                     synthetic_id
                 );
-                Err(DiskError::Dataset(err).into())
+                Ok(())
             }
         }
     }
@@ -260,8 +311,11 @@ mod tests {
         let zpool_name = ZpoolName::new_external(Uuid::new_v4());
         let dir = tempdir().unwrap();
         let _ = new_disk(dir.path(), &zpool_name);
+
         // Spawn the key_manager so that it will respond to requests for encryption keys
         tokio::spawn(async move { key_manager.run().await });
+
+        // Set the stage to pretend we've progressed enough to have a key_manager available.
         manager.stage = StorageManagerStage::Normal;
         manager.add_synthetic_u2_disk(zpool_name.clone()).await.unwrap();
         assert_eq!(manager.resources.all_u2_zpools().len(), 1);

From e2e7dc8e55d7dd17be941ad9ae091deba67fb1ab Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Mon, 2 Oct 2023 03:04:43 +0000
Subject: [PATCH 10/66] wip

---
 sled-storage/src/error.rs   |  5 +++-
 sled-storage/src/manager.rs | 57 +++++++++++++++++++++++++++++++++----
 2 files changed, 56 insertions(+), 6 deletions(-)

diff --git a/sled-storage/src/error.rs b/sled-storage/src/error.rs
index fbf721fab7..70d7fe7c1e 100644
--- a/sled-storage/src/error.rs
+++ b/sled-storage/src/error.rs
@@ -4,7 +4,7 @@
 
 //! Storage related errors
 
-use crate::dataset::DatasetName;
+use crate::dataset::{DatasetError, DatasetName};
 use crate::disk::DiskError;
 use camino::Utf8PathBuf;
 use omicron_common::api::external::ByteCountRangeError;
@@ -15,6 +15,9 @@ pub enum Error {
     #[error(transparent)]
     DiskError(#[from] DiskError),
 
+    #[error(transparent)]
+    DatasetError(#[from] DatasetError),
+
     // TODO: We could add the context of "why are we doint this op", maybe?
     #[error(transparent)]
     ZfsListDataset(#[from] illumos_utils::zfs::ListDatasetsError),
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index 813f552878..499d8edee2 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -89,7 +89,6 @@ impl StorageManager {
         loop {
             if let Err(e) = self.step().await {
                 warn!(self.log, "{e}");
-                return;
             }
         }
     }
@@ -103,7 +102,7 @@ impl StorageManager {
             StorageRequest::AddDisk(unparsed_disk) => {
                 match unparsed_disk.variant() {
                     DiskVariant::U2 => self.add_u2_disk(unparsed_disk).await?,
-                    DiskVariant::M2 => todo!(),
+                    DiskVariant::M2 => self.add_m2_disk(unparsed_disk).await?,
                 }
             }
             StorageRequest::AddSyntheticDisk(zpool_name) => {
@@ -111,7 +110,9 @@ impl StorageManager {
                     ZpoolKind::External => {
                         self.add_synthetic_u2_disk(zpool_name).await?
                     }
-                    ZpoolKind::Internal => todo!(),
+                    ZpoolKind::Internal => {
+                        self.add_synthetic_m2_disk(zpool_name).await?
+                    }
                 }
             }
             StorageRequest::RemoveDisk(_unparsed_disk) => todo!(),
@@ -121,7 +122,7 @@ impl StorageManager {
         Ok(())
     }
 
-    /// Add a real U.2 disk to storage resources or queue it to be added later
+    // Add a real U.2 disk to [`StorageResources`] or queue it to be added later
     async fn add_u2_disk(
         &mut self,
         unparsed_disk: UnparsedDisk,
@@ -159,7 +160,53 @@ impl StorageManager {
         }
     }
 
-    /// Add a synthetic U.2 disk to storage resources or queue it to be added later
+    // Add a real U.2 disk to [`StorageResources`]
+    //
+    //
+    // We never queue M.2 drives, as they don't rely on [`KeyManager`] based
+    // encryption
+    async fn add_m2_disk(
+        &mut self,
+        unparsed_disk: UnparsedDisk,
+    ) -> Result<(), Error> {
+        let disk = Disk::new(
+            &self.log,
+            unparsed_disk.clone(),
+            Some(&self.key_requester),
+        )
+        .await?;
+        self.resources.insert_real_disk(disk)?;
+        Ok(())
+    }
+
+    // Add a synthetic U.2 disk to [`StorageResources`]
+    //
+    // We never queue M.2 drives, as they don't rely on [`KeyManager`] based
+    // encryption
+    async fn add_synthetic_m2_disk(
+        &mut self,
+        zpool_name: ZpoolName,
+    ) -> Result<(), Error> {
+        let synthetic_id = DiskIdentity {
+            vendor: "fake_vendor".to_string(),
+            serial: "fake_serial".to_string(),
+            model: zpool_name.id().to_string(),
+        };
+
+        debug!(self.log, "Ensure zpool has datasets: {zpool_name}");
+        dataset::ensure_zpool_has_datasets(
+            &self.log,
+            &zpool_name,
+            &synthetic_id,
+            Some(&self.key_requester),
+        )
+        .await?;
+        self.resources.insert_synthetic_disk(zpool_name)?;
+        Ok(())
+    }
+
+    // Add a synthetic U.2 disk to [`StorageResources`] or queue it to be added
+    // later
     async fn add_synthetic_u2_disk(
         &mut self,
         zpool_name: ZpoolName,

From 9a1e9164363818c6780ad9529eadb087230ccf84 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Mon, 2 Oct 2023 17:41:03 +0000
Subject: [PATCH 11/66] wip

---
 sled-storage/src/manager.rs   | 140 +++++++++++++++++++++++++++++-----
 sled-storage/src/resources.rs |  25 +++++-
 2 files changed, 144 insertions(+), 21 deletions(-)

diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index 499d8edee2..2855345c3b 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -23,7 +23,7 @@ use tokio::sync::{mpsc, oneshot, watch};
 const QUEUE_SIZE: usize = 256;
 
 #[derive(Debug, Clone, PartialEq, Eq)]
-pub enum StorageManagerStage {
+pub enum StorageManagerState {
     WaitingForBootDisk,
     WaitingForKeyManager,
     QueuingDisks,
@@ -34,7 +34,7 @@ enum StorageRequest {
     AddDisk(UnparsedDisk),
     AddSyntheticDisk(ZpoolName),
     RemoveDisk(UnparsedDisk),
-    DisksChanged(Vec<UnparsedDisk>),
+    DisksChanged(HashSet<UnparsedDisk>),
     //    NewFilesystem(NewFilesystemRequest),
     KeyManagerReady,
 }
@@ -45,12 +45,76 @@ pub struct StorageHandle {
     resource_updates: watch::Receiver<StorageResources>,
 }
 
+impl StorageHandle {
+    /// Adds a disk and associated zpool to the storage manager.
+    pub async fn upsert_disk(&self, disk: UnparsedDisk) {
+        self.tx.send(StorageRequest::AddDisk(disk)).await.unwrap();
+    }
+
+    /// Adds a synthetic disk backed by a zpool to the storage manager.
+    pub async fn upsert_synthetic_disk(&self, pool: ZpoolName) {
+        self.tx.send(StorageRequest::AddSyntheticDisk(pool)).await.unwrap();
+    }
+
+    /// Removes a disk, if it's tracked by the storage manager, as well
+    /// as any associated zpools.
+    pub async fn delete_disk(&self, disk: UnparsedDisk) {
+        self.tx.send(StorageRequest::RemoveDisk(disk)).await.unwrap();
+    }
+
+    /// Ensures that the storage manager tracks exactly the provided disks.
+    ///
+    /// This acts similar to a batch [Self::upsert_disk] for all new disks, and
+    /// [Self::delete_disk] for all removed disks.
+    ///
+    /// If errors occur, an arbitrary "one" of them will be returned, but a
+    /// best-effort attempt to add all disks will still be attempted.
+    pub async fn ensure_using_exactly_these_disks<I>(&self, unparsed_disks: I)
+    where
+        I: IntoIterator<Item = UnparsedDisk>,
+    {
+        self.tx
+            .send(StorageRequest::DisksChanged(
+                unparsed_disks.into_iter().collect(),
+            ))
+            .await
+            .unwrap();
+    }
+
+    /// Notify the [`StorageManager`] that the [`key_manager::KeyManager`]
+    /// has determined what [`key_manager::SecretRetriever`] to use and
+    /// it is now possible to retrieve secrets and construct keys. Note
+    /// that in cases of using the trust quorum, it is possible that the
+    /// [`key_manager::SecretRetriever`] is ready, but enough key shares cannot
+    /// be retrieved from other sleds. In this case, we still will be unable
+    /// to add the disks successfully. In the common case this is a transient
+    /// error. In other cases it may be fatal. However, that is outside the
+    /// scope of the cares of this module.
+    pub async fn key_manager_ready(&self) {
+        self.tx.send(StorageRequest::KeyManagerReady).await.unwrap();
+    }
+
+    /// Wait for a boot disk to be initialized
+    pub async fn wait_for_boot_disk(&mut self) -> (DiskIdentity, ZpoolName) {
+        loop {
+            // We panic if the sender is dropped, as this means
+            // the StorageManager has gone away, which it should not do.
+            self.resource_updates.changed().await.unwrap();
+            // Limit any RWLock related cancellation issues by immediately cloning
+            let resources = self.resource_updates.borrow().clone();
+            if let Some((disk_id, zpool_name)) = resources.boot_disk() {
+                return (disk_id, zpool_name);
+            }
+        }
+    }
+}
+
 /// The storage manager responsible for the state of the storage
 /// on a sled. The storage manager runs in its own task and is interacted
 /// with via the [`StorageHandle`].
 pub struct StorageManager {
     log: Logger,
-    stage: StorageManagerStage,
+    state: StorageManagerState,
     rx: mpsc::Receiver<StorageRequest>,
     resources: StorageResources,
     queued_u2_drives: HashSet<UnparsedDisk>,
@@ -70,7 +134,7 @@ impl StorageManager {
         (
             StorageManager {
                 log: log.new(o!("component" => "StorageManager")),
-                stage: StorageManagerStage::WaitingForBootDisk,
+                state: StorageManagerState::WaitingForBootDisk,
                 rx,
                 resources,
                 queued_u2_drives: HashSet::new(),
@@ -127,7 +191,7 @@ impl StorageManager {
         &mut self,
         unparsed_disk: UnparsedDisk,
     ) -> Result<(), Error> {
-        if self.stage != StorageManagerStage::Normal {
+        if self.state != StorageManagerState::Normal {
             self.queued_u2_drives.insert(unparsed_disk);
             return Ok(());
         }
@@ -139,14 +203,21 @@ impl StorageManager {
         )
         .await
         {
-            Ok(disk) => self.resources.insert_real_disk(disk),
+            Ok(disk) => {
+                if self.resources.insert_real_disk(disk)? {
+                    let _ = self
+                        .resource_updates
+                        .send_replace(self.resources.clone());
+                }
+                Ok(())
+            }
             Err(err @ DiskError::Dataset(DatasetError::KeyManager(_))) => {
                 warn!(
                     self.log,
                     "Transient error: {err} - queuing disk {:?}", unparsed_disk
                 );
                 self.queued_u2_drives.insert(unparsed_disk);
-                self.stage = StorageManagerStage::QueuingDisks;
+                self.state = StorageManagerState::QueuingDisks;
                 Err(err.into())
             }
             Err(err) => {
@@ -175,7 +246,9 @@ impl StorageManager {
             Some(&self.key_requester),
         )
         .await?;
-        self.resources.insert_real_disk(disk)?;
+        if self.resources.insert_real_disk(disk)? {
+            let _ = self.resource_updates.send_replace(self.resources.clone());
+        }
         Ok(())
     }
 
@@ -201,7 +274,9 @@ impl StorageManager {
             Some(&self.key_requester),
         )
         .await?;
-        self.resources.insert_synthetic_disk(zpool_name)?;
+        if self.resources.insert_synthetic_disk(zpool_name)? {
+            let _ = self.resource_updates.send_replace(self.resources.clone());
+        }
         Ok(())
     }
 
@@ -211,7 +286,7 @@ impl StorageManager {
         &mut self,
         zpool_name: ZpoolName,
     ) -> Result<(), Error> {
-        if self.stage != StorageManagerStage::Normal {
+        if self.state != StorageManagerState::Normal {
             info!(self.log, "Queuing synthetic U.2 drive: {zpool_name}");
             self.queued_synthetic_u2_drives.insert(zpool_name);
             return Ok(());
@@ -232,14 +307,21 @@ impl StorageManager {
         )
         .await
         {
-            Ok(()) => self.resources.insert_synthetic_disk(zpool_name),
+            Ok(()) => {
+                if self.resources.insert_synthetic_disk(zpool_name)? {
+                    let _ = self
+                        .resource_updates
+                        .send_replace(self.resources.clone());
+                }
+                Ok(())
+            }
             Err(err @ DatasetError::KeyManager(_)) => {
                 warn!(
                     self.log,
                     "Transient error: {err} - queuing disk {:?}", synthetic_id
                 );
                 self.queued_synthetic_u2_drives.insert(zpool_name);
-                self.stage = StorageManagerStage::QueuingDisks;
+                self.state = StorageManagerState::QueuingDisks;
                 Ok(())
             }
             Err(err) => {
@@ -324,7 +406,7 @@ mod tests {
             KeyManager::new(&logctx.log, HardcodedSecretRetriever {});
         let (mut manager, _) = StorageManager::new(&logctx.log, key_requester);
         let zpool_name = ZpoolName::new_external(Uuid::new_v4());
-        assert_eq!(StorageManagerStage::WaitingForBootDisk, manager.stage);
+        assert_eq!(StorageManagerState::WaitingForBootDisk, manager.state);
         manager.add_synthetic_u2_disk(zpool_name.clone()).await.unwrap();
         assert!(manager.resources.all_u2_zpools().is_empty());
         assert_eq!(
@@ -334,11 +416,11 @@ mod tests {
 
         // Walk through other non-normal stages and enusre disk gets queued
         for stage in [
-            StorageManagerStage::WaitingForKeyManager,
-            StorageManagerStage::QueuingDisks,
+            StorageManagerState::WaitingForKeyManager,
+            StorageManagerState::QueuingDisks,
         ] {
             manager.queued_synthetic_u2_drives.clear();
-            manager.stage = stage;
+            manager.state = stage;
             manager.add_synthetic_u2_disk(zpool_name.clone()).await.unwrap();
             assert!(manager.resources.all_u2_zpools().is_empty());
             assert_eq!(
@@ -363,10 +445,34 @@ mod tests {
         tokio::spawn(async move { key_manager.run().await });
 
         // Set the stage to pretend we've progressed enough to have a key_manager available.
-        manager.stage = StorageManagerStage::Normal;
+        manager.state = StorageManagerState::Normal;
         manager.add_synthetic_u2_disk(zpool_name.clone()).await.unwrap();
         assert_eq!(manager.resources.all_u2_zpools().len(), 1);
         Zpool::destroy(&zpool_name).unwrap();
         logctx.cleanup_successful();
     }
+
+    #[tokio::test]
+    async fn wait_for_bootdisk() {
+        let logctx = test_setup_log("ensure_u2_gets_added_to_resources");
+        let (mut key_manager, key_requester) =
+            KeyManager::new(&logctx.log, HardcodedSecretRetriever {});
+        let (mut manager, mut handle) =
+            StorageManager::new(&logctx.log, key_requester);
+        // Spawn the key_manager so that it will respond to requests for encryption keys
+        tokio::spawn(async move { key_manager.run().await });
+
+        // Spawn the storage manager as done by sled-agent
+        tokio::spawn(async move {
+            manager.run().await;
+        });
+
+        // Create a synthetic internal disk
+        let zpool_name = ZpoolName::new_internal(Uuid::new_v4());
+        let dir = tempdir().unwrap();
+        let _ = new_disk(dir.path(), &zpool_name);
+
+        handle.upsert_synthetic_disk(zpool_name.clone()).await;
+        handle.wait_for_boot_disk().await;
+    }
 }
diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs
index 7601ac7b86..fb57d742e3 100644
--- a/sled-storage/src/resources.rs
+++ b/sled-storage/src/resources.rs
@@ -49,30 +49,47 @@ pub struct StorageResources {
 
 impl StorageResources {
     /// Insert a disk and its zpool
-    pub(crate) fn insert_real_disk(&mut self, disk: Disk) -> Result<(), Error> {
+    ///
+    /// Return true, if data was changed, false otherwise
+    pub(crate) fn insert_real_disk(
+        &mut self,
+        disk: Disk,
+    ) -> Result<bool, Error> {
         let parent = disk.identity().clone();
         let zpool_name = disk.zpool_name().clone();
         let disk = DiskWrapper::Real {
             disk: disk.clone(),
             devfs_path: disk.devfs_path().clone(),
         };
+        if let Some(stored) = self.disks.get(&parent) {
+            if stored == &disk {
+                return Ok(false);
+            }
+        }
         Arc::make_mut(&mut self.disks).insert(disk.identity(), disk);
         let zpool = Pool::new(zpool_name, parent)?;
         Arc::make_mut(&mut self.pools).insert(zpool.name.id(), zpool);
-        Ok(())
+        Ok(true)
     }
 
     /// Insert a synthetic disk and its zpool
+    ///
+    /// Return true, if data was changed, false otherwise
     pub(crate) fn insert_synthetic_disk(
         &mut self,
         zpool_name: ZpoolName,
-    ) -> Result<(), Error> {
+    ) -> Result<bool, Error> {
         let disk = DiskWrapper::Synthetic { zpool_name: zpool_name.clone() };
         let parent = disk.identity().clone();
+        if let Some(stored) = self.disks.get(&parent) {
+            if stored == &disk {
+                return Ok(false);
+            }
+        }
         Arc::make_mut(&mut self.disks).insert(disk.identity(), disk);
         let zpool = Pool::new(zpool_name, parent)?;
         Arc::make_mut(&mut self.pools).insert(zpool.name.id(), zpool);
-        Ok(())
+        Ok(true)
     }
 
     /// Returns the identity of the boot disk.

From 30e16c802c645b964e3cf838c38f4ea0596eb6f5 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Mon, 2 Oct 2023 23:39:05 +0000
Subject: [PATCH 12/66] wip

---
 sled-storage/src/manager.rs | 163 +++++++++++++++++++++++++++++++++---
 1 file changed, 150 insertions(+), 13 deletions(-)

diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index 2855345c3b..96119bd74e 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -17,6 +17,7 @@ use omicron_common::disk::DiskIdentity;
 use sled_hardware::{DiskVariant, UnparsedDisk};
 use slog::{debug, error, info, o, warn, Logger};
 use tokio::sync::{mpsc, oneshot, watch};
+use tokio::time::{interval, Duration, MissedTickBehavior};
 
 // The size of the mpsc bounded channel used to communicate
 // between the `StorageHandle` and `StorageManager`.
@@ -24,7 +25,6 @@ const QUEUE_SIZE: usize = 256;
 
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum StorageManagerState {
-    WaitingForBootDisk,
     WaitingForKeyManager,
     QueuingDisks,
     Normal,
@@ -37,6 +37,10 @@ enum StorageRequest {
     DisksChanged(HashSet<UnparsedDisk>),
     //    NewFilesystem(NewFilesystemRequest),
     KeyManagerReady,
+    /// This will always grab the latest state after any new updates, as it
+    /// serializes through the `StorageManager` task.
+    /// This serialization is particularly useful for tests.
+    GetLatestResources(oneshot::Sender<StorageResources>),
 }
 
 /// A mechanism for interacting with the [`StorageManager`]
@@ -107,6 +111,20 @@ impl StorageHandle {
             }
         }
     }
+
+    /// Wait for any storage resource changes
+    pub async fn wait_for_changes(&mut self) -> StorageResources {
+        self.resource_updates.changed().await.unwrap();
+        self.resource_updates.borrow().clone()
+    }
+
+    /// Retrieve the latest value of `StorageResources` from the
+    /// `StorageManager` task.
+    pub async fn get_latest_resources(&mut self) -> StorageResources {
+        let (tx, rx) = oneshot::channel();
+        self.tx.send(StorageRequest::GetLatestResources(tx)).await.unwrap();
+        rx.await.unwrap()
+    }
 }
 
 /// The storage manager responsible for the state of the storage
@@ -134,7 +152,7 @@ impl StorageManager {
         (
             StorageManager {
                 log: log.new(o!("component" => "StorageManager")),
-                state: StorageManagerState::WaitingForBootDisk,
+                state: StorageManagerState::WaitingForKeyManager,
                 rx,
                 resources,
                 queued_u2_drives: HashSet::new(),
@@ -151,8 +169,22 @@ impl StorageManager {
     /// This should be spawned into a tokio task
     pub async fn run(&mut self) {
         loop {
-            if let Err(e) = self.step().await {
-                warn!(self.log, "{e}");
+            const QUEUED_DISK_RETRY_TIMEOUT: Duration = Duration::from_secs(10);
+            let mut interval = interval(QUEUED_DISK_RETRY_TIMEOUT);
+            interval.set_missed_tick_behavior(MissedTickBehavior::Delay);
+            tokio::select! {
+                res = self.step() => {
+                    if let Err(e) = res {
+                        warn!(self.log, "{e}");
+                    }
+                }
+                _ = interval.tick(),
+                    if self.state == StorageManagerState::QueuingDisks =>
+                {
+                    // We are going to try to configure these disks again
+                    self.state = StorageManagerState::Normal;
+                    self.add_queued_disks().await;
+                }
             }
         }
     }
@@ -181,11 +213,83 @@ impl StorageManager {
             }
             StorageRequest::RemoveDisk(_unparsed_disk) => todo!(),
             StorageRequest::DisksChanged(_unparsed_disks) => todo!(),
-            StorageRequest::KeyManagerReady => todo!(),
+            StorageRequest::KeyManagerReady => {
+                self.state = StorageManagerState::Normal;
+                self.add_queued_disks().await;
+            }
+            StorageRequest::GetLatestResources(tx) => {
+                let _ = tx.send(self.resources.clone());
+            }
         }
         Ok(())
     }
 
+    // Loop through all queued disks inserting them into [`StorageResources`]
+    // unless we hit a transient error. If we hit a transient error, we return
+    // and wait for the next retry window to re-call this method. If we hit a
+    // permanent error we log it, but we continue inserting queued disks.
+    async fn add_queued_disks(&mut self) {
+        // Operate on queued real disks
+
+        // Disks that should be requeued.
+        let mut saved = HashSet::new();
+        let queued = std::mem::take(&mut self.queued_u2_drives);
+        let mut iter = queued.into_iter();
+        while let Some(disk) = iter.next() {
+            if self.state == StorageManagerState::QueuingDisks {
+                // We hit a transient error in a prior iteration.
+                saved.insert(disk);
+            } else {
+                // Try ot add the disk. If there was a transient error the disk will
+                // have been requeued. If there was a permanent error, it will have been
+                // dropped. If there is an another unexpected error, we will handle it and
+                // requeue ourselves.
+                if let Err(err) = self.add_u2_disk(disk.clone()).await {
+                    warn!(
+                    self.log,
+                    "Potentially transient error: {err}: - requeing disk {:?}",
+                    disk
+                );
+                    saved.insert(disk);
+                }
+            }
+        }
+        // Merge any requeued disks from transient errors with saved disks here
+        self.queued_u2_drives.extend(saved);
+
+        // Operate on queued synthetic disks
+        if self.state == StorageManagerState::QueuingDisks {
+            return;
+        }
+
+        let mut saved = HashSet::new();
+        let queued = std::mem::take(&mut self.queued_synthetic_u2_drives);
+        let mut iter = queued.into_iter();
+        while let Some(zpool_name) = iter.next() {
+            if self.state == StorageManagerState::QueuingDisks {
+                // We hit a transient error in a prior iteration.
+                saved.insert(zpool_name);
+            } else {
+                // Try ot add the disk. If there was a transient error the disk will
+                // have been requeued. If there was a permanent error, it will have been
+                // dropped. If there is an another unexpected error, we will handle it and
+                // requeue ourselves.
+                if let Err(err) =
+                    self.add_synthetic_u2_disk(zpool_name.clone()).await
+                {
+                    warn!(
+                    self.log,
+                    "Potentially transient error: {err}: - requeing synthetic disk {:?}",
+                    zpool_name
+                );
+                    saved.insert(zpool_name);
+                }
+            }
+        }
+        // Merge any requeued disks from transient errors with saved disks here
+        self.queued_synthetic_u2_drives.extend(saved);
+    }
+
     // Add a real U.2 disk to [`StorageResources`] or queue it to be added later
     async fn add_u2_disk(
         &mut self,
@@ -218,7 +322,7 @@ impl StorageManager {
                 );
                 self.queued_u2_drives.insert(unparsed_disk);
                 self.state = StorageManagerState::QueuingDisks;
-                Err(err.into())
+                Ok(())
             }
             Err(err) => {
                 error!(
@@ -226,7 +330,7 @@ impl StorageManager {
                     "Persistent error: {err} - not queueing disk {:?}",
                     unparsed_disk
                 );
-                Err(err.into())
+                Ok(())
             }
         }
     }
@@ -406,7 +510,7 @@ mod tests {
             KeyManager::new(&logctx.log, HardcodedSecretRetriever {});
         let (mut manager, _) = StorageManager::new(&logctx.log, key_requester);
         let zpool_name = ZpoolName::new_external(Uuid::new_v4());
-        assert_eq!(StorageManagerState::WaitingForBootDisk, manager.state);
+        assert_eq!(StorageManagerState::WaitingForKeyManager, manager.state);
         manager.add_synthetic_u2_disk(zpool_name.clone()).await.unwrap();
         assert!(manager.resources.all_u2_zpools().is_empty());
         assert_eq!(
@@ -415,10 +519,7 @@ mod tests {
         );
 
         // Walk through other non-normal stages and enusre disk gets queued
-        for stage in [
-            StorageManagerState::WaitingForKeyManager,
-            StorageManagerState::QueuingDisks,
-        ] {
+        for stage in [StorageManagerState::QueuingDisks] {
             manager.queued_synthetic_u2_drives.clear();
             manager.state = stage;
             manager.add_synthetic_u2_disk(zpool_name.clone()).await.unwrap();
@@ -454,7 +555,7 @@ mod tests {
 
     #[tokio::test]
     async fn wait_for_bootdisk() {
-        let logctx = test_setup_log("ensure_u2_gets_added_to_resources");
+        let logctx = test_setup_log("wait_for_bootdisk");
         let (mut key_manager, key_requester) =
             KeyManager::new(&logctx.log, HardcodedSecretRetriever {});
         let (mut manager, mut handle) =
@@ -474,5 +575,41 @@ mod tests {
 
         handle.upsert_synthetic_disk(zpool_name.clone()).await;
         handle.wait_for_boot_disk().await;
+        Zpool::destroy(&zpool_name).unwrap();
+        logctx.cleanup_successful();
+    }
+
+    #[tokio::test]
+    async fn queued_disks_get_added_as_resources() {
+        let logctx = test_setup_log("queued_disks_get_added_as_resources");
+        let (mut key_manager, key_requester) =
+            KeyManager::new(&logctx.log, HardcodedSecretRetriever {});
+        let (mut manager, mut handle) =
+            StorageManager::new(&logctx.log, key_requester);
+
+        // Spawn the key_manager so that it will respond to requests for encryption keys
+        tokio::spawn(async move { key_manager.run().await });
+
+        // Spawn the storage manager as done by sled-agent
+        tokio::spawn(async move {
+            manager.run().await;
+        });
+
+        // Queue up a disks, as we haven't told the `StorageManager` that
+        // the `KeyManager` is ready yet.
+        let zpool_name = ZpoolName::new_external(Uuid::new_v4());
+        let dir = tempdir().unwrap();
+        let _ = new_disk(dir.path(), &zpool_name);
+        handle.upsert_synthetic_disk(zpool_name.clone()).await;
+        let resources = handle.get_latest_resources().await;
+        assert!(resources.all_u2_zpools().is_empty());
+
+        // Now inform the storage manager that the key manager is ready
+        // The queued disk should be successfully added
+        handle.key_manager_ready().await;
+        let resources = handle.get_latest_resources().await;
+        assert_eq!(resources.all_u2_zpools().len(), 1);
+        Zpool::destroy(&zpool_name).unwrap();
+        logctx.cleanup_successful();
     }
 }

From 51dcbcdd9c6bf0bd4ac976be159504b0bd4e628d Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Tue, 3 Oct 2023 16:45:24 +0000
Subject: [PATCH 13/66] wip

---
 sled-storage/src/manager.rs | 85 +++++++++++++++++++++++++++++++++----
 1 file changed, 77 insertions(+), 8 deletions(-)

diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index 96119bd74e..2a7dcbda9b 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -181,8 +181,6 @@ impl StorageManager {
                 _ = interval.tick(),
                     if self.state == StorageManagerState::QueuingDisks =>
                 {
-                    // We are going to try to configure these disks again
-                    self.state = StorageManagerState::Normal;
                     self.add_queued_disks().await;
                 }
             }
@@ -229,6 +227,7 @@ impl StorageManager {
     // and wait for the next retry window to re-call this method. If we hit a
     // permanent error we log it, but we continue inserting queued disks.
     async fn add_queued_disks(&mut self) {
+        self.state = StorageManagerState::Normal;
         // Operate on queued real disks
 
         // Disks that should be requeued.
@@ -455,18 +454,30 @@ mod tests {
     };
     use omicron_test_utils::dev::test_setup_log;
     use std::fs::File;
+    use std::sync::{
+        atomic::{AtomicBool, Ordering},
+        Arc,
+    };
     use uuid::Uuid;
 
     /// A [`key-manager::SecretRetriever`] that only returns hardcoded IKM for
     /// epoch 0
-    #[derive(Debug)]
-    struct HardcodedSecretRetriever {}
+    #[derive(Debug, Default)]
+    struct HardcodedSecretRetriever {
+        inject_error: Arc<AtomicBool>,
+    }
 
     #[async_trait]
     impl SecretRetriever for HardcodedSecretRetriever {
         async fn get_latest(
             &self,
         ) -> Result<VersionedIkm, SecretRetrieverError> {
+            if self.inject_error.load(Ordering::SeqCst) {
+                return Err(SecretRetrieverError::Bootstore(
+                    "Timeout".to_string(),
+                ));
+            }
+
             let epoch = 0;
             let salt = [0u8; 32];
             let secret = [0x1d; 32];
@@ -479,6 +490,11 @@ mod tests {
             &self,
             epoch: u64,
         ) -> Result<SecretState, SecretRetrieverError> {
+            if self.inject_error.load(Ordering::SeqCst) {
+                return Err(SecretRetrieverError::Bootstore(
+                    "Timeout".to_string(),
+                ));
+            }
             if epoch != 0 {
                 return Err(SecretRetrieverError::NoSuchEpoch(epoch));
             }
@@ -507,7 +523,7 @@ mod tests {
             "add_u2_disk_while_not_in_normal_stage_and_ensure_it_gets_queued",
         );
         let (mut _key_manager, key_requester) =
-            KeyManager::new(&logctx.log, HardcodedSecretRetriever {});
+            KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
         let (mut manager, _) = StorageManager::new(&logctx.log, key_requester);
         let zpool_name = ZpoolName::new_external(Uuid::new_v4());
         assert_eq!(StorageManagerState::WaitingForKeyManager, manager.state);
@@ -536,7 +552,7 @@ mod tests {
     async fn ensure_u2_gets_added_to_resources() {
         let logctx = test_setup_log("ensure_u2_gets_added_to_resources");
         let (mut key_manager, key_requester) =
-            KeyManager::new(&logctx.log, HardcodedSecretRetriever {});
+            KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
         let (mut manager, _) = StorageManager::new(&logctx.log, key_requester);
         let zpool_name = ZpoolName::new_external(Uuid::new_v4());
         let dir = tempdir().unwrap();
@@ -557,7 +573,7 @@ mod tests {
     async fn wait_for_bootdisk() {
         let logctx = test_setup_log("wait_for_bootdisk");
         let (mut key_manager, key_requester) =
-            KeyManager::new(&logctx.log, HardcodedSecretRetriever {});
+            KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
         let (mut manager, mut handle) =
             StorageManager::new(&logctx.log, key_requester);
         // Spawn the key_manager so that it will respond to requests for encryption keys
@@ -583,7 +599,7 @@ mod tests {
     async fn queued_disks_get_added_as_resources() {
         let logctx = test_setup_log("queued_disks_get_added_as_resources");
         let (mut key_manager, key_requester) =
-            KeyManager::new(&logctx.log, HardcodedSecretRetriever {});
+            KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
         let (mut manager, mut handle) =
             StorageManager::new(&logctx.log, key_requester);
 
@@ -612,4 +628,57 @@ mod tests {
         Zpool::destroy(&zpool_name).unwrap();
         logctx.cleanup_successful();
     }
+
+    /// For this test, we are going to step through the msg recv loop directly
+    /// without running the `StorageManager` in a tokio task.
+    /// This allows us to control timing precisely.
+    #[tokio::test]
+    async fn queued_disks_get_requeued_on_secret_retriever_error() {
+        let logctx = test_setup_log("queued_disks_get_added_as_resources");
+        let inject_error = Arc::new(AtomicBool::new(false));
+        let (mut key_manager, key_requester) = KeyManager::new(
+            &logctx.log,
+            HardcodedSecretRetriever { inject_error: inject_error.clone() },
+        );
+        let (mut manager, handle) =
+            StorageManager::new(&logctx.log, key_requester);
+
+        // Spawn the key_manager so that it will respond to requests for encryption keys
+        tokio::spawn(async move { key_manager.run().await });
+
+        // Queue up a disks, as we haven't told the `StorageManager` that
+        // the `KeyManager` is ready yet.
+        let zpool_name = ZpoolName::new_external(Uuid::new_v4());
+        let dir = tempdir().unwrap();
+        let _ = new_disk(dir.path(), &zpool_name);
+        handle.upsert_synthetic_disk(zpool_name.clone()).await;
+        manager.step().await.unwrap();
+
+        // We can't wait for a reply through the handle as the storage manager task
+        // isn't actually running. We just check the resources directly.
+        assert!(manager.resources.all_u2_zpools().is_empty());
+
+        // Let's inject an error to the `SecretRetriever` to simulate a trust
+        // quorum timeout
+        inject_error.store(true, Ordering::SeqCst);
+
+        // Now inform the storage manager that the key manager is ready
+        // The queued disk should not be added due to the error
+        handle.key_manager_ready().await;
+        manager.step().await.unwrap();
+        assert!(manager.resources.all_u2_zpools().is_empty());
+
+        // Manually simulating a timer tick to add queued disks should also
+        // still hit the error
+        manager.add_queued_disks().await;
+        assert!(manager.resources.all_u2_zpools().is_empty());
+
+        // Clearing the injected error will cause the disk to get added
+        inject_error.store(false, Ordering::SeqCst);
+        manager.add_queued_disks().await;
+        assert_eq!(1, manager.resources.all_u2_zpools().len());
+
+        Zpool::destroy(&zpool_name).unwrap();
+        logctx.cleanup_successful();
+    }
 }

From 96512174027ee73b0d77c2409dc23f3f488752db Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Tue, 3 Oct 2023 19:48:04 +0000
Subject: [PATCH 14/66] wip

---
 sled-agent/src/storage_manager.rs | 270 ------------------------------
 sled-storage/src/manager.rs       |  75 ++++++++-
 sled-storage/src/resources.rs     |  34 +++-
 3 files changed, 106 insertions(+), 273 deletions(-)

diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs
index 3d3e544573..68fb7df7df 100644
--- a/sled-agent/src/storage_manager.rs
+++ b/sled-agent/src/storage_manager.rs
@@ -47,97 +47,6 @@ use illumos_utils::{zfs::Zfs, zpool::Zpool};
 // boot when the bootstore has detected it has a key share.
 static KEY_MANAGER_READY: OnceLock<()> = OnceLock::new();
 
-#[derive(thiserror::Error, Debug)]
-pub enum Error {
-    #[error(transparent)]
-    DiskError(#[from] sled_hardware::PooledDiskError),
-
-    // TODO: We could add the context of "why are we doint this op", maybe?
-    #[error(transparent)]
-    ZfsListDataset(#[from] illumos_utils::zfs::ListDatasetsError),
-
-    #[error(transparent)]
-    ZfsEnsureFilesystem(#[from] illumos_utils::zfs::EnsureFilesystemError),
-
-    #[error(transparent)]
-    ZfsSetValue(#[from] illumos_utils::zfs::SetValueError),
-
-    #[error(transparent)]
-    ZfsGetValue(#[from] illumos_utils::zfs::GetValueError),
-
-    #[error(transparent)]
-    GetZpoolInfo(#[from] illumos_utils::zpool::GetInfoError),
-
-    #[error(transparent)]
-    Fstyp(#[from] illumos_utils::fstyp::Error),
-
-    #[error(transparent)]
-    ZoneCommand(#[from] illumos_utils::running_zone::RunCommandError),
-
-    #[error(transparent)]
-    ZoneBoot(#[from] illumos_utils::running_zone::BootError),
-
-    #[error(transparent)]
-    ZoneEnsureAddress(#[from] illumos_utils::running_zone::EnsureAddressError),
-
-    #[error(transparent)]
-    ZoneInstall(#[from] illumos_utils::running_zone::InstallZoneError),
-
-    #[error("No U.2 Zpools found")]
-    NoU2Zpool,
-
-    #[error("Failed to parse UUID from {path}: {err}")]
-    ParseUuid {
-        path: Utf8PathBuf,
-        #[source]
-        err: uuid::Error,
-    },
-
-    #[error("Dataset {name:?} exists with a different uuid (has {old}, requested {new})")]
-    UuidMismatch { name: Box<DatasetName>, old: Uuid, new: Uuid },
-
-    #[error("Error parsing pool {name}'s size: {err}")]
-    BadPoolSize {
-        name: String,
-        #[source]
-        err: ByteCountRangeError,
-    },
-
-    #[error("Failed to parse the dataset {name}'s UUID: {err}")]
-    ParseDatasetUuid {
-        name: String,
-        #[source]
-        err: uuid::Error,
-    },
-
-    #[error("Zpool Not Found: {0}")]
-    ZpoolNotFound(String),
-
-    #[error("Underlay not yet initialized")]
-    UnderlayNotInitialized,
-}
-
-/// A ZFS storage pool.
-struct Pool {
-    name: ZpoolName,
-    info: ZpoolInfo,
-    parent: DiskIdentity,
-}
-
-impl Pool {
-    /// Queries for an existing Zpool by name.
-    ///
-    /// Returns Ok if the pool exists.
-    fn new(name: ZpoolName, parent: DiskIdentity) -> Result<Pool, Error> {
-        let info = Zpool::get_info(&name.to_string())?;
-        Ok(Pool { name, info, parent })
-    }
-
-    fn parent(&self) -> &DiskIdentity {
-        &self.parent
-    }
-}
-
 // The type of a future which is used to send a notification to Nexus.
 type NotifyFut =
     Pin<Box<dyn futures::Future<Output = Result<(), String>> + Send>>;
@@ -154,179 +63,12 @@ struct UnderlayRequest {
     responder: oneshot::Sender<Result<(), Error>>,
 }
 
-#[derive(PartialEq, Eq, Clone)]
-pub(crate) enum DiskWrapper {
-    Real { disk: Disk, devfs_path: Utf8PathBuf },
-    Synthetic { zpool_name: ZpoolName },
-}
-
-impl From<Disk> for DiskWrapper {
-    fn from(disk: Disk) -> Self {
-        let devfs_path = disk.devfs_path().clone();
-        Self::Real { disk, devfs_path }
-    }
-}
-
-impl DiskWrapper {
-    fn identity(&self) -> DiskIdentity {
-        match self {
-            DiskWrapper::Real { disk, .. } => disk.identity().clone(),
-            DiskWrapper::Synthetic { zpool_name } => {
-                let id = zpool_name.id();
-                DiskIdentity {
-                    vendor: "synthetic-vendor".to_string(),
-                    serial: format!("synthetic-serial-{id}"),
-                    model: "synthetic-model".to_string(),
-                }
-            }
-        }
-    }
-
-    fn variant(&self) -> DiskVariant {
-        match self {
-            DiskWrapper::Real { disk, .. } => disk.variant(),
-            DiskWrapper::Synthetic { zpool_name } => match zpool_name.kind() {
-                ZpoolKind::External => DiskVariant::U2,
-                ZpoolKind::Internal => DiskVariant::M2,
-            },
-        }
-    }
-
-    fn zpool_name(&self) -> &ZpoolName {
-        match self {
-            DiskWrapper::Real { disk, .. } => disk.zpool_name(),
-            DiskWrapper::Synthetic { zpool_name } => zpool_name,
-        }
-    }
-}
-
-#[derive(Clone)]
-pub struct StorageResources {
-    // All disks, real and synthetic, being managed by this sled
-    disks: Arc<Mutex<HashMap<DiskIdentity, DiskWrapper>>>,
-
-    // A map of "Uuid" to "pool".
-    pools: Arc<Mutex<HashMap<Uuid, Pool>>>,
-}
-
 // The directory within the debug dataset in which bundles are created.
 const BUNDLE_DIRECTORY: &str = "bundle";
 
 // The directory for zone bundles.
 const ZONE_BUNDLE_DIRECTORY: &str = "zone";
 
-impl StorageResources {
-    /// Creates a fabricated view of storage resources.
-    ///
-    /// Use this only when you want to reference the disks, but not actually
-    /// access them. Creates one internal and one external disk.
-    #[cfg(test)]
-    pub fn new_for_test() -> Self {
-        let new_disk_identity = || DiskIdentity {
-            vendor: "vendor".to_string(),
-            serial: Uuid::new_v4().to_string(),
-            model: "model".to_string(),
-        };
-
-        Self {
-            disks: Arc::new(Mutex::new(HashMap::from([
-                (
-                    new_disk_identity(),
-                    DiskWrapper::Synthetic {
-                        zpool_name: ZpoolName::new_internal(Uuid::new_v4()),
-                    },
-                ),
-                (
-                    new_disk_identity(),
-                    DiskWrapper::Synthetic {
-                        zpool_name: ZpoolName::new_external(Uuid::new_v4()),
-                    },
-                ),
-            ]))),
-            pools: Arc::new(Mutex::new(HashMap::new())),
-        }
-    }
-
-    /// Returns the identity of the boot disk.
-    ///
-    /// If this returns `None`, we have not processed the boot disk yet.
-    pub async fn boot_disk(&self) -> Option<(DiskIdentity, ZpoolName)> {
-        let disks = self.disks.lock().await;
-        disks.iter().find_map(|(id, disk)| {
-            match disk {
-                // This is the "real" use-case: if we have real disks, query
-                // their properties to identify if they truly are the boot disk.
-                DiskWrapper::Real { disk, .. } => {
-                    if disk.is_boot_disk() {
-                        return Some((id.clone(), disk.zpool_name().clone()));
-                    }
-                }
-                // This is the "less real" use-case: if we have synthetic disks,
-                // just label the first M.2-looking one as a "boot disk".
-                DiskWrapper::Synthetic { .. } => {
-                    if matches!(disk.variant(), DiskVariant::M2) {
-                        return Some((id.clone(), disk.zpool_name().clone()));
-                    }
-                }
-            };
-            None
-        })
-    }
-
-    // TODO: Could be generic over DiskVariant
-
-    /// Returns all M.2 zpools
-    pub async fn all_m2_zpools(&self) -> Vec<ZpoolName> {
-        self.all_zpools(DiskVariant::M2).await
-    }
-
-    /// Returns all U.2 zpools
-    pub async fn all_u2_zpools(&self) -> Vec<ZpoolName> {
-        self.all_zpools(DiskVariant::U2).await
-    }
-
-    /// Returns all mountpoints within all M.2s for a particular dataset.
-    pub async fn all_m2_mountpoints(&self, dataset: &str) -> Vec<Utf8PathBuf> {
-        let m2_zpools = self.all_m2_zpools().await;
-        m2_zpools
-            .iter()
-            .map(|zpool| zpool.dataset_mountpoint(dataset))
-            .collect()
-    }
-
-    /// Returns all mountpoints within all U.2s for a particular dataset.
-    pub async fn all_u2_mountpoints(&self, dataset: &str) -> Vec<Utf8PathBuf> {
-        let u2_zpools = self.all_u2_zpools().await;
-        u2_zpools
-            .iter()
-            .map(|zpool| zpool.dataset_mountpoint(dataset))
-            .collect()
-    }
-
-    /// Returns all zpools of a particular variant
-    pub async fn all_zpools(&self, variant: DiskVariant) -> Vec<ZpoolName> {
-        let disks = self.disks.lock().await;
-        disks
-            .values()
-            .filter_map(|disk| {
-                if disk.variant() == variant {
-                    return Some(disk.zpool_name().clone());
-                }
-                None
-            })
-            .collect()
-    }
-
-    /// Return the directories for storing zone service bundles.
-    pub async fn all_zone_bundle_directories(&self) -> Vec<Utf8PathBuf> {
-        self.all_m2_mountpoints(sled_hardware::disk::M2_DEBUG_DATASET)
-            .await
-            .into_iter()
-            .map(|p| p.join(BUNDLE_DIRECTORY).join(ZONE_BUNDLE_DIRECTORY))
-            .collect()
-    }
-}
-
 /// Describes the access to the underlay used by the StorageManager.
 pub struct UnderlayAccess {
     pub nexus_client: NexusClientWithResolver,
@@ -1392,15 +1134,3 @@ impl StorageManager {
         &self.inner.resources
     }
 }
-
-impl Drop for StorageManagerInner {
-    fn drop(&mut self) {
-        // NOTE: Ideally, with async drop, we'd await completion of the worker
-        // somehow.
-        //
-        // Without that option, we instead opt to simply cancel the worker
-        // task to ensure it does not remain alive beyond the StorageManager
-        // itself.
-        self.task.abort();
-    }
-}
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index 2a7dcbda9b..f5304262e4 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -34,6 +34,7 @@ enum StorageRequest {
     AddDisk(UnparsedDisk),
     AddSyntheticDisk(ZpoolName),
     RemoveDisk(UnparsedDisk),
+    RemoveSyntheticDisk(ZpoolName),
     DisksChanged(HashSet<UnparsedDisk>),
     //    NewFilesystem(NewFilesystemRequest),
     KeyManagerReady,
@@ -66,6 +67,12 @@ impl StorageHandle {
         self.tx.send(StorageRequest::RemoveDisk(disk)).await.unwrap();
     }
 
+    /// Removes a synthetic disk, if it's tracked by the storage manager, as
+    /// well as any associated zpools.
+    pub async fn delete_synthetic_disk(&self, pool: ZpoolName) {
+        self.tx.send(StorageRequest::RemoveSyntheticDisk(pool)).await.unwrap();
+    }
+
     /// Ensures that the storage manager tracks exactly the provided disks.
     ///
     /// This acts similar to a batch [Self::upsert_disk] for all new disks, and
@@ -209,7 +216,12 @@ impl StorageManager {
                     }
                 }
             }
-            StorageRequest::RemoveDisk(_unparsed_disk) => todo!(),
+            StorageRequest::RemoveDisk(unparsed_disk) => {
+                self.remove_disk(unparsed_disk).await;
+            }
+            StorageRequest::RemoveSyntheticDisk(pool) => {
+                self.remove_synthetic_disk(pool).await;
+            }
             StorageRequest::DisksChanged(_unparsed_disks) => todo!(),
             StorageRequest::KeyManagerReady => {
                 self.state = StorageManagerState::Normal;
@@ -437,6 +449,24 @@ impl StorageManager {
             }
         }
     }
+
+    // Delete a real disk
+    async fn remove_disk(&mut self, unparsed_disk: UnparsedDisk) {
+        // If the disk is a U.2, we want to first delete it from any queued disks
+        let _ = self.queued_u2_drives.remove(&unparsed_disk);
+        if self.resources.remove_real_disk(unparsed_disk) {
+            let _ = self.resource_updates.send_replace(self.resources.clone());
+        }
+    }
+
+    // Delete a synthetic disk
+    async fn remove_synthetic_disk(&mut self, pool: ZpoolName) {
+        // If the disk is a U.2, we want to first delete it from any queued disks
+        let _ = self.queued_synthetic_u2_drives.remove(&pool);
+        if self.resources.remove_synthetic_disk(pool) {
+            let _ = self.resource_updates.send_replace(self.resources.clone());
+        }
+    }
 }
 
 /// All tests only use synthetic disks, but are expected to be run on illumos
@@ -634,7 +664,9 @@ mod tests {
     /// This allows us to control timing precisely.
     #[tokio::test]
     async fn queued_disks_get_requeued_on_secret_retriever_error() {
-        let logctx = test_setup_log("queued_disks_get_added_as_resources");
+        let logctx = test_setup_log(
+            "queued_disks_get_requeued_on_secret_retriever_error",
+        );
         let inject_error = Arc::new(AtomicBool::new(false));
         let (mut key_manager, key_requester) = KeyManager::new(
             &logctx.log,
@@ -681,4 +713,43 @@ mod tests {
         Zpool::destroy(&zpool_name).unwrap();
         logctx.cleanup_successful();
     }
+
+    #[tokio::test]
+    async fn delete_disk_triggers_notification() {
+        let logctx = test_setup_log("delete_disk_triggers_notification");
+        let (mut key_manager, key_requester) =
+            KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
+        let (mut manager, mut handle) =
+            StorageManager::new(&logctx.log, key_requester);
+
+        // Spawn the key_manager so that it will respond to requests for encryption keys
+        tokio::spawn(async move { key_manager.run().await });
+
+        // Spawn the storage manager as done by sled-agent
+        tokio::spawn(async move {
+            manager.run().await;
+        });
+
+        // Inform the storage manager that the key manager is ready, so disks
+        // don't get queued
+        handle.key_manager_ready().await;
+
+        // Create and add a disk
+        let zpool_name = ZpoolName::new_external(Uuid::new_v4());
+        let dir = tempdir().unwrap();
+        let _ = new_disk(dir.path(), &zpool_name);
+        handle.upsert_synthetic_disk(zpool_name.clone()).await;
+
+        // Wait for the add disk notification
+        let resources = handle.wait_for_changes().await;
+        assert_eq!(resources.all_u2_zpools().len(), 1);
+
+        // Delete the disk and wait for a notification
+        handle.delete_synthetic_disk(zpool_name.clone()).await;
+        let resources = handle.wait_for_changes().await;
+        assert!(resources.all_u2_zpools().is_empty());
+
+        Zpool::destroy(&zpool_name).unwrap();
+        logctx.cleanup_successful();
+    }
 }
diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs
index fb57d742e3..82c588bd27 100644
--- a/sled-storage/src/resources.rs
+++ b/sled-storage/src/resources.rs
@@ -12,7 +12,7 @@ use camino::Utf8PathBuf;
 use illumos_utils::zpool::ZpoolName;
 use omicron_common::api::external::{ByteCount, ByteCountRangeError};
 use omicron_common::disk::DiskIdentity;
-use sled_hardware::DiskVariant;
+use sled_hardware::{DiskVariant, UnparsedDisk};
 use std::collections::BTreeMap;
 use std::sync::Arc;
 use uuid::Uuid;
@@ -92,6 +92,38 @@ impl StorageResources {
         Ok(true)
     }
 
+    /// Delete a real disk and its zpool
+    ///
+    /// Return true, if data was changed, false otherwise
+    pub(crate) fn remove_real_disk(&mut self, disk: UnparsedDisk) -> bool {
+        if !self.disks.contains_key(disk.identity()) {
+            return false;
+        }
+        // Safe to unwrap as we just checked the key existed above
+        let parsed_disk =
+            Arc::make_mut(&mut self.disks).remove(disk.identity()).unwrap();
+        Arc::make_mut(&mut self.pools).remove(&parsed_disk.zpool_name().id());
+        true
+    }
+
+    /// Delete a synthetic disk and its zpool
+    ///
+    /// Return true, if data was changed, false otherwise
+    pub(crate) fn remove_synthetic_disk(
+        &mut self,
+        zpool_name: ZpoolName,
+    ) -> bool {
+        let disk = DiskWrapper::Synthetic { zpool_name: zpool_name.clone() };
+        if !self.disks.contains_key(&disk.identity()) {
+            return false;
+        }
+        // Safe to unwrap as we just checked the key existed above
+        let parsed_disk =
+            Arc::make_mut(&mut self.disks).remove(&disk.identity()).unwrap();
+        Arc::make_mut(&mut self.pools).remove(&parsed_disk.zpool_name().id());
+        true
+    }
+
     /// Returns the identity of the boot disk.
     ///
     /// If this returns `None`, we have not processed the boot disk yet.

From 4a38f9019d12ee6f5c5bd62455d42db7fa4ebfae Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Tue, 3 Oct 2023 22:07:44 +0000
Subject: [PATCH 15/66] wip

---
 sled-hardware/src/disk.rs      |   1 +
 sled-storage/src/disk.rs       | 229 ++++++++++++++----------
 sled-storage/src/dump_setup.rs |  71 ++++----
 sled-storage/src/manager.rs    | 311 +++++++--------------------------
 sled-storage/src/resources.rs  |  81 ++-------
 5 files changed, 248 insertions(+), 445 deletions(-)

diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs
index 541d7bd548..aeaca9dc31 100644
--- a/sled-hardware/src/disk.rs
+++ b/sled-hardware/src/disk.rs
@@ -175,6 +175,7 @@ impl UnparsedDisk {
 /// from the ZFS related logic which can also operate on file backed zpools.
 /// Doing things this way allows us to not put higher level concepts like
 /// storage keys into this hardware related package.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct PooledDisk {
     pub paths: DiskPaths,
     pub slot: i64,
diff --git a/sled-storage/src/disk.rs b/sled-storage/src/disk.rs
index d7e02d8c97..640d4e77f4 100644
--- a/sled-storage/src/disk.rs
+++ b/sled-storage/src/disk.rs
@@ -4,163 +4,214 @@
 
 //! Disk related types
 
-use camino::Utf8PathBuf;
-use illumos_utils::zpool::{ZpoolKind, ZpoolName};
+use camino::{Utf8Path, Utf8PathBuf};
+use derive_more::From;
+use illumos_utils::zpool::{Zpool, ZpoolKind, ZpoolName};
 use key_manager::StorageKeyRequester;
 use omicron_common::disk::DiskIdentity;
 use sled_hardware::{
-    DiskPaths, DiskVariant, Partition, PooledDisk, PooledDiskError,
-    UnparsedDisk,
+    DiskVariant, Partition, PooledDisk, PooledDiskError, UnparsedDisk,
 };
 use slog::Logger;
+use std::fs::File;
 
 use crate::dataset;
 
-/// A wrapper around real disks or synthetic disks backed by a file
-#[derive(Debug, PartialEq, Eq, Clone)]
-pub enum DiskWrapper {
-    Real { disk: Disk, devfs_path: Utf8PathBuf },
-    Synthetic { zpool_name: ZpoolName },
+#[derive(Debug, thiserror::Error)]
+pub enum DiskError {
+    #[error(transparent)]
+    Dataset(#[from] crate::dataset::DatasetError),
+    #[error(transparent)]
+    PooledDisk(#[from] sled_hardware::PooledDiskError),
+}
+
+// A synthetic disk that acts as one "found" by the hardware and that is backed
+// by a zpool
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct SyntheticDisk {
+    identity: DiskIdentity,
+    zpool_name: ZpoolName,
 }
 
-impl From<Disk> for DiskWrapper {
-    fn from(disk: Disk) -> Self {
-        let devfs_path = disk.devfs_path().clone();
-        Self::Real { disk, devfs_path }
+impl SyntheticDisk {
+    // Create a zpool and import it for the synthetic disk
+    // Zpools willl be set to the min size of 64Mib
+    pub fn create_zpool(
+        dir: &Utf8Path,
+        zpool_name: &ZpoolName,
+    ) -> SyntheticDisk {
+        // 64 MiB (min size of zpool)
+        const DISK_SIZE: u64 = 64 * 1024 * 1024;
+        let path = dir.join(zpool_name.to_string());
+        let file = File::create(&path).unwrap();
+        file.set_len(DISK_SIZE).unwrap();
+        drop(file);
+        Zpool::create(zpool_name, &path).unwrap();
+        Zpool::import(zpool_name).unwrap();
+        Zpool::set_failmode_continue(zpool_name).unwrap();
+        Self::new(zpool_name.clone())
+    }
+
+    pub fn new(zpool_name: ZpoolName) -> SyntheticDisk {
+        let id = zpool_name.id();
+        let identity = DiskIdentity {
+            vendor: "synthetic-vendor".to_string(),
+            serial: format!("synthetic-serial-{id}"),
+            model: "synthetic-model".to_string(),
+        };
+        SyntheticDisk { identity, zpool_name }
     }
 }
 
-impl DiskWrapper {
-    pub fn identity(&self) -> DiskIdentity {
+// An [`UnparsedDisk`] disk learned about from the hardware or a wrapped zpool
+#[derive(Debug, Clone, PartialEq, Eq, Hash, From)]
+pub enum RawDisk {
+    Real(UnparsedDisk),
+    Synthetic(SyntheticDisk),
+}
+
+impl RawDisk {
+    pub fn is_boot_disk(&self) -> bool {
         match self {
-            DiskWrapper::Real { disk, .. } => disk.identity().clone(),
-            DiskWrapper::Synthetic { zpool_name } => {
-                let id = zpool_name.id();
-                DiskIdentity {
-                    vendor: "synthetic-vendor".to_string(),
-                    serial: format!("synthetic-serial-{id}"),
-                    model: "synthetic-model".to_string(),
-                }
+            Self::Real(disk) => disk.is_boot_disk(),
+            Self::Synthetic(disk) => {
+                // Just label any M.2 the boot disk.
+                disk.zpool_name.kind() == ZpoolKind::Internal
             }
         }
     }
 
-    pub fn variant(&self) -> DiskVariant {
+    pub fn identity(&self) -> &DiskIdentity {
         match self {
-            DiskWrapper::Real { disk, .. } => disk.variant(),
-            DiskWrapper::Synthetic { zpool_name } => match zpool_name.kind() {
-                ZpoolKind::External => DiskVariant::U2,
-                ZpoolKind::Internal => DiskVariant::M2,
-            },
+            Self::Real(disk) => &disk.identity(),
+            Self::Synthetic(disk) => &disk.identity,
         }
     }
 
-    pub fn zpool_name(&self) -> &ZpoolName {
+    pub fn variant(&self) -> DiskVariant {
         match self {
-            DiskWrapper::Real { disk, .. } => disk.zpool_name(),
-            DiskWrapper::Synthetic { zpool_name } => zpool_name,
+            Self::Real(disk) => disk.variant(),
+            Self::Synthetic(disk) => match disk.zpool_name.kind() {
+                ZpoolKind::External => DiskVariant::U2,
+                ZpoolKind::Internal => DiskVariant::M2,
+            },
         }
     }
 }
 
-#[derive(Debug, thiserror::Error)]
-pub enum DiskError {
-    #[error(transparent)]
-    Dataset(#[from] crate::dataset::DatasetError),
-    #[error(transparent)]
-    PooledDisk(#[from] sled_hardware::PooledDiskError),
-}
-
-/// A physical disk conforming to the expected partition layout
-/// and which contains provisioned zpools and datasets. This disk
-/// is ready for usage by higher level software.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct Disk {
-    paths: DiskPaths,
-    slot: i64,
-    variant: DiskVariant,
-    identity: DiskIdentity,
-    is_boot_disk: bool,
-    partitions: Vec<Partition>,
-
-    // This embeds the assumtion that there is exactly one parsed zpool per
-    // disk.
-    zpool_name: ZpoolName,
+/// A physical [`PooledDisk`] or a [`SyntheticDisk`] that contains or is backed
+/// by a single zpool and that has provisioned datasets. This disk is ready for
+/// usage by higher level software.
+#[derive(Debug, Clone, PartialEq, Eq, Hash, From)]
+pub enum Disk {
+    Real(PooledDisk),
+    Synthetic(SyntheticDisk),
 }
 
 impl Disk {
     pub async fn new(
         log: &Logger,
-        unparsed_disk: UnparsedDisk,
+        raw_disk: RawDisk,
         key_requester: Option<&StorageKeyRequester>,
     ) -> Result<Self, DiskError> {
-        let disk = PooledDisk::new(log, unparsed_disk)?;
+        let disk = match raw_disk {
+            RawDisk::Real(disk) => PooledDisk::new(log, disk)?.into(),
+            RawDisk::Synthetic(disk) => Disk::Synthetic(disk),
+        };
         dataset::ensure_zpool_has_datasets(
             log,
-            &disk.zpool_name,
-            &disk.identity,
+            disk.zpool_name(),
+            disk.identity(),
             key_requester,
         )
         .await?;
-        Ok(disk.into())
+        Ok(disk)
+    }
+
+    pub fn is_synthetic(&self) -> bool {
+        match self {
+            Self::Real(_) => false,
+            Self::Synthetic(_) => true,
+        }
+    }
+
+    pub fn is_real(&self) -> bool {
+        !self.is_synthetic()
     }
+
     pub fn is_boot_disk(&self) -> bool {
-        self.is_boot_disk
+        match self {
+            Self::Real(disk) => disk.is_boot_disk,
+            Self::Synthetic(disk) => {
+                // Just label any M.2 the boot disk.
+                disk.zpool_name.kind() == ZpoolKind::Internal
+            }
+        }
     }
 
     pub fn identity(&self) -> &DiskIdentity {
-        &self.identity
+        match self {
+            Self::Real(disk) => &disk.identity,
+            Self::Synthetic(disk) => &disk.identity,
+        }
     }
 
     pub fn variant(&self) -> DiskVariant {
-        self.variant
+        match self {
+            Self::Real(disk) => disk.variant,
+            Self::Synthetic(disk) => match disk.zpool_name.kind() {
+                ZpoolKind::External => DiskVariant::U2,
+                ZpoolKind::Internal => DiskVariant::M2,
+            },
+        }
     }
 
     pub fn devfs_path(&self) -> &Utf8PathBuf {
-        &self.paths.devfs_path
+        match self {
+            Self::Real(disk) => &disk.paths.devfs_path,
+            Self::Synthetic(_) => unreachable!(),
+        }
     }
 
     pub fn zpool_name(&self) -> &ZpoolName {
-        &self.zpool_name
+        match self {
+            Self::Real(disk) => &disk.zpool_name,
+            Self::Synthetic(disk) => &disk.zpool_name,
+        }
     }
 
     pub fn boot_image_devfs_path(
         &self,
         raw: bool,
     ) -> Result<Utf8PathBuf, PooledDiskError> {
-        self.paths.partition_device_path(
-            &self.partitions,
-            Partition::BootImage,
-            raw,
-        )
+        match self {
+            Self::Real(disk) => disk.paths.partition_device_path(
+                &disk.partitions,
+                Partition::BootImage,
+                raw,
+            ),
+            Self::Synthetic(_) => unreachable!(),
+        }
     }
 
     pub fn dump_device_devfs_path(
         &self,
         raw: bool,
     ) -> Result<Utf8PathBuf, PooledDiskError> {
-        self.paths.partition_device_path(
-            &self.partitions,
-            Partition::DumpDevice,
-            raw,
-        )
+        match self {
+            Self::Real(disk) => disk.paths.partition_device_path(
+                &disk.partitions,
+                Partition::DumpDevice,
+                raw,
+            ),
+            Self::Synthetic(_) => unreachable!(),
+        }
     }
 
     pub fn slot(&self) -> i64 {
-        self.slot
-    }
-}
-
-impl From<PooledDisk> for Disk {
-    fn from(pd: PooledDisk) -> Self {
-        Self {
-            paths: pd.paths,
-            slot: pd.slot,
-            variant: pd.variant,
-            identity: pd.identity,
-            is_boot_disk: pd.is_boot_disk,
-            partitions: pd.partitions,
-            zpool_name: pd.zpool_name,
+        match self {
+            Self::Real(disk) => disk.slot,
+            Self::Synthetic(_) => unreachable!(),
         }
     }
 }
diff --git a/sled-storage/src/dump_setup.rs b/sled-storage/src/dump_setup.rs
index 5befa8e8c8..39c6aa2995 100644
--- a/sled-storage/src/dump_setup.rs
+++ b/sled-storage/src/dump_setup.rs
@@ -5,7 +5,7 @@
 //! Dump dataset setup
 
 use crate::dataset::{CRASH_DATASET, DUMP_DATASET};
-use crate::disk::DiskWrapper;
+use crate::disk::Disk;
 use camino::Utf8PathBuf;
 use derive_more::{AsRef, Deref, From};
 use illumos_utils::dumpadm::DumpAdmError;
@@ -106,50 +106,51 @@ const ARCHIVAL_INTERVAL: Duration = Duration::from_secs(300);
 impl DumpSetup {
     pub(crate) async fn update_dumpdev_setup(
         &self,
-        disks: &mut MutexGuard<'_, HashMap<DiskIdentity, DiskWrapper>>,
+        disks: &mut MutexGuard<'_, HashMap<DiskIdentity, Disk>>,
     ) {
         let log = &self.log;
         let mut m2_dump_slices = Vec::new();
         let mut u2_debug_datasets = Vec::new();
         let mut m2_core_datasets = Vec::new();
-        for (_id, disk_wrapper) in disks.iter() {
-            match disk_wrapper {
-                DiskWrapper::Real { disk, .. } => match disk.variant() {
-                    DiskVariant::M2 => {
-                        match disk.dump_device_devfs_path(false) {
-                            Ok(path) => {
-                                m2_dump_slices.push(DumpSlicePath(path))
-                            }
-                            Err(err) => {
-                                warn!(log, "Error getting dump device devfs path: {err:?}");
-                            }
+        for (_id, disk) in disks.iter() {
+            if disk.is_synthetic() {
+                // We only setup dump devices on real disks
+                continue;
+            }
+            match disk.variant() {
+                DiskVariant::M2 => {
+                    match disk.dump_device_devfs_path(false) {
+                        Ok(path) => m2_dump_slices.push(DumpSlicePath(path)),
+                        Err(err) => {
+                            warn!(
+                                log,
+                                "Error getting dump device devfs path: {err:?}"
+                            );
                         }
-                        let name = disk.zpool_name();
-                        if let Ok(info) = illumos_utils::zpool::Zpool::get_info(
-                            &name.to_string(),
-                        ) {
-                            if info.health() == ZpoolHealth::Online {
-                                m2_core_datasets.push(CoreZpool(name.clone()));
-                            } else {
-                                warn!(log, "Zpool {name:?} not online, won't attempt to save process core dumps there");
-                            }
+                    }
+                    let name = disk.zpool_name();
+                    if let Ok(info) =
+                        illumos_utils::zpool::Zpool::get_info(&name.to_string())
+                    {
+                        if info.health() == ZpoolHealth::Online {
+                            m2_core_datasets.push(CoreZpool(name.clone()));
+                        } else {
+                            warn!(log, "Zpool {name:?} not online, won't attempt to save process core dumps there");
                         }
                     }
-                    DiskVariant::U2 => {
-                        let name = disk.zpool_name();
-                        if let Ok(info) = illumos_utils::zpool::Zpool::get_info(
-                            &name.to_string(),
-                        ) {
-                            if info.health() == ZpoolHealth::Online {
-                                u2_debug_datasets
-                                    .push(DebugZpool(name.clone()));
-                            } else {
-                                warn!(log, "Zpool {name:?} not online, won't attempt to save kernel core dumps there");
-                            }
+                }
+                DiskVariant::U2 => {
+                    let name = disk.zpool_name();
+                    if let Ok(info) =
+                        illumos_utils::zpool::Zpool::get_info(&name.to_string())
+                    {
+                        if info.health() == ZpoolHealth::Online {
+                            u2_debug_datasets.push(DebugZpool(name.clone()));
+                        } else {
+                            warn!(log, "Zpool {name:?} not online, won't attempt to save kernel core dumps there");
                         }
                     }
-                },
-                DiskWrapper::Synthetic { .. } => {}
+                }
             }
         }
 
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index f5304262e4..efd4630c71 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -4,17 +4,16 @@
 
 //! The storage manager task
 
-use std::collections::{BTreeSet, HashSet};
+use std::collections::HashSet;
 
-use crate::dataset::{self, DatasetError};
-use crate::disk::{Disk, DiskError, DiskWrapper};
+use crate::dataset::DatasetError;
+use crate::disk::{Disk, DiskError, RawDisk};
 use crate::error::Error;
 use crate::resources::StorageResources;
-use derive_more::From;
-use illumos_utils::zpool::{ZpoolKind, ZpoolName};
+use illumos_utils::zpool::ZpoolName;
 use key_manager::StorageKeyRequester;
 use omicron_common::disk::DiskIdentity;
-use sled_hardware::{DiskVariant, UnparsedDisk};
+use sled_hardware::DiskVariant;
 use slog::{debug, error, info, o, warn, Logger};
 use tokio::sync::{mpsc, oneshot, watch};
 use tokio::time::{interval, Duration, MissedTickBehavior};
@@ -31,11 +30,9 @@ pub enum StorageManagerState {
 }
 
 enum StorageRequest {
-    AddDisk(UnparsedDisk),
-    AddSyntheticDisk(ZpoolName),
-    RemoveDisk(UnparsedDisk),
-    RemoveSyntheticDisk(ZpoolName),
-    DisksChanged(HashSet<UnparsedDisk>),
+    AddDisk(RawDisk),
+    RemoveDisk(RawDisk),
+    DisksChanged(HashSet<RawDisk>),
     //    NewFilesystem(NewFilesystemRequest),
     KeyManagerReady,
     /// This will always grab the latest state after any new updates, as it
@@ -52,27 +49,16 @@ pub struct StorageHandle {
 
 impl StorageHandle {
     /// Adds a disk and associated zpool to the storage manager.
-    pub async fn upsert_disk(&self, disk: UnparsedDisk) {
+    pub async fn upsert_disk(&self, disk: RawDisk) {
         self.tx.send(StorageRequest::AddDisk(disk)).await.unwrap();
     }
 
-    /// Adds a synthetic disk backed by a zpool to the storage manager.
-    pub async fn upsert_synthetic_disk(&self, pool: ZpoolName) {
-        self.tx.send(StorageRequest::AddSyntheticDisk(pool)).await.unwrap();
-    }
-
     /// Removes a disk, if it's tracked by the storage manager, as well
     /// as any associated zpools.
-    pub async fn delete_disk(&self, disk: UnparsedDisk) {
+    pub async fn delete_disk(&self, disk: RawDisk) {
         self.tx.send(StorageRequest::RemoveDisk(disk)).await.unwrap();
     }
 
-    /// Removes a synthetic disk, if it's tracked by the storage manager, as
-    /// well as any associated zpools.
-    pub async fn delete_synthetic_disk(&self, pool: ZpoolName) {
-        self.tx.send(StorageRequest::RemoveSyntheticDisk(pool)).await.unwrap();
-    }
-
     /// Ensures that the storage manager tracks exactly the provided disks.
     ///
     /// This acts similar to a batch [Self::upsert_disk] for all new disks, and
@@ -80,14 +66,12 @@ impl StorageHandle {
     ///
     /// If errors occur, an arbitrary "one" of them will be returned, but a
     /// best-effort attempt to add all disks will still be attempted.
-    pub async fn ensure_using_exactly_these_disks<I>(&self, unparsed_disks: I)
+    pub async fn ensure_using_exactly_these_disks<I>(&self, raw_disks: I)
     where
-        I: IntoIterator<Item = UnparsedDisk>,
+        I: IntoIterator<Item = RawDisk>,
     {
         self.tx
-            .send(StorageRequest::DisksChanged(
-                unparsed_disks.into_iter().collect(),
-            ))
+            .send(StorageRequest::DisksChanged(raw_disks.into_iter().collect()))
             .await
             .unwrap();
     }
@@ -142,8 +126,7 @@ pub struct StorageManager {
     state: StorageManagerState,
     rx: mpsc::Receiver<StorageRequest>,
     resources: StorageResources,
-    queued_u2_drives: HashSet<UnparsedDisk>,
-    queued_synthetic_u2_drives: HashSet<ZpoolName>,
+    queued_u2_drives: HashSet<RawDisk>,
     key_requester: StorageKeyRequester,
     resource_updates: watch::Sender<StorageResources>,
 }
@@ -163,7 +146,6 @@ impl StorageManager {
                 rx,
                 resources,
                 queued_u2_drives: HashSet::new(),
-                queued_synthetic_u2_drives: HashSet::new(),
                 key_requester,
                 resource_updates: update_tx,
             },
@@ -200,29 +182,14 @@ impl StorageManager {
     pub async fn step(&mut self) -> Result<(), Error> {
         // The sending side should never disappear
         match self.rx.recv().await.unwrap() {
-            StorageRequest::AddDisk(unparsed_disk) => {
-                match unparsed_disk.variant() {
-                    DiskVariant::U2 => self.add_u2_disk(unparsed_disk).await?,
-                    DiskVariant::M2 => self.add_m2_disk(unparsed_disk).await?,
-                }
-            }
-            StorageRequest::AddSyntheticDisk(zpool_name) => {
-                match zpool_name.kind() {
-                    ZpoolKind::External => {
-                        self.add_synthetic_u2_disk(zpool_name).await?
-                    }
-                    ZpoolKind::Internal => {
-                        self.add_synthetic_m2_disk(zpool_name).await?
-                    }
-                }
-            }
-            StorageRequest::RemoveDisk(unparsed_disk) => {
-                self.remove_disk(unparsed_disk).await;
-            }
-            StorageRequest::RemoveSyntheticDisk(pool) => {
-                self.remove_synthetic_disk(pool).await;
+            StorageRequest::AddDisk(raw_disk) => match raw_disk.variant() {
+                DiskVariant::U2 => self.add_u2_disk(raw_disk).await?,
+                DiskVariant::M2 => self.add_m2_disk(raw_disk).await?,
+            },
+            StorageRequest::RemoveDisk(raw_disk) => {
+                self.remove_disk(raw_disk).await;
             }
-            StorageRequest::DisksChanged(_unparsed_disks) => todo!(),
+            StorageRequest::DisksChanged(_raw_disks) => todo!(),
             StorageRequest::KeyManagerReady => {
                 self.state = StorageManagerState::Normal;
                 self.add_queued_disks().await;
@@ -267,59 +234,20 @@ impl StorageManager {
         }
         // Merge any requeued disks from transient errors with saved disks here
         self.queued_u2_drives.extend(saved);
-
-        // Operate on queued synthetic disks
-        if self.state == StorageManagerState::QueuingDisks {
-            return;
-        }
-
-        let mut saved = HashSet::new();
-        let queued = std::mem::take(&mut self.queued_synthetic_u2_drives);
-        let mut iter = queued.into_iter();
-        while let Some(zpool_name) = iter.next() {
-            if self.state == StorageManagerState::QueuingDisks {
-                // We hit a transient error in a prior iteration.
-                saved.insert(zpool_name);
-            } else {
-                // Try ot add the disk. If there was a transient error the disk will
-                // have been requeued. If there was a permanent error, it will have been
-                // dropped. If there is an another unexpected error, we will handle it and
-                // requeue ourselves.
-                if let Err(err) =
-                    self.add_synthetic_u2_disk(zpool_name.clone()).await
-                {
-                    warn!(
-                    self.log,
-                    "Potentially transient error: {err}: - requeing synthetic disk {:?}",
-                    zpool_name
-                );
-                    saved.insert(zpool_name);
-                }
-            }
-        }
-        // Merge any requeued disks from transient errors with saved disks here
-        self.queued_synthetic_u2_drives.extend(saved);
     }
 
     // Add a real U.2 disk to [`StorageResources`] or queue it to be added later
-    async fn add_u2_disk(
-        &mut self,
-        unparsed_disk: UnparsedDisk,
-    ) -> Result<(), Error> {
+    async fn add_u2_disk(&mut self, raw_disk: RawDisk) -> Result<(), Error> {
         if self.state != StorageManagerState::Normal {
-            self.queued_u2_drives.insert(unparsed_disk);
+            self.queued_u2_drives.insert(raw_disk);
             return Ok(());
         }
 
-        match Disk::new(
-            &self.log,
-            unparsed_disk.clone(),
-            Some(&self.key_requester),
-        )
-        .await
+        match Disk::new(&self.log, raw_disk.clone(), Some(&self.key_requester))
+            .await
         {
             Ok(disk) => {
-                if self.resources.insert_real_disk(disk)? {
+                if self.resources.insert_disk(disk)? {
                     let _ = self
                         .resource_updates
                         .send_replace(self.resources.clone());
@@ -329,9 +257,9 @@ impl StorageManager {
             Err(err @ DiskError::Dataset(DatasetError::KeyManager(_))) => {
                 warn!(
                     self.log,
-                    "Transient error: {err} - queuing disk {:?}", unparsed_disk
+                    "Transient error: {err} - queuing disk {:?}", raw_disk
                 );
-                self.queued_u2_drives.insert(unparsed_disk);
+                self.queued_u2_drives.insert(raw_disk);
                 self.state = StorageManagerState::QueuingDisks;
                 Ok(())
             }
@@ -339,7 +267,7 @@ impl StorageManager {
                 error!(
                     self.log,
                     "Persistent error: {err} - not queueing disk {:?}",
-                    unparsed_disk
+                    raw_disk
                 );
                 Ok(())
             }
@@ -351,119 +279,21 @@ impl StorageManager {
     //
     // We never queue M.2 drives, as they don't rely on [`KeyManager`] based
     // encryption
-    async fn add_m2_disk(
-        &mut self,
-        unparsed_disk: UnparsedDisk,
-    ) -> Result<(), Error> {
-        let disk = Disk::new(
-            &self.log,
-            unparsed_disk.clone(),
-            Some(&self.key_requester),
-        )
-        .await?;
-        if self.resources.insert_real_disk(disk)? {
+    async fn add_m2_disk(&mut self, raw_disk: RawDisk) -> Result<(), Error> {
+        let disk =
+            Disk::new(&self.log, raw_disk.clone(), Some(&self.key_requester))
+                .await?;
+        if self.resources.insert_disk(disk)? {
             let _ = self.resource_updates.send_replace(self.resources.clone());
         }
         Ok(())
     }
 
-    // Add a synthetic U.2 disk to [`StorageResources`]
-    //
-    // We never queue M.2 drives, as they don't rely on [`KeyManager`] based
-    // encryption
-    async fn add_synthetic_m2_disk(
-        &mut self,
-        zpool_name: ZpoolName,
-    ) -> Result<(), Error> {
-        let synthetic_id = DiskIdentity {
-            vendor: "fake_vendor".to_string(),
-            serial: "fake_serial".to_string(),
-            model: zpool_name.id().to_string(),
-        };
-
-        debug!(self.log, "Ensure zpool has datasets: {zpool_name}");
-        dataset::ensure_zpool_has_datasets(
-            &self.log,
-            &zpool_name,
-            &synthetic_id,
-            Some(&self.key_requester),
-        )
-        .await?;
-        if self.resources.insert_synthetic_disk(zpool_name)? {
-            let _ = self.resource_updates.send_replace(self.resources.clone());
-        }
-        Ok(())
-    }
-
-    // Add a synthetic U.2 disk to [`StorageResources`] or queue it to be added
-    // later
-    async fn add_synthetic_u2_disk(
-        &mut self,
-        zpool_name: ZpoolName,
-    ) -> Result<(), Error> {
-        if self.state != StorageManagerState::Normal {
-            info!(self.log, "Queuing synthetic U.2 drive: {zpool_name}");
-            self.queued_synthetic_u2_drives.insert(zpool_name);
-            return Ok(());
-        }
-
-        let synthetic_id = DiskIdentity {
-            vendor: "fake_vendor".to_string(),
-            serial: "fake_serial".to_string(),
-            model: zpool_name.id().to_string(),
-        };
-
-        debug!(self.log, "Ensure zpool has datasets: {zpool_name}");
-        match dataset::ensure_zpool_has_datasets(
-            &self.log,
-            &zpool_name,
-            &synthetic_id,
-            Some(&self.key_requester),
-        )
-        .await
-        {
-            Ok(()) => {
-                if self.resources.insert_synthetic_disk(zpool_name)? {
-                    let _ = self
-                        .resource_updates
-                        .send_replace(self.resources.clone());
-                }
-                Ok(())
-            }
-            Err(err @ DatasetError::KeyManager(_)) => {
-                warn!(
-                    self.log,
-                    "Transient error: {err} - queuing disk {:?}", synthetic_id
-                );
-                self.queued_synthetic_u2_drives.insert(zpool_name);
-                self.state = StorageManagerState::QueuingDisks;
-                Ok(())
-            }
-            Err(err) => {
-                error!(
-                    self.log,
-                    "Persistent error: {err} - not queueing disk {:?}",
-                    synthetic_id
-                );
-                Ok(())
-            }
-        }
-    }
-
     // Delete a real disk
-    async fn remove_disk(&mut self, unparsed_disk: UnparsedDisk) {
-        // If the disk is a U.2, we want to first delete it from any queued disks
-        let _ = self.queued_u2_drives.remove(&unparsed_disk);
-        if self.resources.remove_real_disk(unparsed_disk) {
-            let _ = self.resource_updates.send_replace(self.resources.clone());
-        }
-    }
-
-    // Delete a synthetic disk
-    async fn remove_synthetic_disk(&mut self, pool: ZpoolName) {
+    async fn remove_disk(&mut self, raw_disk: RawDisk) {
         // If the disk is a U.2, we want to first delete it from any queued disks
-        let _ = self.queued_synthetic_u2_drives.remove(&pool);
-        if self.resources.remove_synthetic_disk(pool) {
+        let _ = self.queued_u2_drives.remove(&raw_disk);
+        if self.resources.remove_disk(raw_disk) {
             let _ = self.resource_updates.send_replace(self.resources.clone());
         }
     }
@@ -473,9 +303,10 @@ impl StorageManager {
 /// systems.
 #[cfg(all(test, target_os = "illumos"))]
 mod tests {
+    use crate::disk::SyntheticDisk;
+
     use super::*;
     use async_trait::async_trait;
-    use camino::{Utf8Path, Utf8PathBuf};
     use camino_tempfile::tempdir;
     use illumos_utils::zpool::Zpool;
     use key_manager::{
@@ -483,7 +314,6 @@ mod tests {
         VersionedIkm,
     };
     use omicron_test_utils::dev::test_setup_log;
-    use std::fs::File;
     use std::sync::{
         atomic::{AtomicBool, Ordering},
         Arc,
@@ -532,21 +362,6 @@ mod tests {
         }
     }
 
-    // 64 MiB (min size of zpool)
-    const DISK_SIZE: u64 = 64 * 1024 * 1024;
-
-    // Create a synthetic disk with a zpool backed by a file
-    fn new_disk(dir: &Utf8Path, zpool_name: &ZpoolName) -> Utf8PathBuf {
-        let path = dir.join(zpool_name.to_string());
-        let file = File::create(&path).unwrap();
-        file.set_len(DISK_SIZE).unwrap();
-        drop(file);
-        Zpool::create(zpool_name, &path).unwrap();
-        Zpool::import(zpool_name).unwrap();
-        Zpool::set_failmode_continue(zpool_name).unwrap();
-        path
-    }
-
     #[tokio::test]
     async fn add_u2_disk_while_not_in_normal_stage_and_ensure_it_gets_queued() {
         let logctx = test_setup_log(
@@ -556,25 +371,18 @@ mod tests {
             KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
         let (mut manager, _) = StorageManager::new(&logctx.log, key_requester);
         let zpool_name = ZpoolName::new_external(Uuid::new_v4());
+        let raw_disk: RawDisk = SyntheticDisk::new(zpool_name).into();
         assert_eq!(StorageManagerState::WaitingForKeyManager, manager.state);
-        manager.add_synthetic_u2_disk(zpool_name.clone()).await.unwrap();
+        manager.add_u2_disk(raw_disk.clone()).await.unwrap();
         assert!(manager.resources.all_u2_zpools().is_empty());
-        assert_eq!(
-            manager.queued_synthetic_u2_drives,
-            HashSet::from([zpool_name.clone()])
-        );
+        assert_eq!(manager.queued_u2_drives, HashSet::from([raw_disk.clone()]));
 
-        // Walk through other non-normal stages and enusre disk gets queued
-        for stage in [StorageManagerState::QueuingDisks] {
-            manager.queued_synthetic_u2_drives.clear();
-            manager.state = stage;
-            manager.add_synthetic_u2_disk(zpool_name.clone()).await.unwrap();
-            assert!(manager.resources.all_u2_zpools().is_empty());
-            assert_eq!(
-                manager.queued_synthetic_u2_drives,
-                HashSet::from([zpool_name.clone()])
-            );
-        }
+        // Check other non-normal stages and enusre disk gets queued
+        manager.queued_u2_drives.clear();
+        manager.state = StorageManagerState::QueuingDisks;
+        manager.add_u2_disk(raw_disk.clone()).await.unwrap();
+        assert!(manager.resources.all_u2_zpools().is_empty());
+        assert_eq!(manager.queued_u2_drives, HashSet::from([raw_disk]));
         logctx.cleanup_successful();
     }
 
@@ -586,14 +394,14 @@ mod tests {
         let (mut manager, _) = StorageManager::new(&logctx.log, key_requester);
         let zpool_name = ZpoolName::new_external(Uuid::new_v4());
         let dir = tempdir().unwrap();
-        let _ = new_disk(dir.path(), &zpool_name);
+        let disk = SyntheticDisk::create_zpool(dir.path(), &zpool_name).into();
 
         // Spawn the key_manager so that it will respond to requests for encryption keys
         tokio::spawn(async move { key_manager.run().await });
 
         // Set the stage to pretend we've progressed enough to have a key_manager available.
         manager.state = StorageManagerState::Normal;
-        manager.add_synthetic_u2_disk(zpool_name.clone()).await.unwrap();
+        manager.add_u2_disk(disk).await.unwrap();
         assert_eq!(manager.resources.all_u2_zpools().len(), 1);
         Zpool::destroy(&zpool_name).unwrap();
         logctx.cleanup_successful();
@@ -617,9 +425,9 @@ mod tests {
         // Create a synthetic internal disk
         let zpool_name = ZpoolName::new_internal(Uuid::new_v4());
         let dir = tempdir().unwrap();
-        let _ = new_disk(dir.path(), &zpool_name);
+        let disk = SyntheticDisk::create_zpool(dir.path(), &zpool_name).into();
 
-        handle.upsert_synthetic_disk(zpool_name.clone()).await;
+        handle.upsert_disk(disk).await;
         handle.wait_for_boot_disk().await;
         Zpool::destroy(&zpool_name).unwrap();
         logctx.cleanup_successful();
@@ -645,8 +453,8 @@ mod tests {
         // the `KeyManager` is ready yet.
         let zpool_name = ZpoolName::new_external(Uuid::new_v4());
         let dir = tempdir().unwrap();
-        let _ = new_disk(dir.path(), &zpool_name);
-        handle.upsert_synthetic_disk(zpool_name.clone()).await;
+        let disk = SyntheticDisk::create_zpool(dir.path(), &zpool_name).into();
+        handle.upsert_disk(disk).await;
         let resources = handle.get_latest_resources().await;
         assert!(resources.all_u2_zpools().is_empty());
 
@@ -682,8 +490,8 @@ mod tests {
         // the `KeyManager` is ready yet.
         let zpool_name = ZpoolName::new_external(Uuid::new_v4());
         let dir = tempdir().unwrap();
-        let _ = new_disk(dir.path(), &zpool_name);
-        handle.upsert_synthetic_disk(zpool_name.clone()).await;
+        let disk = SyntheticDisk::create_zpool(dir.path(), &zpool_name).into();
+        handle.upsert_disk(disk).await;
         manager.step().await.unwrap();
 
         // We can't wait for a reply through the handle as the storage manager task
@@ -737,15 +545,16 @@ mod tests {
         // Create and add a disk
         let zpool_name = ZpoolName::new_external(Uuid::new_v4());
         let dir = tempdir().unwrap();
-        let _ = new_disk(dir.path(), &zpool_name);
-        handle.upsert_synthetic_disk(zpool_name.clone()).await;
+        let disk: RawDisk =
+            SyntheticDisk::create_zpool(dir.path(), &zpool_name).into();
+        handle.upsert_disk(disk.clone()).await;
 
         // Wait for the add disk notification
         let resources = handle.wait_for_changes().await;
         assert_eq!(resources.all_u2_zpools().len(), 1);
 
         // Delete the disk and wait for a notification
-        handle.delete_synthetic_disk(zpool_name.clone()).await;
+        handle.delete_disk(disk).await;
         let resources = handle.wait_for_changes().await;
         assert!(resources.all_u2_zpools().is_empty());
 
diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs
index 82c588bd27..07ee0a1ea5 100644
--- a/sled-storage/src/resources.rs
+++ b/sled-storage/src/resources.rs
@@ -5,12 +5,11 @@
 //! Discovered and usable disks and zpools
 
 use crate::dataset::M2_DEBUG_DATASET;
-use crate::disk::{Disk, DiskWrapper};
+use crate::disk::{Disk, RawDisk};
 use crate::error::Error;
 use crate::pool::Pool;
 use camino::Utf8PathBuf;
 use illumos_utils::zpool::ZpoolName;
-use omicron_common::api::external::{ByteCount, ByteCountRangeError};
 use omicron_common::disk::DiskIdentity;
 use sled_hardware::{DiskVariant, UnparsedDisk};
 use std::collections::BTreeMap;
@@ -41,7 +40,7 @@ const ZONE_BUNDLE_DIRECTORY: &str = "zone";
 #[derive(Debug, Clone, Default)]
 pub struct StorageResources {
     // All disks, real and synthetic, being managed by this sled
-    disks: Arc<BTreeMap<DiskIdentity, DiskWrapper>>,
+    disks: Arc<BTreeMap<DiskIdentity, Disk>>,
 
     // A map of "Uuid" to "pool".
     pools: Arc<BTreeMap<Uuid, Pool>>,
@@ -51,42 +50,15 @@ impl StorageResources {
     /// Insert a disk and its zpool
     ///
     /// Return true, if data was changed, false otherwise
-    pub(crate) fn insert_real_disk(
-        &mut self,
-        disk: Disk,
-    ) -> Result<bool, Error> {
+    pub(crate) fn insert_disk(&mut self, disk: Disk) -> Result<bool, Error> {
         let parent = disk.identity().clone();
         let zpool_name = disk.zpool_name().clone();
-        let disk = DiskWrapper::Real {
-            disk: disk.clone(),
-            devfs_path: disk.devfs_path().clone(),
-        };
         if let Some(stored) = self.disks.get(&parent) {
             if stored == &disk {
                 return Ok(false);
             }
         }
-        Arc::make_mut(&mut self.disks).insert(disk.identity(), disk);
-        let zpool = Pool::new(zpool_name, parent)?;
-        Arc::make_mut(&mut self.pools).insert(zpool.name.id(), zpool);
-        Ok(true)
-    }
-
-    /// Insert a synthetic disk and its zpool
-    ///
-    /// Return true, if data was changed, false otherwise
-    pub(crate) fn insert_synthetic_disk(
-        &mut self,
-        zpool_name: ZpoolName,
-    ) -> Result<bool, Error> {
-        let disk = DiskWrapper::Synthetic { zpool_name: zpool_name.clone() };
-        let parent = disk.identity().clone();
-        if let Some(stored) = self.disks.get(&parent) {
-            if stored == &disk {
-                return Ok(false);
-            }
-        }
-        Arc::make_mut(&mut self.disks).insert(disk.identity(), disk);
+        Arc::make_mut(&mut self.disks).insert(disk.identity().clone(), disk);
         let zpool = Pool::new(zpool_name, parent)?;
         Arc::make_mut(&mut self.pools).insert(zpool.name.id(), zpool);
         Ok(true)
@@ -95,7 +67,7 @@ impl StorageResources {
     /// Delete a real disk and its zpool
     ///
     /// Return true, if data was changed, false otherwise
-    pub(crate) fn remove_real_disk(&mut self, disk: UnparsedDisk) -> bool {
+    pub(crate) fn remove_disk(&mut self, disk: RawDisk) -> bool {
         if !self.disks.contains_key(disk.identity()) {
             return false;
         }
@@ -106,47 +78,16 @@ impl StorageResources {
         true
     }
 
-    /// Delete a synthetic disk and its zpool
-    ///
-    /// Return true, if data was changed, false otherwise
-    pub(crate) fn remove_synthetic_disk(
-        &mut self,
-        zpool_name: ZpoolName,
-    ) -> bool {
-        let disk = DiskWrapper::Synthetic { zpool_name: zpool_name.clone() };
-        if !self.disks.contains_key(&disk.identity()) {
-            return false;
-        }
-        // Safe to unwrap as we just checked the key existed above
-        let parsed_disk =
-            Arc::make_mut(&mut self.disks).remove(&disk.identity()).unwrap();
-        Arc::make_mut(&mut self.pools).remove(&parsed_disk.zpool_name().id());
-        true
-    }
-
     /// Returns the identity of the boot disk.
     ///
     /// If this returns `None`, we have not processed the boot disk yet.
     pub fn boot_disk(&self) -> Option<(DiskIdentity, ZpoolName)> {
-        self.disks.iter().find_map(|(id, disk)| {
-            match disk {
-                // This is the "real" use-case: if we have real disks, query
-                // their properties to identify if they truly are the boot disk.
-                DiskWrapper::Real { disk, .. } => {
-                    if disk.is_boot_disk() {
-                        return Some((id.clone(), disk.zpool_name().clone()));
-                    }
-                }
-                // This is the "less real" use-case: if we have synthetic disks,
-                // just label the first M.2-looking one as a "boot disk".
-                DiskWrapper::Synthetic { .. } => {
-                    if matches!(disk.variant(), DiskVariant::M2) {
-                        return Some((id.clone(), disk.zpool_name().clone()));
-                    }
-                }
-            };
-            None
-        })
+        for (id, disk) in self.disks.iter() {
+            if disk.is_boot_disk() {
+                return Some((id.clone(), disk.zpool_name().clone()));
+            }
+        }
+        None
     }
     /// Returns all M.2 zpools
     pub fn all_m2_zpools(&self) -> Vec<ZpoolName> {

From 98cc812ebf7f437f17aea17e65055be12fce269d Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Wed, 4 Oct 2023 19:42:37 +0000
Subject: [PATCH 16/66] wip

---
 illumos-utils/src/zpool.rs    |   2 +-
 sled-storage/src/disk.rs      |  11 +++
 sled-storage/src/manager.rs   | 170 +++++++++++++++++++++++++---------
 sled-storage/src/pool.rs      |   2 +-
 sled-storage/src/resources.rs |  44 ++++-----
 5 files changed, 160 insertions(+), 69 deletions(-)

diff --git a/illumos-utils/src/zpool.rs b/illumos-utils/src/zpool.rs
index 68d5ebd3a2..f0916b236a 100644
--- a/illumos-utils/src/zpool.rs
+++ b/illumos-utils/src/zpool.rs
@@ -96,7 +96,7 @@ impl FromStr for ZpoolHealth {
 }
 
 /// Describes a Zpool.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
 pub struct ZpoolInfo {
     name: String,
     size: u64,
diff --git a/sled-storage/src/disk.rs b/sled-storage/src/disk.rs
index 640d4e77f4..6121b267d0 100644
--- a/sled-storage/src/disk.rs
+++ b/sled-storage/src/disk.rs
@@ -97,6 +97,17 @@ impl RawDisk {
             },
         }
     }
+
+    pub fn is_synthetic(&self) -> bool {
+        match self {
+            Self::Real(_) => false,
+            Self::Synthetic(_) => true,
+        }
+    }
+
+    pub fn is_real(&self) -> bool {
+        !self.is_synthetic()
+    }
 }
 
 /// A physical [`PooledDisk`] or a [`SyntheticDisk`] that contains or is backed
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index efd4630c71..7355530f2e 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -6,7 +6,7 @@
 
 use std::collections::HashSet;
 
-use crate::dataset::DatasetError;
+use crate::dataset::{DatasetError, DatasetName};
 use crate::disk::{Disk, DiskError, RawDisk};
 use crate::error::Error;
 use crate::resources::StorageResources;
@@ -17,6 +17,7 @@ use sled_hardware::DiskVariant;
 use slog::{debug, error, info, o, warn, Logger};
 use tokio::sync::{mpsc, oneshot, watch};
 use tokio::time::{interval, Duration, MissedTickBehavior};
+use uuid::Uuid;
 
 // The size of the mpsc bounded channel used to communicate
 // between the `StorageHandle` and `StorageManager`.
@@ -29,11 +30,18 @@ pub enum StorageManagerState {
     Normal,
 }
 
+#[derive(Debug)]
+struct NewFilesystemRequest {
+    dataset_id: Uuid,
+    dataset_name: DatasetName,
+    responder: oneshot::Sender<Result<DatasetName, Error>>,
+}
+
 enum StorageRequest {
     AddDisk(RawDisk),
     RemoveDisk(RawDisk),
     DisksChanged(HashSet<RawDisk>),
-    //    NewFilesystem(NewFilesystemRequest),
+    NewFilesystem(NewFilesystemRequest),
     KeyManagerReady,
     /// This will always grab the latest state after any new updates, as it
     /// serializes through the `StorageManager` task.
@@ -170,7 +178,9 @@ impl StorageManager {
                 _ = interval.tick(),
                     if self.state == StorageManagerState::QueuingDisks =>
                 {
-                    self.add_queued_disks().await;
+                    if self.add_queued_disks().await {
+                        let _ = self.resource_updates.send_replace(self.resources.clone());
+                    }
                 }
             }
         }
@@ -181,23 +191,31 @@ impl StorageManager {
     /// This is useful for testing/debugging
     pub async fn step(&mut self) -> Result<(), Error> {
         // The sending side should never disappear
-        match self.rx.recv().await.unwrap() {
-            StorageRequest::AddDisk(raw_disk) => match raw_disk.variant() {
-                DiskVariant::U2 => self.add_u2_disk(raw_disk).await?,
-                DiskVariant::M2 => self.add_m2_disk(raw_disk).await?,
-            },
+        let should_send_updates = match self.rx.recv().await.unwrap() {
+            StorageRequest::AddDisk(raw_disk) => {
+                self.add_disk(raw_disk).await?
+            }
             StorageRequest::RemoveDisk(raw_disk) => {
-                self.remove_disk(raw_disk).await;
+                self.remove_disk(raw_disk).await
             }
-            StorageRequest::DisksChanged(_raw_disks) => todo!(),
+            StorageRequest::DisksChanged(raw_disks) => {
+                self.ensure_using_exactly_these_disks(raw_disks).await
+            }
+            StorageRequest::NewFilesystem(_req) => todo!(),
             StorageRequest::KeyManagerReady => {
                 self.state = StorageManagerState::Normal;
-                self.add_queued_disks().await;
+                self.add_queued_disks().await
             }
             StorageRequest::GetLatestResources(tx) => {
                 let _ = tx.send(self.resources.clone());
+                false
             }
+        };
+
+        if should_send_updates {
+            let _ = self.resource_updates.send_replace(self.resources.clone());
         }
+
         Ok(())
     }
 
@@ -205,9 +223,12 @@ impl StorageManager {
     // unless we hit a transient error. If we hit a transient error, we return
     // and wait for the next retry window to re-call this method. If we hit a
     // permanent error we log it, but we continue inserting queued disks.
-    async fn add_queued_disks(&mut self) {
+    //
+    // Return true if updates should be sent to watchers, false otherwise
+    async fn add_queued_disks(&mut self) -> bool {
         self.state = StorageManagerState::Normal;
-        // Operate on queued real disks
+
+        let mut send_updates = false;
 
         // Disks that should be requeued.
         let mut saved = HashSet::new();
@@ -222,80 +243,137 @@ impl StorageManager {
                 // have been requeued. If there was a permanent error, it will have been
                 // dropped. If there is an another unexpected error, we will handle it and
                 // requeue ourselves.
-                if let Err(err) = self.add_u2_disk(disk.clone()).await {
-                    warn!(
-                    self.log,
-                    "Potentially transient error: {err}: - requeing disk {:?}",
-                    disk
-                );
-                    saved.insert(disk);
+                match self.add_u2_disk(disk.clone()).await {
+                    Err(err) => {
+                        warn!(
+                            self.log,
+                            "Potentially transient error: {err}: requeuing disk";
+                            "disk_id" => ?disk.identity()
+                        );
+                        saved.insert(disk);
+                    }
+                    Ok(true) => send_updates = true,
+                    Ok(false) => (),
                 }
             }
         }
         // Merge any requeued disks from transient errors with saved disks here
         self.queued_u2_drives.extend(saved);
+        send_updates
+    }
+
+    // Add a disk to `StorageResources` if it is new and return Ok(true) if so
+    async fn add_disk(&mut self, raw_disk: RawDisk) -> Result<bool, Error> {
+        match raw_disk.variant() {
+            DiskVariant::U2 => self.add_u2_disk(raw_disk).await,
+            DiskVariant::M2 => self.add_m2_disk(raw_disk).await,
+        }
     }
 
-    // Add a real U.2 disk to [`StorageResources`] or queue it to be added later
-    async fn add_u2_disk(&mut self, raw_disk: RawDisk) -> Result<(), Error> {
+    // Add a U.2 disk to [`StorageResources`] or queue it to be added later
+    async fn add_u2_disk(&mut self, raw_disk: RawDisk) -> Result<bool, Error> {
         if self.state != StorageManagerState::Normal {
             self.queued_u2_drives.insert(raw_disk);
-            return Ok(());
+            return Ok(false);
         }
 
         match Disk::new(&self.log, raw_disk.clone(), Some(&self.key_requester))
             .await
         {
-            Ok(disk) => {
-                if self.resources.insert_disk(disk)? {
-                    let _ = self
-                        .resource_updates
-                        .send_replace(self.resources.clone());
-                }
-                Ok(())
-            }
+            Ok(disk) => self.resources.insert_disk(disk),
             Err(err @ DiskError::Dataset(DatasetError::KeyManager(_))) => {
                 warn!(
                     self.log,
-                    "Transient error: {err} - queuing disk {:?}", raw_disk
+                    "Transient error: {err}: queuing disk";
+                    "disk_id" => ?raw_disk.identity()
                 );
                 self.queued_u2_drives.insert(raw_disk);
                 self.state = StorageManagerState::QueuingDisks;
-                Ok(())
+                Ok(false)
             }
             Err(err) => {
                 error!(
                     self.log,
-                    "Persistent error: {err} - not queueing disk {:?}",
-                    raw_disk
+                    "Persistent error: {err}: not queueing disk";
+                    "disk_id" => ?raw_disk.identity()
                 );
-                Ok(())
+                Ok(false)
             }
         }
     }
 
-    // Add a real U.2 disk to [`StorageResources`]
+    // Add a U.2 disk to [`StorageResources`] if new and return `Ok(true)` if so
     //
     //
     // We never queue M.2 drives, as they don't rely on [`KeyManager`] based
     // encryption
-    async fn add_m2_disk(&mut self, raw_disk: RawDisk) -> Result<(), Error> {
+    async fn add_m2_disk(&mut self, raw_disk: RawDisk) -> Result<bool, Error> {
         let disk =
             Disk::new(&self.log, raw_disk.clone(), Some(&self.key_requester))
                 .await?;
-        if self.resources.insert_disk(disk)? {
-            let _ = self.resource_updates.send_replace(self.resources.clone());
-        }
-        Ok(())
+        self.resources.insert_disk(disk)
     }
 
-    // Delete a real disk
-    async fn remove_disk(&mut self, raw_disk: RawDisk) {
+    // Delete a real disk and return `true` if the disk was actually removed
+    async fn remove_disk(&mut self, raw_disk: RawDisk) -> bool {
         // If the disk is a U.2, we want to first delete it from any queued disks
         let _ = self.queued_u2_drives.remove(&raw_disk);
-        if self.resources.remove_disk(raw_disk) {
-            let _ = self.resource_updates.send_replace(self.resources.clone());
+        self.resources.remove_disk(raw_disk.identity())
+    }
+
+    // Find all disks to remove that are not in raw_disks and remove them Then
+    // take the remaining disks and try to add them all. `StorageResources` will
+    // inform us if anything changed, and if so we return true, otherwise we
+    // return false.
+    async fn ensure_using_exactly_these_disks(
+        &mut self,
+        raw_disks: HashSet<RawDisk>,
+    ) -> bool {
+        let mut should_update = false;
+
+        // Clear out any queued U.2 disks that are real.
+        // We keep synthetic disks, as they are only added once.
+        self.queued_u2_drives.retain(|d| d.is_synthetic());
+
+        let all_ids: HashSet<_> =
+            raw_disks.iter().map(|d| d.identity()).collect();
+
+        // Find all existing disks not in the current set
+        let to_remove: Vec<DiskIdentity> = self
+            .resources
+            .disks
+            .keys()
+            .filter_map(|id| {
+                if !all_ids.contains(id) {
+                    Some(id.clone())
+                } else {
+                    None
+                }
+            })
+            .collect();
+
+        for id in to_remove {
+            if self.resources.remove_disk(&id) {
+                should_update = true;
+            }
+        }
+
+        for raw_disk in raw_disks {
+            let disk_id = raw_disk.identity().clone();
+            match self.add_disk(raw_disk).await {
+                Ok(true) => should_update = true,
+                Ok(false) => (),
+                Err(err) => {
+                    warn!(
+                        self.log,
+                        "Failed to add disk to storage resources: {err}";
+                        "disk_id" => ?disk_id
+                    );
+                }
+            }
         }
+
+        should_update
     }
 }
 
diff --git a/sled-storage/src/pool.rs b/sled-storage/src/pool.rs
index 203738b16a..76eabedd12 100644
--- a/sled-storage/src/pool.rs
+++ b/sled-storage/src/pool.rs
@@ -9,7 +9,7 @@ use illumos_utils::zpool::{Zpool, ZpoolInfo, ZpoolName};
 use omicron_common::disk::DiskIdentity;
 
 /// A ZFS storage pool
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq, Eq)]
 pub struct Pool {
     pub name: ZpoolName,
     pub info: ZpoolInfo,
diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs
index 07ee0a1ea5..8d739fae3f 100644
--- a/sled-storage/src/resources.rs
+++ b/sled-storage/src/resources.rs
@@ -25,25 +25,25 @@ const ZONE_BUNDLE_DIRECTORY: &str = "zone";
 /// Storage related resources: disks and zpools
 ///
 /// This state is internal to the [`crate::StorageManager`] task. Clones
-/// of this state, or subsets of it, can be retrieved by requests to the
-/// `StorageManager` task from the [`crate::StorageManagerHandle`]. This state
-/// is not `Sync`, and as such does not require any mutexes. However, we do
-/// expect to share it relatively frequently, and we want copies of it to be
-/// as cheaply made as possible. So any large state is stored inside `Arc`s. On
-/// the other hand, we expect infrequent updates to this state, and as such, we
-/// use [`std::sync::Arc::make_mut`] to implement clone on write functionality
+/// of this state can be retrieved by requests to the `StorageManager` task
+/// from the [`crate::StorageManagerHandle`]. This state is not `Sync`, and
+/// as such does not require any mutexes. However, we do expect to share it
+/// relatively frequently, and we want copies of it to be as cheaply made
+/// as possible. So any large state is stored inside `Arc`s. On the other
+/// hand, we expect infrequent updates to this state, and as such, we use
+/// [`std::sync::Arc::make_mut`] to implement clone on write functionality
 /// inside the `StorageManager` task if there are any outstanding copies.
 /// Therefore, we only pay the cost to update infrequently, and no locks are
 /// required by callers when operating on cloned data. The only contention here
-/// is for the refrence counters of the internal Arcs when `StorageResources` gets cloned
-/// or dropped.
+/// is for the refrence counters of the internal Arcs when `StorageResources`
+/// gets cloned or dropped.
 #[derive(Debug, Clone, Default)]
 pub struct StorageResources {
     // All disks, real and synthetic, being managed by this sled
-    disks: Arc<BTreeMap<DiskIdentity, Disk>>,
+    pub disks: Arc<BTreeMap<DiskIdentity, Disk>>,
 
     // A map of "Uuid" to "pool".
-    pools: Arc<BTreeMap<Uuid, Pool>>,
+    pub pools: Arc<BTreeMap<Uuid, Pool>>,
 }
 
 impl StorageResources {
@@ -51,15 +51,18 @@ impl StorageResources {
     ///
     /// Return true, if data was changed, false otherwise
     pub(crate) fn insert_disk(&mut self, disk: Disk) -> Result<bool, Error> {
-        let parent = disk.identity().clone();
+        let disk_id = disk.identity().clone();
         let zpool_name = disk.zpool_name().clone();
-        if let Some(stored) = self.disks.get(&parent) {
-            if stored == &disk {
-                return Ok(false);
+        let zpool = Pool::new(zpool_name, disk_id.clone())?;
+        if let Some(stored_disk) = self.disks.get(&disk_id) {
+            if let Some(stored_pool) = self.pools.get(&zpool.name.id()) {
+                if stored_disk == &disk && stored_pool == &zpool {
+                    return Ok(false);
+                }
             }
         }
-        Arc::make_mut(&mut self.disks).insert(disk.identity().clone(), disk);
-        let zpool = Pool::new(zpool_name, parent)?;
+        // Either the disk or zpool changed
+        Arc::make_mut(&mut self.disks).insert(disk_id, disk);
         Arc::make_mut(&mut self.pools).insert(zpool.name.id(), zpool);
         Ok(true)
     }
@@ -67,13 +70,12 @@ impl StorageResources {
     /// Delete a real disk and its zpool
     ///
     /// Return true, if data was changed, false otherwise
-    pub(crate) fn remove_disk(&mut self, disk: RawDisk) -> bool {
-        if !self.disks.contains_key(disk.identity()) {
+    pub(crate) fn remove_disk(&mut self, id: &DiskIdentity) -> bool {
+        if !self.disks.contains_key(id) {
             return false;
         }
         // Safe to unwrap as we just checked the key existed above
-        let parsed_disk =
-            Arc::make_mut(&mut self.disks).remove(disk.identity()).unwrap();
+        let parsed_disk = Arc::make_mut(&mut self.disks).remove(id).unwrap();
         Arc::make_mut(&mut self.pools).remove(&parsed_disk.zpool_name().id());
         true
     }

From 8c38e8d75ece2b8b0d13a7d5a9610e19f4d7a292 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Wed, 4 Oct 2023 23:38:51 +0000
Subject: [PATCH 17/66] wip

---
 sled-storage/src/disk.rs      |  12 ++-
 sled-storage/src/manager.rs   | 144 +++++++++++++++++++++++++++++++++-
 sled-storage/src/resources.rs |   9 ++-
 3 files changed, 160 insertions(+), 5 deletions(-)

diff --git a/sled-storage/src/disk.rs b/sled-storage/src/disk.rs
index 6121b267d0..f5a0e60c8f 100644
--- a/sled-storage/src/disk.rs
+++ b/sled-storage/src/disk.rs
@@ -29,8 +29,8 @@ pub enum DiskError {
 // by a zpool
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct SyntheticDisk {
-    identity: DiskIdentity,
-    zpool_name: ZpoolName,
+    pub identity: DiskIdentity,
+    pub zpool_name: ZpoolName,
 }
 
 impl SyntheticDisk {
@@ -98,6 +98,14 @@ impl RawDisk {
         }
     }
 
+    #[cfg(test)]
+    pub fn zpool_name(&self) -> &ZpoolName {
+        match self {
+            Self::Real(_) => unreachable!(),
+            Self::Synthetic(disk) => &disk.zpool_name,
+        }
+    }
+
     pub fn is_synthetic(&self) -> bool {
         match self {
             Self::Real(_) => false,
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index 7355530f2e..51706c5021 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -23,7 +23,7 @@ use uuid::Uuid;
 // between the `StorageHandle` and `StorageManager`.
 const QUEUE_SIZE: usize = 256;
 
-#[derive(Debug, Clone, PartialEq, Eq)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum StorageManagerState {
     WaitingForKeyManager,
     QueuingDisks,
@@ -47,6 +47,17 @@ enum StorageRequest {
     /// serializes through the `StorageManager` task.
     /// This serialization is particularly useful for tests.
     GetLatestResources(oneshot::Sender<StorageResources>),
+
+    /// Get the internal task state of the manager
+    GetManagerState(oneshot::Sender<StorageManagerData>),
+}
+
+/// Data managed internally to the StorageManagerTask that can be useful
+/// to clients for debugging purposes, and that isn't exposed in other ways.
+#[derive(Debug, Clone)]
+pub struct StorageManagerData {
+    state: StorageManagerState,
+    queued_u2_drives: HashSet<RawDisk>,
 }
 
 /// A mechanism for interacting with the [`StorageManager`]
@@ -124,6 +135,13 @@ impl StorageHandle {
         self.tx.send(StorageRequest::GetLatestResources(tx)).await.unwrap();
         rx.await.unwrap()
     }
+
+    /// Return internal data useful for debugging and testing
+    pub async fn get_manager_state(&mut self) -> StorageManagerData {
+        let (tx, rx) = oneshot::channel();
+        self.tx.send(StorageRequest::GetManagerState(tx)).await.unwrap();
+        rx.await.unwrap()
+    }
 }
 
 /// The storage manager responsible for the state of the storage
@@ -210,6 +228,13 @@ impl StorageManager {
                 let _ = tx.send(self.resources.clone());
                 false
             }
+            StorageRequest::GetManagerState(tx) => {
+                let _ = tx.send(StorageManagerData {
+                    state: self.state,
+                    queued_u2_drives: self.queued_u2_drives.clone(),
+                });
+                false
+            }
         };
 
         if should_send_updates {
@@ -639,4 +664,121 @@ mod tests {
         Zpool::destroy(&zpool_name).unwrap();
         logctx.cleanup_successful();
     }
+
+    #[tokio::test]
+    async fn ensure_using_exactly_these_disks() {
+        let logctx = test_setup_log("ensure_using_exactly_these_disks");
+        let (mut key_manager, key_requester) =
+            KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
+        let (mut manager, mut handle) =
+            StorageManager::new(&logctx.log, key_requester);
+
+        // Spawn the key_manager so that it will respond to requests for encryption keys
+        tokio::spawn(async move { key_manager.run().await });
+
+        // Spawn the storage manager as done by sled-agent
+        tokio::spawn(async move {
+            manager.run().await;
+        });
+
+        // Create a bunch of file backed external disks with zpools
+        let dir = tempdir().unwrap();
+        let zpools: Vec<ZpoolName> =
+            (0..10).map(|_| ZpoolName::new_external(Uuid::new_v4())).collect();
+        let disks: Vec<RawDisk> = zpools
+            .iter()
+            .map(|zpool_name| {
+                SyntheticDisk::create_zpool(dir.path(), zpool_name).into()
+            })
+            .collect();
+
+        // Add the first 3 disks, and ensure they get queued, as we haven't
+        // marked our key manager ready yet
+        handle
+            .ensure_using_exactly_these_disks(disks.iter().take(3).cloned())
+            .await;
+        let state = handle.get_manager_state().await;
+        assert_eq!(state.queued_u2_drives.len(), 3);
+        assert_eq!(state.state, StorageManagerState::WaitingForKeyManager);
+        assert!(handle.get_latest_resources().await.all_u2_zpools().is_empty());
+
+        // Mark the key manager ready and wait for the storage update
+        handle.key_manager_ready().await;
+        let resources = handle.wait_for_changes().await;
+        let expected: HashSet<_> =
+            disks.iter().take(3).map(|d| d.identity()).collect();
+        let actual: HashSet<_> = resources.disks.keys().collect();
+        assert_eq!(expected, actual);
+
+        // Add first three disks after the initial one. The returned resources
+        // should not contain the first disk.
+        handle
+            .ensure_using_exactly_these_disks(
+                disks.iter().skip(1).take(3).cloned(),
+            )
+            .await;
+        let resources = handle.wait_for_changes().await;
+        let expected: HashSet<_> =
+            disks.iter().skip(1).take(3).map(|d| d.identity()).collect();
+        let actual: HashSet<_> = resources.disks.keys().collect();
+        assert_eq!(expected, actual);
+
+        // Ensure the same set of disks and make sure no change occurs
+        // Note that we directly request the resources this time so we aren't
+        // waiting forever for a change notification.
+        handle
+            .ensure_using_exactly_these_disks(
+                disks.iter().skip(1).take(3).cloned(),
+            )
+            .await;
+        let resources2 = handle.get_latest_resources().await;
+        assert_eq!(resources, resources2);
+
+        // Add a disjoint set of disks and see that only they come through
+        handle
+            .ensure_using_exactly_these_disks(
+                disks.iter().skip(4).take(5).cloned(),
+            )
+            .await;
+        let resources = handle.wait_for_changes().await;
+        let expected: HashSet<_> =
+            disks.iter().skip(4).take(5).map(|d| d.identity()).collect();
+        let actual: HashSet<_> = resources.disks.keys().collect();
+        assert_eq!(expected, actual);
+
+        // Finally, change the zpool backing of the 5th disk to be that of the 10th
+        // and ensure that disk changes. Note that we don't change the identity
+        // of the 5th disk.
+        let mut modified_disk = disks[4].clone();
+        if let RawDisk::Synthetic(disk) = &mut modified_disk {
+            disk.zpool_name = disks[9].zpool_name().clone();
+        } else {
+            panic!();
+        }
+        let mut expected: HashSet<_> =
+            disks.iter().skip(5).take(4).cloned().collect();
+        expected.insert(modified_disk);
+
+        handle
+            .ensure_using_exactly_these_disks(expected.clone().into_iter())
+            .await;
+        let resources = handle.wait_for_changes().await;
+
+        // Ensure the one modified disk changed as we expected
+        assert_eq!(5, resources.disks.len());
+        //assert_eq!(5, resources.pools.len());
+        for raw_disk in expected {
+            let disk = resources.disks.get(raw_disk.identity()).unwrap();
+            assert_eq!(disk.zpool_name(), raw_disk.zpool_name());
+            let pool = resources.pools.get(&disk.zpool_name().id()).unwrap();
+            assert_eq!(&pool.name, disk.zpool_name());
+            assert_eq!(raw_disk.identity(), &pool.parent);
+        }
+
+        // Cleanup
+        for zpool in zpools {
+            Zpool::destroy(&zpool).unwrap();
+        }
+        logctx.cleanup_successful();
+    }
 }
diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs
index 8d739fae3f..212e9ad397 100644
--- a/sled-storage/src/resources.rs
+++ b/sled-storage/src/resources.rs
@@ -37,7 +37,7 @@ const ZONE_BUNDLE_DIRECTORY: &str = "zone";
 /// required by callers when operating on cloned data. The only contention here
 /// is for the refrence counters of the internal Arcs when `StorageResources`
 /// gets cloned or dropped.
-#[derive(Debug, Clone, Default)]
+#[derive(Debug, Clone, Default, PartialEq, Eq)]
 pub struct StorageResources {
     // All disks, real and synthetic, being managed by this sled
     pub disks: Arc<BTreeMap<DiskIdentity, Disk>>,
@@ -56,9 +56,14 @@ impl StorageResources {
         let zpool = Pool::new(zpool_name, disk_id.clone())?;
         if let Some(stored_disk) = self.disks.get(&disk_id) {
             if let Some(stored_pool) = self.pools.get(&zpool.name.id()) {
-                if stored_disk == &disk && stored_pool == &zpool {
+                if stored_disk == &disk
+                    && stored_pool.info.size() == zpool.info.size()
+                    && stored_pool.name == zpool.name
+                {
                     return Ok(false);
                 }
+            } else {
+                // We must delete the stored pool which no longer matches our disk
             }
         }
         // Either the disk or zpool changed

From b2f3146f8d26d89c31dfc667600e36b4a0c2190c Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Wed, 4 Oct 2023 23:46:29 +0000
Subject: [PATCH 18/66] wip

---
 sled-storage/src/manager.rs   |  5 ++---
 sled-storage/src/pool.rs      |  3 ++-
 sled-storage/src/resources.rs | 31 +++++++++++--------------------
 3 files changed, 15 insertions(+), 24 deletions(-)

diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index 51706c5021..c214566ecb 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -766,11 +766,10 @@ mod tests {
 
         // Ensure the one modified disk changed as we expected
         assert_eq!(5, resources.disks.len());
-        //assert_eq!(5, resources.pools.len());
         for raw_disk in expected {
-            let disk = resources.disks.get(raw_disk.identity()).unwrap();
+            let (disk, pool) =
+                resources.disks.get(raw_disk.identity()).unwrap();
             assert_eq!(disk.zpool_name(), raw_disk.zpool_name());
-            let pool = resources.pools.get(&disk.zpool_name().id()).unwrap();
             assert_eq!(&pool.name, disk.zpool_name());
             assert_eq!(raw_disk.identity(), &pool.parent);
         }
diff --git a/sled-storage/src/pool.rs b/sled-storage/src/pool.rs
index 76eabedd12..bac851df46 100644
--- a/sled-storage/src/pool.rs
+++ b/sled-storage/src/pool.rs
@@ -8,7 +8,8 @@ use crate::error::Error;
 use illumos_utils::zpool::{Zpool, ZpoolInfo, ZpoolName};
 use omicron_common::disk::DiskIdentity;
 
-/// A ZFS storage pool
+/// A ZFS storage pool wrapper that tracks information returned from
+/// `zpool` commands
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct Pool {
     pub name: ZpoolName,
diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs
index 212e9ad397..2b9e7cffae 100644
--- a/sled-storage/src/resources.rs
+++ b/sled-storage/src/resources.rs
@@ -40,10 +40,7 @@ const ZONE_BUNDLE_DIRECTORY: &str = "zone";
 #[derive(Debug, Clone, Default, PartialEq, Eq)]
 pub struct StorageResources {
     // All disks, real and synthetic, being managed by this sled
-    pub disks: Arc<BTreeMap<DiskIdentity, Disk>>,
-
-    // A map of "Uuid" to "pool".
-    pub pools: Arc<BTreeMap<Uuid, Pool>>,
+    pub disks: Arc<BTreeMap<DiskIdentity, (Disk, Pool)>>,
 }
 
 impl StorageResources {
@@ -54,21 +51,16 @@ impl StorageResources {
         let disk_id = disk.identity().clone();
         let zpool_name = disk.zpool_name().clone();
         let zpool = Pool::new(zpool_name, disk_id.clone())?;
-        if let Some(stored_disk) = self.disks.get(&disk_id) {
-            if let Some(stored_pool) = self.pools.get(&zpool.name.id()) {
-                if stored_disk == &disk
-                    && stored_pool.info.size() == zpool.info.size()
-                    && stored_pool.name == zpool.name
-                {
-                    return Ok(false);
-                }
-            } else {
-                // We must delete the stored pool which no longer matches our disk
+        if let Some((stored_disk, stored_pool)) = self.disks.get(&disk_id) {
+            if stored_disk == &disk
+                && stored_pool.info.size() == zpool.info.size()
+                && stored_pool.name == zpool.name
+            {
+                return Ok(false);
             }
         }
         // Either the disk or zpool changed
-        Arc::make_mut(&mut self.disks).insert(disk_id, disk);
-        Arc::make_mut(&mut self.pools).insert(zpool.name.id(), zpool);
+        Arc::make_mut(&mut self.disks).insert(disk_id, (disk, zpool));
         Ok(true)
     }
 
@@ -80,8 +72,7 @@ impl StorageResources {
             return false;
         }
         // Safe to unwrap as we just checked the key existed above
-        let parsed_disk = Arc::make_mut(&mut self.disks).remove(id).unwrap();
-        Arc::make_mut(&mut self.pools).remove(&parsed_disk.zpool_name().id());
+        Arc::make_mut(&mut self.disks).remove(id).unwrap();
         true
     }
 
@@ -89,7 +80,7 @@ impl StorageResources {
     ///
     /// If this returns `None`, we have not processed the boot disk yet.
     pub fn boot_disk(&self) -> Option<(DiskIdentity, ZpoolName)> {
-        for (id, disk) in self.disks.iter() {
+        for (id, (disk, _)) in self.disks.iter() {
             if disk.is_boot_disk() {
                 return Some((id.clone(), disk.zpool_name().clone()));
             }
@@ -126,7 +117,7 @@ impl StorageResources {
     pub fn all_zpools(&self, variant: DiskVariant) -> Vec<ZpoolName> {
         self.disks
             .values()
-            .filter_map(|disk| {
+            .filter_map(|(disk, _)| {
                 if disk.variant() == variant {
                     return Some(disk.zpool_name().clone());
                 }

From 1352bcf41b43990e3eb086fdf383923eb135352b Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Thu, 5 Oct 2023 00:28:57 +0000
Subject: [PATCH 19/66] wip

---
 sled-storage/src/manager.rs | 66 +++++++++++++++++++++++++++++++++++--
 1 file changed, 64 insertions(+), 2 deletions(-)

diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index c214566ecb..d74a7ada0a 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -10,6 +10,8 @@ use crate::dataset::{DatasetError, DatasetName};
 use crate::disk::{Disk, DiskError, RawDisk};
 use crate::error::Error;
 use crate::resources::StorageResources;
+use camino::Utf8PathBuf;
+use illumos_utils::zfs::{Mountpoint, Zfs};
 use illumos_utils::zpool::ZpoolName;
 use key_manager::StorageKeyRequester;
 use omicron_common::disk::DiskIdentity;
@@ -34,7 +36,7 @@ pub enum StorageManagerState {
 struct NewFilesystemRequest {
     dataset_id: Uuid,
     dataset_name: DatasetName,
-    responder: oneshot::Sender<Result<DatasetName, Error>>,
+    responder: oneshot::Sender<Result<(), Error>>,
 }
 
 enum StorageRequest {
@@ -219,7 +221,14 @@ impl StorageManager {
             StorageRequest::DisksChanged(raw_disks) => {
                 self.ensure_using_exactly_these_disks(raw_disks).await
             }
-            StorageRequest::NewFilesystem(_req) => todo!(),
+            StorageRequest::NewFilesystem(request) => {
+                let result = self.add_dataset(&request).await;
+                if result.is_err() {
+                    warn!(self.log, "{result:?}");
+                }
+                let _ = request.responder.send(result);
+                false
+            }
             StorageRequest::KeyManagerReady => {
                 self.state = StorageManagerState::Normal;
                 self.add_queued_disks().await
@@ -400,6 +409,59 @@ impl StorageManager {
 
         should_update
     }
+
+    // Attempts to add a dataset within a zpool, according to `request`.
+    async fn add_dataset(
+        &mut self,
+        request: &NewFilesystemRequest,
+    ) -> Result<(), Error> {
+        info!(self.log, "add_dataset: {:?}", request);
+        if !self
+            .resources
+            .disks
+            .values()
+            .any(|(_, pool)| &pool.name == request.dataset_name.pool())
+        {
+            return Err(Error::ZpoolNotFound(format!(
+                "{}, looked up while trying to add dataset",
+                request.dataset_name.pool(),
+            )));
+        }
+
+        let zoned = true;
+        let fs_name = &request.dataset_name.full();
+        let do_format = true;
+        let encryption_details = None;
+        let size_details = None;
+        Zfs::ensure_filesystem(
+            fs_name,
+            Mountpoint::Path(Utf8PathBuf::from("/data")),
+            zoned,
+            do_format,
+            encryption_details,
+            size_details,
+        )?;
+        // Ensure the dataset has a usable UUID.
+        if let Ok(id_str) = Zfs::get_oxide_value(&fs_name, "uuid") {
+            if let Ok(id) = id_str.parse::<Uuid>() {
+                if id != request.dataset_id {
+                    return Err(Error::UuidMismatch {
+                        name: Box::new(request.dataset_name.clone()),
+                        old: id,
+                        new: request.dataset_id,
+                    });
+                }
+                return Ok(());
+            }
+        }
+        Zfs::set_oxide_value(
+            &fs_name,
+            "uuid",
+            &request.dataset_id.to_string(),
+        )?;
+
+        Ok(())
+    }
 }
 
 /// All tests only use synthetic disks, but are expected to be run on illumos

From 12245d546dbabbd89a670441c8a92d9f1fa2c7a8 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Thu, 5 Oct 2023 03:39:40 +0000
Subject: [PATCH 20/66] wip

---
 sled-agent/src/storage_manager.rs | 208 ------------------------------
 sled-storage/src/manager.rs       |  49 ++++++-
 2 files changed, 48 insertions(+), 209 deletions(-)

diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs
index 68fb7df7df..bbf89e41fb 100644
--- a/sled-agent/src/storage_manager.rs
+++ b/sled-agent/src/storage_manager.rs
@@ -113,45 +113,6 @@ impl QueuedDiskCreate {
 }
 
 impl StorageWorker {
-    // Ensures the named dataset exists as a filesystem with a UUID, optionally
-    // creating it if `do_format` is true.
-    //
-    // Returns the UUID attached to the ZFS filesystem.
-    fn ensure_dataset(
-        &mut self,
-        dataset_id: Uuid,
-        dataset_name: &DatasetName,
-    ) -> Result<(), Error> {
-        let zoned = true;
-        let fs_name = &dataset_name.full();
-        let do_format = true;
-        let encryption_details = None;
-        let size_details = None;
-        Zfs::ensure_filesystem(
-            &dataset_name.full(),
-            Mountpoint::Path(Utf8PathBuf::from("/data")),
-            zoned,
-            do_format,
-            encryption_details,
-            size_details,
-        )?;
-        // Ensure the dataset has a usable UUID.
-        if let Ok(id_str) = Zfs::get_oxide_value(&fs_name, "uuid") {
-            if let Ok(id) = id_str.parse::<Uuid>() {
-                if id != dataset_id {
-                    return Err(Error::UuidMismatch {
-                        name: Box::new(dataset_name.clone()),
-                        old: id,
-                        new: dataset_id,
-                    });
-                }
-                return Ok(());
-            }
-        }
-        Zfs::set_oxide_value(&fs_name, "uuid", &dataset_id.to_string())?;
-        Ok(())
-    }
-
     // Adds a "notification to nexus" to `nexus_notifications`,
     // informing it about the addition of `pool_id` to this sled.
     async fn add_zpool_notify(&mut self, pool: &Pool, size: ByteCount) {
@@ -712,30 +673,6 @@ impl StorageWorker {
         Ok(())
     }
 
-    // Attempts to add a dataset within a zpool, according to `request`.
-    async fn add_dataset(
-        &mut self,
-        resources: &StorageResources,
-        request: &NewFilesystemRequest,
-    ) -> Result<DatasetName, Error> {
-        info!(self.log, "add_dataset: {:?}", request);
-        let mut pools = resources.pools.lock().await;
-        let pool = pools
-            .get_mut(&request.dataset_name.pool().id())
-            .ok_or_else(|| {
-                Error::ZpoolNotFound(format!(
-                    "{}, looked up while trying to add dataset",
-                    request.dataset_name.pool(),
-                ))
-            })?;
-        let dataset_name = DatasetName::new(
-            pool.name.clone(),
-            request.dataset_name.dataset().clone(),
-        );
-        self.ensure_dataset(request.dataset_id, &dataset_name)?;
-        Ok(dataset_name)
-    }
-
     // Small wrapper around `Self::do_work_internal` that ensures we always
     // emit info to the log when we exit.
     async fn do_work(
@@ -851,77 +788,6 @@ impl StorageWorker {
         }
         Ok(())
     }
-
-    async fn upsert_queued_disks(
-        &mut self,
-        resources: &StorageResources,
-        queued_u2_drives: &mut Option<HashSet<QueuedDiskCreate>>,
-    ) {
-        let queued = queued_u2_drives.take();
-        if let Some(queued) = queued {
-            for disk in queued {
-                if let Some(saved) = queued_u2_drives {
-                    // We already hit a transient error and recreated our queue.
-                    // Add any remaining queued disks back on the queue so we
-                    // can try again later.
-                    saved.insert(disk);
-                } else {
-                    match self.upsert_queued_disk(disk, resources).await {
-                        Ok(()) => {}
-                        Err((_, None)) => {
-                            // We already logged this as a persistent error in
-                            // `add_new_disk` or `add_new_synthetic_disk`
-                        }
-                        Err((_, Some(disk))) => {
-                            // We already logged this as a transient error in
-                            // `add_new_disk` or `add_new_synthetic_disk`
-                            *queued_u2_drives = Some(HashSet::from([disk]));
-                        }
-                    }
-                }
-            }
-        }
-        if queued_u2_drives.is_none() {
-            info!(self.log, "upserted all queued disks");
-        } else {
-            warn!(
-                self.log,
-                "failed to upsert all queued disks - will try again"
-            );
-        }
-    }
-
-    // Attempt to upsert a queued disk. Return the disk and error if the upsert
-    // fails due to a transient error. Examples of transient errors are key
-    // manager errors which indicate that there are not enough sleds available
-    // to unlock the rack.
-    async fn upsert_queued_disk(
-        &mut self,
-        disk: QueuedDiskCreate,
-        resources: &StorageResources,
-    ) -> Result<(), (Error, Option<QueuedDiskCreate>)> {
-        let mut temp: Option<HashSet<QueuedDiskCreate>> = None;
-        let res = match disk {
-            QueuedDiskCreate::Real(disk) => {
-                self.upsert_disk(&resources, disk, &mut temp).await
-            }
-            QueuedDiskCreate::Synthetic(zpool_name) => {
-                self.upsert_synthetic_disk(&resources, zpool_name, &mut temp)
-                    .await
-            }
-        };
-        if let Some(mut disks) = temp.take() {
-            assert!(res.is_err());
-            assert_eq!(disks.len(), 1);
-            return Err((
-                res.unwrap_err(),
-                disks.drain().next().unwrap().into(),
-            ));
-        }
-        // Any error at this point is not transient.
-        // We don't requeue the disk.
-        res.map_err(|e| (e, None))
-    }
 }
 
 enum StorageWorkerRequest {
@@ -997,64 +863,6 @@ impl StorageManager {
         &self.zone_bundler
     }
 
-    /// Ensures that the storage manager tracks exactly the provided disks.
-    ///
-    /// This acts similar to a batch [Self::upsert_disk] for all new disks, and
-    /// [Self::delete_disk] for all removed disks.
-    ///
-    /// If errors occur, an arbitrary "one" of them will be returned, but a
-    /// best-effort attempt to add all disks will still be attempted.
-    // Receiver implemented by [StorageWorker::ensure_using_exactly_these_disks]
-    pub async fn ensure_using_exactly_these_disks<I>(&self, unparsed_disks: I)
-    where
-        I: IntoIterator<Item = UnparsedDisk>,
-    {
-        self.inner
-            .tx
-            .send(StorageWorkerRequest::DisksChanged(
-                unparsed_disks.into_iter().collect::<Vec<_>>(),
-            ))
-            .await
-            .map_err(|e| e.to_string())
-            .expect("Failed to send DisksChanged request");
-    }
-
-    /// Adds a disk and associated zpool to the storage manager.
-    // Receiver implemented by [StorageWorker::upsert_disk].
-    pub async fn upsert_disk(&self, disk: UnparsedDisk) {
-        info!(self.inner.log, "Upserting disk: {disk:?}");
-        self.inner
-            .tx
-            .send(StorageWorkerRequest::AddDisk(disk))
-            .await
-            .map_err(|e| e.to_string())
-            .expect("Failed to send AddDisk request");
-    }
-
-    /// Removes a disk, if it's tracked by the storage manager, as well
-    /// as any associated zpools.
-    // Receiver implemented by [StorageWorker::delete_disk].
-    pub async fn delete_disk(&self, disk: UnparsedDisk) {
-        info!(self.inner.log, "Deleting disk: {disk:?}");
-        self.inner
-            .tx
-            .send(StorageWorkerRequest::RemoveDisk(disk))
-            .await
-            .map_err(|e| e.to_string())
-            .expect("Failed to send RemoveDisk request");
-    }
-
-    /// Adds a synthetic zpool to the storage manager.
-    // Receiver implemented by [StorageWorker::upsert_synthetic_disk].
-    pub async fn upsert_synthetic_disk(&self, name: ZpoolName) {
-        self.inner
-            .tx
-            .send(StorageWorkerRequest::AddSyntheticDisk(name))
-            .await
-            .map_err(|e| e.to_string())
-            .expect("Failed to send AddSyntheticDisk request");
-    }
-
     /// Adds underlay access to the storage manager.
     pub async fn setup_underlay_access(
         &self,
@@ -1117,20 +925,4 @@ impl StorageManager {
 
         Ok(dataset_name)
     }
-
-    /// Inform the storage worker that the KeyManager is capable of retrieving
-    /// secrets now and that any queued disks can be upserted.
-    pub async fn key_manager_ready(&self) {
-        info!(self.inner.log, "KeyManger ready");
-        self.inner
-            .tx
-            .send(StorageWorkerRequest::KeyManagerReady)
-            .await
-            .map_err(|e| e.to_string())
-            .expect("Failed to send KeyManagerReady request");
-    }
-
-    pub fn resources(&self) -> &StorageResources {
-        &self.inner.resources
-    }
 }
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index d74a7ada0a..e6395def6b 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -6,7 +6,7 @@
 
 use std::collections::HashSet;
 
-use crate::dataset::{DatasetError, DatasetName};
+use crate::dataset::{DatasetError, DatasetKind, DatasetName};
 use crate::disk::{Disk, DiskError, RawDisk};
 use crate::error::Error;
 use crate::resources::StorageResources;
@@ -144,6 +144,18 @@ impl StorageHandle {
         self.tx.send(StorageRequest::GetManagerState(tx)).await.unwrap();
         rx.await.unwrap()
     }
+
+    pub async fn upsert_filesystem(
+        &self,
+        dataset_id: Uuid,
+        dataset_name: DatasetName,
+    ) -> Result<(), Error> {
+        let (tx, rx) = oneshot::channel();
+        let request =
+            NewFilesystemRequest { dataset_id, dataset_name, responder: tx };
+        self.tx.send(StorageRequest::NewFilesystem(request)).await.unwrap();
+        rx.await.unwrap()
+    }
 }
 
 /// The storage manager responsible for the state of the storage
@@ -842,4 +854,39 @@ mod tests {
         }
         logctx.cleanup_successful();
     }
+
+    #[tokio::test]
+    async fn upsert_filesystem() {
+        let logctx = test_setup_log("upsert_filesystem");
+        let (mut key_manager, key_requester) =
+            KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
+        let (mut manager, handle) =
+            StorageManager::new(&logctx.log, key_requester);
+
+        // Spawn the key_manager so that it will respond to requests for encryption keys
+        tokio::spawn(async move { key_manager.run().await });
+
+        // Spawn the storage manager as done by sled-agent
+        tokio::spawn(async move {
+            manager.run().await;
+        });
+
+        handle.key_manager_ready().await;
+
+        // Create and add a disk
+        let zpool_name = ZpoolName::new_external(Uuid::new_v4());
+        let dir = tempdir().unwrap();
+        let disk: RawDisk =
+            SyntheticDisk::create_zpool(dir.path(), &zpool_name).into();
+        handle.upsert_disk(disk.clone()).await;
+
+        // Create a filesystem
+        let dataset_id = Uuid::new_v4();
+        let dataset_name =
+            DatasetName::new(zpool_name.clone(), DatasetKind::Crucible);
+        handle.upsert_filesystem(dataset_id, dataset_name).await.unwrap();
+
+        Zpool::destroy(&zpool_name).unwrap();
+        logctx.cleanup_successful();
+    }
 }

From e8afd42d95c2219aec822b6edaade13c7c502721 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Fri, 6 Oct 2023 04:14:20 +0000
Subject: [PATCH 21/66] wip

---
 Cargo.lock                            |  53 +++++-----
 Cargo.toml                            |   1 +
 sled-agent/Cargo.toml                 |   1 +
 sled-agent/src/bootstrap/bootstore.rs | 104 +++++--------------
 sled-agent/src/bootstrap/mod.rs       |   4 +-
 sled-agent/src/lib.rs                 |   1 +
 sled-agent/src/long_running_tasks.rs  | 142 ++++++++++++++++++++++++++
 sled-hardware/src/illumos/mod.rs      |   6 +-
 sled-storage/Cargo.toml               |   4 +-
 sled-storage/src/dataset.rs           |   2 +-
 sled-storage/src/lib.rs               |   2 +-
 sled-storage/src/manager.rs           |  10 +-
 12 files changed, 215 insertions(+), 115 deletions(-)
 create mode 100644 sled-agent/src/long_running_tasks.rs

diff --git a/Cargo.lock b/Cargo.lock
index 26358b3459..6165b6963c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5246,6 +5246,7 @@ dependencies = [
  "sha3",
  "sled-agent-client",
  "sled-hardware",
+ "sled-storage",
  "slog",
  "slog-async",
  "slog-dtrace",
@@ -5265,32 +5266,6 @@ dependencies = [
  "zone",
 ]
 
-[[package]]
-name = "omicron-sled-storage"
-version = "0.1.0"
-dependencies = [
- "async-trait",
- "camino",
- "camino-tempfile",
- "derive_more",
- "glob",
- "illumos-utils",
- "key-manager",
- "nexus-client 0.1.0",
- "omicron-common 0.1.0",
- "omicron-test-utils",
- "rand 0.8.5",
- "schemars",
- "serde",
- "serde_json",
- "sled-agent-client",
- "sled-hardware",
- "slog",
- "thiserror",
- "tokio",
- "uuid",
-]
-
 [[package]]
 name = "omicron-test-utils"
 version = "0.1.0"
@@ -7982,6 +7957,32 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "sled-storage"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "camino",
+ "camino-tempfile",
+ "derive_more",
+ "glob",
+ "illumos-utils",
+ "key-manager",
+ "nexus-client 0.1.0",
+ "omicron-common 0.1.0",
+ "omicron-test-utils",
+ "rand 0.8.5",
+ "schemars",
+ "serde",
+ "serde_json",
+ "sled-agent-client",
+ "sled-hardware",
+ "slog",
+ "thiserror",
+ "tokio",
+ "uuid",
+]
+
 [[package]]
 name = "slog"
 version = "2.7.0"
diff --git a/Cargo.toml b/Cargo.toml
index 0b63984ea7..2f1d0b012e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -319,6 +319,7 @@ similar-asserts = "1.5.0"
 sled = "0.34"
 sled-agent-client = { path = "sled-agent-client" }
 sled-hardware = { path = "sled-hardware" }
+sled-storage = { path = "sled-storage" }
 slog = { version = "2.7", features = [ "dynamic-keys", "max_level_trace", "release_max_level_debug" ] }
 slog-async = "2.8"
 slog-dtrace = "0.2"
diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml
index f172136726..e219461b9b 100644
--- a/sled-agent/Cargo.toml
+++ b/sled-agent/Cargo.toml
@@ -60,6 +60,7 @@ serde_json.workspace = true
 sha3.workspace = true
 sled-agent-client.workspace = true
 sled-hardware.workspace = true
+sled-storage.workspace = true
 slog.workspace = true
 slog-async.workspace = true
 slog-dtrace.workspace = true
diff --git a/sled-agent/src/bootstrap/bootstore.rs b/sled-agent/src/bootstrap/bootstore.rs
index 17267bef55..3c7e860b4a 100644
--- a/sled-agent/src/bootstrap/bootstore.rs
+++ b/sled-agent/src/bootstrap/bootstore.rs
@@ -7,122 +7,74 @@
 
 use super::config::BOOTSTORE_PORT;
 use super::server::StartError;
-use crate::storage_manager::StorageResources;
 use bootstore::schemes::v0 as bootstore;
 use camino::Utf8PathBuf;
 use ddm_admin_client::Client as DdmAdminClient;
 use sled_hardware::underlay::BootstrapInterface;
 use sled_hardware::Baseboard;
+use sled_storage::dataset::CLUSTER_DATASET;
+use sled_storage::resources::StorageResources;
 use slog::Logger;
 use std::collections::BTreeSet;
 use std::net::Ipv6Addr;
 use std::net::SocketAddrV6;
 use std::time::Duration;
-use tokio::task::JoinHandle;
 
 const BOOTSTORE_FSM_STATE_FILE: &str = "bootstore-fsm-state.json";
 const BOOTSTORE_NETWORK_CONFIG_FILE: &str = "bootstore-network-config.json";
 
-pub(super) struct BootstoreHandles {
-    pub(super) node_handle: bootstore::NodeHandle,
-
-    // These two are never used; we keep them to show ownership of the spawned
-    // tasks.
-    _node_task_handle: JoinHandle<()>,
-    _peer_update_task_handle: JoinHandle<()>,
+pub fn new_bootstore_config(
+    storage_resources: &StorageResources,
+    baseboard: Baseboard,
+    global_zone_bootstrap_ip: Ipv6Addr,
+) -> Result<bootstore::Config, StartError> {
+    Ok(bootstore::Config {
+        id: baseboard,
+        addr: SocketAddrV6::new(global_zone_bootstrap_ip, BOOTSTORE_PORT, 0, 0),
+        time_per_tick: Duration::from_millis(250),
+        learn_timeout: Duration::from_secs(5),
+        rack_init_timeout: Duration::from_secs(300),
+        rack_secret_request_timeout: Duration::from_secs(5),
+        fsm_state_ledger_paths: bootstore_fsm_state_paths(&storage_resources)?,
+        network_config_ledger_paths: bootstore_network_config_paths(
+            &storage_resources,
+        )?,
+    })
 }
 
-impl BootstoreHandles {
-    pub(super) async fn spawn(
-        storage_resources: &StorageResources,
-        ddm_admin_client: DdmAdminClient,
-        baseboard: Baseboard,
-        global_zone_bootstrap_ip: Ipv6Addr,
-        base_log: &Logger,
-    ) -> Result<Self, StartError> {
-        let config = bootstore::Config {
-            id: baseboard,
-            addr: SocketAddrV6::new(
-                global_zone_bootstrap_ip,
-                BOOTSTORE_PORT,
-                0,
-                0,
-            ),
-            time_per_tick: Duration::from_millis(250),
-            learn_timeout: Duration::from_secs(5),
-            rack_init_timeout: Duration::from_secs(300),
-            rack_secret_request_timeout: Duration::from_secs(5),
-            fsm_state_ledger_paths: bootstore_fsm_state_paths(
-                &storage_resources,
-            )
-            .await?,
-            network_config_ledger_paths: bootstore_network_config_paths(
-                &storage_resources,
-            )
-            .await?,
-        };
-
-        let (mut node, node_handle) =
-            bootstore::Node::new(config, base_log).await;
-
-        let join_handle = tokio::spawn(async move { node.run().await });
-
-        // Spawn a task for polling DDMD and updating bootstore
-        let peer_update_handle =
-            tokio::spawn(poll_ddmd_for_bootstore_peer_update(
-                base_log.new(o!("component" => "bootstore_ddmd_poller")),
-                node_handle.clone(),
-                ddm_admin_client,
-            ));
-
-        Ok(Self {
-            node_handle,
-            _node_task_handle: join_handle,
-            _peer_update_task_handle: peer_update_handle,
-        })
-    }
-}
-
-async fn bootstore_fsm_state_paths(
+fn bootstore_fsm_state_paths(
     storage: &StorageResources,
 ) -> Result<Vec<Utf8PathBuf>, StartError> {
     let paths: Vec<_> = storage
-        .all_m2_mountpoints(sled_hardware::disk::CLUSTER_DATASET)
-        .await
+        .all_m2_mountpoints(CLUSTER_DATASET)
         .into_iter()
         .map(|p| p.join(BOOTSTORE_FSM_STATE_FILE))
         .collect();
 
     if paths.is_empty() {
-        return Err(StartError::MissingM2Paths(
-            sled_hardware::disk::CLUSTER_DATASET,
-        ));
+        return Err(StartError::MissingM2Paths(CLUSTER_DATASET));
     }
     Ok(paths)
 }
 
-async fn bootstore_network_config_paths(
+fn bootstore_network_config_paths(
     storage: &StorageResources,
 ) -> Result<Vec<Utf8PathBuf>, StartError> {
     let paths: Vec<_> = storage
-        .all_m2_mountpoints(sled_hardware::disk::CLUSTER_DATASET)
-        .await
+        .all_m2_mountpoints(CLUSTER_DATASET)
         .into_iter()
         .map(|p| p.join(BOOTSTORE_NETWORK_CONFIG_FILE))
         .collect();
 
     if paths.is_empty() {
-        return Err(StartError::MissingM2Paths(
-            sled_hardware::disk::CLUSTER_DATASET,
-        ));
+        return Err(StartError::MissingM2Paths(CLUSTER_DATASET));
     }
     Ok(paths)
 }
 
-async fn poll_ddmd_for_bootstore_peer_update(
+pub async fn poll_ddmd_for_bootstore_peer_update(
     log: Logger,
     bootstore_node_handle: bootstore::NodeHandle,
-    ddmd_client: DdmAdminClient,
 ) {
     let mut current_peers: BTreeSet<SocketAddrV6> = BTreeSet::new();
     // We're talking to a service's admin interface on localhost and
@@ -132,7 +84,7 @@ async fn poll_ddmd_for_bootstore_peer_update(
     // We also use this timeout in the case of spurious ddmd failures
     // that require a reconnection from the ddmd_client.
     const RETRY: tokio::time::Duration = tokio::time::Duration::from_secs(5);
-
+    let ddmd_client = DdmAdminClient::localhost(&log).unwrap();
     loop {
         match ddmd_client
             .derive_bootstrap_addrs_from_prefixes(&[
@@ -154,7 +106,7 @@ async fn poll_ddmd_for_bootstore_peer_update(
                             log,
                             concat!(
                                 "Bootstore comms error: {}. ",
-                                "bootstore::Node task must have paniced",
+                                "bootstore::Node task must have panicked",
                             ),
                             e
                         );
diff --git a/sled-agent/src/bootstrap/mod.rs b/sled-agent/src/bootstrap/mod.rs
index 96e674acf3..5bf25b8521 100644
--- a/sled-agent/src/bootstrap/mod.rs
+++ b/sled-agent/src/bootstrap/mod.rs
@@ -4,7 +4,7 @@
 
 //! Bootstrap-related utilities
 
-mod bootstore;
+pub(crate) mod bootstore;
 pub mod client;
 pub mod config;
 pub mod early_networking;
@@ -14,7 +14,7 @@ pub(crate) mod params;
 mod pre_server;
 mod rack_ops;
 pub(crate) mod rss_handle;
-mod secret_retriever;
+pub mod secret_retriever;
 pub mod server;
 mod sprockets_server;
 mod views;
diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs
index 5c4dbd8310..376a154ed2 100644
--- a/sled-agent/src/lib.rs
+++ b/sled-agent/src/lib.rs
@@ -22,6 +22,7 @@ pub mod config;
 mod http_entrypoints;
 mod instance;
 mod instance_manager;
+mod long_running_tasks;
 mod nexus;
 pub mod params;
 mod profile;
diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs
new file mode 100644
index 0000000000..54e8ed7e18
--- /dev/null
+++ b/sled-agent/src/long_running_tasks.rs
@@ -0,0 +1,142 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! This module is responsible for spawning, starting, and managing long running
+//! tasks and task driven subsystems. These tasks run for the remainder of the
+//! sled-agent process from the moment they begin. Primarily they include the
+//! "managers", like `StorageManager`, `InstanceManager`, etc..., and are used
+//! by both the bootstrap agent and the sled-agent.
+//!
+//! We don't bother keeping track of the spawned tasks handles because we know
+//! these tasks are supposed to run forever, and they can shutdown if their
+//! handles are dropped.
+
+use crate::bootstrap::bootstore::{
+    new_bootstore_config, poll_ddmd_for_bootstore_peer_update,
+};
+use crate::bootstrap::secret_retriever::LrtqOrHardcodedSecretRetriever;
+use bootstore::schemes::v0 as bootstore;
+use key_manager::{KeyManager, StorageKeyRequester};
+use sled_hardware::{HardwareManager, SledMode};
+use sled_storage::manager::{StorageHandle, StorageManager};
+use slog::{info, Logger};
+use std::net::Ipv6Addr;
+
+/// A mechanism for interacting with all long running tasks that can be shared
+/// between the bootstrap-agent and sled-agent code.
+#[derive(Clone)]
+pub struct LongRunningTaskHandles {
+    /// A mechanism for retrieving storage keys. This interacts with the
+    /// [`KeyManager`] task. In the future, there may be other handles for
+    /// retrieving different types of keys. Separating the handles limits the
+    /// access for a given key type to the code that holds the handle.
+    pub storage_key_requester: StorageKeyRequester,
+
+    /// A mechanism for talking to the [`StorageManager`] which is responsible
+    /// for establishing zpools on disks and managing their datasets.
+    pub storage_handle: StorageHandle,
+
+    /// A mechanism for interacting with the hardware device tree
+    pub hardware_manager: HardwareManager,
+
+    // A handle for interacting with the bootstore
+    pub bootstore: bootstore::NodeHandle,
+}
+
+/// Spawn all long running tasks
+pub async fn spawn_all(
+    log: &Logger,
+    sled_mode: SledMode,
+    global_zone_bootstrap_ip: Ipv6Addr,
+) -> LongRunningTaskHandles {
+    let storage_key_requester = spawn_key_manager(log);
+    let mut storage_handle =
+        spawn_storage_manager(log, storage_key_requester.clone());
+    let hardware_manager = spawn_hardware_manager(log, sled_mode);
+
+    // Wait for the boot disk so that we can work with any ledgers,
+    // such as those needed by the bootstore and sled-agent
+    let _ = storage_handle.wait_for_boot_disk().await;
+
+    let bootstore = spawn_bootstore_tasks(
+        log,
+        &mut storage_handle,
+        &hardware_manager,
+        global_zone_bootstrap_ip,
+    )
+    .await;
+
+    LongRunningTaskHandles {
+        storage_key_requester,
+        storage_handle,
+        hardware_manager,
+        bootstore,
+    }
+}
+
+fn spawn_key_manager(log: &Logger) -> StorageKeyRequester {
+    info!(log, "Starting KeyManager");
+    let secret_retriever = LrtqOrHardcodedSecretRetriever::new();
+    let (mut key_manager, storage_key_requester) =
+        KeyManager::new(log, secret_retriever);
+    let key_manager_handle =
+        tokio::spawn(async move { key_manager.run().await });
+    storage_key_requester
+}
+
+fn spawn_storage_manager(
+    log: &Logger,
+    key_requester: StorageKeyRequester,
+) -> StorageHandle {
+    info!(log, "Starting StorageManager");
+    let (mut manager, handle) = StorageManager::new(log, key_requester);
+    tokio::spawn(async move {
+        manager.run().await;
+    });
+    handle
+}
+
+fn spawn_hardware_manager(
+    log: &Logger,
+    sled_mode: SledMode,
+) -> HardwareManager {
+    // The `HardwareManager` does not use the the "task/handle" pattern
+    // and spawns its worker task inside `HardwareManager::new`. Instead of returning
+    // a handle to send messages to that task, the "Inner/Mutex" pattern is used
+    // which shares data between the task, the manager itself, and the users of the manager
+    // since the manager can be freely cloned and passed around.
+    //
+    // There are pros and cons to both methods, but the reason to mention it here is that
+    // the handle in this case is the `HardwareManager` itself.
+    info!(log, "Starting HardwareManager"; "sled_mode" => ?sled_mode);
+    HardwareManager::new(log, sled_mode).unwrap()
+}
+
+async fn spawn_bootstore_tasks(
+    log: &Logger,
+    storage_handle: &mut StorageHandle,
+    hardware_manager: &HardwareManager,
+    global_zone_bootstrap_ip: Ipv6Addr,
+) -> bootstore::NodeHandle {
+    let storage_resources = storage_handle.get_latest_resources().await;
+    let config = new_bootstore_config(
+        &storage_resources,
+        hardware_manager.baseboard(),
+        global_zone_bootstrap_ip,
+    )
+    .unwrap();
+
+    // Create and spawn the bootstore
+    let (mut node, node_handle) = bootstore::Node::new(config, log).await;
+    tokio::spawn(async move { node.run().await });
+
+    // Spawn a task for polling DDMD and updating bootstore with peer addresses
+    let log = log.new(o!("component" => "bootstore_ddmd_poller"));
+    let node_handle2 = node_handle.clone();
+    tokio::spawn(async move {
+        poll_ddmd_for_bootstore_peer_update(log, node_handle2).await
+    });
+
+    node_handle
+}
diff --git a/sled-hardware/src/illumos/mod.rs b/sled-hardware/src/illumos/mod.rs
index 0364c98f14..0e49d6d776 100644
--- a/sled-hardware/src/illumos/mod.rs
+++ b/sled-hardware/src/illumos/mod.rs
@@ -589,11 +589,11 @@ async fn hardware_tracking_task(
 ///
 /// This structure provides interfaces for both querying and for receiving new
 /// events.
+#[derive(Clone)]
 pub struct HardwareManager {
     log: Logger,
     inner: Arc<Mutex<HardwareView>>,
     tx: broadcast::Sender<HardwareUpdate>,
-    _worker: JoinHandle<()>,
 }
 
 impl HardwareManager {
@@ -660,11 +660,11 @@ impl HardwareManager {
         let log2 = log.clone();
         let inner2 = inner.clone();
         let tx2 = tx.clone();
-        let _worker = tokio::task::spawn(async move {
+        tokio::task::spawn(async move {
             hardware_tracking_task(log2, inner2, tx2).await
         });
 
-        Ok(Self { log, inner, tx, _worker })
+        Ok(Self { log, inner, tx })
     }
 
     pub fn baseboard(&self) -> Baseboard {
diff --git a/sled-storage/Cargo.toml b/sled-storage/Cargo.toml
index 11bd502183..617a0a0fd7 100644
--- a/sled-storage/Cargo.toml
+++ b/sled-storage/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "omicron-sled-storage"
+name = "sled-storage"
 version = "0.1.0"
 edition = "2021"
 
@@ -30,4 +30,4 @@ uuid.workspace = true
 [dev-dependencies]
 illumos-utils = { workspace = true, features = ["tmp_keypath"] }
 omicron-test-utils.workspace = true
-camino-tempfile.workspace = true
\ No newline at end of file
+camino-tempfile.workspace = true
diff --git a/sled-storage/src/dataset.rs b/sled-storage/src/dataset.rs
index 99df582371..71e04a6935 100644
--- a/sled-storage/src/dataset.rs
+++ b/sled-storage/src/dataset.rs
@@ -237,7 +237,7 @@ pub enum DatasetError {
 /// be used. The `StorageManager` for the sled-agent always has a
 /// `StorageKeyRequester` available, and so the only place we should pass
 /// `None` is for the M.2s touched by the Installinator.
-pub async fn ensure_zpool_has_datasets(
+pub(crate) async fn ensure_zpool_has_datasets(
     log: &Logger,
     zpool_name: &ZpoolName,
     disk_identity: &DiskIdentity,
diff --git a/sled-storage/src/lib.rs b/sled-storage/src/lib.rs
index f923165896..20f6442b9a 100644
--- a/sled-storage/src/lib.rs
+++ b/sled-storage/src/lib.rs
@@ -8,7 +8,7 @@
 //! hardware partitions from the `sled-hardware` crate. It utilizes the
 //! `illumos-utils` crate to actually perform ZFS related OS calls.
 
-pub(crate) mod dataset;
+pub mod dataset;
 pub(crate) mod disk;
 pub(crate) mod dump_setup;
 pub mod error;
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index e6395def6b..7bcb8df192 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -6,7 +6,7 @@
 
 use std::collections::HashSet;
 
-use crate::dataset::{DatasetError, DatasetKind, DatasetName};
+use crate::dataset::{DatasetError, DatasetName};
 use crate::disk::{Disk, DiskError, RawDisk};
 use crate::error::Error;
 use crate::resources::StorageResources;
@@ -16,7 +16,7 @@ use illumos_utils::zpool::ZpoolName;
 use key_manager::StorageKeyRequester;
 use omicron_common::disk::DiskIdentity;
 use sled_hardware::DiskVariant;
-use slog::{debug, error, info, o, warn, Logger};
+use slog::{error, info, o, warn, Logger};
 use tokio::sync::{mpsc, oneshot, watch};
 use tokio::time::{interval, Duration, MissedTickBehavior};
 use uuid::Uuid;
@@ -58,11 +58,12 @@ enum StorageRequest {
 /// to clients for debugging purposes, and that isn't exposed in other ways.
 #[derive(Debug, Clone)]
 pub struct StorageManagerData {
-    state: StorageManagerState,
-    queued_u2_drives: HashSet<RawDisk>,
+    pub state: StorageManagerState,
+    pub queued_u2_drives: HashSet<RawDisk>,
 }
 
 /// A mechanism for interacting with the [`StorageManager`]
+#[derive(Clone)]
 pub struct StorageHandle {
     tx: mpsc::Sender<StorageRequest>,
     resource_updates: watch::Receiver<StorageResources>,
@@ -480,6 +481,7 @@ impl StorageManager {
 /// systems.
 #[cfg(all(test, target_os = "illumos"))]
 mod tests {
+    use crate::dataset::DatasetKind;
     use crate::disk::SyntheticDisk;
 
     use super::*;

From f540c086f5cbc1dda34bfa3f85203e0826aa07ca Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Fri, 6 Oct 2023 05:33:07 +0000
Subject: [PATCH 22/66] wip

---
 sled-agent/src/bootstrap/pre_server.rs | 79 +++++++++-----------------
 sled-agent/src/bootstrap/server.rs     |  1 -
 sled-agent/src/long_running_tasks.rs   | 30 ++++++++--
 sled-agent/src/services.rs             | 18 +++---
 sled-agent/src/sled_agent.rs           |  2 +-
 sled-agent/src/zone_bundle.rs          | 39 ++++++-------
 sled-storage/src/lib.rs                |  2 +-
 sled-storage/src/manager.rs            |  4 +-
 8 files changed, 83 insertions(+), 92 deletions(-)

diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs
index 0899bdd82f..d7c3e9d5d8 100644
--- a/sled-agent/src/bootstrap/pre_server.rs
+++ b/sled-agent/src/bootstrap/pre_server.rs
@@ -11,9 +11,11 @@
 #![allow(clippy::result_large_err)]
 
 use super::maghemite;
-use super::secret_retriever::LrtqOrHardcodedSecretRetriever;
 use super::server::StartError;
 use crate::config::Config;
+use crate::long_running_tasks::{
+    spawn_all_longrunning_tasks, LongRunningTaskHandles,
+};
 use crate::services::ServiceManager;
 use crate::sled_agent::SledAgent;
 use crate::storage_manager::StorageManager;
@@ -29,8 +31,6 @@ use illumos_utils::zfs;
 use illumos_utils::zfs::Zfs;
 use illumos_utils::zone;
 use illumos_utils::zone::Zones;
-use key_manager::KeyManager;
-use key_manager::StorageKeyRequester;
 use omicron_common::address::Ipv6Subnet;
 use omicron_common::FileKv;
 use sled_hardware::underlay;
@@ -38,6 +38,8 @@ use sled_hardware::DendriteAsic;
 use sled_hardware::HardwareManager;
 use sled_hardware::HardwareUpdate;
 use sled_hardware::SledMode;
+use sled_storage::disk::SyntheticDisk;
+use sled_storage::manager::StorageHandle;
 use slog::Drain;
 use slog::Logger;
 use std::net::IpAddr;
@@ -200,36 +202,24 @@ impl BootstrapAgentStartup {
         // This should be a no-op if already enabled.
         BootstrapNetworking::enable_ipv6_forwarding().await?;
 
-        // Spawn the `KeyManager` which is needed by the the StorageManager to
-        // retrieve encryption keys.
-        let (storage_key_requester, key_manager_handle) =
-            spawn_key_manager_task(&base_log);
-
         let sled_mode = sled_mode_from_config(&config)?;
 
-        // Start monitoring hardware. This is blocking so we use
-        // `spawn_blocking`; similar to above, we move some things in and (on
-        // success) it gives them back.
-        let (base_log, log, hardware_manager) = {
-            tokio::task::spawn_blocking(move || {
-                info!(
-                    log, "Starting hardware monitor";
-                    "sled_mode" => ?sled_mode,
-                );
-                let hardware_manager =
-                    HardwareManager::new(&base_log, sled_mode)
-                        .map_err(StartError::StartHardwareManager)?;
-                Ok::<_, StartError>((base_log, log, hardware_manager))
-            })
-            .await
-            .unwrap()?
-        };
+        // Spawn all important long running tasks that live for the lifetime of
+        // the process and are used by both the bootstrap agent and sled agent
+        let long_running_task_handles = spawn_all_longrunning_tasks(
+            &base_log,
+            sled_mode,
+            startup_networking.global_zone_bootstrap_ip,
+        )
+        .await;
 
-        // Create a `StorageManager` and (possibly) synthetic disks.
-        let storage_manager =
-            StorageManager::new(&base_log, storage_key_requester).await;
-        upsert_synthetic_zpools_if_needed(&log, &storage_manager, &config)
-            .await;
+        // Add some synthetic disks if necessary.
+        upsert_synthetic_zpools_if_needed(
+            &log,
+            &long_running_task_handles.storage_manager,
+            &config,
+        )
+        .await;
 
         let global_zone_bootstrap_ip =
             startup_networking.global_zone_bootstrap_ip;
@@ -242,7 +232,7 @@ impl BootstrapAgentStartup {
             config.skip_timesync,
             config.sidecar_revision.clone(),
             config.switch_zone_maghemite_links.clone(),
-            storage_manager.resources().clone(),
+            long_running_task_handles.storage_manager.clone(),
             storage_manager.zone_bundler().clone(),
         );
 
@@ -357,13 +347,10 @@ fn ensure_zfs_key_directory_exists(log: &Logger) -> Result<(), StartError> {
     // to create and mount encrypted datasets.
     info!(
         log, "Ensuring zfs key directory exists";
-        "path" => sled_hardware::disk::KEYPATH_ROOT,
+        "path" => zfs::KEYPATH_ROOT,
     );
-    std::fs::create_dir_all(sled_hardware::disk::KEYPATH_ROOT).map_err(|err| {
-        StartError::CreateZfsKeyDirectory {
-            dir: sled_hardware::disk::KEYPATH_ROOT,
-            err,
-        }
+    std::fs::create_dir_all(zfs::KEYPATH_ROOT).map_err(|err| {
+        StartError::CreateZfsKeyDirectory { dir: zfs::KEYPATH_ROOT, err }
     })
 }
 
@@ -387,7 +374,7 @@ fn ensure_zfs_ramdisk_dataset() -> Result<(), StartError> {
 
 async fn upsert_synthetic_zpools_if_needed(
     log: &Logger,
-    storage_manager: &StorageManager,
+    storage_manager: &StorageHandle,
     config: &Config,
 ) {
     if let Some(pools) = &config.zpools {
@@ -397,7 +384,8 @@ async fn upsert_synthetic_zpools_if_needed(
                 "Upserting synthetic zpool to Storage Manager: {}",
                 pool.to_string()
             );
-            storage_manager.upsert_synthetic_disk(pool.clone()).await;
+            let disk = SyntheticDisk::new(pool.clone()).into();
+            storage_manager.upsert_disk(disk).await;
         }
     }
 }
@@ -435,19 +423,6 @@ fn sled_mode_from_config(config: &Config) -> Result<SledMode, StartError> {
     Ok(sled_mode)
 }
 
-fn spawn_key_manager_task(
-    log: &Logger,
-) -> (StorageKeyRequester, JoinHandle<()>) {
-    let secret_retriever = LrtqOrHardcodedSecretRetriever::new();
-    let (mut key_manager, storage_key_requester) =
-        KeyManager::new(log, secret_retriever);
-
-    let key_manager_handle =
-        tokio::spawn(async move { key_manager.run().await });
-
-    (storage_key_requester, key_manager_handle)
-}
-
 #[derive(Debug, Clone)]
 pub(crate) struct BootstrapNetworking {
     pub(crate) bootstrap_etherstub: dladm::Etherstub,
diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs
index 0cbbf0678b..638aa51dee 100644
--- a/sled-agent/src/bootstrap/server.rs
+++ b/sled-agent/src/bootstrap/server.rs
@@ -13,7 +13,6 @@ use super::rack_ops::RackInitId;
 use super::views::SledAgentResponse;
 use super::BootstrapError;
 use super::RssAccessError;
-use crate::bootstrap::bootstore::BootstoreHandles;
 use crate::bootstrap::config::BOOTSTRAP_AGENT_RACK_INIT_PORT;
 use crate::bootstrap::http_entrypoints::api as http_api;
 use crate::bootstrap::http_entrypoints::BootstrapServerContext;
diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs
index 54e8ed7e18..cb82648a8c 100644
--- a/sled-agent/src/long_running_tasks.rs
+++ b/sled-agent/src/long_running_tasks.rs
@@ -16,8 +16,10 @@ use crate::bootstrap::bootstore::{
     new_bootstore_config, poll_ddmd_for_bootstore_peer_update,
 };
 use crate::bootstrap::secret_retriever::LrtqOrHardcodedSecretRetriever;
+use crate::zone_bundle::{CleanupContext, ZoneBundler};
 use bootstore::schemes::v0 as bootstore;
 use key_manager::{KeyManager, StorageKeyRequester};
+use sled_agent_client::types::CleanupContext;
 use sled_hardware::{HardwareManager, SledMode};
 use sled_storage::manager::{StorageHandle, StorageManager};
 use slog::{info, Logger};
@@ -35,24 +37,29 @@ pub struct LongRunningTaskHandles {
 
     /// A mechanism for talking to the [`StorageManager`] which is responsible
     /// for establishing zpools on disks and managing their datasets.
-    pub storage_handle: StorageHandle,
+    pub storage_manager: StorageHandle,
 
     /// A mechanism for interacting with the hardware device tree
     pub hardware_manager: HardwareManager,
 
     // A handle for interacting with the bootstore
     pub bootstore: bootstore::NodeHandle,
+
+    // A reference to the object used to manage zone bundles
+    pub zone_bundler: ZoneBundler,
 }
 
 /// Spawn all long running tasks
-pub async fn spawn_all(
+pub async fn spawn_all_longrunning_tasks(
     log: &Logger,
     sled_mode: SledMode,
     global_zone_bootstrap_ip: Ipv6Addr,
 ) -> LongRunningTaskHandles {
     let storage_key_requester = spawn_key_manager(log);
-    let mut storage_handle =
+    let mut storage_manager =
         spawn_storage_manager(log, storage_key_requester.clone());
+
+    // TODO: Does this need to run inside tokio::task::spawn_blocking?
     let hardware_manager = spawn_hardware_manager(log, sled_mode);
 
     // Wait for the boot disk so that we can work with any ledgers,
@@ -67,9 +74,11 @@ pub async fn spawn_all(
     )
     .await;
 
+    let zone_bundler = spawn_zone_bundler_tasks(log, &mut storage_handle);
+
     LongRunningTaskHandles {
         storage_key_requester,
-        storage_handle,
+        storage_manager,
         hardware_manager,
         bootstore,
     }
@@ -140,3 +149,16 @@ async fn spawn_bootstore_tasks(
 
     node_handle
 }
+
+// `ZoneBundler::new` spawns a periodic cleanup task that runs indefinitely
+fn spawn_zone_bundler_tasks(
+    log: &Logger,
+    storage_handle: &mut StorageHandle,
+) -> ZoneBundler {
+    let log = log.new(o!("component" => "ZoneBundler"));
+    let zone_bundler = ZoneBundler::new(
+        log,
+        storage_handle.clone(),
+        CleanupContext::default(),
+    );
+}
diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs
index 96cdf8222b..3fcbf717fa 100644
--- a/sled-agent/src/services.rs
+++ b/sled-agent/src/services.rs
@@ -5,7 +5,7 @@
 //! Sled-local service management.
 //!
 //! For controlling zone-based storage services, refer to
-//! [crate::storage_manager::StorageManager].
+//! [sled_hardware:manager::StorageManager].
 //!
 //! For controlling virtual machine instances, refer to
 //! [crate::instance_manager::InstanceManager].
@@ -38,7 +38,6 @@ use crate::params::{
 use crate::profile::*;
 use crate::smf_helper::Service;
 use crate::smf_helper::SmfHelper;
-use crate::storage_manager::StorageResources;
 use crate::zone_bundle::BundleError;
 use crate::zone_bundle::ZoneBundler;
 use anyhow::anyhow;
@@ -88,12 +87,14 @@ use omicron_common::nexus_config::{
 use once_cell::sync::OnceCell;
 use rand::prelude::SliceRandom;
 use rand::SeedableRng;
-use sled_hardware::disk::ZONE_DATASET;
 use sled_hardware::is_gimlet;
 use sled_hardware::underlay;
 use sled_hardware::underlay::BOOTSTRAP_PREFIX;
 use sled_hardware::Baseboard;
 use sled_hardware::SledMode;
+use sled_storage::dataset::{CONFIG_DATASET, ZONE_DATASET};
+use sled_storage::manager::StorageHandle;
+use sled_storage::resources::StorageResources;
 use slog::Logger;
 use std::collections::HashSet;
 use std::collections::{BTreeMap, HashMap};
@@ -370,7 +371,7 @@ pub struct ServiceManagerInner {
     advertised_prefixes: Mutex<HashSet<Ipv6Subnet<SLED_PREFIX>>>,
     sled_info: OnceCell<SledAgentInfo>,
     switch_zone_bootstrap_address: Ipv6Addr,
-    storage: StorageResources,
+    storage: StorageHandle,
     zone_bundler: ZoneBundler,
     ledger_directory_override: OnceCell<Utf8PathBuf>,
     image_directory_override: OnceCell<Utf8PathBuf>,
@@ -415,7 +416,7 @@ impl ServiceManager {
         skip_timesync: Option<bool>,
         sidecar_revision: SidecarRevision,
         switch_zone_maghemite_links: Vec<PhysicalLink>,
-        storage: StorageResources,
+        storage: StorageHandle,
         zone_bundler: ZoneBundler,
     ) -> Self {
         let log = log.new(o!("component" => "ServiceManager"));
@@ -470,13 +471,12 @@ impl ServiceManager {
     }
 
     async fn all_service_ledgers(&self) -> Vec<Utf8PathBuf> {
+        let resources = self.inner.storage.get_latest_resources().await;
         if let Some(dir) = self.inner.ledger_directory_override.get() {
             return vec![dir.join(SERVICES_LEDGER_FILENAME)];
         }
-        self.inner
-            .storage
-            .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET)
-            .await
+        resources
+            .all_m2_mountpoints(CONFIG_DATASET)
             .into_iter()
             .map(|p| p.join(SERVICES_LEDGER_FILENAME))
             .collect()
diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs
index 7e62f6a8a7..dc130524f6 100644
--- a/sled-agent/src/sled_agent.rs
+++ b/sled-agent/src/sled_agent.rs
@@ -90,7 +90,7 @@ pub enum Error {
     Instance(#[from] crate::instance_manager::Error),
 
     #[error("Error managing storage: {0}")]
-    Storage(#[from] crate::storage_manager::Error),
+    Storage(#[from] sled_storage::error::Error),
 
     #[error("Error updating: {0}")]
     Download(#[from] crate::updates::Error),
diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs
index 2eeb8ebe7d..ea7481bd6d 100644
--- a/sled-agent/src/zone_bundle.rs
+++ b/sled-agent/src/zone_bundle.rs
@@ -6,7 +6,6 @@
 
 //! Tools for collecting and inspecting service bundles for zones.
 
-use crate::storage_manager::StorageResources;
 use anyhow::anyhow;
 use anyhow::Context;
 use camino::FromPathBufError;
@@ -22,6 +21,8 @@ use illumos_utils::zone::AdmError;
 use schemars::JsonSchema;
 use serde::Deserialize;
 use serde::Serialize;
+use sled_storage::dataset::U2_DEBUG_DATASET;
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::cmp::Ord;
 use std::cmp::Ordering;
@@ -148,20 +149,12 @@ pub struct ZoneBundler {
     inner: Arc<Mutex<Inner>>,
     // Channel for notifying the cleanup task that it should reevaluate.
     notify_cleanup: Arc<Notify>,
-    // Tokio task handle running the period cleanup operation.
-    cleanup_task: Arc<tokio::task::JoinHandle<()>>,
-}
-
-impl Drop for ZoneBundler {
-    fn drop(&mut self) {
-        self.cleanup_task.abort();
-    }
 }
 
 // State shared between tasks, e.g., used when creating a bundle in different
 // tasks or between a creation and cleanup.
 struct Inner {
-    resources: StorageResources,
+    storage_handle: StorageHandle,
     cleanup_context: CleanupContext,
     last_cleanup_at: Instant,
 }
@@ -189,7 +182,8 @@ impl Inner {
     // that can exist but do not, i.e., those whose parent datasets already
     // exist; and returns those.
     async fn bundle_directories(&self) -> Vec<Utf8PathBuf> {
-        let expected = self.resources.all_zone_bundle_directories().await;
+        let resources = self.storage_handle.get_latest_resources().await;
+        let expected = resources.all_zone_bundle_directories();
         let mut out = Vec::with_capacity(expected.len());
         for each in expected.into_iter() {
             if tokio::fs::create_dir_all(&each).await.is_ok() {
@@ -249,26 +243,28 @@ impl ZoneBundler {
     /// Create a new zone bundler.
     ///
     /// This creates an object that manages zone bundles on the system. It can
-    /// be used to create bundles from running zones, and runs a period task to
-    /// clean them up to free up space.
+    /// be used to create bundles from running zones, and runs a periodic task
+    /// to clean them up to free up space.
     pub fn new(
         log: Logger,
-        resources: StorageResources,
+        storage_handle: StorageHandle,
         cleanup_context: CleanupContext,
     ) -> Self {
         let notify_cleanup = Arc::new(Notify::new());
         let inner = Arc::new(Mutex::new(Inner {
-            resources,
+            storage_handle,
             cleanup_context,
             last_cleanup_at: Instant::now(),
         }));
         let cleanup_log = log.new(slog::o!("component" => "auto-cleanup-task"));
         let notify_clone = notify_cleanup.clone();
         let inner_clone = inner.clone();
-        let cleanup_task = Arc::new(tokio::task::spawn(
-            Self::periodic_cleanup(cleanup_log, inner_clone, notify_clone),
+        tokio::task::spawn(Self::periodic_cleanup(
+            cleanup_log,
+            inner_clone,
+            notify_clone,
         ));
-        Self { log, inner, notify_cleanup, cleanup_task }
+        Self { log, inner, notify_cleanup }
     }
 
     /// Trigger an immediate cleanup of low-priority zone bundles.
@@ -353,10 +349,9 @@ impl ZoneBundler {
     ) -> Result<ZoneBundleMetadata, BundleError> {
         let inner = self.inner.lock().await;
         let storage_dirs = inner.bundle_directories().await;
-        let extra_log_dirs = inner
-            .resources
-            .all_u2_mountpoints(sled_hardware::disk::U2_DEBUG_DATASET)
-            .await
+        let resources = inner.storage_handle.get_latest_resources().await;
+        let extra_log_dirs = resources
+            .all_u2_mountpoints(U2_DEBUG_DATASET)
             .into_iter()
             .map(|p| p.join(zone.name()))
             .collect();
diff --git a/sled-storage/src/lib.rs b/sled-storage/src/lib.rs
index 20f6442b9a..0c1b383d7f 100644
--- a/sled-storage/src/lib.rs
+++ b/sled-storage/src/lib.rs
@@ -9,7 +9,7 @@
 //! `illumos-utils` crate to actually perform ZFS related OS calls.
 
 pub mod dataset;
-pub(crate) mod disk;
+pub mod disk;
 pub(crate) mod dump_setup;
 pub mod error;
 pub(crate) mod keyfile;
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index 7bcb8df192..7e2050084b 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -133,14 +133,14 @@ impl StorageHandle {
 
     /// Retrieve the latest value of `StorageResources` from the
     /// `StorageManager` task.
-    pub async fn get_latest_resources(&mut self) -> StorageResources {
+    pub async fn get_latest_resources(&self) -> StorageResources {
         let (tx, rx) = oneshot::channel();
         self.tx.send(StorageRequest::GetLatestResources(tx)).await.unwrap();
         rx.await.unwrap()
     }
 
     /// Return internal data useful for debugging and testing
-    pub async fn get_manager_state(&mut self) -> StorageManagerData {
+    pub async fn get_manager_state(&self) -> StorageManagerData {
         let (tx, rx) = oneshot::channel();
         self.tx.send(StorageRequest::GetManagerState(tx)).await.unwrap();
         rx.await.unwrap()

From 1e34e7cbd1f624bedc40b503e537d08adc49bc56 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Fri, 6 Oct 2023 20:42:00 +0000
Subject: [PATCH 23/66] wip

---
 sled-agent/src/bootstrap/pre_server.rs |  47 +----
 sled-agent/src/bootstrap/server.rs     |   1 -
 sled-agent/src/hardware_monitor.rs     | 270 +++++++++++++++++++++++++
 sled-agent/src/hardware_monitor.sh     |   1 +
 sled-agent/src/lib.rs                  |   1 +
 sled-agent/src/long_running_tasks.rs   |  14 +-
 sled-storage/src/manager.rs            |   2 +-
 7 files changed, 282 insertions(+), 54 deletions(-)
 create mode 100644 sled-agent/src/hardware_monitor.rs
 create mode 100644 sled-agent/src/hardware_monitor.sh

diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs
index d7c3e9d5d8..64d8119a33 100644
--- a/sled-agent/src/bootstrap/pre_server.rs
+++ b/sled-agent/src/bootstrap/pre_server.rs
@@ -104,41 +104,6 @@ impl BootstrapManagers {
             }
         }
     }
-
-    // Observe the current hardware state manually.
-    //
-    // We use this when we're monitoring hardware for the first
-    // time, and if we miss notifications.
-    pub(super) async fn check_latest_hardware_snapshot(
-        &self,
-        sled_agent: Option<&SledAgent>,
-        log: &Logger,
-    ) {
-        let underlay_network = sled_agent.map(|sled_agent| {
-            sled_agent.notify_nexus_about_self(log);
-            sled_agent.switch_zone_underlay_info()
-        });
-        info!(
-            log, "Checking current full hardware snapshot";
-            "underlay_network_info" => ?underlay_network,
-        );
-        if self.hardware.is_scrimlet_driver_loaded() {
-            let baseboard = self.hardware.baseboard();
-            if let Err(e) =
-                self.service.activate_switch(underlay_network, baseboard).await
-            {
-                warn!(log, "Failed to activate switch: {e}");
-            }
-        } else {
-            if let Err(e) = self.service.deactivate_switch().await {
-                warn!(log, "Failed to deactivate switch: {e}");
-            }
-        }
-
-        self.storage
-            .ensure_using_exactly_these_disks(self.hardware.disks())
-            .await;
-    }
 }
 
 pub(super) struct BootstrapAgentStartup {
@@ -148,7 +113,6 @@ pub(super) struct BootstrapAgentStartup {
     pub(super) base_log: Logger,
     pub(super) startup_log: Logger,
     pub(super) managers: BootstrapManagers,
-    pub(super) key_manager_handle: JoinHandle<()>,
 }
 
 impl BootstrapAgentStartup {
@@ -202,6 +166,7 @@ impl BootstrapAgentStartup {
         // This should be a no-op if already enabled.
         BootstrapNetworking::enable_ipv6_forwarding().await?;
 
+        // Are we a gimlet or scrimlet?
         let sled_mode = sled_mode_from_config(&config)?;
 
         // Spawn all important long running tasks that live for the lifetime of
@@ -233,7 +198,7 @@ impl BootstrapAgentStartup {
             config.sidecar_revision.clone(),
             config.switch_zone_maghemite_links.clone(),
             long_running_task_handles.storage_manager.clone(),
-            storage_manager.zone_bundler().clone(),
+            long_running_task_handles.zone_bundler.clone(),
         );
 
         Ok(Self {
@@ -242,12 +207,8 @@ impl BootstrapAgentStartup {
             ddm_admin_localhost_client,
             base_log,
             startup_log: log,
-            managers: BootstrapManagers {
-                hardware: hardware_manager,
-                storage: storage_manager,
-                service: service_manager,
-            },
-            key_manager_handle,
+            service_manager,
+            long_running_task_handles,
         })
     }
 }
diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs
index 638aa51dee..20778bd402 100644
--- a/sled-agent/src/bootstrap/server.rs
+++ b/sled-agent/src/bootstrap/server.rs
@@ -25,7 +25,6 @@ use crate::config::Config as SledConfig;
 use crate::config::ConfigError;
 use crate::server::Server as SledAgentServer;
 use crate::sled_agent::SledAgent;
-use crate::storage_manager::StorageResources;
 use bootstore::schemes::v0 as bootstore;
 use camino::Utf8PathBuf;
 use cancel_safe_futures::TryStreamExt;
diff --git a/sled-agent/src/hardware_monitor.rs b/sled-agent/src/hardware_monitor.rs
new file mode 100644
index 0000000000..7304f10492
--- /dev/null
+++ b/sled-agent/src/hardware_monitor.rs
@@ -0,0 +1,270 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! A task that listens for hardware events from the
+//! [`sled_hardware::HardwareManager`] and dispatches them to other parts
+//! of the bootstrap agent and sled-agent code.
+
+use crate::services::ServiceManager;
+use crate::sled_agent::SledAgent;
+use sled_hardware::{Baseboard, HardwareManager, HardwareUpdate};
+use sled_storage::disk::RawDisk;
+use sled_storage::manager::StorageHandle;
+use slog::Logger;
+use std::fmt::Debug;
+use tokio::sync::broadcast;
+use tokio::sync::broadcast::error::RecvError;
+use tokio::sync::mpsc;
+
+const QUEUE_SIZE: usize = 10;
+
+pub enum HardwareMonitorMsg {
+    SledAgentStarted(SledAgent),
+    ServiceManagerCreated(ServiceManager),
+}
+
+impl Debug for HardwareMonitorMsg {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            HardwareMonitorMsg::SledAgentStarted(_) => {
+                f.debug_struct("SledAgentStarted").finish()
+            }
+            HardwareMonitorMsg::ServiceManagerCreated(_) => {
+                f.debug_struct("ServiceManagerCreated").finish()
+            }
+        }
+    }
+}
+
+// A thin wrapper around the the [`ServiceManager`] that caches the state
+// whether or not the tofino is loaded if the [`ServiceManager`] doesn't exist
+// yet.
+enum TofinoManager {
+    Ready(ServiceManager),
+    NotReady { tofino_loaded: bool },
+}
+
+impl TofinoManager {
+    pub fn new() -> TofinoManager {
+        TofinoManager::NotReady { tofino_loaded: false }
+    }
+
+    // Must only be called once on the transition from `NotReady` to `Ready`.
+    // Panics otherwise.
+    //
+    // Returns whether the tofino was loaded or not
+    pub fn become_ready(&mut self, service_manager: ServiceManager) -> bool {
+        match self {
+            Self::Ready(_) => panic!("ServiceManager is already available"),
+            Self::NotReady { tofino_loaded } => {
+                *self = Self::Ready(service_manager);
+                *tofino_loaded
+            }
+        }
+    }
+}
+
+#[derive(Clone)]
+pub struct HardwareMonitorHandle {
+    tx: mpsc::Sender<HardwareMonitorMsg>,
+}
+
+pub struct HardwareMonitor {
+    log: Logger,
+
+    baseboard: Baseboard,
+
+    // Receive messages from the [`HardwareMonitorHandle`]
+    handle_rx: mpsc::Receiver<HardwareMonitorMsg>,
+
+    // Receive messages from the [`HardwareManager`]
+    hardware_rx: broadcast::Receiver<HardwareUpdate>,
+
+    // A reference to the hardware manager
+    hardware_manager: HardwareManager,
+
+    // A handle to [`sled_hardware::manager::StorageManger`]
+    storage_manager: StorageHandle,
+
+    // A handle to the sled-agent
+    //
+    // This will go away once Nexus updates are polled:
+    // See:
+    //  * https://github.com/oxidecomputer/omicron/issues/1917
+    //  * https://rfd.shared.oxide.computer/rfd/0433
+    sled_agent: Option<SledAgent>,
+
+    // The [`ServiceManager`] is instantiated after we start the [`HardwareMonitor`]
+    // task. However, it is only used to load and unload the switch zone when thes
+    // state of the tofino changes. We keep track of the tofino state so that we
+    // can properly load the tofino when the [`ServiceManager`] becomes available
+    // available.
+    tofino_manager: TofinoManager,
+}
+
+impl HardwareMonitor {
+    pub fn new(
+        log: &Logger,
+        hardware_manager: &mut HardwareManager,
+        storage_manager: &mut StorageHandle,
+    ) -> (HardwareMonitor, HardwareMonitorHandle) {
+        let baseboard = hardware_manager.baseboard();
+        let (handle_tx, handle_rx) = mpsc::channel(QUEUE_SIZE);
+        let hardware_rx = hardware_manager.monitor();
+        let log = log.new(o!("component" => "HardwareMonitor"));
+        let tofino_manager = TofinoManager::new();
+        (
+            HardwareMonitor {
+                log,
+                baseboard,
+                handle_rx,
+                hardware_rx,
+                hardware_manager: hardware_manager.clone(),
+                storage_manager: storage_manager.clone(),
+                sled_agent: None,
+                tofino_manager,
+            },
+            HardwareMonitorHandle { tx: handle_tx },
+        )
+    }
+
+    /// Run the main receive loop of the `StorageManager`
+    ///
+    /// This should be spawned into a tokio task
+    pub async fn run(&mut self) {
+        loop {
+            tokio::select! {
+                Some(msg) = self.handle_rx.recv() => {
+                    info!(
+                        self.log,
+                        "Received hardware monitor message";
+                        "msg" => ?msg
+                    );
+                    self.handle_monitor_msg(msg).await;
+                }
+                update = self.hardware_rx.recv() => {
+                    info!(
+                        self.log,
+                        "Received hardware update message";
+                        "update" => ?update,
+                    );
+                    self.handle_hardware_update(update).await;
+                }
+            }
+        }
+    }
+
+    // Handle a message from the [`HardwareMonitorHandle`]
+    async fn handle_monitor_msg(&mut self, msg: HardwareMonitorMsg) {
+        match msg {
+            HardwareMonitorMsg::SledAgentStarted(sled_agent) => {
+                self.sled_agent = Some(sled_agent);
+                self.check_latest_hardware_snapshot().await;
+            }
+            HardwareMonitorMsg::ServiceManagerCreated(service_manager) => {
+                let tofino_loaded =
+                    self.tofino_manager.become_ready(service_manager);
+                if tofino_loaded {
+                    self.activate_switch().await;
+                }
+            }
+        }
+    }
+
+    // Handle an update from the [`HardwareMonitor`]
+    async fn handle_hardware_update(
+        &mut self,
+        update: Result<HardwareUpdate, RecvError>,
+    ) {
+        match update {
+            Ok(update) => match update {
+                HardwareUpdate::TofinoLoaded => self.activate_switch().await,
+                HardwareUpdate::TofinoUnloaded => {
+                    self.deactivate_switch().await
+                }
+                HardwareUpdate::TofinoDeviceChange => {
+                    if let Some(sled_agent) = &mut self.sled_agent {
+                        sled_agent.notify_nexus_about_self(&self.log);
+                    }
+                }
+                HardwareUpdate::DiskAdded(disk) => {
+                    self.storage_manager.upsert_disk(disk.into()).await;
+                }
+                HardwareUpdate::DiskRemoved(disk) => {
+                    self.storage_manager.delete_disk(disk.into()).await;
+                }
+            },
+            Err(broadcast::error::RecvError::Lagged(count)) => {
+                warn!(self.log, "Hardware monitor missed {count} messages");
+                self.check_latest_hardware_snapshot().await;
+            }
+            Err(broadcast::error::RecvError::Closed) => {
+                // The `HardwareManager` monitoring task is an infinite loop -
+                // the only way for us to get `Closed` here is if it panicked,
+                // so we will propagate such a panic.
+                panic!("Hardware manager monitor task panicked");
+            }
+        }
+    }
+
+    async fn activate_switch(&mut self) {
+        match &mut self.tofino_manager {
+            TofinoManager::Ready(service_manager) => {
+                if let Err(e) = service_manager
+                    .activate_switch(
+                        self.sled_agent
+                            .as_ref()
+                            .map(|sa| sa.switch_zone_underlay_info()),
+                        self.baseboard.clone(),
+                    )
+                    .await
+                {
+                    warn!(self.log, "Failed to activate switch: {e}");
+                }
+            }
+            TofinoManager::NotReady { tofino_loaded } => {
+                *tofino_loaded = true;
+            }
+        }
+    }
+
+    async fn deactivate_switch(&mut self) {
+        match &mut self.tofino_manager {
+            TofinoManager::Ready(service_manager) => {
+                if let Err(e) = service_manager.deactivate_switch().await {
+                    warn!(self.log, "Failed to deactivate switch: {e}");
+                }
+            }
+            TofinoManager::NotReady { tofino_loaded } => {
+                *tofino_loaded = false;
+            }
+        }
+    }
+
+    // Observe the current hardware state manually.
+    //
+    // We use this when we're monitoring hardware for the first
+    // time, and if we miss notifications.
+    async fn check_latest_hardware_snapshot(&mut self) {
+        let underlay_network = self.sled_agent.as_ref().map(|sled_agent| {
+            sled_agent.notify_nexus_about_self(&self.log);
+            sled_agent.switch_zone_underlay_info()
+        });
+        info!(
+            self.log, "Checking current full hardware snapshot";
+            "underlay_network_info" => ?underlay_network,
+        );
+        if self.hardware_manager.is_scrimlet_driver_loaded() {
+            self.activate_switch().await;
+        } else {
+            self.deactivate_switch().await;
+        }
+
+        self.storage_manager
+            .ensure_using_exactly_these_disks(
+                self.hardware_manager.disks().into_iter().map(RawDisk::from),
+            )
+            .await;
+    }
+}
diff --git a/sled-agent/src/hardware_monitor.sh b/sled-agent/src/hardware_monitor.sh
new file mode 100644
index 0000000000..8b13789179
--- /dev/null
+++ b/sled-agent/src/hardware_monitor.sh
@@ -0,0 +1 @@
+
diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs
index 376a154ed2..154a133272 100644
--- a/sled-agent/src/lib.rs
+++ b/sled-agent/src/lib.rs
@@ -19,6 +19,7 @@ pub mod common;
 // Modules for the non-simulated sled agent.
 pub mod bootstrap;
 pub mod config;
+pub(crate) mod hardware_monitor;
 mod http_entrypoints;
 mod instance;
 mod instance_manager;
diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs
index cb82648a8c..b6c22ce633 100644
--- a/sled-agent/src/long_running_tasks.rs
+++ b/sled-agent/src/long_running_tasks.rs
@@ -19,7 +19,6 @@ use crate::bootstrap::secret_retriever::LrtqOrHardcodedSecretRetriever;
 use crate::zone_bundle::{CleanupContext, ZoneBundler};
 use bootstore::schemes::v0 as bootstore;
 use key_manager::{KeyManager, StorageKeyRequester};
-use sled_agent_client::types::CleanupContext;
 use sled_hardware::{HardwareManager, SledMode};
 use sled_storage::manager::{StorageHandle, StorageManager};
 use slog::{info, Logger};
@@ -64,23 +63,24 @@ pub async fn spawn_all_longrunning_tasks(
 
     // Wait for the boot disk so that we can work with any ledgers,
     // such as those needed by the bootstore and sled-agent
-    let _ = storage_handle.wait_for_boot_disk().await;
+    let _ = storage_manager.wait_for_boot_disk().await;
 
     let bootstore = spawn_bootstore_tasks(
         log,
-        &mut storage_handle,
+        &mut storage_manager,
         &hardware_manager,
         global_zone_bootstrap_ip,
     )
     .await;
 
-    let zone_bundler = spawn_zone_bundler_tasks(log, &mut storage_handle);
+    let zone_bundler = spawn_zone_bundler_tasks(log, &mut storage_manager);
 
     LongRunningTaskHandles {
         storage_key_requester,
         storage_manager,
         hardware_manager,
         bootstore,
+        zone_bundler,
     }
 }
 
@@ -156,9 +156,5 @@ fn spawn_zone_bundler_tasks(
     storage_handle: &mut StorageHandle,
 ) -> ZoneBundler {
     let log = log.new(o!("component" => "ZoneBundler"));
-    let zone_bundler = ZoneBundler::new(
-        log,
-        storage_handle.clone(),
-        CleanupContext::default(),
-    );
+    ZoneBundler::new(log, storage_handle.clone(), CleanupContext::default())
 }
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index 7e2050084b..e00c9ad4fa 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -617,7 +617,7 @@ mod tests {
         let logctx = test_setup_log("queued_disks_get_added_as_resources");
         let (mut key_manager, key_requester) =
             KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
-        let (mut manager, mut handle) =
+        let (mut manager, handle) =
             StorageManager::new(&logctx.log, key_requester);
 
         // Spawn the key_manager so that it will respond to requests for encryption keys

From 9a3380ac0fab42e6045c2a69a01358de920f0f54 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Fri, 6 Oct 2023 21:02:46 +0000
Subject: [PATCH 24/66] wip

---
 sled-agent/src/bootstrap/pre_server.rs | 73 +++-----------------------
 sled-agent/src/hardware_monitor.rs     | 20 ++++++-
 sled-agent/src/long_running_tasks.rs   | 23 ++++++++
 3 files changed, 48 insertions(+), 68 deletions(-)

diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs
index 64d8119a33..bb9a040100 100644
--- a/sled-agent/src/bootstrap/pre_server.rs
+++ b/sled-agent/src/bootstrap/pre_server.rs
@@ -17,8 +17,6 @@ use crate::long_running_tasks::{
     spawn_all_longrunning_tasks, LongRunningTaskHandles,
 };
 use crate::services::ServiceManager;
-use crate::sled_agent::SledAgent;
-use crate::storage_manager::StorageManager;
 use camino::Utf8PathBuf;
 use cancel_safe_futures::TryStreamExt;
 use ddm_admin_client::Client as DdmAdminClient;
@@ -35,8 +33,6 @@ use omicron_common::address::Ipv6Subnet;
 use omicron_common::FileKv;
 use sled_hardware::underlay;
 use sled_hardware::DendriteAsic;
-use sled_hardware::HardwareManager;
-use sled_hardware::HardwareUpdate;
 use sled_hardware::SledMode;
 use sled_storage::disk::SyntheticDisk;
 use sled_storage::manager::StorageHandle;
@@ -44,67 +40,6 @@ use slog::Drain;
 use slog::Logger;
 use std::net::IpAddr;
 use std::net::Ipv6Addr;
-use tokio::sync::broadcast;
-use tokio::task::JoinHandle;
-
-pub(super) struct BootstrapManagers {
-    pub(super) hardware: HardwareManager,
-    pub(super) storage: StorageManager,
-    pub(super) service: ServiceManager,
-}
-
-impl BootstrapManagers {
-    pub(super) async fn handle_hardware_update(
-        &self,
-        update: Result<HardwareUpdate, broadcast::error::RecvError>,
-        sled_agent: Option<&SledAgent>,
-        log: &Logger,
-    ) {
-        match update {
-            Ok(update) => match update {
-                HardwareUpdate::TofinoLoaded => {
-                    let baseboard = self.hardware.baseboard();
-                    if let Err(e) = self
-                        .service
-                        .activate_switch(
-                            sled_agent.map(|sa| sa.switch_zone_underlay_info()),
-                            baseboard,
-                        )
-                        .await
-                    {
-                        warn!(log, "Failed to activate switch: {e}");
-                    }
-                }
-                HardwareUpdate::TofinoUnloaded => {
-                    if let Err(e) = self.service.deactivate_switch().await {
-                        warn!(log, "Failed to deactivate switch: {e}");
-                    }
-                }
-                HardwareUpdate::TofinoDeviceChange => {
-                    if let Some(sled_agent) = sled_agent {
-                        sled_agent.notify_nexus_about_self(log);
-                    }
-                }
-                HardwareUpdate::DiskAdded(disk) => {
-                    self.storage.upsert_disk(disk).await;
-                }
-                HardwareUpdate::DiskRemoved(disk) => {
-                    self.storage.delete_disk(disk).await;
-                }
-            },
-            Err(broadcast::error::RecvError::Lagged(count)) => {
-                warn!(log, "Hardware monitor missed {count} messages");
-                self.check_latest_hardware_snapshot(sled_agent, log).await;
-            }
-            Err(broadcast::error::RecvError::Closed) => {
-                // The `HardwareManager` monitoring task is an infinite loop -
-                // the only way for us to get `Closed` here is if it panicked,
-                // so we will propagate such a panic.
-                panic!("Hardware manager monitor task panicked");
-            }
-        }
-    }
-}
 
 pub(super) struct BootstrapAgentStartup {
     pub(super) config: Config,
@@ -112,7 +47,8 @@ pub(super) struct BootstrapAgentStartup {
     pub(super) ddm_admin_localhost_client: DdmAdminClient,
     pub(super) base_log: Logger,
     pub(super) startup_log: Logger,
-    pub(super) managers: BootstrapManagers,
+    pub(super) service_manager: ServiceManager,
+    pub(super) long_running_task_handles: LongRunningTaskHandles,
 }
 
 impl BootstrapAgentStartup {
@@ -201,6 +137,11 @@ impl BootstrapAgentStartup {
             long_running_task_handles.zone_bundler.clone(),
         );
 
+        long_running_task_handles
+            .hardware_monitor
+            .service_manager_ready(service_manager.clone())
+            .await;
+
         Ok(Self {
             config,
             global_zone_bootstrap_ip,
diff --git a/sled-agent/src/hardware_monitor.rs b/sled-agent/src/hardware_monitor.rs
index 7304f10492..10e20ed92c 100644
--- a/sled-agent/src/hardware_monitor.rs
+++ b/sled-agent/src/hardware_monitor.rs
@@ -70,6 +70,22 @@ pub struct HardwareMonitorHandle {
     tx: mpsc::Sender<HardwareMonitorMsg>,
 }
 
+impl HardwareMonitorHandle {
+    pub async fn service_manager_ready(&self, service_manager: ServiceManager) {
+        self.tx
+            .send(HardwareMonitorMsg::ServiceManagerCreated(service_manager))
+            .await
+            .unwrap();
+    }
+
+    pub async fn sled_agent_started(&self, sled_agent: SledAgent) {
+        self.tx
+            .send(HardwareMonitorMsg::SledAgentStarted(sled_agent))
+            .await
+            .unwrap();
+    }
+}
+
 pub struct HardwareMonitor {
     log: Logger,
 
@@ -106,8 +122,8 @@ pub struct HardwareMonitor {
 impl HardwareMonitor {
     pub fn new(
         log: &Logger,
-        hardware_manager: &mut HardwareManager,
-        storage_manager: &mut StorageHandle,
+        hardware_manager: &HardwareManager,
+        storage_manager: &StorageHandle,
     ) -> (HardwareMonitor, HardwareMonitorHandle) {
         let baseboard = hardware_manager.baseboard();
         let (handle_tx, handle_rx) = mpsc::channel(QUEUE_SIZE);
diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs
index b6c22ce633..223289bb2e 100644
--- a/sled-agent/src/long_running_tasks.rs
+++ b/sled-agent/src/long_running_tasks.rs
@@ -16,6 +16,7 @@ use crate::bootstrap::bootstore::{
     new_bootstore_config, poll_ddmd_for_bootstore_peer_update,
 };
 use crate::bootstrap::secret_retriever::LrtqOrHardcodedSecretRetriever;
+use crate::hardware_monitor::{HardwareMonitor, HardwareMonitorHandle};
 use crate::zone_bundle::{CleanupContext, ZoneBundler};
 use bootstore::schemes::v0 as bootstore;
 use key_manager::{KeyManager, StorageKeyRequester};
@@ -41,6 +42,10 @@ pub struct LongRunningTaskHandles {
     /// A mechanism for interacting with the hardware device tree
     pub hardware_manager: HardwareManager,
 
+    /// A mechanism for interacting with the task that monitors for hardware
+    /// updates from the [`HardwareManager`]
+    pub hardware_monitor: HardwareMonitorHandle,
+
     // A handle for interacting with the bootstore
     pub bootstore: bootstore::NodeHandle,
 
@@ -61,6 +66,10 @@ pub async fn spawn_all_longrunning_tasks(
     // TODO: Does this need to run inside tokio::task::spawn_blocking?
     let hardware_manager = spawn_hardware_manager(log, sled_mode);
 
+    // Start monitoring for hardware changes
+    let hardware_monitor =
+        spawn_hardware_monitor(log, &hardware_manager, &storage_manager);
+
     // Wait for the boot disk so that we can work with any ledgers,
     // such as those needed by the bootstore and sled-agent
     let _ = storage_manager.wait_for_boot_disk().await;
@@ -79,6 +88,7 @@ pub async fn spawn_all_longrunning_tasks(
         storage_key_requester,
         storage_manager,
         hardware_manager,
+        hardware_monitor,
         bootstore,
         zone_bundler,
     }
@@ -122,6 +132,19 @@ fn spawn_hardware_manager(
     HardwareManager::new(log, sled_mode).unwrap()
 }
 
+fn spawn_hardware_monitor(
+    log: &Logger,
+    hardware_manager: &HardwareManager,
+    storage_handle: &StorageHandle,
+) -> HardwareMonitorHandle {
+    let (monitor, handle) =
+        HardwareMonitor::new(log, hardware_manager, storage_handle);
+    tokio::spawn(async move {
+        monitor.run().await;
+    });
+    handle
+}
+
 async fn spawn_bootstore_tasks(
     log: &Logger,
     storage_handle: &mut StorageHandle,

From b2c01e707c0acf8c7d1393dfd76c8d3917685912 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Fri, 6 Oct 2023 21:11:38 +0000
Subject: [PATCH 25/66] wip

---
 sled-agent/src/bootstrap/server.rs | 99 +++++-------------------------
 sled-agent/src/hardware_monitor.rs |  5 ++
 2 files changed, 20 insertions(+), 84 deletions(-)

diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs
index 20778bd402..4e07ee03b7 100644
--- a/sled-agent/src/bootstrap/server.rs
+++ b/sled-agent/src/bootstrap/server.rs
@@ -45,6 +45,8 @@ use serde::Deserialize;
 use serde::Serialize;
 use sled_hardware::underlay;
 use sled_hardware::HardwareUpdate;
+use sled_storage::dataset::CONFIG_DATASET;
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::borrow::Cow;
 use std::io;
@@ -175,68 +177,19 @@ impl Server {
             ddm_admin_localhost_client,
             base_log,
             startup_log,
-            managers,
-            key_manager_handle,
+            service_manager,
+            long_running_task_handles,
         } = BootstrapAgentStartup::run(config).await?;
 
-        // From this point on we will listen for hardware notifications and
-        // potentially start the switch zone and be notified of new disks; we
-        // are responsible for responding to updates from this point on.
-        let mut hardware_monitor = managers.hardware.monitor();
-        let storage_resources = managers.storage.resources();
-
-        // Check the latest hardware snapshot; we could have missed events
-        // between the creation of the hardware manager and our subscription of
-        // its monitor.
-        managers.check_latest_hardware_snapshot(None, &startup_log).await;
-
-        // Wait for our boot M.2 to show up.
-        wait_while_handling_hardware_updates(
-            wait_for_boot_m2(storage_resources, &startup_log),
-            &mut hardware_monitor,
-            &managers,
-            None, // No underlay network yet
-            &startup_log,
-            "waiting for boot M.2",
-        )
-        .await;
-
-        // Wait for the bootstore to start.
-        let bootstore_handles = wait_while_handling_hardware_updates(
-            BootstoreHandles::spawn(
-                storage_resources,
-                ddm_admin_localhost_client.clone(),
-                managers.hardware.baseboard(),
-                global_zone_bootstrap_ip,
-                &base_log,
-            ),
-            &mut hardware_monitor,
-            &managers,
-            None, // No underlay network yet
-            &startup_log,
-            "initializing bootstore",
-        )
-        .await?;
-
         // Do we have a StartSledAgentRequest stored in the ledger?
-        let maybe_ledger = wait_while_handling_hardware_updates(
-            async {
-                let paths = sled_config_paths(storage_resources).await?;
-                let maybe_ledger =
-                    Ledger::<PersistentSledAgentRequest<'static>>::new(
-                        &startup_log,
-                        paths,
-                    )
-                    .await;
-                Ok::<_, StartError>(maybe_ledger)
-            },
-            &mut hardware_monitor,
-            &managers,
-            None, // No underlay network yet
+        let paths =
+            sled_config_paths(&long_running_task_handles.storage_manager)
+                .await?;
+        let maybe_ledger = Ledger::<PersistentSledAgentRequest<'static>>::new(
             &startup_log,
-            "loading sled-agent request from ledger",
+            paths,
         )
-        .await?;
+        .await;
 
         // We don't yet _act_ on the `StartSledAgentRequest` if we have one, but
         // if we have one we init our `RssAccess` noting that we're already
@@ -522,28 +475,6 @@ fn start_dropshot_server(
     Ok(http_server)
 }
 
-/// Wait for at least the M.2 we booted from to show up.
-///
-/// TODO-correctness Subsequent steps may assume all M.2s that will ever be
-/// present are present once we return from this function; see
-/// https://github.com/oxidecomputer/omicron/issues/3815.
-async fn wait_for_boot_m2(storage_resources: &StorageResources, log: &Logger) {
-    // Wait for at least the M.2 we booted from to show up.
-    loop {
-        match storage_resources.boot_disk().await {
-            Some(disk) => {
-                info!(log, "Found boot disk M.2: {disk:?}");
-                break;
-            }
-            None => {
-                info!(log, "Waiting for boot disk M.2...");
-                tokio::time::sleep(core::time::Duration::from_millis(250))
-                    .await;
-            }
-        }
-    }
-}
-
 struct MissingM2Paths(&'static str);
 
 impl From<MissingM2Paths> for StartError {
@@ -559,17 +490,17 @@ impl From<MissingM2Paths> for SledAgentServerStartError {
 }
 
 async fn sled_config_paths(
-    storage: &StorageResources,
+    storage: &StorageHandle,
 ) -> Result<Vec<Utf8PathBuf>, MissingM2Paths> {
-    let paths: Vec<_> = storage
-        .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET)
-        .await
+    let resources = storage.get_latest_resources().await;
+    let paths: Vec<_> = resources
+        .all_m2_mountpoints(CONFIG_DATASET)
         .into_iter()
         .map(|p| p.join(SLED_AGENT_REQUEST_FILE))
         .collect();
 
     if paths.is_empty() {
-        return Err(MissingM2Paths(sled_hardware::disk::CONFIG_DATASET));
+        return Err(MissingM2Paths(CONFIG_DATASET));
     }
     Ok(paths)
 }
diff --git a/sled-agent/src/hardware_monitor.rs b/sled-agent/src/hardware_monitor.rs
index 10e20ed92c..e296a3bdca 100644
--- a/sled-agent/src/hardware_monitor.rs
+++ b/sled-agent/src/hardware_monitor.rs
@@ -149,6 +149,11 @@ impl HardwareMonitor {
     ///
     /// This should be spawned into a tokio task
     pub async fn run(&mut self) {
+        // Check the latest hardware snapshot; we could have missed events
+        // between the creation of the hardware manager and our subscription of
+        // its monitor.
+        self.check_latest_hardware_snapshot().await;
+
         loop {
             tokio::select! {
                 Some(msg) = self.handle_rx.recv() => {

From 327578ca51bd75f9905f9126f56cfb1fd1b8aaa0 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Fri, 6 Oct 2023 22:39:16 +0000
Subject: [PATCH 26/66] wip

---
 sled-agent/src/bootstrap/http_entrypoints.rs |   6 +-
 sled-agent/src/bootstrap/rack_ops.rs         |  12 +-
 sled-agent/src/bootstrap/rss_handle.rs       |   6 +-
 sled-agent/src/bootstrap/server.rs           | 172 +---
 sled-agent/src/instance.rs                   |  11 +-
 sled-agent/src/instance_manager.rs           |   6 +-
 sled-agent/src/params.rs                     |  61 +-
 sled-agent/src/rack_setup/plan/service.rs    |  17 +-
 sled-agent/src/rack_setup/plan/sled.rs       |   9 +-
 sled-agent/src/rack_setup/service.rs         |  26 +-
 sled-agent/src/server.rs                     |   9 +-
 sled-agent/src/sled_agent.rs                 |  49 +-
 sled-agent/src/storage/dataset.rs            |  63 --
 sled-agent/src/storage/dump_setup.rs         |  76 +-
 sled-agent/src/storage/mod.rs                |   1 -
 sled-storage/src/dump_setup.rs               | 803 -------------------
 sled-storage/src/lib.rs                      |   1 -
 17 files changed, 156 insertions(+), 1172 deletions(-)
 delete mode 100644 sled-agent/src/storage/dataset.rs
 delete mode 100644 sled-storage/src/dump_setup.rs

diff --git a/sled-agent/src/bootstrap/http_entrypoints.rs b/sled-agent/src/bootstrap/http_entrypoints.rs
index c69bdeb0ce..7c32bf48a5 100644
--- a/sled-agent/src/bootstrap/http_entrypoints.rs
+++ b/sled-agent/src/bootstrap/http_entrypoints.rs
@@ -12,7 +12,6 @@ use super::BootstrapError;
 use super::RssAccessError;
 use crate::bootstrap::params::RackInitializeRequest;
 use crate::bootstrap::rack_ops::{RackInitId, RackResetId};
-use crate::storage_manager::StorageResources;
 use crate::updates::ConfigUpdates;
 use crate::updates::{Component, UpdateManager};
 use bootstore::schemes::v0 as bootstore;
@@ -25,6 +24,7 @@ use omicron_common::api::external::Error;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use sled_hardware::Baseboard;
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::net::Ipv6Addr;
 use tokio::sync::mpsc::error::TrySendError;
@@ -33,7 +33,7 @@ use tokio::sync::{mpsc, oneshot};
 pub(crate) struct BootstrapServerContext {
     pub(crate) base_log: Logger,
     pub(crate) global_zone_bootstrap_ip: Ipv6Addr,
-    pub(crate) storage_resources: StorageResources,
+    pub(crate) storage_manager: StorageHandle,
     pub(crate) bootstore_node_handle: bootstore::NodeHandle,
     pub(crate) baseboard: Baseboard,
     pub(crate) rss_access: RssAccess,
@@ -50,7 +50,7 @@ impl BootstrapServerContext {
         self.rss_access.start_initializing(
             &self.base_log,
             self.global_zone_bootstrap_ip,
-            &self.storage_resources,
+            &self.storage_manager,
             &self.bootstore_node_handle,
             request,
         )
diff --git a/sled-agent/src/bootstrap/rack_ops.rs b/sled-agent/src/bootstrap/rack_ops.rs
index b8721f8332..5cfd0b074a 100644
--- a/sled-agent/src/bootstrap/rack_ops.rs
+++ b/sled-agent/src/bootstrap/rack_ops.rs
@@ -8,11 +8,11 @@ use crate::bootstrap::http_entrypoints::RackOperationStatus;
 use crate::bootstrap::params::RackInitializeRequest;
 use crate::bootstrap::rss_handle::RssHandle;
 use crate::rack_setup::service::SetupServiceError;
-use crate::storage_manager::StorageResources;
 use bootstore::schemes::v0 as bootstore;
 use schemars::JsonSchema;
 use serde::Deserialize;
 use serde::Serialize;
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::mem;
 use std::net::Ipv6Addr;
@@ -171,7 +171,7 @@ impl RssAccess {
         &self,
         parent_log: &Logger,
         global_zone_bootstrap_ip: Ipv6Addr,
-        storage_resources: &StorageResources,
+        storage_manager: &StorageHandle,
         bootstore_node_handle: &bootstore::NodeHandle,
         request: RackInitializeRequest,
     ) -> Result<RackInitId, RssAccessError> {
@@ -207,14 +207,14 @@ impl RssAccess {
                 mem::drop(status);
 
                 let parent_log = parent_log.clone();
-                let storage_resources = storage_resources.clone();
+                let storage_manager = storage_manager.clone();
                 let bootstore_node_handle = bootstore_node_handle.clone();
                 let status = Arc::clone(&self.status);
                 tokio::spawn(async move {
                     let result = rack_initialize(
                         &parent_log,
                         global_zone_bootstrap_ip,
-                        storage_resources,
+                        storage_manager,
                         bootstore_node_handle,
                         request,
                     )
@@ -342,7 +342,7 @@ enum RssStatus {
 async fn rack_initialize(
     parent_log: &Logger,
     global_zone_bootstrap_ip: Ipv6Addr,
-    storage_resources: StorageResources,
+    storage_manager: StorageHandle,
     bootstore_node_handle: bootstore::NodeHandle,
     request: RackInitializeRequest,
 ) -> Result<(), SetupServiceError> {
@@ -350,7 +350,7 @@ async fn rack_initialize(
         parent_log,
         request,
         global_zone_bootstrap_ip,
-        storage_resources,
+        storage_manager,
         bootstore_node_handle,
     )
     .await
diff --git a/sled-agent/src/bootstrap/rss_handle.rs b/sled-agent/src/bootstrap/rss_handle.rs
index c82873d91d..5d9c01e7f2 100644
--- a/sled-agent/src/bootstrap/rss_handle.rs
+++ b/sled-agent/src/bootstrap/rss_handle.rs
@@ -9,7 +9,6 @@ use super::params::StartSledAgentRequest;
 use crate::rack_setup::config::SetupServiceConfig;
 use crate::rack_setup::service::RackSetupService;
 use crate::rack_setup::service::SetupServiceError;
-use crate::storage_manager::StorageResources;
 use ::bootstrap_agent_client::Client as BootstrapAgentClient;
 use bootstore::schemes::v0 as bootstore;
 use futures::stream::FuturesUnordered;
@@ -17,6 +16,7 @@ use futures::StreamExt;
 use omicron_common::backoff::retry_notify;
 use omicron_common::backoff::retry_policy_local;
 use omicron_common::backoff::BackoffError;
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::net::Ipv6Addr;
 use std::net::SocketAddrV6;
@@ -46,7 +46,7 @@ impl RssHandle {
         log: &Logger,
         config: SetupServiceConfig,
         our_bootstrap_address: Ipv6Addr,
-        storage_resources: StorageResources,
+        storage_manager: StorageHandle,
         bootstore: bootstore::NodeHandle,
     ) -> Result<(), SetupServiceError> {
         let (tx, rx) = rss_channel(our_bootstrap_address);
@@ -54,7 +54,7 @@ impl RssHandle {
         let rss = RackSetupService::new(
             log.new(o!("component" => "RSS")),
             config,
-            storage_resources,
+            storage_manager,
             tx,
             bootstore,
         );
diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs
index 4e07ee03b7..94c326eef5 100644
--- a/sled-agent/src/bootstrap/server.rs
+++ b/sled-agent/src/bootstrap/server.rs
@@ -8,7 +8,6 @@ use super::config::BOOTSTRAP_AGENT_HTTP_PORT;
 use super::http_entrypoints;
 use super::params::RackInitializeRequest;
 use super::params::StartSledAgentRequest;
-use super::pre_server::BootstrapManagers;
 use super::rack_ops::RackInitId;
 use super::views::SledAgentResponse;
 use super::BootstrapError;
@@ -23,15 +22,15 @@ use crate::bootstrap::secret_retriever::LrtqOrHardcodedSecretRetriever;
 use crate::bootstrap::sprockets_server::SprocketsServer;
 use crate::config::Config as SledConfig;
 use crate::config::ConfigError;
+use crate::long_running_tasks::LongRunningTaskHandles;
 use crate::server::Server as SledAgentServer;
+use crate::services::ServiceManager;
 use crate::sled_agent::SledAgent;
-use bootstore::schemes::v0 as bootstore;
 use camino::Utf8PathBuf;
 use cancel_safe_futures::TryStreamExt;
 use ddm_admin_client::Client as DdmAdminClient;
 use ddm_admin_client::DdmError;
 use dropshot::HttpServer;
-use futures::Future;
 use futures::StreamExt;
 use illumos_utils::dladm;
 use illumos_utils::zfs;
@@ -44,7 +43,6 @@ use schemars::JsonSchema;
 use serde::Deserialize;
 use serde::Serialize;
 use sled_hardware::underlay;
-use sled_hardware::HardwareUpdate;
 use sled_storage::dataset::CONFIG_DATASET;
 use sled_storage::manager::StorageHandle;
 use slog::Logger;
@@ -52,7 +50,6 @@ use std::borrow::Cow;
 use std::io;
 use std::net::SocketAddr;
 use std::net::SocketAddrV6;
-use tokio::sync::broadcast;
 use tokio::sync::mpsc;
 use tokio::sync::oneshot;
 use tokio::task::JoinHandle;
@@ -206,9 +203,9 @@ impl Server {
         let bootstrap_context = BootstrapServerContext {
             base_log: base_log.clone(),
             global_zone_bootstrap_ip,
-            storage_resources: storage_resources.clone(),
-            bootstore_node_handle: bootstore_handles.node_handle.clone(),
-            baseboard: managers.hardware.baseboard(),
+            storage_manager: long_running_task_handles.storage_manager.clone(),
+            bootstore_node_handle: long_running_task_handles.bootstore.clone(),
+            baseboard: long_running_task_handles.hardware_manager.baseboard(),
             rss_access,
             updates: config.updates.clone(),
             sled_reset_tx,
@@ -240,52 +237,31 @@ impl Server {
         // Do we have a persistent sled-agent request that we need to restore?
         let state = if let Some(ledger) = maybe_ledger {
             let sled_request = ledger.data();
-            let sled_agent_server = wait_while_handling_hardware_updates(
-                start_sled_agent(
-                    &config,
-                    &sled_request.request,
-                    &bootstore_handles.node_handle,
-                    &managers,
-                    &ddm_admin_localhost_client,
-                    &base_log,
-                    &startup_log,
-                ),
-                &mut hardware_monitor,
-                &managers,
-                None, // No underlay network yet
+            let sled_agent_server = start_sled_agent(
+                &config,
+                &sled_request.request,
+                long_running_task_handles.clone(),
+                service_manager,
+                &ddm_admin_localhost_client,
+                &base_log,
                 &startup_log,
-                "restoring sled-agent (cold boot)",
             )
             .await?;
 
-            let sled_agent = sled_agent_server.sled_agent();
-
             // We've created sled-agent; we need to (possibly) reconfigure the
             // switch zone, if we're a scrimlet, to give it our underlay network
             // information.
-            let underlay_network_info = sled_agent.switch_zone_underlay_info();
-            info!(
-                startup_log, "Sled Agent started; rescanning hardware";
-                "underlay_network_info" => ?underlay_network_info,
-            );
-            managers
-                .check_latest_hardware_snapshot(Some(&sled_agent), &startup_log)
+            let sled_agent = sled_agent_server.sled_agent();
+            long_running_task_handles
+                .hardware_monitor
+                .sled_agent_started(sled_agent.clone())
                 .await;
 
             // For cold boot specifically, we now need to load the services
             // we're responsible for, while continuing to handle hardware
             // notifications. This cannot fail: we retry indefinitely until
             // we're done loading services.
-            wait_while_handling_hardware_updates(
-                sled_agent.cold_boot_load_services(),
-                &mut hardware_monitor,
-                &managers,
-                Some(&sled_agent),
-                &startup_log,
-                "restoring sled-agent services (cold boot)",
-            )
-            .await;
-
+            sled_agent.cold_boot_load_services().await;
             SledAgentState::ServerStarted(sled_agent_server)
         } else {
             SledAgentState::Bootstrapping
@@ -296,15 +272,13 @@ impl Server {
         // agent state.
         let inner = Inner {
             config,
-            hardware_monitor,
             state,
             sled_init_rx,
             sled_reset_rx,
-            managers,
             ddm_admin_localhost_client,
-            bootstore_handles,
+            long_running_task_handles,
+            service_manager,
             _sprockets_server_handle: sprockets_server_handle,
-            _key_manager_handle: key_manager_handle,
             base_log,
         };
         let inner_task = tokio::spawn(inner.run());
@@ -378,8 +352,8 @@ impl From<SledAgentServerStartError> for StartError {
 async fn start_sled_agent(
     config: &SledConfig,
     request: &StartSledAgentRequest,
-    bootstore: &bootstore::NodeHandle,
-    managers: &BootstrapManagers,
+    long_running_task_handles: LongRunningTaskHandles,
+    service_manager: ServiceManager,
     ddmd_client: &DdmAdminClient,
     base_log: &Logger,
     log: &Logger,
@@ -394,14 +368,17 @@ async fn start_sled_agent(
     if request.use_trust_quorum {
         info!(log, "KeyManager: using lrtq secret retriever");
         let salt = request.hash_rack_id();
-        LrtqOrHardcodedSecretRetriever::init_lrtq(salt, bootstore.clone())
+        LrtqOrHardcodedSecretRetriever::init_lrtq(
+            salt,
+            long_running_task_handles.bootstore.clone(),
+        )
     } else {
         info!(log, "KeyManager: using hardcoded secret retriever");
         LrtqOrHardcodedSecretRetriever::init_hardcoded();
     }
 
     // Inform the storage service that the key manager is available
-    managers.storage.key_manager_ready().await;
+    long_running_task_handles.storage_manager.key_manager_ready().await;
 
     // Start trying to notify ddmd of our sled prefix so it can
     // advertise it to other sleds.
@@ -421,9 +398,8 @@ async fn start_sled_agent(
         config,
         base_log.clone(),
         request.clone(),
-        managers.service.clone(),
-        managers.storage.clone(),
-        bootstore.clone(),
+        long_running_task_handles.clone(),
+        service_manager,
     )
     .await
     .map_err(SledAgentServerStartError::FailedStartingServer)?;
@@ -432,7 +408,8 @@ async fn start_sled_agent(
 
     // Record this request so the sled agent can be automatically
     // initialized on the next boot.
-    let paths = sled_config_paths(managers.storage.resources()).await?;
+    let paths =
+        sled_config_paths(&long_running_task_handles.storage_manager).await?;
 
     let mut ledger = Ledger::new_with(
         &log,
@@ -505,41 +482,6 @@ async fn sled_config_paths(
     Ok(paths)
 }
 
-// Helper function to wait for `fut` while handling any updates about hardware.
-async fn wait_while_handling_hardware_updates<F: Future<Output = T>, T>(
-    fut: F,
-    hardware_monitor: &mut broadcast::Receiver<HardwareUpdate>,
-    managers: &BootstrapManagers,
-    sled_agent: Option<&SledAgent>,
-    log: &Logger,
-    log_phase: &str,
-) -> T {
-    tokio::pin!(fut);
-    loop {
-        tokio::select! {
-            // Cancel-safe per the docs on `broadcast::Receiver::recv()`.
-            hardware_update = hardware_monitor.recv() => {
-                info!(
-                    log,
-                    "Handling hardware update message";
-                    "phase" => log_phase,
-                    "update" => ?hardware_update,
-                );
-
-                managers.handle_hardware_update(
-                    hardware_update,
-                    sled_agent,
-                    log,
-                ).await;
-            }
-
-            // Cancel-safe: we're using a `&mut Future`; dropping the
-            // reference does not cancel the underlying future.
-            result = &mut fut => return result,
-        }
-    }
-}
-
 #[derive(Clone, Serialize, Deserialize, PartialEq, JsonSchema)]
 struct PersistentSledAgentRequest<'a> {
     request: Cow<'a, StartSledAgentRequest>,
@@ -565,18 +507,16 @@ pub fn run_openapi() -> Result<(), String> {
 
 struct Inner {
     config: SledConfig,
-    hardware_monitor: broadcast::Receiver<HardwareUpdate>,
     state: SledAgentState,
     sled_init_rx: mpsc::Receiver<(
         StartSledAgentRequest,
         oneshot::Sender<Result<SledAgentResponse, String>>,
     )>,
     sled_reset_rx: mpsc::Receiver<oneshot::Sender<Result<(), BootstrapError>>>,
-    managers: BootstrapManagers,
     ddm_admin_localhost_client: DdmAdminClient,
-    bootstore_handles: BootstoreHandles,
+    service_manager: ServiceManager,
+    long_running_task_handles: LongRunningTaskHandles,
     _sprockets_server_handle: JoinHandle<()>,
-    _key_manager_handle: JoinHandle<()>,
     base_log: Logger,
 }
 
@@ -584,14 +524,7 @@ impl Inner {
     async fn run(mut self) {
         let log = self.base_log.new(o!("component" => "SledAgentMain"));
         loop {
-            // TODO-correctness We pause handling hardware update messages while
-            // we handle sled init/reset requests - is that okay?
             tokio::select! {
-                // Cancel-safe per the docs on `broadcast::Receiver::recv()`.
-                hardware_update = self.hardware_monitor.recv() => {
-                    self.handle_hardware_update(hardware_update, &log).await;
-                }
-
                 // Cancel-safe per the docs on `mpsc::Receiver::recv()`.
                 Some((request, response_tx)) = self.sled_init_rx.recv() => {
                     self.handle_start_sled_agent_request(
@@ -619,27 +552,6 @@ impl Inner {
         }
     }
 
-    async fn handle_hardware_update(
-        &self,
-        hardware_update: Result<HardwareUpdate, broadcast::error::RecvError>,
-        log: &Logger,
-    ) {
-        info!(
-            log,
-            "Handling hardware update message";
-            "phase" => "bootstore-steady-state",
-            "update" => ?hardware_update,
-        );
-
-        self.managers
-            .handle_hardware_update(
-                hardware_update,
-                self.state.sled_agent(),
-                &log,
-            )
-            .await;
-    }
-
     async fn handle_start_sled_agent_request(
         &mut self,
         request: StartSledAgentRequest,
@@ -651,8 +563,8 @@ impl Inner {
                 let response = match start_sled_agent(
                     &self.config,
                     &request,
-                    &self.bootstore_handles.node_handle,
-                    &self.managers,
+                    self.long_running_task_handles.clone(),
+                    self.service_manager.clone(),
                     &self.ddm_admin_localhost_client,
                     &self.base_log,
                     &log,
@@ -663,11 +575,9 @@ impl Inner {
                         // We've created sled-agent; we need to (possibly)
                         // reconfigure the switch zone, if we're a scrimlet, to
                         // give it our underlay network information.
-                        self.managers
-                            .check_latest_hardware_snapshot(
-                                Some(server.sled_agent()),
-                                log,
-                            )
+                        self.long_running_task_handles
+                            .hardware_monitor
+                            .sled_agent_started(server.sled_agent().clone())
                             .await;
 
                         self.state = SledAgentState::ServerStarted(server);
@@ -725,11 +635,11 @@ impl Inner {
 
     async fn uninstall_sled_local_config(&self) -> Result<(), BootstrapError> {
         let config_dirs = self
-            .managers
-            .storage
-            .resources()
-            .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET)
+            .long_running_task_handles
+            .storage_manager
+            .get_latest_resources()
             .await
+            .all_m2_mountpoints(CONFIG_DATASET)
             .into_iter();
 
         for dir in config_dirs {
diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs
index baf92af28a..88ece3e3b0 100644
--- a/sled-agent/src/instance.rs
+++ b/sled-agent/src/instance.rs
@@ -17,7 +17,6 @@ use crate::params::{
     InstanceMigrationTargetParams, InstanceStateRequested, VpcFirewallRule,
 };
 use crate::profile::*;
-use crate::storage_manager::StorageResources;
 use crate::zone_bundle::BundleError;
 use crate::zone_bundle::ZoneBundler;
 use anyhow::anyhow;
@@ -40,7 +39,8 @@ use omicron_common::backoff;
 use propolis_client::Client as PropolisClient;
 use rand::prelude::SliceRandom;
 use rand::SeedableRng;
-use sled_hardware::disk::ZONE_DATASET;
+use sled_storage::dataset::ZONE_DATASET;
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::net::IpAddr;
 use std::net::{SocketAddr, SocketAddrV6};
@@ -243,7 +243,7 @@ struct InstanceInner {
     nexus_client: NexusClientWithResolver,
 
     // Storage resources
-    storage: StorageResources,
+    storage: StorageHandle,
 
     // Object used to collect zone bundles from this instance when terminated.
     zone_bundler: ZoneBundler,
@@ -622,7 +622,7 @@ impl Instance {
         vnic_allocator: VnicAllocator<Etherstub>,
         port_manager: PortManager,
         nexus_client: NexusClientWithResolver,
-        storage: StorageResources,
+        storage: StorageHandle,
         zone_bundler: ZoneBundler,
     ) -> Result<Self, Error> {
         info!(log, "Instance::new w/initial HW: {:?}", initial);
@@ -889,8 +889,9 @@ impl Instance {
         let mut rng = rand::rngs::StdRng::from_entropy();
         let root = inner
             .storage
-            .all_u2_mountpoints(ZONE_DATASET)
+            .get_latest_resources()
             .await
+            .all_u2_mountpoints(ZONE_DATASET)
             .choose(&mut rng)
             .ok_or_else(|| Error::U2NotFound)?
             .clone();
diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs
index bdd29e4d1f..c6310d28f2 100644
--- a/sled-agent/src/instance_manager.rs
+++ b/sled-agent/src/instance_manager.rs
@@ -12,7 +12,6 @@ use crate::params::{
     InstanceHardware, InstanceMigrationSourceParams, InstancePutStateResponse,
     InstanceStateRequested, InstanceUnregisterResponse,
 };
-use crate::storage_manager::StorageResources;
 use crate::zone_bundle::BundleError;
 use crate::zone_bundle::ZoneBundler;
 use illumos_utils::dladm::Etherstub;
@@ -21,6 +20,7 @@ use illumos_utils::opte::PortManager;
 use illumos_utils::vmm_reservoir;
 use omicron_common::api::external::ByteCount;
 use omicron_common::api::internal::nexus::InstanceRuntimeState;
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::collections::BTreeMap;
 use std::sync::{Arc, Mutex};
@@ -62,7 +62,7 @@ struct InstanceManagerInternal {
 
     vnic_allocator: VnicAllocator<Etherstub>,
     port_manager: PortManager,
-    storage: StorageResources,
+    storage: StorageHandle,
     zone_bundler: ZoneBundler,
 }
 
@@ -78,7 +78,7 @@ impl InstanceManager {
         nexus_client: NexusClientWithResolver,
         etherstub: Etherstub,
         port_manager: PortManager,
-        storage: StorageResources,
+        storage: StorageHandle,
         zone_bundler: ZoneBundler,
     ) -> Result<InstanceManager, Error> {
         Ok(InstanceManager {
diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs
index d0fa2fbe4d..e736793298 100644
--- a/sled-agent/src/params.rs
+++ b/sled-agent/src/params.rs
@@ -18,6 +18,7 @@ use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use sled_hardware::Baseboard;
 pub use sled_hardware::DendriteAsic;
+use sled_storage::dataset::DatasetName;
 use std::fmt::{Debug, Display, Formatter, Result as FormatResult};
 use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6};
 use std::time::Duration;
@@ -210,64 +211,6 @@ pub struct Zpool {
     pub disk_type: DiskType,
 }
 
-/// The type of a dataset, and an auxiliary information necessary
-/// to successfully launch a zone managing the associated data.
-#[derive(
-    Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash,
-)]
-#[serde(tag = "type", rename_all = "snake_case")]
-pub enum DatasetKind {
-    CockroachDb,
-    Crucible,
-    Clickhouse,
-    ClickhouseKeeper,
-    ExternalDns,
-    InternalDns,
-}
-
-impl From<DatasetKind> for sled_agent_client::types::DatasetKind {
-    fn from(k: DatasetKind) -> Self {
-        use DatasetKind::*;
-        match k {
-            CockroachDb => Self::CockroachDb,
-            Crucible => Self::Crucible,
-            Clickhouse => Self::Clickhouse,
-            ClickhouseKeeper => Self::ClickhouseKeeper,
-            ExternalDns => Self::ExternalDns,
-            InternalDns => Self::InternalDns,
-        }
-    }
-}
-
-impl From<DatasetKind> for nexus_client::types::DatasetKind {
-    fn from(k: DatasetKind) -> Self {
-        use DatasetKind::*;
-        match k {
-            CockroachDb => Self::Cockroach,
-            Crucible => Self::Crucible,
-            Clickhouse => Self::Clickhouse,
-            ClickhouseKeeper => Self::ClickhouseKeeper,
-            ExternalDns => Self::ExternalDns,
-            InternalDns => Self::InternalDns,
-        }
-    }
-}
-
-impl std::fmt::Display for DatasetKind {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        use DatasetKind::*;
-        let s = match self {
-            Crucible => "crucible",
-            CockroachDb { .. } => "cockroachdb",
-            Clickhouse => "clickhouse",
-            ClickhouseKeeper => "clickhouse_keeper",
-            ExternalDns { .. } => "external_dns",
-            InternalDns { .. } => "internal_dns",
-        };
-        write!(f, "{}", s)
-    }
-}
-
 /// Describes service-specific parameters.
 #[derive(
     Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash,
@@ -577,7 +520,7 @@ impl std::fmt::Display for ZoneType {
 )]
 pub struct DatasetRequest {
     pub id: Uuid,
-    pub name: crate::storage::dataset::DatasetName,
+    pub name: DatasetName,
     pub service_address: SocketAddrV6,
 }
 
diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs
index 2183aa7b63..01fababa4d 100644
--- a/sled-agent/src/rack_setup/plan/service.rs
+++ b/sled-agent/src/rack_setup/plan/service.rs
@@ -11,7 +11,6 @@ use crate::params::{
 };
 use crate::rack_setup::config::SetupServiceConfig as Config;
 use crate::storage::dataset::DatasetName;
-use crate::storage_manager::StorageResources;
 use camino::Utf8PathBuf;
 use dns_service_client::types::DnsConfigParams;
 use illumos_utils::zpool::ZpoolName;
@@ -35,6 +34,8 @@ use serde::{Deserialize, Serialize};
 use sled_agent_client::{
     types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError,
 };
+use sled_storage::dataset::CONFIG_DATASET;
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::collections::{BTreeSet, HashMap, HashSet};
 use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV6};
@@ -124,11 +125,12 @@ const RSS_SERVICE_PLAN_FILENAME: &str = "rss-service-plan.json";
 impl Plan {
     pub async fn load(
         log: &Logger,
-        storage: &StorageResources,
+        storage_manager: &StorageHandle,
     ) -> Result<Option<Plan>, PlanError> {
-        let paths: Vec<Utf8PathBuf> = storage
-            .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET)
+        let paths: Vec<Utf8PathBuf> = storage_manager
+            .get_latest_resources()
             .await
+            .all_m2_mountpoints(CONFIG_DATASET)
             .into_iter()
             .map(|p| p.join(RSS_SERVICE_PLAN_FILENAME))
             .collect();
@@ -236,7 +238,7 @@ impl Plan {
     pub async fn create(
         log: &Logger,
         config: &Config,
-        storage: &StorageResources,
+        storage_manager: &StorageHandle,
         sleds: &HashMap<SocketAddrV6, StartSledAgentRequest>,
     ) -> Result<Self, PlanError> {
         let mut dns_builder = internal_dns::DnsConfigBuilder::new();
@@ -724,9 +726,10 @@ impl Plan {
         let plan = Self { services, dns_config };
 
         // Once we've constructed a plan, write it down to durable storage.
-        let paths: Vec<Utf8PathBuf> = storage
-            .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET)
+        let paths: Vec<Utf8PathBuf> = storage_manager
+            .get_latest_resources()
             .await
+            .all_m2_mountpoints(CONFIG_DATASET)
             .into_iter()
             .map(|p| p.join(RSS_SERVICE_PLAN_FILENAME))
             .collect();
diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs
index ea12f0db32..189216fd9b 100644
--- a/sled-agent/src/rack_setup/plan/sled.rs
+++ b/sled-agent/src/rack_setup/plan/sled.rs
@@ -13,6 +13,8 @@ use camino::Utf8PathBuf;
 use omicron_common::ledger::{self, Ledger, Ledgerable};
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
+use sled_storage::dataset::CONFIG_DATASET;
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::collections::{HashMap, HashSet};
 use std::net::{Ipv6Addr, SocketAddrV6};
@@ -77,7 +79,7 @@ impl Plan {
     pub async fn create(
         log: &Logger,
         config: &Config,
-        storage: &StorageResources,
+        storage_manager: &StorageHandle,
         bootstrap_addrs: HashSet<Ipv6Addr>,
         use_trust_quorum: bool,
     ) -> Result<Self, PlanError> {
@@ -119,9 +121,10 @@ impl Plan {
         let plan = Self { rack_id, sleds, config: config.clone() };
 
         // Once we've constructed a plan, write it down to durable storage.
-        let paths: Vec<Utf8PathBuf> = storage
-            .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET)
+        let paths: Vec<Utf8PathBuf> = storage_manager
+            .get_latest_resources()
             .await
+            .all_m2_mountpoints(CONFIG_DATASET)
             .into_iter()
             .map(|p| p.join(RSS_SLED_PLAN_FILENAME))
             .collect();
diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs
index 805c889295..42290f5ce8 100644
--- a/sled-agent/src/rack_setup/service.rs
+++ b/sled-agent/src/rack_setup/service.rs
@@ -73,7 +73,6 @@ use crate::rack_setup::plan::service::{
 use crate::rack_setup::plan::sled::{
     Plan as SledPlan, PlanError as SledPlanError,
 };
-use crate::storage_manager::StorageResources;
 use bootstore::schemes::v0 as bootstore;
 use camino::Utf8PathBuf;
 use ddm_admin_client::{Client as DdmAdminClient, DdmError};
@@ -93,6 +92,8 @@ use sled_agent_client::{
     types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError,
 };
 use sled_hardware::underlay::BootstrapInterface;
+use sled_storage::dataset::CONFIG_DATASET;
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::collections::BTreeSet;
 use std::collections::{HashMap, HashSet};
@@ -186,7 +187,7 @@ impl RackSetupService {
     pub(crate) fn new(
         log: Logger,
         config: Config,
-        storage_resources: StorageResources,
+        storage_manager: StorageHandle,
         local_bootstrap_agent: BootstrapAgentHandle,
         bootstore: bootstore::NodeHandle,
     ) -> Self {
@@ -195,7 +196,7 @@ impl RackSetupService {
             if let Err(e) = svc
                 .run(
                     &config,
-                    &storage_resources,
+                    &storage_manager,
                     local_bootstrap_agent,
                     bootstore,
                 )
@@ -741,7 +742,7 @@ impl ServiceInner {
     async fn run(
         &self,
         config: &Config,
-        storage_resources: &StorageResources,
+        storage_manager: &StorageHandle,
         local_bootstrap_agent: BootstrapAgentHandle,
         bootstore: bootstore::NodeHandle,
     ) -> Result<(), SetupServiceError> {
@@ -752,9 +753,10 @@ impl ServiceInner {
             config.az_subnet(),
         )?;
 
-        let marker_paths: Vec<Utf8PathBuf> = storage_resources
-            .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET)
+        let marker_paths: Vec<Utf8PathBuf> = storage_manager
+            .get_latest_resources()
             .await
+            .all_m2_mountpoints(CONFIG_DATASET)
             .into_iter()
             .map(|p| p.join(RSS_COMPLETED_FILENAME))
             .collect();
@@ -775,7 +777,7 @@ impl ServiceInner {
                 "RSS configuration looks like it has already been applied",
             );
 
-            let sled_plan = SledPlan::load(&self.log, storage_resources)
+            let sled_plan = SledPlan::load(&self.log, storage_manager)
                 .await?
                 .expect("Sled plan should exist if completed marker exists");
             if &sled_plan.config != config {
@@ -783,7 +785,7 @@ impl ServiceInner {
                     "Configuration changed".to_string(),
                 ));
             }
-            let service_plan = ServicePlan::load(&self.log, storage_resources)
+            let service_plan = ServicePlan::load(&self.log, storage_manager)
                 .await?
                 .expect("Service plan should exist if completed marker exists");
 
@@ -817,7 +819,7 @@ impl ServiceInner {
             BootstrapAddressDiscovery::OnlyThese { addrs } => addrs.clone(),
         };
         let maybe_sled_plan =
-            SledPlan::load(&self.log, storage_resources).await?;
+            SledPlan::load(&self.log, storage_manager).await?;
         if let Some(plan) = &maybe_sled_plan {
             let stored_peers: HashSet<Ipv6Addr> =
                 plan.sleds.keys().map(|a| *a.ip()).collect();
@@ -849,7 +851,7 @@ impl ServiceInner {
             SledPlan::create(
                 &self.log,
                 config,
-                &storage_resources,
+                &storage_manager,
                 bootstrap_addrs,
                 config.trust_quorum_peers.is_some(),
             )
@@ -902,14 +904,14 @@ impl ServiceInner {
             })
             .collect();
         let service_plan = if let Some(plan) =
-            ServicePlan::load(&self.log, storage_resources).await?
+            ServicePlan::load(&self.log, storage_manager).await?
         {
             plan
         } else {
             ServicePlan::create(
                 &self.log,
                 &config,
-                &storage_resources,
+                &storage_manager,
                 &plan.sleds,
             )
             .await?
diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs
index 156547627c..c9828e7542 100644
--- a/sled-agent/src/server.rs
+++ b/sled-agent/src/server.rs
@@ -8,10 +8,9 @@ use super::config::Config;
 use super::http_entrypoints::api as http_api;
 use super::sled_agent::SledAgent;
 use crate::bootstrap::params::StartSledAgentRequest;
+use crate::long_running_tasks::LongRunningTaskHandles;
 use crate::nexus::NexusClientWithResolver;
 use crate::services::ServiceManager;
-use crate::storage_manager::StorageManager;
-use bootstore::schemes::v0 as bootstore;
 use internal_dns::resolver::Resolver;
 use slog::Logger;
 use std::net::SocketAddr;
@@ -39,9 +38,8 @@ impl Server {
         config: &Config,
         log: Logger,
         request: StartSledAgentRequest,
+        long_running_tasks_handles: LongRunningTaskHandles,
         services: ServiceManager,
-        storage: StorageManager,
-        bootstore: bootstore::NodeHandle,
     ) -> Result<Server, String> {
         info!(log, "setting up sled agent server");
 
@@ -63,8 +61,7 @@ impl Server {
             nexus_client,
             request,
             services,
-            storage,
-            bootstore,
+            long_running_tasks_handles,
         )
         .await
         .map_err(|e| e.to_string())?;
diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs
index dc130524f6..475cbf8018 100644
--- a/sled-agent/src/sled_agent.rs
+++ b/sled-agent/src/sled_agent.rs
@@ -10,6 +10,7 @@ use crate::bootstrap::early_networking::{
 use crate::bootstrap::params::StartSledAgentRequest;
 use crate::config::Config;
 use crate::instance_manager::InstanceManager;
+use crate::long_running_tasks::LongRunningTaskHandles;
 use crate::nexus::{NexusClientWithResolver, NexusRequestQueue};
 use crate::params::{
     DiskStateRequested, InstanceHardware, InstanceMigrationSourceParams,
@@ -18,11 +19,9 @@ use crate::params::{
     VpcFirewallRule, ZoneBundleMetadata, Zpool,
 };
 use crate::services::{self, ServiceManager};
-use crate::storage_manager::{self, StorageManager};
 use crate::updates::{ConfigUpdates, UpdateManager};
 use crate::zone_bundle;
 use crate::zone_bundle::BundleError;
-use bootstore::schemes::v0 as bootstore;
 use camino::Utf8PathBuf;
 use dropshot::HttpError;
 use illumos_utils::opte::params::{
@@ -46,6 +45,8 @@ use omicron_common::backoff::{
 };
 use sled_hardware::underlay;
 use sled_hardware::HardwareManager;
+use sled_storage::dataset::DatasetName;
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::collections::BTreeMap;
 use std::net::{Ipv6Addr, SocketAddrV6};
@@ -200,7 +201,7 @@ struct SledAgentInner {
     subnet: Ipv6Subnet<SLED_PREFIX>,
 
     // Component of Sled Agent responsible for storage and dataset management.
-    storage: StorageManager,
+    storage: StorageHandle,
 
     // Component of Sled Agent responsible for managing Propolis instances.
     instances: InstanceManager,
@@ -254,8 +255,7 @@ impl SledAgent {
         nexus_client: NexusClientWithResolver,
         request: StartSledAgentRequest,
         services: ServiceManager,
-        storage: StorageManager,
-        bootstore: bootstore::NodeHandle,
+        long_running_task_handles: LongRunningTaskHandles,
     ) -> Result<SledAgent, Error> {
         // Pass the "parent_log" to all subcomponents that want to set their own
         // "component" value.
@@ -268,12 +268,17 @@ impl SledAgent {
         ));
         info!(&log, "SledAgent::new(..) starting");
 
+        let storage_manager = &long_running_task_handles.storage_manager;
+
         // Configure a swap device of the configured size before other system setup.
         match config.swap_device_size_gb {
             Some(sz) if sz > 0 => {
                 info!(log, "Requested swap device of size {} GiB", sz);
-                let boot_disk =
-                    storage.resources().boot_disk().await.ok_or_else(|| {
+                let boot_disk = storage_manager
+                    .get_latest_resources()
+                    .await
+                    .boot_disk()
+                    .ok_or_else(|| {
                         crate::swap_device::SwapDeviceError::BootDiskNotFound
                     })?;
                 crate::swap_device::ensure_swap_device(
@@ -324,28 +329,13 @@ impl SledAgent {
             *sled_address.ip(),
         );
 
-        storage
-            .setup_underlay_access(storage_manager::UnderlayAccess {
-                nexus_client: nexus_client.clone(),
-                sled_id: request.id,
-            })
-            .await?;
-
-        // TODO-correctness The bootstrap agent _also_ has a `HardwareManager`.
-        // We only use it for reading properties, but it's not `Clone`able
-        // because it's holding an inner task handle. Could we add a way to get
-        // a read-only handle to it, and have bootstrap agent give us that
-        // instead of creating a new full one ourselves?
-        let hardware = HardwareManager::new(&parent_log, services.sled_mode())
-            .map_err(|e| Error::Hardware(e))?;
-
         let instances = InstanceManager::new(
             parent_log.clone(),
             nexus_client.clone(),
             etherstub.clone(),
             port_manager.clone(),
-            storage.resources().clone(),
-            storage.zone_bundler().clone(),
+            storage_manager.clone(),
+            long_running_task_handles.zone_bundler.clone(),
         )?;
 
         match config.vmm_reservoir_percentage {
@@ -378,7 +368,8 @@ impl SledAgent {
         // until we have this, as we need to know which switches have uplinks to
         // correctly set up services.
         let get_network_config = || async {
-            let serialized_config = bootstore
+            let serialized_config = long_running_task_handles
+                .bootstore
                 .get_network_config()
                 .await
                 .map_err(|err| BackoffError::transient(err.to_string()))?
@@ -421,14 +412,13 @@ impl SledAgent {
             rack_network_config.clone(),
         )?;
 
-        let zone_bundler = storage.zone_bundler().clone();
         let sled_agent = SledAgent {
             inner: Arc::new(SledAgentInner {
                 id: request.id,
                 subnet: request.subnet,
-                storage,
+                storage: long_running_task_handles.storage_manager.clone(),
                 instances,
-                hardware,
+                hardware: long_running_task_handles.hardware_manager.clone(),
                 updates,
                 port_manager,
                 services,
@@ -442,7 +432,7 @@ impl SledAgent {
                 // request queue?
                 nexus_request_queue: NexusRequestQueue::new(),
                 rack_network_config,
-                zone_bundler,
+                zone_bundler: long_running_task_handles.zone_bundler.clone(),
             }),
             log: log.clone(),
         };
@@ -462,6 +452,7 @@ impl SledAgent {
     /// Blocks until all services have started, retrying indefinitely on
     /// failure.
     pub(crate) async fn cold_boot_load_services(&self) {
+        info!(self.log, "Loading cold boot services");
         retry_notify(
             retry_policy_internal_service_aggressive(),
             || async {
diff --git a/sled-agent/src/storage/dataset.rs b/sled-agent/src/storage/dataset.rs
deleted file mode 100644
index 4efc0f320a..0000000000
--- a/sled-agent/src/storage/dataset.rs
+++ /dev/null
@@ -1,63 +0,0 @@
-// This Source Code Form is subject to the terms of the Mozilla Public
-// License, v. 2.0. If a copy of the MPL was not distributed with this
-// file, You can obtain one at https://mozilla.org/MPL/2.0/.
-
-use crate::params::DatasetKind;
-use illumos_utils::zpool::ZpoolName;
-use schemars::JsonSchema;
-use serde::{Deserialize, Serialize};
-use std::str::FromStr;
-
-#[derive(
-    Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone, JsonSchema,
-)]
-pub struct DatasetName {
-    // A unique identifier for the Zpool on which the dataset is stored.
-    pool_name: ZpoolName,
-    // A name for the dataset within the Zpool.
-    kind: DatasetKind,
-}
-
-impl DatasetName {
-    pub fn new(pool_name: ZpoolName, kind: DatasetKind) -> Self {
-        Self { pool_name, kind }
-    }
-
-    pub fn pool(&self) -> &ZpoolName {
-        &self.pool_name
-    }
-
-    pub fn dataset(&self) -> &DatasetKind {
-        &self.kind
-    }
-
-    pub fn full(&self) -> String {
-        format!("{}/{}", self.pool_name, self.kind)
-    }
-}
-
-impl From<DatasetName> for sled_agent_client::types::DatasetName {
-    fn from(n: DatasetName) -> Self {
-        Self {
-            pool_name: sled_agent_client::types::ZpoolName::from_str(
-                &n.pool().to_string(),
-            )
-            .unwrap(),
-            kind: n.dataset().clone().into(),
-        }
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-    use uuid::Uuid;
-
-    #[test]
-    fn serialize_dataset_name() {
-        let pool = ZpoolName::new_internal(Uuid::new_v4());
-        let kind = DatasetKind::Crucible;
-        let name = DatasetName::new(pool, kind);
-        toml::to_string(&name).unwrap();
-    }
-}
diff --git a/sled-agent/src/storage/dump_setup.rs b/sled-agent/src/storage/dump_setup.rs
index 9b5edc0a7e..ea60998955 100644
--- a/sled-agent/src/storage/dump_setup.rs
+++ b/sled-agent/src/storage/dump_setup.rs
@@ -1,4 +1,3 @@
-use crate::storage_manager::DiskWrapper;
 use camino::Utf8PathBuf;
 use derive_more::{AsRef, Deref, From};
 use illumos_utils::dumpadm::DumpAdmError;
@@ -6,6 +5,8 @@ use illumos_utils::zone::{AdmError, Zones};
 use illumos_utils::zpool::{ZpoolHealth, ZpoolName};
 use omicron_common::disk::DiskIdentity;
 use sled_hardware::DiskVariant;
+use sled_storage::dataset::{CRASH_DATASET, DUMP_DATASET};
+use sled_storage::disk::Disk;
 use slog::Logger;
 use std::collections::{HashMap, HashSet};
 use std::ffi::OsString;
@@ -70,11 +71,11 @@ trait GetMountpoint: std::ops::Deref<Target = ZpoolName> {
 }
 impl GetMountpoint for DebugZpool {
     type NewType = DebugDataset;
-    const MOUNTPOINT: &'static str = sled_hardware::disk::DUMP_DATASET;
+    const MOUNTPOINT: &'static str = DUMP_DATASET;
 }
 impl GetMountpoint for CoreZpool {
     type NewType = CoreDataset;
-    const MOUNTPOINT: &'static str = sled_hardware::disk::CRASH_DATASET;
+    const MOUNTPOINT: &'static str = CRASH_DATASET;
 }
 
 struct DumpSetupWorker {
@@ -99,50 +100,51 @@ const ARCHIVAL_INTERVAL: Duration = Duration::from_secs(300);
 impl DumpSetup {
     pub(crate) async fn update_dumpdev_setup(
         &self,
-        disks: &mut MutexGuard<'_, HashMap<DiskIdentity, DiskWrapper>>,
+        disks: &mut MutexGuard<'_, HashMap<DiskIdentity, Disk>>,
     ) {
         let log = &self.log;
         let mut m2_dump_slices = Vec::new();
         let mut u2_debug_datasets = Vec::new();
         let mut m2_core_datasets = Vec::new();
-        for (_id, disk_wrapper) in disks.iter() {
-            match disk_wrapper {
-                DiskWrapper::Real { disk, .. } => match disk.variant() {
-                    DiskVariant::M2 => {
-                        match disk.dump_device_devfs_path(false) {
-                            Ok(path) => {
-                                m2_dump_slices.push(DumpSlicePath(path))
-                            }
-                            Err(err) => {
-                                warn!(log, "Error getting dump device devfs path: {err:?}");
-                            }
+        for (_id, disk) in disks.iter() {
+            if disk.is_synthetic() {
+                // We only setup dump devices on real disks
+                continue;
+            }
+            match disk.variant() {
+                DiskVariant::M2 => {
+                    match disk.dump_device_devfs_path(false) {
+                        Ok(path) => m2_dump_slices.push(DumpSlicePath(path)),
+                        Err(err) => {
+                            warn!(
+                                log,
+                                "Error getting dump device devfs path: {err:?}"
+                            );
                         }
-                        let name = disk.zpool_name();
-                        if let Ok(info) = illumos_utils::zpool::Zpool::get_info(
-                            &name.to_string(),
-                        ) {
-                            if info.health() == ZpoolHealth::Online {
-                                m2_core_datasets.push(CoreZpool(name.clone()));
-                            } else {
-                                warn!(log, "Zpool {name:?} not online, won't attempt to save process core dumps there");
-                            }
+                    }
+                    let name = disk.zpool_name();
+                    if let Ok(info) =
+                        illumos_utils::zpool::Zpool::get_info(&name.to_string())
+                    {
+                        if info.health() == ZpoolHealth::Online {
+                            m2_core_datasets.push(CoreZpool(name.clone()));
+                        } else {
+                            warn!(log, "Zpool {name:?} not online, won't attempt to save process core dumps there");
                         }
                     }
-                    DiskVariant::U2 => {
-                        let name = disk.zpool_name();
-                        if let Ok(info) = illumos_utils::zpool::Zpool::get_info(
-                            &name.to_string(),
-                        ) {
-                            if info.health() == ZpoolHealth::Online {
-                                u2_debug_datasets
-                                    .push(DebugZpool(name.clone()));
-                            } else {
-                                warn!(log, "Zpool {name:?} not online, won't attempt to save kernel core dumps there");
-                            }
+                }
+                DiskVariant::U2 => {
+                    let name = disk.zpool_name();
+                    if let Ok(info) =
+                        illumos_utils::zpool::Zpool::get_info(&name.to_string())
+                    {
+                        if info.health() == ZpoolHealth::Online {
+                            u2_debug_datasets.push(DebugZpool(name.clone()));
+                        } else {
+                            warn!(log, "Zpool {name:?} not online, won't attempt to save kernel core dumps there");
                         }
                     }
-                },
-                DiskWrapper::Synthetic { .. } => {}
+                }
             }
         }
 
diff --git a/sled-agent/src/storage/mod.rs b/sled-agent/src/storage/mod.rs
index 74bd59a151..663ebe8274 100644
--- a/sled-agent/src/storage/mod.rs
+++ b/sled-agent/src/storage/mod.rs
@@ -4,5 +4,4 @@
 
 //! Management of local storage
 
-pub(crate) mod dataset;
 pub(crate) mod dump_setup;
diff --git a/sled-storage/src/dump_setup.rs b/sled-storage/src/dump_setup.rs
deleted file mode 100644
index 39c6aa2995..0000000000
--- a/sled-storage/src/dump_setup.rs
+++ /dev/null
@@ -1,803 +0,0 @@
-// This Source Code Form is subject to the terms of the Mozilla Public
-// License, v. 2.0. If a copy of the MPL was not distributed with this
-// file, You can obtain one at https://mozilla.org/MPL/2.0/.
-
-//! Dump dataset setup
-
-use crate::dataset::{CRASH_DATASET, DUMP_DATASET};
-use crate::disk::Disk;
-use camino::Utf8PathBuf;
-use derive_more::{AsRef, Deref, From};
-use illumos_utils::dumpadm::DumpAdmError;
-use illumos_utils::zone::{AdmError, Zones};
-use illumos_utils::zpool::{ZpoolHealth, ZpoolName};
-use omicron_common::disk::DiskIdentity;
-use sled_hardware::DiskVariant;
-use slog::{debug, error, info, o, warn, Logger};
-use std::collections::{HashMap, HashSet};
-use std::ffi::OsString;
-use std::path::{Path, PathBuf};
-use std::sync::{Arc, Weak};
-use std::time::{Duration, SystemTime, SystemTimeError, UNIX_EPOCH};
-use tokio::sync::MutexGuard;
-
-pub struct DumpSetup {
-    worker: Arc<std::sync::Mutex<DumpSetupWorker>>,
-    _poller: std::thread::JoinHandle<()>,
-    log: Logger,
-}
-
-impl DumpSetup {
-    pub fn new(log: &Logger) -> Self {
-        let worker = Arc::new(std::sync::Mutex::new(DumpSetupWorker::new(
-            log.new(o!("component" => "DumpSetup-worker")),
-        )));
-        let worker_weak = Arc::downgrade(&worker);
-        let log_poll = log.new(o!("component" => "DumpSetup-archival"));
-        let _poller = std::thread::spawn(move || {
-            Self::poll_file_archival(worker_weak, log_poll)
-        });
-        let log = log.new(o!("component" => "DumpSetup"));
-        Self { worker, _poller, log }
-    }
-}
-
-// we sure are passing a lot of Utf8PathBufs around, let's be careful about it
-#[derive(
-    AsRef, Clone, Debug, Deref, Eq, From, Hash, Ord, PartialEq, PartialOrd,
-)]
-struct DumpSlicePath(Utf8PathBuf);
-#[derive(
-    AsRef, Clone, Debug, Deref, Eq, From, Hash, Ord, PartialEq, PartialOrd,
-)]
-struct DebugDataset(Utf8PathBuf);
-#[derive(
-    AsRef, Clone, Debug, Deref, Eq, From, Hash, Ord, PartialEq, PartialOrd,
-)]
-struct CoreDataset(Utf8PathBuf);
-
-#[derive(Deref)]
-struct CoreZpool(ZpoolName);
-#[derive(Deref)]
-struct DebugZpool(ZpoolName);
-
-// only want to access these directories after they're mounted!
-trait GetMountpoint: std::ops::Deref<Target = ZpoolName> {
-    type NewType: From<Utf8PathBuf>;
-    const MOUNTPOINT: &'static str;
-    fn mountpoint(&self) -> Result<Option<Self::NewType>, ZfsGetError> {
-        if zfs_get_prop(self.to_string(), "mounted")? == "yes" {
-            Ok(Some(Self::NewType::from(
-                self.dataset_mountpoint(Self::MOUNTPOINT),
-            )))
-        } else {
-            Ok(None)
-        }
-    }
-}
-impl GetMountpoint for DebugZpool {
-    type NewType = DebugDataset;
-    const MOUNTPOINT: &'static str = DUMP_DATASET;
-}
-impl GetMountpoint for CoreZpool {
-    type NewType = CoreDataset;
-    const MOUNTPOINT: &'static str = CRASH_DATASET;
-}
-
-struct DumpSetupWorker {
-    core_dataset_names: Vec<CoreZpool>,
-    debug_dataset_names: Vec<DebugZpool>,
-
-    chosen_dump_slice: Option<DumpSlicePath>,
-    chosen_debug_dir: Option<DebugDataset>,
-    chosen_core_dir: Option<CoreDataset>,
-
-    known_dump_slices: Vec<DumpSlicePath>,
-    known_debug_dirs: Vec<DebugDataset>,
-    known_core_dirs: Vec<CoreDataset>,
-
-    savecored_slices: HashSet<DumpSlicePath>,
-
-    log: Logger,
-}
-
-const ARCHIVAL_INTERVAL: Duration = Duration::from_secs(300);
-
-impl DumpSetup {
-    pub(crate) async fn update_dumpdev_setup(
-        &self,
-        disks: &mut MutexGuard<'_, HashMap<DiskIdentity, Disk>>,
-    ) {
-        let log = &self.log;
-        let mut m2_dump_slices = Vec::new();
-        let mut u2_debug_datasets = Vec::new();
-        let mut m2_core_datasets = Vec::new();
-        for (_id, disk) in disks.iter() {
-            if disk.is_synthetic() {
-                // We only setup dump devices on real disks
-                continue;
-            }
-            match disk.variant() {
-                DiskVariant::M2 => {
-                    match disk.dump_device_devfs_path(false) {
-                        Ok(path) => m2_dump_slices.push(DumpSlicePath(path)),
-                        Err(err) => {
-                            warn!(
-                                log,
-                                "Error getting dump device devfs path: {err:?}"
-                            );
-                        }
-                    }
-                    let name = disk.zpool_name();
-                    if let Ok(info) =
-                        illumos_utils::zpool::Zpool::get_info(&name.to_string())
-                    {
-                        if info.health() == ZpoolHealth::Online {
-                            m2_core_datasets.push(CoreZpool(name.clone()));
-                        } else {
-                            warn!(log, "Zpool {name:?} not online, won't attempt to save process core dumps there");
-                        }
-                    }
-                }
-                DiskVariant::U2 => {
-                    let name = disk.zpool_name();
-                    if let Ok(info) =
-                        illumos_utils::zpool::Zpool::get_info(&name.to_string())
-                    {
-                        if info.health() == ZpoolHealth::Online {
-                            u2_debug_datasets.push(DebugZpool(name.clone()));
-                        } else {
-                            warn!(log, "Zpool {name:?} not online, won't attempt to save kernel core dumps there");
-                        }
-                    }
-                }
-            }
-        }
-
-        let savecore_lock = self.worker.clone();
-        let log_tmp = log.new(o!("component" => "DumpSetup-mutex"));
-        tokio::task::spawn_blocking(move || match savecore_lock.lock() {
-            Ok(mut guard) => {
-                guard.update_disk_loadout(
-                    m2_dump_slices,
-                    u2_debug_datasets,
-                    m2_core_datasets,
-                );
-            }
-            Err(err) => {
-                error!(log_tmp, "DumpSetup mutex poisoned: {err:?}");
-            }
-        });
-    }
-
-    fn poll_file_archival(
-        worker: Weak<std::sync::Mutex<DumpSetupWorker>>,
-        log: Logger,
-    ) {
-        info!(log, "DumpSetup poll loop started.");
-        loop {
-            if let Some(mutex) = worker.upgrade() {
-                match mutex.lock() {
-                    Ok(mut guard) => {
-                        guard.reevaluate_choices();
-                        if let Err(err) = guard.archive_files() {
-                            error!(
-                                log,
-                                "Failed to archive debug/dump files: {err:?}"
-                            );
-                        }
-                    }
-                    Err(err) => {
-                        error!(
-                            log,
-                            "DumpSetup mutex poisoned in poll thread: {err:?}"
-                        );
-                        break;
-                    }
-                }
-            } else {
-                info!(
-                    log,
-                    "DumpSetup weak pointer dropped, leaving poll loop."
-                );
-                break;
-            }
-            std::thread::sleep(ARCHIVAL_INTERVAL);
-        }
-    }
-}
-
-#[derive(Debug, thiserror::Error)]
-enum ZfsGetError {
-    #[error("Error executing 'zfs get' command: {0}")]
-    IoError(#[from] std::io::Error),
-    #[error("Output of 'zfs get' was not only not an integer string, it wasn't even UTF-8: {0}")]
-    Utf8(#[from] std::string::FromUtf8Error),
-    #[error("Error parsing output of 'zfs get' command as integer: {0}")]
-    Parse(#[from] std::num::ParseIntError),
-}
-
-const ZFS_PROP_USED: &str = "used";
-const ZFS_PROP_AVAILABLE: &str = "available";
-
-fn zfs_get_integer(
-    mountpoint_or_name: impl AsRef<str>,
-    property: &str,
-) -> Result<u64, ZfsGetError> {
-    zfs_get_prop(mountpoint_or_name, property)?.parse().map_err(Into::into)
-}
-
-fn zfs_get_prop(
-    mountpoint_or_name: impl AsRef<str> + Sized,
-    property: &str,
-) -> Result<String, ZfsGetError> {
-    let mountpoint = mountpoint_or_name.as_ref();
-    let mut cmd = std::process::Command::new(illumos_utils::zfs::ZFS);
-    cmd.arg("get").arg("-Hpo").arg("value");
-    cmd.arg(property);
-    cmd.arg(mountpoint);
-    let output = cmd.output()?;
-    Ok(String::from_utf8(output.stdout)?.trim().to_string())
-}
-
-const DATASET_USAGE_PERCENT_CHOICE: u64 = 70;
-const DATASET_USAGE_PERCENT_CLEANUP: u64 = 80;
-
-fn below_thresh(
-    mountpoint: &Utf8PathBuf,
-    percent: u64,
-) -> Result<(bool, u64), ZfsGetError> {
-    let used = zfs_get_integer(mountpoint, ZFS_PROP_USED)?;
-    let available = zfs_get_integer(mountpoint, ZFS_PROP_AVAILABLE)?;
-    let capacity = used + available;
-    let below = (used * 100) / capacity < percent;
-    Ok((below, used))
-}
-
-impl DumpSetupWorker {
-    fn new(log: Logger) -> Self {
-        Self {
-            core_dataset_names: vec![],
-            debug_dataset_names: vec![],
-            chosen_dump_slice: None,
-            chosen_debug_dir: None,
-            chosen_core_dir: None,
-            known_dump_slices: vec![],
-            known_debug_dirs: vec![],
-            known_core_dirs: vec![],
-            savecored_slices: Default::default(),
-            log,
-        }
-    }
-
-    fn update_disk_loadout(
-        &mut self,
-        dump_slices: Vec<DumpSlicePath>,
-        debug_datasets: Vec<DebugZpool>,
-        core_datasets: Vec<CoreZpool>,
-    ) {
-        self.core_dataset_names = core_datasets;
-        self.debug_dataset_names = debug_datasets;
-
-        self.known_dump_slices = dump_slices;
-
-        self.reevaluate_choices();
-    }
-
-    // only allow mounted zfs datasets into 'known_*_dirs',
-    // such that we don't render them non-auto-mountable by zfs
-    fn update_mounted_dirs(&mut self) {
-        self.known_debug_dirs = self
-            .debug_dataset_names
-            .iter()
-            .flat_map(|ds| ds.mountpoint())
-            .flatten()
-            .collect();
-        self.known_core_dirs = self
-            .core_dataset_names
-            .iter()
-            .flat_map(|ds| ds.mountpoint())
-            .flatten()
-            .collect();
-    }
-
-    fn reevaluate_choices(&mut self) {
-        self.update_mounted_dirs();
-
-        self.known_dump_slices.sort();
-        // sort key: prefer to choose a dataset where there's already other
-        // dumps so we don't shotgun them across every U.2, but only if they're
-        // below a certain usage threshold.
-        self.known_debug_dirs.sort_by_cached_key(
-            |mountpoint: &DebugDataset| {
-                match below_thresh(mountpoint.as_ref(), DATASET_USAGE_PERCENT_CHOICE) {
-                    Ok((below, used)) => {
-                        let priority = if below { 0 } else { 1 };
-                        (priority, used, mountpoint.clone())
-                    }
-                    Err(err) => {
-                        error!(self.log, "Could not query zfs properties of debug dump dir: {err:?}");
-                        // deprioritize anything we get errors querying.
-                        (usize::MAX, u64::MAX, mountpoint.clone())
-                    }
-                }
-            },
-        );
-        self.known_core_dirs.sort_by_cached_key(|mnt| {
-            // these get archived periodically anyway, pick one with room
-            let available = zfs_get_integer(&**mnt, "available").unwrap_or(0);
-            (u64::MAX - available, mnt.clone())
-        });
-
-        if let Some(x) = &self.chosen_debug_dir {
-            if !self.known_debug_dirs.contains(x) {
-                warn!(self.log, "Previously-chosen debug/dump dir {x:?} no longer exists in our view of reality");
-                self.chosen_debug_dir = None;
-            } else {
-                match below_thresh(x.as_ref(), DATASET_USAGE_PERCENT_CLEANUP) {
-                    Ok((true, _)) => {}
-                    Ok((false, _)) => {
-                        if self.known_debug_dirs.iter().any(|x| {
-                            below_thresh(
-                                x.as_ref(),
-                                DATASET_USAGE_PERCENT_CHOICE,
-                            )
-                            .unwrap_or((false, 0))
-                            .0
-                        }) {
-                            info!(self.log, "Previously-chosen debug/dump dir {x:?} is over usage threshold, choosing a more vacant disk");
-                            self.chosen_debug_dir = None;
-                        } else {
-                            warn!(self.log, "All candidate debug/dump dirs are over usage threshold, removing older archived files");
-                            if let Err(err) = self.cleanup() {
-                                error!(self.log, "Couldn't clean up any debug/dump dirs, may hit dataset quota in {x:?}: {err:?}");
-                            } else {
-                                self.chosen_debug_dir = None;
-                            }
-                        }
-                    }
-                    Err(err) => {
-                        error!(self.log, "Previously-chosen debug/dump dir {x:?} couldn't be queried for zfs properties!  Choosing another. {err:?}");
-                        self.chosen_debug_dir = None;
-                    }
-                }
-            }
-        }
-        if let Some(x) = &self.chosen_dump_slice {
-            if !self.known_dump_slices.contains(x) {
-                warn!(self.log, "Previously-chosen dump slice {x:?} no longer exists in our view of reality");
-                self.chosen_dump_slice = None;
-            }
-        }
-        if let Some(x) = &self.chosen_core_dir {
-            if !self.known_core_dirs.contains(x) {
-                warn!(self.log, "Previously-chosen core dir {x:?} no longer exists in our view of reality");
-                self.chosen_core_dir = None;
-            }
-        }
-
-        if self.chosen_debug_dir.is_none() {
-            self.chosen_debug_dir = self.known_debug_dirs.first().cloned();
-        }
-
-        if self.chosen_core_dir.is_none() {
-            for core_dir in &self.known_core_dirs {
-                // tell the system to write *userspace process* cores here.
-                match illumos_utils::coreadm::coreadm(core_dir) {
-                    Ok(()) => {
-                        self.chosen_core_dir = Some(core_dir.clone());
-                        info!(
-                            self.log,
-                            "Set process core dump directory to {core_dir:?}"
-                        );
-                        break;
-                    }
-                    Err(err) => {
-                        error!(self.log, "Couldn't configure process core dump directory to {core_dir:?}: {err:?}");
-                    }
-                }
-            }
-        }
-
-        if self.chosen_dump_slice.is_none() {
-            if self.chosen_debug_dir.is_some() {
-                for dump_slice in self.known_dump_slices.clone() {
-                    // Let's try to see if it appears to have a kernel dump already
-                    match illumos_utils::dumpadm::dump_flag_is_valid(
-                        &dump_slice,
-                    ) {
-                        Ok(true) => {
-                            debug!(self.log, "Dump slice {dump_slice:?} appears to have a valid header; will attempt to savecore");
-                        }
-                        Ok(false) => {
-                            info!(self.log, "Dump slice {dump_slice:?} appears to have already been saved");
-                        }
-                        Err(err) => {
-                            debug!(self.log, "Dump slice {dump_slice:?} appears to be unused: {err:?}");
-                        }
-                    }
-                    if let Ok(saved) = self.dumpadm_and_savecore(&dump_slice) {
-                        if let Some(out) = saved {
-                            info!(self.log, "Previous dump on slice {dump_slice:?} saved, configured slice as target for new dumps. {out:?}");
-                        }
-                        self.chosen_dump_slice = Some(dump_slice);
-                        break;
-                    }
-                }
-            } else {
-                // Don't risk overwriting an existing kernel dump if there's
-                // already one there until we can attempt to savecore(8)
-                // it away and clear the flag to make room.
-                for dump_slice in &self.known_dump_slices {
-                    match illumos_utils::dumpadm::dump_flag_is_valid(dump_slice)
-                    {
-                        Ok(false) => {
-                            // Have dumpadm write the config for crash dumps to be
-                            // on this slice, at least, until a U.2 comes along.
-                            match illumos_utils::dumpadm::dumpadm(
-                                dump_slice, None,
-                            ) {
-                                Ok(_) => {
-                                    info!(self.log, "Using dump device {dump_slice:?} with no savecore destination (no U.2 debug zvol yet)");
-                                    self.chosen_dump_slice =
-                                        Some(dump_slice.clone());
-                                    break;
-                                }
-                                Err(err) => {
-                                    warn!(self.log, "Could not configure {dump_slice:?} as dump device: {err:?}");
-                                }
-                            }
-                        }
-                        Ok(true) => {
-                            warn!(self.log, "Not configuring {dump_slice:?} as it appears to contain a dump we cannot yet send to a U.2 debug zvol");
-                        }
-                        Err(err) => {
-                            debug!(
-                                self.log,
-                                "Dump slice {dump_slice:?} appears to be unused : {err:?}",
-                            );
-                        }
-                    }
-                }
-            }
-        }
-
-        if let Some(debug_dir) = self.chosen_debug_dir.clone() {
-            let mut changed_slice = false;
-            for dump_slice in self.known_dump_slices.clone() {
-                if !self.savecored_slices.contains(&dump_slice) {
-                    changed_slice = true;
-                    // temporarily changes the system's dump slice so savecore(8)
-                    // can update the header in the slice when it finishes...
-                    match self.dumpadm_and_savecore(&dump_slice) {
-                        Ok(saved) => {
-                            if let Some(stdout) = &saved {
-                                info!(
-                                    self.log,
-                                    "Saved dump from {dump_slice:?} to {debug_dir:?}: {stdout:?}"
-                                );
-                            } else {
-                                info!(
-                                    self.log,
-                                    "Set {dump_slice:?} as system dump slice",
-                                );
-                            }
-                        }
-                        Err(err) => {
-                            warn!(self.log, "Could not configure {dump_slice:?} as dump device with {debug_dir:?} as savecore destination: {err:?}");
-                        }
-                    }
-                }
-            }
-
-            // ...so then we restore the chosen dump slice for the system to use
-            // in the event of a kernel crash
-            if changed_slice {
-                if let Some(dump_slice) = &self.chosen_dump_slice {
-                    if let Err(err) =
-                        illumos_utils::dumpadm::dumpadm(dump_slice, None)
-                    {
-                        error!(self.log, "Could not restore dump slice to {dump_slice:?}: {err:?}");
-                    }
-                }
-            }
-        }
-    }
-
-    fn archive_files(&self) -> std::io::Result<()> {
-        if let Some(debug_dir) = &self.chosen_debug_dir {
-            if self.known_core_dirs.is_empty() {
-                info!(self.log, "No core dump locations yet known.");
-            }
-            for core_dir in &self.known_core_dirs {
-                if let Ok(dir) = core_dir.read_dir() {
-                    for entry in dir.flatten() {
-                        if let Some(path) = entry.file_name().to_str() {
-                            let dest = debug_dir.join(path);
-
-                            if let Err(err) =
-                                Self::copy_sync_and_remove(&entry.path(), &dest)
-                            {
-                                error!(
-                                    self.log,
-                                    "Failed to archive {entry:?}: {err:?}"
-                                );
-                            } else {
-                                info!(
-                                    self.log,
-                                    "Relocated {entry:?} to {dest:?}"
-                                );
-                            }
-                        } else {
-                            error!(self.log, "Non-UTF8 path found while archiving core dumps: {entry:?}");
-                        }
-                    }
-                }
-            }
-        } else {
-            info!(
-                self.log,
-                "No archival destination for crash dumps yet chosen."
-            );
-        }
-
-        if let Err(err) = self.archive_logs() {
-            if !matches!(err, ArchiveLogsError::NoDebugDirYet) {
-                error!(
-                    self.log,
-                    "Failure while trying to archive logs to debug dataset: {err:?}"
-                );
-            }
-        }
-
-        Ok(())
-    }
-
-    fn copy_sync_and_remove(
-        source: impl AsRef<Path>,
-        dest: impl AsRef<Path>,
-    ) -> std::io::Result<()> {
-        let source = source.as_ref();
-        let dest = dest.as_ref();
-        let mut dest_f = std::fs::File::create(&dest)?;
-        let mut src_f = std::fs::File::open(&source)?;
-
-        std::io::copy(&mut src_f, &mut dest_f)?;
-
-        dest_f.sync_all()?;
-
-        drop(src_f);
-        drop(dest_f);
-
-        std::fs::remove_file(source)?;
-        Ok(())
-    }
-
-    fn archive_logs(&self) -> Result<(), ArchiveLogsError> {
-        let debug_dir = self
-            .chosen_debug_dir
-            .as_ref()
-            .ok_or(ArchiveLogsError::NoDebugDirYet)?;
-        // zone crate's 'deprecated' functions collide if you try to enable
-        // its 'sync' and 'async' features simultaneously :(
-        let rt =
-            tokio::runtime::Runtime::new().map_err(ArchiveLogsError::Tokio)?;
-        let oxz_zones = rt.block_on(Zones::get())?;
-        self.archive_logs_inner(
-            debug_dir,
-            PathBuf::from("/var/svc/log"),
-            "global",
-        )?;
-        for zone in oxz_zones {
-            let logdir = zone.path().join("root/var/svc/log");
-            let zone_name = zone.name();
-            self.archive_logs_inner(debug_dir, logdir, zone_name)?;
-        }
-        Ok(())
-    }
-
-    fn archive_logs_inner(
-        &self,
-        debug_dir: &DebugDataset,
-        logdir: PathBuf,
-        zone_name: &str,
-    ) -> Result<(), ArchiveLogsError> {
-        let mut rotated_log_files = Vec::new();
-        // patterns matching archived logs, e.g. foo.log.3
-        // keep checking for greater numbers of digits until we don't find any
-        for n in 1..9 {
-            let pattern = logdir
-                .join(format!("*.log.{}", "[0-9]".repeat(n)))
-                .to_str()
-                .ok_or_else(|| ArchiveLogsError::Utf8(zone_name.to_string()))?
-                .to_string();
-            rotated_log_files.extend(glob::glob(&pattern)?.flatten());
-        }
-        let dest_dir = debug_dir.join(zone_name).into_std_path_buf();
-        if !rotated_log_files.is_empty() {
-            std::fs::create_dir_all(&dest_dir)?;
-            let count = rotated_log_files.len();
-            info!(
-                self.log,
-                "Archiving {count} log files from {zone_name} zone"
-            );
-        }
-        for entry in rotated_log_files {
-            let src_name = entry.file_name().unwrap();
-            // as we archive them, logadm will keep resetting to .log.0,
-            // so we need to maintain our own numbering in the dest dataset.
-            // we'll use the modified date of the rotated log file, or try
-            // falling back to the time of archival if that fails, and
-            // falling back to counting up from 0 if *that* somehow fails.
-            let mut n = entry
-                .metadata()
-                .and_then(|m| m.modified())
-                .unwrap_or_else(|_| SystemTime::now())
-                .duration_since(UNIX_EPOCH)
-                .map(|d| d.as_secs())
-                .unwrap_or(0);
-            let mut dest;
-            loop {
-                dest = dest_dir.join(src_name).with_extension(format!("{n}"));
-                if dest.exists() {
-                    n += 1;
-                } else {
-                    break;
-                }
-            }
-            if let Err(err) = Self::copy_sync_and_remove(&entry, dest) {
-                warn!(self.log, "Failed to archive {entry:?}: {err:?}");
-            }
-        }
-        Ok(())
-    }
-
-    // Have dumpadm write the config for crash dumps to be
-    // on this slice, and then invoke savecore(8) to save any
-    // dump that's already present there.
-    //
-    // NOTE: because of the need to have dumpadm change the global
-    // state of which slice the system is using for dumps in order
-    // for savecore to behave the way we want (i.e. clear the flag
-    // after succeeding), we could hypothetically miss a dump if
-    // the kernel crashes again while savecore is still running.
-    fn dumpadm_and_savecore(
-        &mut self,
-        dump_slice: &DumpSlicePath,
-    ) -> Result<Option<OsString>, DumpAdmError> {
-        // TODO: untangle savecore from illumos_utils::dumpadm
-        assert!(self.chosen_debug_dir.is_some());
-
-        let savecore_dir = self.chosen_debug_dir.clone().unwrap().0;
-
-        match illumos_utils::dumpadm::dumpadm(&dump_slice, Some(&savecore_dir))
-        {
-            Ok(saved) => {
-                self.savecored_slices.insert(dump_slice.clone());
-                Ok(saved)
-            }
-            Err(err) => Err(err),
-        }
-    }
-
-    fn cleanup(&self) -> Result<(), CleanupError> {
-        let mut dir_info = Vec::new();
-        for dir in &self.known_debug_dirs {
-            match Self::scope_dir_for_cleanup(dir) {
-                Ok(info) => {
-                    dir_info.push((info, dir));
-                }
-                Err(err) => {
-                    error!(self.log, "Could not analyze {dir:?} for debug dataset cleanup task: {err:?}");
-                }
-            }
-        }
-        if dir_info.is_empty() {
-            return Err(CleanupError::NoDatasetsToClean);
-        }
-        // find dir with oldest average time of files that must be deleted
-        // to achieve desired threshold, and reclaim that space.
-        dir_info.sort();
-        'outer: for (dir_info, dir) in dir_info {
-            let CleanupDirInfo { average_time: _, num_to_delete, file_list } =
-                dir_info;
-            for (_time, _bytes, path) in &file_list[..num_to_delete as usize] {
-                // if we are unable to remove a file, we cannot guarantee
-                // that we will reach our target size threshold, and suspect
-                // the i/o error *may* be an issue with the underlying disk, so
-                // we continue to the dataset with the next-oldest average age
-                // of files-to-delete in the sorted list.
-                if let Err(err) = std::fs::remove_file(&path) {
-                    error!(self.log, "Couldn't delete {path:?} from debug dataset, skipping {dir:?}. {err:?}");
-                    continue 'outer;
-                }
-            }
-            // we made it through all the files we planned to remove, thereby
-            // freeing up enough space on one of the debug datasets for it to
-            // be chosen when reevaluating targets.
-            break;
-        }
-        Ok(())
-    }
-
-    fn scope_dir_for_cleanup(
-        debug_dir: &DebugDataset,
-    ) -> Result<CleanupDirInfo, CleanupError> {
-        let used = zfs_get_integer(&**debug_dir, ZFS_PROP_USED)?;
-        let available = zfs_get_integer(&**debug_dir, ZFS_PROP_AVAILABLE)?;
-        let capacity = used + available;
-
-        let target_used = capacity * DATASET_USAGE_PERCENT_CHOICE / 100;
-
-        let mut file_list = Vec::new();
-        // find all files in the debug dataset and sort by modified time
-        for path in glob::glob(debug_dir.join("**/*").as_str())?.flatten() {
-            let meta = std::fs::metadata(&path)?;
-            // we need this to be a Duration rather than SystemTime so we can
-            // do math to it later.
-            let time = meta.modified()?.duration_since(UNIX_EPOCH)?;
-            let size = meta.len();
-
-            file_list.push((time, size, path))
-        }
-        file_list.sort();
-
-        // find how many old files must be deleted to get the dataset under
-        // the limit, and what the average age of that set is.
-        let mut possible_bytes = 0;
-        let mut total_time = Duration::ZERO;
-        let mut num_to_delete = 0;
-        for (time, size, _path) in &file_list {
-            if used - possible_bytes < target_used {
-                break;
-            } else {
-                total_time += *time;
-                num_to_delete += 1;
-                possible_bytes += size;
-            }
-        }
-        let average_time =
-            total_time.checked_div(num_to_delete).unwrap_or(Duration::MAX);
-
-        Ok(CleanupDirInfo { average_time, num_to_delete, file_list })
-    }
-}
-
-#[derive(thiserror::Error, Debug)]
-enum ArchiveLogsError {
-    #[error("Couldn't make an async runtime to get zone info: {0}")]
-    Tokio(std::io::Error),
-    #[error("I/O error: {0}")]
-    IoError(#[from] std::io::Error),
-    #[error("Error calling zoneadm: {0}")]
-    Zoneadm(#[from] AdmError),
-    #[error("Non-UTF8 zone path for zone {0}")]
-    Utf8(String),
-    #[error("Glob pattern invalid: {0}")]
-    Glob(#[from] glob::PatternError),
-    #[error(
-        "No debug dir into which we should archive logs has yet been chosen"
-    )]
-    NoDebugDirYet,
-}
-
-#[derive(thiserror::Error, Debug)]
-enum CleanupError {
-    #[error("No debug datasets were successfully evaluated for cleanup")]
-    NoDatasetsToClean,
-    #[error("Failed to query ZFS properties: {0}")]
-    ZfsError(#[from] ZfsGetError),
-    #[error("I/O error: {0}")]
-    IoError(#[from] std::io::Error),
-    #[error("Glob pattern invalid: {0}")]
-    Glob(#[from] glob::PatternError),
-    #[error("A file's observed modified time was before the Unix epoch: {0}")]
-    TimelineWentSideways(#[from] SystemTimeError),
-}
-
-#[derive(Ord, PartialOrd, Eq, PartialEq)]
-struct CleanupDirInfo {
-    average_time: Duration,
-    num_to_delete: u32,
-    file_list: Vec<(Duration, u64, PathBuf)>,
-}
diff --git a/sled-storage/src/lib.rs b/sled-storage/src/lib.rs
index 0c1b383d7f..fc08579d77 100644
--- a/sled-storage/src/lib.rs
+++ b/sled-storage/src/lib.rs
@@ -10,7 +10,6 @@
 
 pub mod dataset;
 pub mod disk;
-pub(crate) mod dump_setup;
 pub mod error;
 pub(crate) mod keyfile;
 pub mod manager;

From 9593047345ba5c84d5db302b5f44c3d2f8908815 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Fri, 6 Oct 2023 22:50:53 +0000
Subject: [PATCH 27/66] wip

---
 sled-agent/src/bootstrap/server.rs |  2 +-
 sled-agent/src/services.rs         | 29 ++++++++++++++++++-----------
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs
index 94c326eef5..c9e1002306 100644
--- a/sled-agent/src/bootstrap/server.rs
+++ b/sled-agent/src/bootstrap/server.rs
@@ -514,8 +514,8 @@ struct Inner {
     )>,
     sled_reset_rx: mpsc::Receiver<oneshot::Sender<Result<(), BootstrapError>>>,
     ddm_admin_localhost_client: DdmAdminClient,
-    service_manager: ServiceManager,
     long_running_task_handles: LongRunningTaskHandles,
+    service_manager: ServiceManager,
     _sprockets_server_handle: JoinHandle<()>,
     base_log: Logger,
 }
diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs
index 3fcbf717fa..c22eae6baa 100644
--- a/sled-agent/src/services.rs
+++ b/sled-agent/src/services.rs
@@ -92,9 +92,8 @@ use sled_hardware::underlay;
 use sled_hardware::underlay::BOOTSTRAP_PREFIX;
 use sled_hardware::Baseboard;
 use sled_hardware::SledMode;
-use sled_storage::dataset::{CONFIG_DATASET, ZONE_DATASET};
+use sled_storage::dataset::{CONFIG_DATASET, INSTALL_DATASET, ZONE_DATASET};
 use sled_storage::manager::StorageHandle;
-use sled_storage::resources::StorageResources;
 use slog::Logger;
 use std::collections::HashSet;
 use std::collections::{BTreeMap, HashMap};
@@ -1084,11 +1083,11 @@ impl ServiceManager {
 
         // If the boot disk exists, look for the image in the "install" dataset
         // there too.
-        if let Some((_, boot_zpool)) = self.inner.storage.boot_disk().await {
-            zone_image_paths.push(
-                boot_zpool
-                    .dataset_mountpoint(sled_hardware::disk::INSTALL_DATASET),
-            );
+        if let Some((_, boot_zpool)) =
+            self.inner.storage.get_latest_resources().await.boot_disk()
+        {
+            zone_image_paths
+                .push(boot_zpool.dataset_mountpoint(INSTALL_DATASET));
         }
 
         let installed_zone = InstalledZone::install(
@@ -2195,8 +2194,12 @@ impl ServiceManager {
 
         // Create zones that should be running
         let mut zone_requests = AllZoneRequests::default();
-        let all_u2_roots =
-            self.inner.storage.all_u2_mountpoints(ZONE_DATASET).await;
+        let all_u2_roots = self
+            .inner
+            .storage
+            .get_latest_resources()
+            .await
+            .all_u2_mountpoints(ZONE_DATASET);
         for zone in zones_to_be_added {
             // Check if we think the zone should already be running
             let name = zone.zone_name();
@@ -2870,8 +2873,12 @@ impl ServiceManager {
         let root = if request.zone_type == ZoneType::Switch {
             Utf8PathBuf::from(ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT)
         } else {
-            let all_u2_roots =
-                self.inner.storage.all_u2_mountpoints(ZONE_DATASET).await;
+            let all_u2_roots = self
+                .inner
+                .storage
+                .get_latest_resources()
+                .await
+                .all_u2_mountpoints(ZONE_DATASET);
             let mut rng = rand::rngs::StdRng::from_entropy();
             all_u2_roots
                 .choose(&mut rng)

From e64b5690121f3722c2fd00dbff91266006775c55 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Fri, 6 Oct 2023 23:33:13 +0000
Subject: [PATCH 28/66] wip

---
 sled-agent/src/bootstrap/server.rs        |   2 +-
 sled-agent/src/hardware_monitor.rs        |  11 +-
 sled-agent/src/hardware_monitor.sh        |   1 -
 sled-agent/src/http_entrypoints.rs        |   2 +-
 sled-agent/src/lib.rs                     |   1 -
 sled-agent/src/long_running_tasks.rs      |   5 +-
 sled-agent/src/rack_setup/plan/service.rs |   7 +-
 sled-agent/src/rack_setup/plan/sled.rs    |   6 +-
 sled-agent/src/sled_agent.rs              |  32 +-
 sled-agent/src/storage_manager.rs         | 928 ----------------------
 sled-hardware/src/illumos/mod.rs          |   1 -
 sled-storage/src/resources.rs             |  17 +-
 12 files changed, 50 insertions(+), 963 deletions(-)
 delete mode 100644 sled-agent/src/hardware_monitor.sh
 delete mode 100644 sled-agent/src/storage_manager.rs

diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs
index c9e1002306..17d988e749 100644
--- a/sled-agent/src/bootstrap/server.rs
+++ b/sled-agent/src/bootstrap/server.rs
@@ -241,7 +241,7 @@ impl Server {
                 &config,
                 &sled_request.request,
                 long_running_task_handles.clone(),
-                service_manager,
+                service_manager.clone(),
                 &ddm_admin_localhost_client,
                 &base_log,
                 &startup_log,
diff --git a/sled-agent/src/hardware_monitor.rs b/sled-agent/src/hardware_monitor.rs
index e296a3bdca..03677e4e6d 100644
--- a/sled-agent/src/hardware_monitor.rs
+++ b/sled-agent/src/hardware_monitor.rs
@@ -55,13 +55,12 @@ impl TofinoManager {
     //
     // Returns whether the tofino was loaded or not
     pub fn become_ready(&mut self, service_manager: ServiceManager) -> bool {
-        match self {
+        let tofino_loaded = match self {
             Self::Ready(_) => panic!("ServiceManager is already available"),
-            Self::NotReady { tofino_loaded } => {
-                *self = Self::Ready(service_manager);
-                *tofino_loaded
-            }
-        }
+            Self::NotReady { tofino_loaded } => *tofino_loaded,
+        };
+        *self = Self::Ready(service_manager);
+        tofino_loaded
     }
 }
 
diff --git a/sled-agent/src/hardware_monitor.sh b/sled-agent/src/hardware_monitor.sh
deleted file mode 100644
index 8b13789179..0000000000
--- a/sled-agent/src/hardware_monitor.sh
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs
index 440ccb73ee..f5b35ee77a 100644
--- a/sled-agent/src/http_entrypoints.rs
+++ b/sled-agent/src/http_entrypoints.rs
@@ -352,7 +352,7 @@ async fn zpools_get(
     rqctx: RequestContext<SledAgent>,
 ) -> Result<HttpResponseOk<Vec<Zpool>>, HttpError> {
     let sa = rqctx.context();
-    Ok(HttpResponseOk(sa.zpools_get().await.map_err(|e| Error::from(e))?))
+    Ok(HttpResponseOk(sa.zpools_get().await))
 }
 
 #[endpoint {
diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs
index 154a133272..9a3b6d4f1b 100644
--- a/sled-agent/src/lib.rs
+++ b/sled-agent/src/lib.rs
@@ -33,7 +33,6 @@ mod services;
 mod sled_agent;
 mod smf_helper;
 pub(crate) mod storage;
-mod storage_manager;
 mod swap_device;
 mod updates;
 mod zone_bundle;
diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs
index 223289bb2e..f322126714 100644
--- a/sled-agent/src/long_running_tasks.rs
+++ b/sled-agent/src/long_running_tasks.rs
@@ -99,8 +99,7 @@ fn spawn_key_manager(log: &Logger) -> StorageKeyRequester {
     let secret_retriever = LrtqOrHardcodedSecretRetriever::new();
     let (mut key_manager, storage_key_requester) =
         KeyManager::new(log, secret_retriever);
-    let key_manager_handle =
-        tokio::spawn(async move { key_manager.run().await });
+    tokio::spawn(async move { key_manager.run().await });
     storage_key_requester
 }
 
@@ -137,7 +136,7 @@ fn spawn_hardware_monitor(
     hardware_manager: &HardwareManager,
     storage_handle: &StorageHandle,
 ) -> HardwareMonitorHandle {
-    let (monitor, handle) =
+    let (mut monitor, handle) =
         HardwareMonitor::new(log, hardware_manager, storage_handle);
     tokio::spawn(async move {
         monitor.run().await;
diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs
index 01fababa4d..5a9b3939e4 100644
--- a/sled-agent/src/rack_setup/plan/service.rs
+++ b/sled-agent/src/rack_setup/plan/service.rs
@@ -6,11 +6,10 @@
 
 use crate::bootstrap::params::StartSledAgentRequest;
 use crate::params::{
-    DatasetKind, DatasetRequest, ServiceType, ServiceZoneRequest,
-    ServiceZoneService, ZoneType,
+    DatasetRequest, ServiceType, ServiceZoneRequest, ServiceZoneService,
+    ZoneType,
 };
 use crate::rack_setup::config::SetupServiceConfig as Config;
-use crate::storage::dataset::DatasetName;
 use camino::Utf8PathBuf;
 use dns_service_client::types::DnsConfigParams;
 use illumos_utils::zpool::ZpoolName;
@@ -34,7 +33,7 @@ use serde::{Deserialize, Serialize};
 use sled_agent_client::{
     types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError,
 };
-use sled_storage::dataset::CONFIG_DATASET;
+use sled_storage::dataset::{DatasetKind, DatasetName, CONFIG_DATASET};
 use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::collections::{BTreeSet, HashMap, HashSet};
diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs
index 189216fd9b..2c8814403b 100644
--- a/sled-agent/src/rack_setup/plan/sled.rs
+++ b/sled-agent/src/rack_setup/plan/sled.rs
@@ -8,7 +8,6 @@ use crate::bootstrap::{
     config::BOOTSTRAP_AGENT_RACK_INIT_PORT, params::StartSledAgentRequest,
 };
 use crate::rack_setup::config::SetupServiceConfig as Config;
-use crate::storage_manager::StorageResources;
 use camino::Utf8PathBuf;
 use omicron_common::ledger::{self, Ledger, Ledgerable};
 use schemars::JsonSchema;
@@ -56,11 +55,12 @@ pub struct Plan {
 impl Plan {
     pub async fn load(
         log: &Logger,
-        storage: &StorageResources,
+        storage: &StorageHandle,
     ) -> Result<Option<Self>, PlanError> {
         let paths: Vec<Utf8PathBuf> = storage
-            .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET)
+            .get_latest_resources()
             .await
+            .all_m2_mountpoints(CONFIG_DATASET)
             .into_iter()
             .map(|p| p.join(RSS_SLED_PLAN_FILENAME))
             .collect();
diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs
index 475cbf8018..571ea45499 100644
--- a/sled-agent/src/sled_agent.rs
+++ b/sled-agent/src/sled_agent.rs
@@ -22,6 +22,7 @@ use crate::services::{self, ServiceManager};
 use crate::updates::{ConfigUpdates, UpdateManager};
 use crate::zone_bundle;
 use crate::zone_bundle::BundleError;
+use bootstore::schemes::v0::NodeRequestError as BootstoreNodeRequestError;
 use camino::Utf8PathBuf;
 use dropshot::HttpError;
 use illumos_utils::opte::params::{
@@ -45,7 +46,6 @@ use omicron_common::backoff::{
 };
 use sled_hardware::underlay;
 use sled_hardware::HardwareManager;
-use sled_storage::dataset::DatasetName;
 use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::collections::BTreeMap;
@@ -112,7 +112,7 @@ pub enum Error {
     EarlyNetworkError(#[from] EarlyNetworkSetupError),
 
     #[error("Bootstore Error: {0}")]
-    Bootstore(#[from] bootstore::NodeRequestError),
+    Bootstore(#[from] BootstoreNodeRequestError),
 
     #[error("Failed to deserialize early network config: {0}")]
     EarlyNetworkDeserialize(serde_json::Error),
@@ -340,10 +340,15 @@ impl SledAgent {
 
         match config.vmm_reservoir_percentage {
             Some(sz) if sz > 0 && sz < 100 => {
-                instances.set_reservoir_size(&hardware, sz).map_err(|e| {
-                    error!(log, "Failed to set VMM reservoir size: {e}");
-                    e
-                })?;
+                instances
+                    .set_reservoir_size(
+                        &long_running_task_handles.hardware_manager,
+                        sz,
+                    )
+                    .map_err(|e| {
+                        error!(log, "Failed to set VMM reservoir size: {e}");
+                        e
+                    })?;
             }
             Some(sz) if sz == 0 => {
                 warn!(log, "Not using VMM reservoir (size 0 bytes requested)");
@@ -729,9 +734,18 @@ impl SledAgent {
     }
 
     /// Gets the sled's current list of all zpools.
-    pub async fn zpools_get(&self) -> Result<Vec<Zpool>, Error> {
-        let zpools = self.inner.storage.get_zpools().await?;
-        Ok(zpools)
+    pub async fn zpools_get(&self) -> Vec<Zpool> {
+        self.inner
+            .storage
+            .get_latest_resources()
+            .await
+            .get_all_zpools()
+            .into_iter()
+            .map(|(name, variant)| Zpool {
+                id: name.id(),
+                disk_type: variant.into(),
+            })
+            .collect()
     }
 
     /// Returns whether or not the sled believes itself to be a scrimlet
diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs
deleted file mode 100644
index bbf89e41fb..0000000000
--- a/sled-agent/src/storage_manager.rs
+++ /dev/null
@@ -1,928 +0,0 @@
-// This Source Code Form is subject to the terms of the Mozilla Public
-// License, v. 2.0. If a copy of the MPL was not distributed with this
-// file, You can obtain one at https://mozilla.org/MPL/2.0/.
-
-//! Management of sled-local storage.
-
-use crate::nexus::NexusClientWithResolver;
-use crate::storage::dataset::DatasetName;
-use crate::storage::dump_setup::DumpSetup;
-use crate::zone_bundle::ZoneBundler;
-use camino::Utf8PathBuf;
-use derive_more::From;
-use futures::stream::FuturesOrdered;
-use futures::FutureExt;
-use futures::StreamExt;
-use illumos_utils::zpool::{ZpoolKind, ZpoolName};
-use illumos_utils::{zfs::Mountpoint, zpool::ZpoolInfo};
-use key_manager::StorageKeyRequester;
-use nexus_client::types::PhysicalDiskDeleteRequest;
-use nexus_client::types::PhysicalDiskKind;
-use nexus_client::types::PhysicalDiskPutRequest;
-use nexus_client::types::ZpoolPutRequest;
-use omicron_common::api::external::{ByteCount, ByteCountRangeError};
-use omicron_common::backoff;
-use omicron_common::disk::DiskIdentity;
-use sled_hardware::{Disk, DiskVariant, UnparsedDisk};
-use slog::Logger;
-use std::collections::hash_map;
-use std::collections::HashMap;
-use std::collections::HashSet;
-use std::convert::TryFrom;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::sync::OnceLock;
-use std::time::Duration;
-use tokio::sync::{mpsc, oneshot, Mutex};
-use tokio::task::JoinHandle;
-use tokio::time::{interval, MissedTickBehavior};
-use uuid::Uuid;
-
-#[cfg(test)]
-use illumos_utils::{zfs::MockZfs as Zfs, zpool::MockZpool as Zpool};
-#[cfg(not(test))]
-use illumos_utils::{zfs::Zfs, zpool::Zpool};
-
-// A key manager can only become ready once. This occurs during RSS or cold
-// boot when the bootstore has detected it has a key share.
-static KEY_MANAGER_READY: OnceLock<()> = OnceLock::new();
-
-// The type of a future which is used to send a notification to Nexus.
-type NotifyFut =
-    Pin<Box<dyn futures::Future<Output = Result<(), String>> + Send>>;
-
-#[derive(Debug)]
-struct NewFilesystemRequest {
-    dataset_id: Uuid,
-    dataset_name: DatasetName,
-    responder: oneshot::Sender<Result<DatasetName, Error>>,
-}
-
-struct UnderlayRequest {
-    underlay: UnderlayAccess,
-    responder: oneshot::Sender<Result<(), Error>>,
-}
-
-// The directory within the debug dataset in which bundles are created.
-const BUNDLE_DIRECTORY: &str = "bundle";
-
-// The directory for zone bundles.
-const ZONE_BUNDLE_DIRECTORY: &str = "zone";
-
-/// Describes the access to the underlay used by the StorageManager.
-pub struct UnderlayAccess {
-    pub nexus_client: NexusClientWithResolver,
-    pub sled_id: Uuid,
-}
-
-// A worker that starts zones for pools as they are received.
-struct StorageWorker {
-    log: Logger,
-    nexus_notifications: FuturesOrdered<NotifyFut>,
-    rx: mpsc::Receiver<StorageWorkerRequest>,
-    underlay: Arc<Mutex<Option<UnderlayAccess>>>,
-
-    // A mechanism for requesting disk encryption keys from the
-    // [`key_manager::KeyManager`]
-    key_requester: StorageKeyRequester,
-
-    // Invokes dumpadm(8) and savecore(8) when new disks are encountered
-    dump_setup: Arc<DumpSetup>,
-}
-
-#[derive(Clone, Debug)]
-enum NotifyDiskRequest {
-    Add { identity: DiskIdentity, variant: DiskVariant },
-    Remove(DiskIdentity),
-}
-
-#[derive(From, Clone, Debug, PartialEq, Eq, Hash)]
-enum QueuedDiskCreate {
-    Real(UnparsedDisk),
-    Synthetic(ZpoolName),
-}
-
-impl QueuedDiskCreate {
-    fn is_synthetic(&self) -> bool {
-        if let QueuedDiskCreate::Synthetic(_) = self {
-            true
-        } else {
-            false
-        }
-    }
-}
-
-impl StorageWorker {
-    // Adds a "notification to nexus" to `nexus_notifications`,
-    // informing it about the addition of `pool_id` to this sled.
-    async fn add_zpool_notify(&mut self, pool: &Pool, size: ByteCount) {
-        // The underlay network is setup once at sled-agent startup. Before
-        // there is an underlay we want to avoid sending notifications to nexus for
-        // two reasons:
-        //  1. They can't possibly succeed
-        //  2. They increase the backoff time exponentially, so that once
-        //   sled-agent does start it may take much longer to notify nexus
-        //   than it would if we avoid this. This goes especially so for rack
-        //   setup, when bootstrap agent is waiting an aribtrary time for RSS
-        //   initialization.
-        if self.underlay.lock().await.is_none() {
-            return;
-        }
-
-        let pool_id = pool.name.id();
-        let DiskIdentity { vendor, serial, model } = pool.parent.clone();
-        let underlay = self.underlay.clone();
-
-        let notify_nexus = move || {
-            let zpool_request = ZpoolPutRequest {
-                size: size.into(),
-                disk_vendor: vendor.clone(),
-                disk_serial: serial.clone(),
-                disk_model: model.clone(),
-            };
-            let underlay = underlay.clone();
-
-            async move {
-                let underlay_guard = underlay.lock().await;
-                let Some(underlay) = underlay_guard.as_ref() else {
-                    return Err(backoff::BackoffError::transient(
-                        Error::UnderlayNotInitialized.to_string(),
-                    ));
-                };
-                let sled_id = underlay.sled_id;
-                let nexus_client = underlay.nexus_client.client().clone();
-                drop(underlay_guard);
-
-                nexus_client
-                    .zpool_put(&sled_id, &pool_id, &zpool_request)
-                    .await
-                    .map_err(|e| {
-                        backoff::BackoffError::transient(e.to_string())
-                    })?;
-                Ok(())
-            }
-        };
-        let log = self.log.clone();
-        let name = pool.name.clone();
-        let disk = pool.parent().clone();
-        let log_post_failure = move |_, call_count, total_duration| {
-            if call_count == 0 {
-                info!(log, "failed to notify nexus about a new pool {name} on disk {disk:?}");
-            } else if total_duration > std::time::Duration::from_secs(30) {
-                warn!(log, "failed to notify nexus about a new pool {name} on disk {disk:?}";
-                    "total duration" => ?total_duration);
-            }
-        };
-        self.nexus_notifications.push_back(
-            backoff::retry_notify_ext(
-                backoff::retry_policy_internal_service_aggressive(),
-                notify_nexus,
-                log_post_failure,
-            )
-            .boxed(),
-        );
-    }
-
-    async fn ensure_using_exactly_these_disks(
-        &mut self,
-        resources: &StorageResources,
-        unparsed_disks: Vec<UnparsedDisk>,
-        queued_u2_drives: &mut Option<HashSet<QueuedDiskCreate>>,
-    ) -> Result<(), Error> {
-        // Queue U.2 drives if necessary
-        // We clear all existing queued drives that are not synthetic and add
-        // new ones in the loop below
-        if let Some(queued) = queued_u2_drives {
-            info!(
-                self.log,
-                "Ensure exact disks: clearing non-synthetic queued disks."
-            );
-            queued.retain(|d| d.is_synthetic());
-        }
-
-        let mut new_disks = HashMap::new();
-
-        // We may encounter errors while parsing any of the disks; keep track of
-        // any errors that occur and return any of them if something goes wrong.
-        //
-        // That being said, we should not prevent access to the other disks if
-        // only one failure occurs.
-        let mut err: Option<Error> = None;
-
-        // Ensure all disks conform to the expected partition layout.
-        for disk in unparsed_disks.into_iter() {
-            if disk.variant() == DiskVariant::U2 {
-                if let Some(queued) = queued_u2_drives {
-                    info!(self.log, "Queuing disk for upsert: {disk:?}");
-                    queued.insert(disk.into());
-                    continue;
-                }
-            }
-            match self.add_new_disk(disk, queued_u2_drives).await.map_err(
-                |err| {
-                    warn!(self.log, "Could not ensure partitions: {err}");
-                    err
-                },
-            ) {
-                Ok(disk) => {
-                    new_disks.insert(disk.identity().clone(), disk);
-                }
-                Err(e) => {
-                    warn!(self.log, "Cannot parse disk: {e}");
-                    err = Some(e.into());
-                }
-            };
-        }
-
-        let mut disks = resources.disks.lock().await;
-
-        // Remove disks that don't appear in the "new_disks" set.
-        //
-        // This also accounts for zpools and notifies Nexus.
-        let disks_to_be_removed = disks
-            .iter_mut()
-            .filter(|(key, old_disk)| {
-                // If this disk appears in the "new" and "old" set, it should
-                // only be removed if it has changed.
-                //
-                // This treats a disk changing in an unexpected way as a
-                // "removal and re-insertion".
-                match old_disk {
-                    DiskWrapper::Real { disk, .. } => {
-                        if let Some(new_disk) = new_disks.get(*key) {
-                            // Changed Disk -> Disk should be removed.
-                            new_disk != disk
-                        } else {
-                            // Real disk, not in the new set -> Disk should be removed.
-                            true
-                        }
-                    }
-                    // Synthetic disk -> Disk should NOT be removed.
-                    DiskWrapper::Synthetic { .. } => false,
-                }
-            })
-            .map(|(_key, disk)| disk.clone())
-            .collect::<Vec<_>>();
-
-        for disk in disks_to_be_removed {
-            if let Err(e) = self
-                .delete_disk_locked(&resources, &mut disks, &disk.identity())
-                .await
-            {
-                warn!(self.log, "Failed to delete disk: {e}");
-                err = Some(e);
-            }
-        }
-
-        // Add new disks to `resources.disks`.
-        //
-        // This also accounts for zpools and notifies Nexus.
-        for (key, new_disk) in new_disks {
-            if let Some(old_disk) = disks.get(&key) {
-                // In this case, the disk should be unchanged.
-                //
-                // This assertion should be upheld by the filter above, which
-                // should remove disks that changed.
-                assert!(old_disk == &new_disk.into());
-            } else {
-                let disk = DiskWrapper::Real {
-                    disk: new_disk.clone(),
-                    devfs_path: new_disk.devfs_path().clone(),
-                };
-                if let Err(e) =
-                    self.upsert_disk_locked(&resources, &mut disks, disk).await
-                {
-                    warn!(self.log, "Failed to upsert disk: {e}");
-                    err = Some(e);
-                }
-            }
-        }
-
-        if let Some(err) = err {
-            Err(err)
-        } else {
-            Ok(())
-        }
-    }
-
-    // Attempt to create a new disk via `sled_hardware::Disk::new()`. If the
-    // disk addition fails because the the key manager cannot load a secret,
-    // this indicates a transient error, and so we queue the disk so we can
-    // try again.
-    async fn add_new_disk(
-        &mut self,
-        unparsed_disk: UnparsedDisk,
-        queued_u2_drives: &mut Option<HashSet<QueuedDiskCreate>>,
-    ) -> Result<Disk, sled_hardware::PooledDiskError> {
-        match sled_hardware::Disk::new(
-            &self.log,
-            unparsed_disk.clone(),
-            Some(&self.key_requester),
-        )
-        .await
-        {
-            Ok(disk) => Ok(disk),
-            Err(sled_hardware::PooledDiskError::KeyManager(err)) => {
-                warn!(
-                    self.log,
-                    "Transient error: {err} - queuing disk {:?}", unparsed_disk
-                );
-                if let Some(queued) = queued_u2_drives {
-                    queued.insert(unparsed_disk.into());
-                } else {
-                    *queued_u2_drives =
-                        Some(HashSet::from([unparsed_disk.into()]));
-                }
-                Err(sled_hardware::PooledDiskError::KeyManager(err))
-            }
-            Err(err) => {
-                error!(
-                    self.log,
-                    "Persistent error: {err} - not queueing disk {:?}",
-                    unparsed_disk
-                );
-                Err(err)
-            }
-        }
-    }
-
-    // Attempt to create a new synthetic disk via
-    // `sled_hardware::Disk::ensure_zpool_ready()`. If the disk addition fails
-    // because the the key manager cannot load a secret, this indicates a
-    // transient error, and so we queue the disk so we can try again.
-    async fn add_new_synthetic_disk(
-        &mut self,
-        zpool_name: ZpoolName,
-        queued_u2_drives: &mut Option<HashSet<QueuedDiskCreate>>,
-    ) -> Result<(), sled_hardware::PooledDiskError> {
-        let synthetic_id = DiskIdentity {
-            vendor: "fake_vendor".to_string(),
-            serial: "fake_serial".to_string(),
-            model: zpool_name.id().to_string(),
-        };
-        match sled_hardware::Disk::ensure_zpool_ready(
-            &self.log,
-            &zpool_name,
-            &synthetic_id,
-            Some(&self.key_requester),
-        )
-        .await
-        {
-            Ok(()) => Ok(()),
-            Err(sled_hardware::PooledDiskError::KeyManager(err)) => {
-                warn!(
-                    self.log,
-                    "Transient error: {err} - queuing synthetic disk: {:?}",
-                    zpool_name
-                );
-                if let Some(queued) = queued_u2_drives {
-                    queued.insert(zpool_name.into());
-                } else {
-                    *queued_u2_drives =
-                        Some(HashSet::from([zpool_name.into()]));
-                }
-                Err(sled_hardware::PooledDiskError::KeyManager(err))
-            }
-            Err(err) => {
-                error!(
-                    self.log,
-                    "Persistent error: {} - not queueing synthetic disk {:?}",
-                    err,
-                    zpool_name
-                );
-                Err(err)
-            }
-        }
-    }
-
-    async fn upsert_disk(
-        &mut self,
-        resources: &StorageResources,
-        disk: UnparsedDisk,
-        queued_u2_drives: &mut Option<HashSet<QueuedDiskCreate>>,
-    ) -> Result<(), Error> {
-        // Queue U.2 drives if necessary
-        if let Some(queued) = queued_u2_drives {
-            if disk.variant() == DiskVariant::U2 {
-                info!(self.log, "Queuing disk for upsert: {disk:?}");
-                queued.insert(disk.into());
-                return Ok(());
-            }
-        }
-
-        info!(self.log, "Upserting disk: {disk:?}");
-
-        // Ensure the disk conforms to an expected partition layout.
-        let disk =
-            self.add_new_disk(disk, queued_u2_drives).await.map_err(|err| {
-                warn!(self.log, "Could not ensure partitions: {err}");
-                err
-            })?;
-
-        let mut disks = resources.disks.lock().await;
-        let disk = DiskWrapper::Real {
-            disk: disk.clone(),
-            devfs_path: disk.devfs_path().clone(),
-        };
-        self.upsert_disk_locked(resources, &mut disks, disk).await
-    }
-
-    async fn upsert_synthetic_disk(
-        &mut self,
-        resources: &StorageResources,
-        zpool_name: ZpoolName,
-        queued_u2_drives: &mut Option<HashSet<QueuedDiskCreate>>,
-    ) -> Result<(), Error> {
-        // Queue U.2 drives if necessary
-        if let Some(queued) = queued_u2_drives {
-            if zpool_name.kind() == ZpoolKind::External {
-                info!(
-                    self.log,
-                    "Queuing synthetic disk for upsert: {zpool_name:?}"
-                );
-                queued.insert(zpool_name.into());
-                return Ok(());
-            }
-        }
-
-        info!(self.log, "Upserting synthetic disk for: {zpool_name:?}");
-
-        self.add_new_synthetic_disk(zpool_name.clone(), queued_u2_drives)
-            .await?;
-        let disk = DiskWrapper::Synthetic { zpool_name };
-        let mut disks = resources.disks.lock().await;
-        self.upsert_disk_locked(resources, &mut disks, disk).await
-    }
-
-    async fn upsert_disk_locked(
-        &mut self,
-        resources: &StorageResources,
-        disks: &mut tokio::sync::MutexGuard<
-            '_,
-            HashMap<DiskIdentity, DiskWrapper>,
-        >,
-        disk: DiskWrapper,
-    ) -> Result<(), Error> {
-        disks.insert(disk.identity(), disk.clone());
-        self.physical_disk_notify(NotifyDiskRequest::Add {
-            identity: disk.identity(),
-            variant: disk.variant(),
-        })
-        .await;
-        self.upsert_zpool(&resources, disk.identity(), disk.zpool_name())
-            .await?;
-
-        self.dump_setup.update_dumpdev_setup(disks).await;
-
-        Ok(())
-    }
-
-    async fn delete_disk(
-        &mut self,
-        resources: &StorageResources,
-        disk: UnparsedDisk,
-    ) -> Result<(), Error> {
-        info!(self.log, "Deleting disk: {disk:?}");
-        // TODO: Don't we need to do some accounting, e.g. for all the information
-        // that's no longer accessible? Or is that up to Nexus to figure out at
-        // a later point-in-time?
-        //
-        // If we're storing zone images on the M.2s for internal services, how
-        // do we reconcile them?
-        let mut disks = resources.disks.lock().await;
-        self.delete_disk_locked(resources, &mut disks, disk.identity()).await
-    }
-
-    async fn delete_disk_locked(
-        &mut self,
-        resources: &StorageResources,
-        disks: &mut tokio::sync::MutexGuard<
-            '_,
-            HashMap<DiskIdentity, DiskWrapper>,
-        >,
-        key: &DiskIdentity,
-    ) -> Result<(), Error> {
-        if let Some(parsed_disk) = disks.remove(key) {
-            resources.pools.lock().await.remove(&parsed_disk.zpool_name().id());
-            self.physical_disk_notify(NotifyDiskRequest::Remove(key.clone()))
-                .await;
-        }
-
-        self.dump_setup.update_dumpdev_setup(disks).await;
-
-        Ok(())
-    }
-
-    /// When the underlay becomes available, we need to notify nexus about any
-    /// discovered disks and pools, since we don't attempt to notify until there
-    /// is an underlay available.
-    async fn notify_nexus_about_existing_resources(
-        &mut self,
-        resources: &StorageResources,
-    ) -> Result<(), Error> {
-        let disks = resources.disks.lock().await;
-        for disk in disks.values() {
-            self.physical_disk_notify(NotifyDiskRequest::Add {
-                identity: disk.identity(),
-                variant: disk.variant(),
-            })
-            .await;
-        }
-
-        // We may encounter errors while processing any of the pools; keep track of
-        // any errors that occur and return any of them if something goes wrong.
-        //
-        // That being said, we should not prevent notification to nexus of the
-        // other pools if only one failure occurs.
-        let mut err: Option<Error> = None;
-
-        let pools = resources.pools.lock().await;
-        for pool in pools.values() {
-            match ByteCount::try_from(pool.info.size()).map_err(|err| {
-                Error::BadPoolSize { name: pool.name.to_string(), err }
-            }) {
-                Ok(size) => self.add_zpool_notify(pool, size).await,
-                Err(e) => {
-                    warn!(self.log, "Failed to notify nexus about pool: {e}");
-                    err = Some(e)
-                }
-            }
-        }
-
-        if let Some(err) = err {
-            Err(err)
-        } else {
-            Ok(())
-        }
-    }
-
-    // Adds a "notification to nexus" to `self.nexus_notifications`, informing it
-    // about the addition/removal of a physical disk to this sled.
-    async fn physical_disk_notify(&mut self, disk: NotifyDiskRequest) {
-        // The underlay network is setup once at sled-agent startup. Before
-        // there is an underlay we want to avoid sending notifications to nexus for
-        // two reasons:
-        //  1. They can't possibly succeed
-        //  2. They increase the backoff time exponentially, so that once
-        //   sled-agent does start it may take much longer to notify nexus
-        //   than it would if we avoid this. This goes especially so for rack
-        //   setup, when bootstrap agent is waiting an aribtrary time for RSS
-        //   initialization.
-        if self.underlay.lock().await.is_none() {
-            return;
-        }
-        let underlay = self.underlay.clone();
-        let disk2 = disk.clone();
-        let notify_nexus = move || {
-            let disk = disk.clone();
-            let underlay = underlay.clone();
-            async move {
-                let underlay_guard = underlay.lock().await;
-                let Some(underlay) = underlay_guard.as_ref() else {
-                    return Err(backoff::BackoffError::transient(
-                        Error::UnderlayNotInitialized.to_string(),
-                    ));
-                };
-                let sled_id = underlay.sled_id;
-                let nexus_client = underlay.nexus_client.client().clone();
-                drop(underlay_guard);
-
-                match &disk {
-                    NotifyDiskRequest::Add { identity, variant } => {
-                        let request = PhysicalDiskPutRequest {
-                            model: identity.model.clone(),
-                            serial: identity.serial.clone(),
-                            vendor: identity.vendor.clone(),
-                            variant: match variant {
-                                DiskVariant::U2 => PhysicalDiskKind::U2,
-                                DiskVariant::M2 => PhysicalDiskKind::M2,
-                            },
-                            sled_id,
-                        };
-                        nexus_client
-                            .physical_disk_put(&request)
-                            .await
-                            .map_err(|e| {
-                                backoff::BackoffError::transient(e.to_string())
-                            })?;
-                    }
-                    NotifyDiskRequest::Remove(disk_identity) => {
-                        let request = PhysicalDiskDeleteRequest {
-                            model: disk_identity.model.clone(),
-                            serial: disk_identity.serial.clone(),
-                            vendor: disk_identity.vendor.clone(),
-                            sled_id,
-                        };
-                        nexus_client
-                            .physical_disk_delete(&request)
-                            .await
-                            .map_err(|e| {
-                                backoff::BackoffError::transient(e.to_string())
-                            })?;
-                    }
-                }
-                Ok(())
-            }
-        };
-        let log = self.log.clone();
-        // This notification is often invoked before Nexus has started
-        // running, so avoid flagging any errors as concerning until some
-        // time has passed.
-        let log_post_failure = move |_, call_count, total_duration| {
-            if call_count == 0 {
-                info!(log, "failed to notify nexus about {disk2:?}");
-            } else if total_duration > std::time::Duration::from_secs(30) {
-                warn!(log, "failed to notify nexus about {disk2:?}";
-                    "total duration" => ?total_duration);
-            }
-        };
-        self.nexus_notifications.push_back(
-            backoff::retry_notify_ext(
-                backoff::retry_policy_internal_service_aggressive(),
-                notify_nexus,
-                log_post_failure,
-            )
-            .boxed(),
-        );
-    }
-
-    async fn upsert_zpool(
-        &mut self,
-        resources: &StorageResources,
-        parent: DiskIdentity,
-        pool_name: &ZpoolName,
-    ) -> Result<(), Error> {
-        let mut pools = resources.pools.lock().await;
-        let zpool = Pool::new(pool_name.clone(), parent)?;
-
-        let pool = match pools.entry(pool_name.id()) {
-            hash_map::Entry::Occupied(mut entry) => {
-                // The pool already exists.
-                entry.get_mut().info = zpool.info;
-                return Ok(());
-            }
-            hash_map::Entry::Vacant(entry) => entry.insert(zpool),
-        };
-        info!(&self.log, "Storage manager processing zpool: {:#?}", pool.info);
-
-        let size = ByteCount::try_from(pool.info.size()).map_err(|err| {
-            Error::BadPoolSize { name: pool_name.to_string(), err }
-        })?;
-        // Notify Nexus of the zpool.
-        self.add_zpool_notify(&pool, size).await;
-        Ok(())
-    }
-
-    // Small wrapper around `Self::do_work_internal` that ensures we always
-    // emit info to the log when we exit.
-    async fn do_work(
-        &mut self,
-        resources: StorageResources,
-    ) -> Result<(), Error> {
-        // We queue U.2 sleds until the StorageKeyRequester is ready to use.
-        let mut queued_u2_drives = Some(HashSet::new());
-        loop {
-            match self.do_work_internal(&resources, &mut queued_u2_drives).await
-            {
-                Ok(()) => {
-                    info!(self.log, "StorageWorker exited successfully");
-                    return Ok(());
-                }
-                Err(e) => {
-                    warn!(
-                        self.log,
-                        "StorageWorker encountered unexpected error: {}", e
-                    );
-                    // ... for now, keep trying.
-                }
-            }
-        }
-    }
-
-    async fn do_work_internal(
-        &mut self,
-        resources: &StorageResources,
-        queued_u2_drives: &mut Option<HashSet<QueuedDiskCreate>>,
-    ) -> Result<(), Error> {
-        const QUEUED_DISK_RETRY_TIMEOUT: Duration = Duration::from_secs(5);
-        let mut interval = interval(QUEUED_DISK_RETRY_TIMEOUT);
-        interval.set_missed_tick_behavior(MissedTickBehavior::Delay);
-        loop {
-            tokio::select! {
-                _ = self.nexus_notifications.next(),
-                    if !self.nexus_notifications.is_empty() => {},
-                Some(request) = self.rx.recv() => {
-                    // We want to queue failed requests related to the key manager
-                    match self.handle_storage_worker_request(
-                        resources, queued_u2_drives, request)
-                    .await {
-                        Err(Error::DiskError(_)) => {
-                            // We already handle and log disk errors, no need to
-                            // return here.
-                        }
-                        Err(e) => return Err(e),
-                        Ok(()) => {}
-                    }
-               }
-               _ = interval.tick(), if queued_u2_drives.is_some() &&
-                   KEY_MANAGER_READY.get().is_some()=>
-                {
-                    self.upsert_queued_disks(resources, queued_u2_drives).await;
-                }
-            }
-        }
-    }
-
-    async fn handle_storage_worker_request(
-        &mut self,
-        resources: &StorageResources,
-        queued_u2_drives: &mut Option<HashSet<QueuedDiskCreate>>,
-        request: StorageWorkerRequest,
-    ) -> Result<(), Error> {
-        use StorageWorkerRequest::*;
-        match request {
-            AddDisk(disk) => {
-                self.upsert_disk(&resources, disk, queued_u2_drives).await?;
-            }
-            AddSyntheticDisk(zpool_name) => {
-                self.upsert_synthetic_disk(
-                    &resources,
-                    zpool_name,
-                    queued_u2_drives,
-                )
-                .await?;
-            }
-            RemoveDisk(disk) => {
-                self.delete_disk(&resources, disk).await?;
-            }
-            NewFilesystem(request) => {
-                let result = self.add_dataset(&resources, &request).await;
-                let _ = request.responder.send(result);
-            }
-            DisksChanged(disks) => {
-                self.ensure_using_exactly_these_disks(
-                    &resources,
-                    disks,
-                    queued_u2_drives,
-                )
-                .await?;
-            }
-            SetupUnderlayAccess(UnderlayRequest { underlay, responder }) => {
-                // If this is the first time establishing an
-                // underlay we should notify nexus of all existing
-                // disks and zpools.
-                //
-                // Instead of individual notifications, we should
-                // send a bulk notification as described in https://
-                // github.com/oxidecomputer/omicron/issues/1917
-                if self.underlay.lock().await.replace(underlay).is_none() {
-                    self.notify_nexus_about_existing_resources(&resources)
-                        .await?;
-                }
-                let _ = responder.send(Ok(()));
-            }
-            KeyManagerReady => {
-                let _ = KEY_MANAGER_READY.set(());
-                self.upsert_queued_disks(resources, queued_u2_drives).await;
-            }
-        }
-        Ok(())
-    }
-}
-
-enum StorageWorkerRequest {
-    AddDisk(UnparsedDisk),
-    AddSyntheticDisk(ZpoolName),
-    RemoveDisk(UnparsedDisk),
-    DisksChanged(Vec<UnparsedDisk>),
-    NewFilesystem(NewFilesystemRequest),
-    SetupUnderlayAccess(UnderlayRequest),
-    KeyManagerReady,
-}
-
-struct StorageManagerInner {
-    log: Logger,
-
-    resources: StorageResources,
-
-    tx: mpsc::Sender<StorageWorkerRequest>,
-
-    // A handle to a worker which updates "pools".
-    task: JoinHandle<Result<(), Error>>,
-}
-
-/// A sled-local view of all attached storage.
-#[derive(Clone)]
-pub struct StorageManager {
-    inner: Arc<StorageManagerInner>,
-    zone_bundler: ZoneBundler,
-}
-
-impl StorageManager {
-    /// Creates a new [`StorageManager`] which should manage local storage.
-    pub async fn new(log: &Logger, key_requester: StorageKeyRequester) -> Self {
-        let log = log.new(o!("component" => "StorageManager"));
-        let resources = StorageResources {
-            disks: Arc::new(Mutex::new(HashMap::new())),
-            pools: Arc::new(Mutex::new(HashMap::new())),
-        };
-        let (tx, rx) = mpsc::channel(30);
-
-        let zb_log = log.new(o!("component" => "ZoneBundler"));
-        let zone_bundler =
-            ZoneBundler::new(zb_log, resources.clone(), Default::default());
-
-        StorageManager {
-            inner: Arc::new(StorageManagerInner {
-                log: log.clone(),
-                resources: resources.clone(),
-                tx,
-                task: tokio::task::spawn(async move {
-                    let dump_setup = Arc::new(DumpSetup::new(&log));
-                    let mut worker = StorageWorker {
-                        log,
-                        nexus_notifications: FuturesOrdered::new(),
-                        rx,
-                        underlay: Arc::new(Mutex::new(None)),
-                        key_requester,
-                        dump_setup,
-                    };
-
-                    worker.do_work(resources).await
-                }),
-            }),
-            zone_bundler,
-        }
-    }
-
-    /// Return a reference to the object used to manage zone bundles.
-    ///
-    /// This can be cloned by other code wishing to create and manage their own
-    /// zone bundles.
-    pub fn zone_bundler(&self) -> &ZoneBundler {
-        &self.zone_bundler
-    }
-
-    /// Adds underlay access to the storage manager.
-    pub async fn setup_underlay_access(
-        &self,
-        underlay: UnderlayAccess,
-    ) -> Result<(), Error> {
-        let (tx, rx) = oneshot::channel();
-        self.inner
-            .tx
-            .send(StorageWorkerRequest::SetupUnderlayAccess(UnderlayRequest {
-                underlay,
-                responder: tx,
-            }))
-            .await
-            .map_err(|e| e.to_string())
-            .expect("Failed to send SetupUnderlayAccess request");
-        rx.await.expect("Failed to await underlay setup")
-    }
-
-    pub async fn get_zpools(&self) -> Result<Vec<crate::params::Zpool>, Error> {
-        let disks = self.inner.resources.disks.lock().await;
-        let pools = self.inner.resources.pools.lock().await;
-
-        let mut zpools = Vec::with_capacity(pools.len());
-
-        for (id, pool) in pools.iter() {
-            let disk_identity = &pool.parent;
-            let disk_type = if let Some(disk) = disks.get(&disk_identity) {
-                disk.variant().into()
-            } else {
-                // If the zpool claims to be attached to a disk that we
-                // don't know about, that's an error.
-                return Err(Error::ZpoolNotFound(
-                    format!("zpool: {id} claims to be from unknown disk: {disk_identity:#?}")
-                ));
-            };
-            zpools.push(crate::params::Zpool { id: *id, disk_type });
-        }
-
-        Ok(zpools)
-    }
-
-    pub async fn upsert_filesystem(
-        &self,
-        dataset_id: Uuid,
-        dataset_name: DatasetName,
-    ) -> Result<DatasetName, Error> {
-        let (tx, rx) = oneshot::channel();
-        let request =
-            NewFilesystemRequest { dataset_id, dataset_name, responder: tx };
-
-        self.inner
-            .tx
-            .send(StorageWorkerRequest::NewFilesystem(request))
-            .await
-            .map_err(|e| e.to_string())
-            .expect("Storage worker bug (not alive)");
-        let dataset_name = rx.await.expect(
-            "Storage worker bug (dropped responder without responding)",
-        )?;
-
-        Ok(dataset_name)
-    }
-}
diff --git a/sled-hardware/src/illumos/mod.rs b/sled-hardware/src/illumos/mod.rs
index 0e49d6d776..a1204c4c63 100644
--- a/sled-hardware/src/illumos/mod.rs
+++ b/sled-hardware/src/illumos/mod.rs
@@ -19,7 +19,6 @@ use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 use std::sync::Mutex;
 use tokio::sync::broadcast;
-use tokio::task::JoinHandle;
 use uuid::Uuid;
 
 mod gpt;
diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs
index 2b9e7cffae..0bdca5c19c 100644
--- a/sled-storage/src/resources.rs
+++ b/sled-storage/src/resources.rs
@@ -5,16 +5,15 @@
 //! Discovered and usable disks and zpools
 
 use crate::dataset::M2_DEBUG_DATASET;
-use crate::disk::{Disk, RawDisk};
+use crate::disk::Disk;
 use crate::error::Error;
 use crate::pool::Pool;
 use camino::Utf8PathBuf;
 use illumos_utils::zpool::ZpoolName;
 use omicron_common::disk::DiskIdentity;
-use sled_hardware::{DiskVariant, UnparsedDisk};
+use sled_hardware::DiskVariant;
 use std::collections::BTreeMap;
 use std::sync::Arc;
-use uuid::Uuid;
 
 // The directory within the debug dataset in which bundles are created.
 const BUNDLE_DIRECTORY: &str = "bundle";
@@ -113,8 +112,16 @@ impl StorageResources {
             .collect()
     }
 
-    /// Returns all zpools of a particular variant
-    pub fn all_zpools(&self, variant: DiskVariant) -> Vec<ZpoolName> {
+    pub fn get_all_zpools(&self) -> Vec<(ZpoolName, DiskVariant)> {
+        self.disks
+            .values()
+            .cloned()
+            .map(|(disk, _)| (disk.zpool_name().clone(), disk.variant()))
+            .collect()
+    }
+
+    // Returns all zpools of a particular variant
+    fn all_zpools(&self, variant: DiskVariant) -> Vec<ZpoolName> {
         self.disks
             .values()
             .filter_map(|(disk, _)| {

From e0b4b26e431d99538ec358f2f53d64dd6246a036 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Tue, 10 Oct 2023 03:50:16 +0000
Subject: [PATCH 29/66] wip

---
 nexus-client/Cargo.toml                    |   1 +
 nexus-client/src/lib.rs                    |   9 +
 sled-agent/src/{storage => }/dump_setup.rs |   0
 sled-agent/src/hardware_monitor.rs         |   2 +-
 sled-agent/src/lib.rs                      |   3 +-
 sled-agent/src/storage/mod.rs              |   7 -
 sled-agent/src/storage_monitor.rs          | 199 +++++++++++++++++++++
 sled-hardware/Cargo.toml                   |   1 -
 8 files changed, 212 insertions(+), 10 deletions(-)
 rename sled-agent/src/{storage => }/dump_setup.rs (100%)
 delete mode 100644 sled-agent/src/storage/mod.rs
 create mode 100644 sled-agent/src/storage_monitor.rs

diff --git a/nexus-client/Cargo.toml b/nexus-client/Cargo.toml
index 589562c930..7da76b418a 100644
--- a/nexus-client/Cargo.toml
+++ b/nexus-client/Cargo.toml
@@ -10,6 +10,7 @@ futures.workspace = true
 ipnetwork.workspace = true
 omicron-common.workspace = true
 omicron-passwords.workspace = true
+sled-hardware.workspace = true
 progenitor.workspace = true
 regress.workspace = true
 reqwest = { workspace = true, features = ["rustls-tls", "stream"] }
diff --git a/nexus-client/src/lib.rs b/nexus-client/src/lib.rs
index e5cec83f39..c32f8be2e1 100644
--- a/nexus-client/src/lib.rs
+++ b/nexus-client/src/lib.rs
@@ -375,3 +375,12 @@ impl From<omicron_common::api::internal::shared::ExternalPortDiscovery>
         }
     }
 }
+
+impl From<sled_hardware::DiskVariant> for types::PhysicalDiskKind {
+    fn from(value: sled_hardware::DiskVariant) -> Self {
+        match value {
+            sled_hardware::DiskVariant::U2 => types::PhysicalDiskKind::U2,
+            sled_hardware::DiskVariant::M2 => types::PhysicalDiskKind::M2,
+        }
+    }
+}
diff --git a/sled-agent/src/storage/dump_setup.rs b/sled-agent/src/dump_setup.rs
similarity index 100%
rename from sled-agent/src/storage/dump_setup.rs
rename to sled-agent/src/dump_setup.rs
diff --git a/sled-agent/src/hardware_monitor.rs b/sled-agent/src/hardware_monitor.rs
index 03677e4e6d..f3402cb6bd 100644
--- a/sled-agent/src/hardware_monitor.rs
+++ b/sled-agent/src/hardware_monitor.rs
@@ -144,7 +144,7 @@ impl HardwareMonitor {
         )
     }
 
-    /// Run the main receive loop of the `StorageManager`
+    /// Run the main receive loop of the `HardwareMonitor`
     ///
     /// This should be spawned into a tokio task
     pub async fn run(&mut self) {
diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs
index 9a3b6d4f1b..4c1266df9b 100644
--- a/sled-agent/src/lib.rs
+++ b/sled-agent/src/lib.rs
@@ -19,6 +19,7 @@ pub mod common;
 // Modules for the non-simulated sled agent.
 pub mod bootstrap;
 pub mod config;
+pub(crate) mod dump_setup;
 pub(crate) mod hardware_monitor;
 mod http_entrypoints;
 mod instance;
@@ -32,7 +33,7 @@ pub mod server;
 mod services;
 mod sled_agent;
 mod smf_helper;
-pub(crate) mod storage;
+mod storage_monitor;
 mod swap_device;
 mod updates;
 mod zone_bundle;
diff --git a/sled-agent/src/storage/mod.rs b/sled-agent/src/storage/mod.rs
deleted file mode 100644
index 663ebe8274..0000000000
--- a/sled-agent/src/storage/mod.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-// This Source Code Form is subject to the terms of the Mozilla Public
-// License, v. 2.0. If a copy of the MPL was not distributed with this
-// file, You can obtain one at https://mozilla.org/MPL/2.0/.
-
-//! Management of local storage
-
-pub(crate) mod dump_setup;
diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs
new file mode 100644
index 0000000000..bd21664d19
--- /dev/null
+++ b/sled-agent/src/storage_monitor.rs
@@ -0,0 +1,199 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! A task that listens for storage events from [`sled_storage::StorageMonitor`]
+//! and dispatches them to other parst of the bootstrap agent and sled agent
+//! code.
+
+use crate::nexus::NexusClientWithResolver;
+use nexus_client::types::PhysicalDiskDeleteRequest;
+use nexus_client::types::PhysicalDiskKind;
+use nexus_client::types::PhysicalDiskPutRequest;
+use nexus_client::types::ZpoolPutRequest;
+use omicron_common::api::external::ByteCount;
+use sled_storage::disk::Disk;
+use sled_storage::manager::StorageHandle;
+use sled_storage::resources::StorageResources;
+use slog::Logger;
+use std::fmt::Debug;
+use tokio::sync::mpsc;
+use uuid::Uuid;
+
+const QUEUE_SIZE: usize = 10;
+
+/// A message sent from the `StorageMonitorHandle` to the `StorageMonitor`.
+#[derive(Debug)]
+pub enum StorageMonitorMsg {
+    UnderlayAvailable(UnderlayAccess),
+}
+
+/// Describes the access to the underlay used by the StorageManager.
+pub struct UnderlayAccess {
+    pub nexus_client: NexusClientWithResolver,
+    pub sled_id: Uuid,
+}
+
+impl Debug for UnderlayAccess {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("UnderlayAccess")
+            .field("sled_id", &self.sled_id)
+            .finish()
+    }
+}
+
+/// A mechanism for interacting with the StorageMonitor
+#[derive(Clone)]
+pub struct StorageMonitorHandle {
+    tx: mpsc::Sender<StorageMonitorMsg>,
+}
+
+pub struct StorageMonitor {
+    log: Logger,
+    storage_manager: StorageHandle,
+    handle_rx: mpsc::Receiver<StorageMonitorMsg>,
+
+    // A cached copy of the `StorageResources` from the last update
+    storage_resources: StorageResources,
+
+    // Ability to access the underlay network
+    underlay: Option<UnderlayAccess>,
+}
+
+impl StorageMonitor {
+    pub fn new(
+        log: &Logger,
+        storage_manager: StorageHandle,
+    ) -> (StorageMonitor, StorageMonitorHandle) {
+        let (handle_tx, handle_rx) = mpsc::channel(QUEUE_SIZE);
+        let storage_resources = StorageResources::default();
+        let log = log.new(o!("component" => "StorageMonitor"));
+        (
+            StorageMonitor {
+                log,
+                storage_manager,
+                handle_rx,
+                storage_resources,
+                underlay: None,
+            },
+            StorageMonitorHandle { tx: handle_tx },
+        )
+    }
+
+    /// Run the main receive loop of the `StorageMonitor`
+    ///
+    /// This should be spawned into a tokio task
+    pub async fn run(&mut self) {
+        loop {
+            tokio::select! {
+                resources = self.storage_manager.wait_for_changes() => {
+                    info!(
+                        self.log,
+                        "Received storage manager update";
+                        "resources" => ?resources
+                    );
+                    self.handle_resource_update(resources).await;
+                }
+                Some(msg) = self.handle_rx.recv() => {
+                    info!(
+                        self.log,
+                        "Received storage monitor message";
+                        "msg" => ?msg
+                    );
+                    self.handle_monitor_msg(msg).await;
+                }
+            }
+        }
+    }
+
+    async fn handle_resource_update(
+        &mut self,
+        updated_resources: StorageResources,
+    ) {
+        // If the underlay isn't available, we only record the changes. Nexus
+        // isn't yet reachable to notify.
+        if self.underlay.is_some() {
+            let nexus_updates = compute_resource_diffs(
+                &self.log,
+                &self.underlay.as_ref().unwrap().sled_id,
+                &self.storage_resources,
+                &updated_resources,
+            );
+            // TODO: Notify nexus about diffs
+        }
+        // Save the updated `StorageResources`
+        self.storage_resources = updated_resources;
+    }
+}
+
+struct NexusUpdates {
+    disk_puts: Vec<PhysicalDiskPutRequest>,
+    disk_deletes: Vec<PhysicalDiskDeleteRequest>,
+    zpool_puts: Vec<ZpoolPutRequest>,
+}
+
+async fn compute_resource_diffs(
+    log: &Logger,
+    sled_id: &Uuid,
+    current: &StorageResources,
+    updated: &StorageResources,
+) -> NexusUpdates {
+    let mut disk_puts = vec![];
+    let mut disk_deletes = vec![];
+    let mut zpool_puts = vec![];
+
+    // Diff the existing resources with the update to see what has changed
+    // This loop finds disks and pools that were modified or deleted
+    for (disk_id, (disk, pool)) in current.disks.iter() {
+        match updated.disks.get(disk_id) {
+            Some((updated_disk, updated_pool)) => {
+                if disk != updated_disk {
+                    disk_puts.push(PhysicalDiskPutRequest {
+                        sled_id: *sled_id,
+                        model: disk_id.model.clone(),
+                        serial: disk_id.serial.clone(),
+                        vendor: disk_id.vendor.clone(),
+                        variant: updated_disk.variant().into(),
+                    });
+                }
+                if pool != updated_pool {
+                    match ByteCount::try_from(pool.info.size()) {
+                        Ok(size) => zpool_puts.push(ZpoolPutRequest {
+                            size: size.into(),
+                            disk_model: disk_id.model.clone(),
+                            disk_serial: disk_id.serial.clone(),
+                            disk_vendor: disk_id.vendor.clone(),
+                        }),
+                        Err(err) => error!(
+                            log, 
+                            "Error parsing pool size";
+                            "name" => pool.name.to_string(),
+                            "err" => ?err),
+                    }
+                }
+            }
+            None => disk_deletes.push(PhysicalDiskDeleteRequest {
+                model: disk_id.model.clone(),
+                serial: disk_id.serial.clone(),
+                vendor: disk_id.vendor.clone(),
+                sled_id,
+            }),
+        }
+    }
+
+    // Diff the existing resources with the update to see what has changed
+    // This loop finds new disks and pools
+    for (disk_id, (updated_disk, updated_pool)) in updated.disks.iter() {
+        if !current.disks.contains_key(disk_id) {
+            disk_puts.push(PhysicalDiskPutRequest {
+                sled_id: *sled_id,
+                model: disk_id.model.clone(),
+                serial: disk_id.serial.clone(),
+                vendor: disk_id.vendor.clone(),
+                variant: updated_disk.variant().into(),
+            });
+        }
+    }
+
+    NexusUpdates { disk_puts, disk_deletes, zpool_puts }
+}
diff --git a/sled-hardware/Cargo.toml b/sled-hardware/Cargo.toml
index c6bc09f41e..5a6df575a0 100644
--- a/sled-hardware/Cargo.toml
+++ b/sled-hardware/Cargo.toml
@@ -11,7 +11,6 @@ camino.workspace = true
 cfg-if.workspace = true
 futures.workspace = true
 illumos-utils.workspace = true
-key-manager.workspace = true
 libc.workspace = true
 macaddr.workspace = true
 nexus-client.workspace = true

From 950a1d431a2421a6421ad584a1b0397a73e1dd2f Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Tue, 10 Oct 2023 18:51:03 +0000
Subject: [PATCH 30/66] wip

---
 Cargo.lock                        |   3 +-
 nexus-client/src/lib.rs           |  10 ++
 sled-agent/src/storage_monitor.rs | 158 ++++++++++++++++++++++++++++--
 sled-hardware/Cargo.toml          |   1 -
 sled-hardware/src/lib.rs          |  10 --
 sled-storage/src/lib.rs           |   2 +-
 6 files changed, 160 insertions(+), 24 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 6165b6963c..d8c01f0136 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4258,6 +4258,7 @@ dependencies = [
  "schemars",
  "serde",
  "serde_json",
+ "sled-hardware",
  "slog",
  "uuid",
 ]
@@ -7939,11 +7940,9 @@ dependencies = [
  "futures",
  "illumos-devinfo",
  "illumos-utils",
- "key-manager",
  "libc",
  "libefi-illumos",
  "macaddr",
- "nexus-client 0.1.0",
  "omicron-common 0.1.0",
  "omicron-test-utils",
  "rand 0.8.5",
diff --git a/nexus-client/src/lib.rs b/nexus-client/src/lib.rs
index c32f8be2e1..e1980bcf6b 100644
--- a/nexus-client/src/lib.rs
+++ b/nexus-client/src/lib.rs
@@ -384,3 +384,13 @@ impl From<sled_hardware::DiskVariant> for types::PhysicalDiskKind {
         }
     }
 }
+
+impl From<sled_hardware::Baseboard> for types::Baseboard {
+    fn from(b: sled_hardware::Baseboard) -> types::Baseboard {
+        types::Baseboard {
+            serial_number: b.identifier().to_string(),
+            part_number: b.model().to_string(),
+            revision: b.revision(),
+        }
+    }
+}
diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs
index bd21664d19..93233a8ed2 100644
--- a/sled-agent/src/storage_monitor.rs
+++ b/sled-agent/src/storage_monitor.rs
@@ -7,21 +7,33 @@
 //! code.
 
 use crate::nexus::NexusClientWithResolver;
+use derive_more::From;
+use futures::stream::FuturesOrdered;
+use futures::FutureExt;
 use nexus_client::types::PhysicalDiskDeleteRequest;
 use nexus_client::types::PhysicalDiskKind;
 use nexus_client::types::PhysicalDiskPutRequest;
 use nexus_client::types::ZpoolPutRequest;
 use omicron_common::api::external::ByteCount;
+use omicron_common::backoff;
 use sled_storage::disk::Disk;
 use sled_storage::manager::StorageHandle;
+use sled_storage::pool::Pool;
 use sled_storage::resources::StorageResources;
 use slog::Logger;
 use std::fmt::Debug;
+use std::pin::Pin;
 use tokio::sync::mpsc;
 use uuid::Uuid;
 
 const QUEUE_SIZE: usize = 10;
 
+#[derive(From, Clone, Debug)]
+enum NexusDiskRequest {
+    Put(PhysicalDiskPutRequest),
+    Delete(PhysicalDiskDeleteRequest),
+}
+
 /// A message sent from the `StorageMonitorHandle` to the `StorageMonitor`.
 #[derive(Debug)]
 pub enum StorageMonitorMsg {
@@ -29,6 +41,7 @@ pub enum StorageMonitorMsg {
 }
 
 /// Describes the access to the underlay used by the StorageManager.
+#[derive(Clone)]
 pub struct UnderlayAccess {
     pub nexus_client: NexusClientWithResolver,
     pub sled_id: Uuid,
@@ -58,6 +71,9 @@ pub struct StorageMonitor {
 
     // Ability to access the underlay network
     underlay: Option<UnderlayAccess>,
+
+    // A queue for sending nexus notifications in order
+    nexus_notifications: FuturesOrdered<NotifyFut>,
 }
 
 impl StorageMonitor {
@@ -75,6 +91,7 @@ impl StorageMonitor {
                 handle_rx,
                 storage_resources,
                 underlay: None,
+                nexus_notifications: FuturesOrdered::new(),
             },
             StorageMonitorHandle { tx: handle_tx },
         )
@@ -106,6 +123,8 @@ impl StorageMonitor {
         }
     }
 
+    async fn handle_monitor_msg(&mut self, msg: StorageMonitorMsg) {}
+
     async fn handle_resource_update(
         &mut self,
         updated_resources: StorageResources,
@@ -119,20 +138,136 @@ impl StorageMonitor {
                 &self.storage_resources,
                 &updated_resources,
             );
-            // TODO: Notify nexus about diffs
+            for put in nexus_updates.disk_puts {
+                self.physical_disk_notify(put.into()).await;
+            }
+            for del in nexus_updates.disk_deletes {
+                self.physical_disk_notify(del.into()).await;
+            }
+            for (pool, put) in nexus_updates.zpool_puts {
+                self.add_zpool_notify(pool, put).await;
+            }
+
+            // TODO: Update Dump Setup if any diffs
         }
         // Save the updated `StorageResources`
         self.storage_resources = updated_resources;
     }
+
+    // Adds a "notification to nexus" to `self.nexus_notifications`, informing it
+    // about the addition/removal of a physical disk to this sled.
+    async fn physical_disk_notify(&mut self, disk: NexusDiskRequest) {
+        let underlay = self.underlay.as_ref().unwrap().clone();
+        let disk2 = disk.clone();
+        let notify_nexus = move || {
+            let underlay = underlay.clone();
+            let disk = disk.clone();
+            async move {
+                let nexus_client = underlay.nexus_client.client().clone();
+
+                match &disk {
+                    NexusDiskRequest::Put(request) => {
+                        nexus_client
+                            .physical_disk_put(&request)
+                            .await
+                            .map_err(|e| {
+                                backoff::BackoffError::transient(e.to_string())
+                            })?;
+                    }
+                    NexusDiskRequest::Delete(request) => {
+                        nexus_client
+                            .physical_disk_delete(&request)
+                            .await
+                            .map_err(|e| {
+                                backoff::BackoffError::transient(e.to_string())
+                            })?;
+                    }
+                }
+                Ok(())
+            }
+        };
+
+        let log = self.log.clone();
+        // This notification is often invoked before Nexus has started
+        // running, so avoid flagging any errors as concerning until some
+        // time has passed.
+        let log_post_failure = move |_, call_count, total_duration| {
+            if call_count == 0 {
+                info!(log, "failed to notify nexus about {disk2:?}");
+            } else if total_duration > std::time::Duration::from_secs(30) {
+                warn!(log, "failed to notify nexus about {disk2:?}";
+                    "total duration" => ?total_duration);
+            }
+        };
+        self.nexus_notifications.push_back(
+            backoff::retry_notify_ext(
+                backoff::retry_policy_internal_service_aggressive(),
+                notify_nexus,
+                log_post_failure,
+            )
+            .boxed(),
+        );
+    }
+
+    // Adds a "notification to nexus" to `nexus_notifications`,
+    // informing it about the addition of `pool_id` to this sled.
+    async fn add_zpool_notify(
+        &mut self,
+        pool: Pool,
+        zpool_request: ZpoolPutRequest,
+    ) {
+        let pool_id = pool.name.id();
+        let underlay = self.underlay.as_ref().unwrap().clone();
+
+        let notify_nexus = move || {
+            let underlay = underlay.clone();
+            let zpool_request = zpool_request.clone();
+            async move {
+                let sled_id = underlay.sled_id;
+                let nexus_client = underlay.nexus_client.client().clone();
+                nexus_client
+                    .zpool_put(&sled_id, &pool_id, &zpool_request)
+                    .await
+                    .map_err(|e| {
+                        backoff::BackoffError::transient(e.to_string())
+                    })?;
+                Ok(())
+            }
+        };
+
+        let log = self.log.clone();
+        let name = pool.name.clone();
+        let disk = pool.parent.clone();
+        let log_post_failure = move |_, call_count, total_duration| {
+            if call_count == 0 {
+                info!(log, "failed to notify nexus about a new pool {name} on disk {disk:?}");
+            } else if total_duration > std::time::Duration::from_secs(30) {
+                warn!(log, "failed to notify nexus about a new pool {name} on disk {disk:?}";
+                    "total duration" => ?total_duration);
+            }
+        };
+        self.nexus_notifications.push_back(
+            backoff::retry_notify_ext(
+                backoff::retry_policy_internal_service_aggressive(),
+                notify_nexus,
+                log_post_failure,
+            )
+            .boxed(),
+        );
+    }
 }
 
+// The type of a future which is used to send a notification to Nexus.
+type NotifyFut =
+    Pin<Box<dyn futures::Future<Output = Result<(), String>> + Send>>;
+
 struct NexusUpdates {
     disk_puts: Vec<PhysicalDiskPutRequest>,
     disk_deletes: Vec<PhysicalDiskDeleteRequest>,
-    zpool_puts: Vec<ZpoolPutRequest>,
+    zpool_puts: Vec<(Pool, ZpoolPutRequest)>,
 }
 
-async fn compute_resource_diffs(
+fn compute_resource_diffs(
     log: &Logger,
     sled_id: &Uuid,
     current: &StorageResources,
@@ -158,12 +293,15 @@ async fn compute_resource_diffs(
                 }
                 if pool != updated_pool {
                     match ByteCount::try_from(pool.info.size()) {
-                        Ok(size) => zpool_puts.push(ZpoolPutRequest {
-                            size: size.into(),
-                            disk_model: disk_id.model.clone(),
-                            disk_serial: disk_id.serial.clone(),
-                            disk_vendor: disk_id.vendor.clone(),
-                        }),
+                        Ok(size) => zpool_puts.push((
+                            pool.clone(),
+                            ZpoolPutRequest {
+                                size: size.into(),
+                                disk_model: disk_id.model.clone(),
+                                disk_serial: disk_id.serial.clone(),
+                                disk_vendor: disk_id.vendor.clone(),
+                            },
+                        )),
                         Err(err) => error!(
                             log, 
                             "Error parsing pool size";
@@ -176,7 +314,7 @@ async fn compute_resource_diffs(
                 model: disk_id.model.clone(),
                 serial: disk_id.serial.clone(),
                 vendor: disk_id.vendor.clone(),
-                sled_id,
+                sled_id: *sled_id,
             }),
         }
     }
diff --git a/sled-hardware/Cargo.toml b/sled-hardware/Cargo.toml
index 5a6df575a0..0b97c160f3 100644
--- a/sled-hardware/Cargo.toml
+++ b/sled-hardware/Cargo.toml
@@ -13,7 +13,6 @@ futures.workspace = true
 illumos-utils.workspace = true
 libc.workspace = true
 macaddr.workspace = true
-nexus-client.workspace = true
 omicron-common.workspace = true
 rand.workspace = true
 schemars.workspace = true
diff --git a/sled-hardware/src/lib.rs b/sled-hardware/src/lib.rs
index c81bcddbfb..3ae745118b 100644
--- a/sled-hardware/src/lib.rs
+++ b/sled-hardware/src/lib.rs
@@ -160,13 +160,3 @@ impl std::fmt::Display for Baseboard {
         }
     }
 }
-
-impl From<Baseboard> for nexus_client::types::Baseboard {
-    fn from(b: Baseboard) -> nexus_client::types::Baseboard {
-        nexus_client::types::Baseboard {
-            serial_number: b.identifier().to_string(),
-            part_number: b.model().to_string(),
-            revision: b.revision(),
-        }
-    }
-}
diff --git a/sled-storage/src/lib.rs b/sled-storage/src/lib.rs
index fc08579d77..d4b64c55a5 100644
--- a/sled-storage/src/lib.rs
+++ b/sled-storage/src/lib.rs
@@ -13,5 +13,5 @@ pub mod disk;
 pub mod error;
 pub(crate) mod keyfile;
 pub mod manager;
-pub(crate) mod pool;
+pub mod pool;
 pub mod resources;

From 1a67b04095ff2066c9ad2e0d76358f04b0e60885 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Tue, 10 Oct 2023 22:57:32 +0000
Subject: [PATCH 31/66] wip

---
 sled-agent/src/storage_monitor.rs | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs
index 93233a8ed2..debd0d5e95 100644
--- a/sled-agent/src/storage_monitor.rs
+++ b/sled-agent/src/storage_monitor.rs
@@ -123,7 +123,31 @@ impl StorageMonitor {
         }
     }
 
-    async fn handle_monitor_msg(&mut self, msg: StorageMonitorMsg) {}
+    async fn handle_monitor_msg(&mut self, msg: StorageMonitorMsg) {
+        match msg {
+            StorageMonitorMsg::UnderlayAvailable(underlay) => {
+                let sled_id = underlay.sled_id;
+                self.underlay = Some(underlay);
+                self.notify_nexus_about_existing_resources(sled_id).await;
+            }
+        }
+    }
+
+    /// When the underlay becomes available, we need to notify nexus about any
+    /// discovered disks and pools, since we don't attempt to notify until there
+    /// is an underlay available.
+    async fn notify_nexus_about_existing_resources(&mut self, sled_id: Uuid) {
+        let current = StorageResources::default();
+        let updated = &self.storage_resources;
+        let nexus_updates =
+            compute_resource_diffs(&self.log, &sled_id, &current, updated);
+        for put in nexus_updates.disk_puts {
+            self.physical_disk_notify(put.into()).await;
+        }
+        for (pool, put) in nexus_updates.zpool_puts {
+            self.add_zpool_notify(pool, put).await;
+        }
+    }
 
     async fn handle_resource_update(
         &mut self,

From 1e61ea90e52a5f6b909b61ea8b650088971a109a Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Wed, 11 Oct 2023 22:37:36 +0000
Subject: [PATCH 32/66] wip

---
 sled-agent/src/bootstrap/secret_retriever.rs |   2 +-
 sled-agent/src/dump_setup.rs                 |   8 +-
 sled-agent/src/long_running_tasks.rs         |  22 +++
 sled-agent/src/services.rs                   |  80 ++++++-----
 sled-agent/src/storage_monitor.rs            |  25 +++-
 sled-agent/src/zone_bundle.rs                | 133 ++++++++-----------
 sled-storage/src/resources.rs                |   4 +-
 7 files changed, 147 insertions(+), 127 deletions(-)

diff --git a/sled-agent/src/bootstrap/secret_retriever.rs b/sled-agent/src/bootstrap/secret_retriever.rs
index 5cae06310c..d6b542378d 100644
--- a/sled-agent/src/bootstrap/secret_retriever.rs
+++ b/sled-agent/src/bootstrap/secret_retriever.rs
@@ -92,7 +92,7 @@ impl LrtqOrHardcodedSecretRetriever {
 ///
 /// The local retriever only returns keys for epoch 0
 #[derive(Debug)]
-struct HardcodedSecretRetriever {}
+pub struct HardcodedSecretRetriever {}
 
 #[async_trait]
 impl SecretRetriever for HardcodedSecretRetriever {
diff --git a/sled-agent/src/dump_setup.rs b/sled-agent/src/dump_setup.rs
index ea60998955..50bbda44b4 100644
--- a/sled-agent/src/dump_setup.rs
+++ b/sled-agent/src/dump_setup.rs
@@ -7,13 +7,13 @@ use omicron_common::disk::DiskIdentity;
 use sled_hardware::DiskVariant;
 use sled_storage::dataset::{CRASH_DATASET, DUMP_DATASET};
 use sled_storage::disk::Disk;
+use sled_storage::pool::Pool;
 use slog::Logger;
-use std::collections::{HashMap, HashSet};
+use std::collections::{BTreeMap, HashSet};
 use std::ffi::OsString;
 use std::path::{Path, PathBuf};
 use std::sync::{Arc, Weak};
 use std::time::{Duration, SystemTime, SystemTimeError, UNIX_EPOCH};
-use tokio::sync::MutexGuard;
 
 pub struct DumpSetup {
     worker: Arc<std::sync::Mutex<DumpSetupWorker>>,
@@ -100,13 +100,13 @@ const ARCHIVAL_INTERVAL: Duration = Duration::from_secs(300);
 impl DumpSetup {
     pub(crate) async fn update_dumpdev_setup(
         &self,
-        disks: &mut MutexGuard<'_, HashMap<DiskIdentity, Disk>>,
+        disks: &Arc<BTreeMap<DiskIdentity, (Disk, Pool)>>,
     ) {
         let log = &self.log;
         let mut m2_dump_slices = Vec::new();
         let mut u2_debug_datasets = Vec::new();
         let mut m2_core_datasets = Vec::new();
-        for (_id, disk) in disks.iter() {
+        for (_id, (disk, _)) in disks.iter() {
             if disk.is_synthetic() {
                 // We only setup dump devices on real disks
                 continue;
diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs
index f322126714..714bd1e406 100644
--- a/sled-agent/src/long_running_tasks.rs
+++ b/sled-agent/src/long_running_tasks.rs
@@ -17,6 +17,7 @@ use crate::bootstrap::bootstore::{
 };
 use crate::bootstrap::secret_retriever::LrtqOrHardcodedSecretRetriever;
 use crate::hardware_monitor::{HardwareMonitor, HardwareMonitorHandle};
+use crate::storage_monitor::{StorageMonitor, StorageMonitorHandle};
 use crate::zone_bundle::{CleanupContext, ZoneBundler};
 use bootstore::schemes::v0 as bootstore;
 use key_manager::{KeyManager, StorageKeyRequester};
@@ -39,6 +40,11 @@ pub struct LongRunningTaskHandles {
     /// for establishing zpools on disks and managing their datasets.
     pub storage_manager: StorageHandle,
 
+    /// A task which monitors for updates from the `StorageManager` and takes
+    /// actions based on those updates, such as informing Nexus and setting
+    /// up dump locations.
+    pub storage_monitor: StorageMonitorHandle,
+
     /// A mechanism for interacting with the hardware device tree
     pub hardware_manager: HardwareManager,
 
@@ -63,6 +69,8 @@ pub async fn spawn_all_longrunning_tasks(
     let mut storage_manager =
         spawn_storage_manager(log, storage_key_requester.clone());
 
+    let storage_monitor = spawn_storage_monitor(log, storage_manager.clone());
+
     // TODO: Does this need to run inside tokio::task::spawn_blocking?
     let hardware_manager = spawn_hardware_manager(log, sled_mode);
 
@@ -87,6 +95,7 @@ pub async fn spawn_all_longrunning_tasks(
     LongRunningTaskHandles {
         storage_key_requester,
         storage_manager,
+        storage_monitor,
         hardware_manager,
         hardware_monitor,
         bootstore,
@@ -115,6 +124,19 @@ fn spawn_storage_manager(
     handle
 }
 
+fn spawn_storage_monitor(
+    log: &Logger,
+    storage_handle: StorageHandle,
+) -> StorageMonitorHandle {
+    info!(log, "Starting StorageMonitor");
+    let (mut storage_monitor, handle) =
+        StorageMonitor::new(log, storage_handle);
+    tokio::spawn(async move {
+        storage_monitor.run().await;
+    });
+    handle
+}
+
 fn spawn_hardware_manager(
     log: &Logger,
     sled_mode: SledMode,
diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs
index c22eae6baa..6aaf69f198 100644
--- a/sled-agent/src/services.rs
+++ b/sled-agent/src/services.rs
@@ -5,7 +5,7 @@
 //! Sled-local service management.
 //!
 //! For controlling zone-based storage services, refer to
-//! [sled_hardware:manager::StorageManager].
+//! [sled_storage:manager::StorageManager].
 //!
 //! For controlling virtual machine instances, refer to
 //! [crate::instance_manager::InstanceManager].
@@ -2935,8 +2935,8 @@ impl ServiceManager {
 #[cfg(test)]
 mod test {
     use super::*;
+    use crate::bootstrap::secret_retriever::HardcodedSecretRetriever;
     use crate::params::{ServiceZoneService, ZoneType};
-    use async_trait::async_trait;
     use illumos_utils::{
         dladm::{
             Etherstub, MockDladm, BOOTSTRAP_ETHERSTUB_NAME,
@@ -2945,10 +2945,9 @@ mod test {
         svc,
         zone::MockZones,
     };
-    use key_manager::{
-        SecretRetriever, SecretRetrieverError, SecretState, VersionedIkm,
-    };
+    use key_manager::KeyManager;
     use omicron_common::address::OXIMETER_PORT;
+    use sled_storage::manager::{StorageHandle, StorageManager};
     use std::net::{Ipv6Addr, SocketAddrV6};
     use std::os::unix::process::ExitStatusExt;
     use uuid::Uuid;
@@ -3141,29 +3140,28 @@ mod test {
         }
     }
 
-    pub struct TestSecretRetriever {}
-
-    #[async_trait]
-    impl SecretRetriever for TestSecretRetriever {
-        async fn get_latest(
-            &self,
-        ) -> Result<VersionedIkm, SecretRetrieverError> {
-            let epoch = 0;
-            let salt = [0u8; 32];
-            let secret = [0x1d; 32];
-
-            Ok(VersionedIkm::new(epoch, salt, &secret))
-        }
+    // Spawn storage related tasks and return a handle to pass to both the `ServiceManager`
+    // and `ZoneBundler`. However, it is expected that this handle is not actually used
+    // as there are no provisioned zones or datasets. This is consistent with the use of
+    // `test_config.override_paths` below.
+    async fn setup_storage(log: &Logger) -> StorageHandle {
+        let (mut key_manager, key_requester) =
+            KeyManager::new(log, HardcodedSecretRetriever {});
+        let (mut manager, handle) = StorageManager::new(log, key_requester);
+
+        // Spawn the key_manager so that it will respond to requests for encryption keys
+        tokio::spawn(async move { key_manager.run().await });
+
+        // Spawn the storage manager as done by sled-agent
+        tokio::spawn(async move {
+            manager.run().await;
+        });
 
-        async fn get(
-            &self,
-            epoch: u64,
-        ) -> Result<SecretState, SecretRetrieverError> {
-            if epoch != 0 {
-                return Err(SecretRetrieverError::NoSuchEpoch(epoch));
-            }
-            Ok(SecretState::Current(self.get_latest().await?))
-        }
+        // Inform the storage manager that the secret retriever is ready We
+        // are using the HardcodedSecretRetriever, so no need to wait for RSS
+        // or anything to setup the LRTQ
+        handle.key_manager_ready().await;
+        handle
     }
 
     #[tokio::test]
@@ -3174,10 +3172,10 @@ mod test {
         let log = logctx.log.clone();
         let test_config = TestConfig::new().await;
 
-        let resources = StorageResources::new_for_test();
+        let storage_handle = setup_storage(&log).await;
         let zone_bundler = ZoneBundler::new(
             log.clone(),
-            resources.clone(),
+            storage_handle.clone(),
             Default::default(),
         );
         let mgr = ServiceManager::new(
@@ -3188,7 +3186,7 @@ mod test {
             Some(true),
             SidecarRevision::Physical("rev-test".to_string()),
             vec![],
-            resources,
+            storage_handle,
             zone_bundler,
         );
         test_config.override_paths(&mgr);
@@ -3222,10 +3220,10 @@ mod test {
         let log = logctx.log.clone();
         let test_config = TestConfig::new().await;
 
-        let resources = StorageResources::new_for_test();
+        let storage_handle = setup_storage(&log).await;
         let zone_bundler = ZoneBundler::new(
             log.clone(),
-            resources.clone(),
+            storage_handle.clone(),
             Default::default(),
         );
         let mgr = ServiceManager::new(
@@ -3236,7 +3234,7 @@ mod test {
             Some(true),
             SidecarRevision::Physical("rev-test".to_string()),
             vec![],
-            resources,
+            storage_handle,
             zone_bundler,
         );
         test_config.override_paths(&mgr);
@@ -3275,10 +3273,10 @@ mod test {
 
         // First, spin up a ServiceManager, create a new service, and tear it
         // down.
-        let resources = StorageResources::new_for_test();
+        let storage_handle = setup_storage(&log).await;
         let zone_bundler = ZoneBundler::new(
             log.clone(),
-            resources.clone(),
+            storage_handle.clone(),
             Default::default(),
         );
         let mgr = ServiceManager::new(
@@ -3289,7 +3287,7 @@ mod test {
             Some(true),
             SidecarRevision::Physical("rev-test".to_string()),
             vec![],
-            resources.clone(),
+            storage_handle.clone(),
             zone_bundler.clone(),
         );
         test_config.override_paths(&mgr);
@@ -3322,7 +3320,7 @@ mod test {
             Some(true),
             SidecarRevision::Physical("rev-test".to_string()),
             vec![],
-            resources.clone(),
+            storage_handle.clone(),
             zone_bundler.clone(),
         );
         test_config.override_paths(&mgr);
@@ -3358,10 +3356,10 @@ mod test {
 
         // First, spin up a ServiceManager, create a new service, and tear it
         // down.
-        let resources = StorageResources::new_for_test();
+        let storage_handle = setup_storage(&log).await;
         let zone_bundler = ZoneBundler::new(
             log.clone(),
-            resources.clone(),
+            storage_handle.clone(),
             Default::default(),
         );
         let mgr = ServiceManager::new(
@@ -3372,7 +3370,7 @@ mod test {
             Some(true),
             SidecarRevision::Physical("rev-test".to_string()),
             vec![],
-            resources.clone(),
+            storage_handle.clone(),
             zone_bundler.clone(),
         );
         test_config.override_paths(&mgr);
@@ -3410,7 +3408,7 @@ mod test {
             Some(true),
             SidecarRevision::Physical("rev-test".to_string()),
             vec![],
-            resources.clone(),
+            storage_handle,
             zone_bundler.clone(),
         );
         test_config.override_paths(&mgr);
diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs
index debd0d5e95..c48fd5cbfa 100644
--- a/sled-agent/src/storage_monitor.rs
+++ b/sled-agent/src/storage_monitor.rs
@@ -6,17 +6,16 @@
 //! and dispatches them to other parst of the bootstrap agent and sled agent
 //! code.
 
+use crate::dump_setup::DumpSetup;
 use crate::nexus::NexusClientWithResolver;
 use derive_more::From;
 use futures::stream::FuturesOrdered;
 use futures::FutureExt;
 use nexus_client::types::PhysicalDiskDeleteRequest;
-use nexus_client::types::PhysicalDiskKind;
 use nexus_client::types::PhysicalDiskPutRequest;
 use nexus_client::types::ZpoolPutRequest;
 use omicron_common::api::external::ByteCount;
 use omicron_common::backoff;
-use sled_storage::disk::Disk;
 use sled_storage::manager::StorageHandle;
 use sled_storage::pool::Pool;
 use sled_storage::resources::StorageResources;
@@ -74,6 +73,9 @@ pub struct StorageMonitor {
 
     // A queue for sending nexus notifications in order
     nexus_notifications: FuturesOrdered<NotifyFut>,
+
+    // Invokes dumpadm(8) and savecore(8) when new disks are encountered
+    dump_setup: DumpSetup,
 }
 
 impl StorageMonitor {
@@ -83,6 +85,7 @@ impl StorageMonitor {
     ) -> (StorageMonitor, StorageMonitorHandle) {
         let (handle_tx, handle_rx) = mpsc::channel(QUEUE_SIZE);
         let storage_resources = StorageResources::default();
+        let dump_setup = DumpSetup::new(&log);
         let log = log.new(o!("component" => "StorageMonitor"));
         (
             StorageMonitor {
@@ -92,6 +95,7 @@ impl StorageMonitor {
                 storage_resources,
                 underlay: None,
                 nexus_notifications: FuturesOrdered::new(),
+                dump_setup,
             },
             StorageMonitorHandle { tx: handle_tx },
         )
@@ -129,6 +133,9 @@ impl StorageMonitor {
                 let sled_id = underlay.sled_id;
                 self.underlay = Some(underlay);
                 self.notify_nexus_about_existing_resources(sled_id).await;
+                self.dump_setup
+                    .update_dumpdev_setup(&self.storage_resources.disks)
+                    .await;
             }
         }
     }
@@ -162,6 +169,12 @@ impl StorageMonitor {
                 &self.storage_resources,
                 &updated_resources,
             );
+            if nexus_updates.has_disk_updates() {
+                self.dump_setup
+                    .update_dumpdev_setup(&self.storage_resources.disks)
+                    .await;
+            }
+
             for put in nexus_updates.disk_puts {
                 self.physical_disk_notify(put.into()).await;
             }
@@ -171,8 +184,6 @@ impl StorageMonitor {
             for (pool, put) in nexus_updates.zpool_puts {
                 self.add_zpool_notify(pool, put).await;
             }
-
-            // TODO: Update Dump Setup if any diffs
         }
         // Save the updated `StorageResources`
         self.storage_resources = updated_resources;
@@ -291,6 +302,12 @@ struct NexusUpdates {
     zpool_puts: Vec<(Pool, ZpoolPutRequest)>,
 }
 
+impl NexusUpdates {
+    fn has_disk_updates(&self) -> bool {
+        !self.disk_puts.is_empty() || !self.disk_deletes.is_empty()
+    }
+}
+
 fn compute_resource_diffs(
     log: &Logger,
     sled_id: &Uuid,
diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs
index ea7481bd6d..55058ee23a 100644
--- a/sled-agent/src/zone_bundle.rs
+++ b/sled-agent/src/zone_bundle.rs
@@ -1764,7 +1764,6 @@ mod illumos_tests {
     use super::CleanupPeriod;
     use super::PriorityOrder;
     use super::StorageLimit;
-    use super::StorageResources;
     use super::Utf8Path;
     use super::Utf8PathBuf;
     use super::Uuid;
@@ -1774,9 +1773,15 @@ mod illumos_tests {
     use super::ZoneBundleMetadata;
     use super::ZoneBundler;
     use super::ZFS;
+    use crate::bootstrap::secret_retriever::HardcodedSecretRetriever;
     use anyhow::Context;
     use chrono::TimeZone;
     use chrono::Utc;
+    use illumos_utils::zpool::{Zpool, ZpoolName};
+    use key_manager::KeyManager;
+    use sled_storage::disk::RawDisk;
+    use sled_storage::disk::SyntheticDisk;
+    use sled_storage::manager::{StorageHandle, StorageManager};
     use slog::Drain;
     use slog::Logger;
     use tokio::process::Command;
@@ -1818,31 +1823,62 @@ mod illumos_tests {
     // system, that creates the directories implied by the `StorageResources`
     // expected disk structure.
     struct ResourceWrapper {
-        resources: StorageResources,
+        storage_handle: StorageHandle,
+        zpool_names: Vec<ZpoolName>,
         dirs: Vec<Utf8PathBuf>,
     }
 
+    async fn setup_storage(log: &Logger) -> (StorageHandle, Vec<ZpoolName>) {
+        let (mut key_manager, key_requester) =
+            KeyManager::new(log, HardcodedSecretRetriever {});
+        let (mut manager, handle) = StorageManager::new(log, key_requester);
+
+        // Spawn the key_manager so that it will respond to requests for encryption keys
+        tokio::spawn(async move { key_manager.run().await });
+
+        // Spawn the storage manager as done by sled-agent
+        tokio::spawn(async move {
+            manager.run().await;
+        });
+
+        // Inform the storage manager that the secret retriever is ready We
+        // are using the HardcodedSecretRetriever, so no need to wait for RSS
+        // or anything to setup the LRTQ
+        handle.key_manager_ready().await;
+
+        // Put the zpools under /rpool
+        let dir =
+            camino::Utf8PathBuf::from(format!("/rpool/{}", Uuid::new_v4()));
+
+        let internal_zpool_name = ZpoolName::new_internal(Uuid::new_v4());
+        let internal_disk: RawDisk =
+            SyntheticDisk::create_zpool(&dir, &internal_zpool_name).into();
+        let external_zpool_name = ZpoolName::new_external(Uuid::new_v4());
+        let external_disk: RawDisk =
+            SyntheticDisk::create_zpool(&dir, &external_zpool_name).into();
+        handle.upsert_disk(internal_disk).await;
+        handle.upsert_disk(external_disk).await;
+
+        (handle, vec![internal_zpool_name, external_zpool_name])
+    }
+
     impl ResourceWrapper {
         // Create new storage resources, and mount fake datasets at the required
         // locations.
-        async fn new() -> Self {
-            let resources = StorageResources::new_for_test();
-            let dirs = resources.all_zone_bundle_directories().await;
-            for d in dirs.iter() {
-                let id =
-                    d.components().nth(3).unwrap().as_str().parse().unwrap();
-                create_test_dataset(&id, d).await.unwrap();
-            }
-            Self { resources, dirs }
+        async fn new(log: Logger) -> Self {
+            // Spawn the storage related tasks required for testing and insert
+            // synthetic disks.
+            let (storage_handle, zpool_names) = setup_storage(&log).await;
+            let resources = storage_handle.get_latest_resources().await;
+            let dirs = resources.all_zone_bundle_directories();
+            Self { storage_handle, zpool_names, dirs }
         }
     }
 
     impl Drop for ResourceWrapper {
         fn drop(&mut self) {
-            for d in self.dirs.iter() {
-                let id =
-                    d.components().nth(3).unwrap().as_str().parse().unwrap();
-                remove_test_dataset(&id).unwrap();
+            for name in &self.zpool_names {
+                Zpool::destroy(name).unwrap();
             }
         }
     }
@@ -1854,9 +1890,12 @@ mod illumos_tests {
         let log =
             Logger::root(drain, slog::o!("component" => "fake-cleanup-task"));
         let context = CleanupContext::default();
-        let resource_wrapper = ResourceWrapper::new().await;
-        let bundler =
-            ZoneBundler::new(log, resource_wrapper.resources.clone(), context);
+        let resource_wrapper = ResourceWrapper::new(log.clone()).await;
+        let bundler = ZoneBundler::new(
+            log,
+            resource_wrapper.storage_handle.clone(),
+            context,
+        );
         Ok(CleanupTestContext { resource_wrapper, context, bundler })
     }
 
@@ -1891,64 +1930,6 @@ mod illumos_tests {
         assert_eq!(context, new_context, "failed to update context");
     }
 
-    // Quota applied to test datasets.
-    //
-    // This needs to be at least this big lest we get "out of space" errors when
-    // creating. Not sure where those come from, but could be ZFS overhead.
-    const TEST_QUOTA: u64 = 1024 * 32;
-
-    async fn create_test_dataset(
-        id: &Uuid,
-        mountpoint: &Utf8PathBuf,
-    ) -> anyhow::Result<()> {
-        let output = Command::new("/usr/bin/pfexec")
-            .arg(ZFS)
-            .arg("create")
-            .arg("-o")
-            .arg(format!("quota={TEST_QUOTA}"))
-            .arg("-o")
-            .arg(format!("mountpoint={mountpoint}"))
-            .arg(format!("rpool/{id}"))
-            .output()
-            .await
-            .context("failed to spawn zfs create operation")?;
-        anyhow::ensure!(
-            output.status.success(),
-            "zfs create operation failed: {}",
-            String::from_utf8_lossy(&output.stderr),
-        );
-
-        // Make the path operable by the test code.
-        let output = Command::new("/usr/bin/pfexec")
-            .arg("chmod")
-            .arg("a+rw")
-            .arg(&mountpoint)
-            .output()
-            .await
-            .context("failed to spawn chmod operation")?;
-        anyhow::ensure!(
-            output.status.success(),
-            "chmod-ing the dataset failed: {}",
-            String::from_utf8_lossy(&output.stderr),
-        );
-        Ok(())
-    }
-
-    fn remove_test_dataset(id: &Uuid) -> anyhow::Result<()> {
-        let output = std::process::Command::new("/usr/bin/pfexec")
-            .arg(ZFS)
-            .arg("destroy")
-            .arg(format!("rpool/{id}"))
-            .output()
-            .context("failed to spawn zfs destroy operation")?;
-        anyhow::ensure!(
-            output.status.success(),
-            "zfs destroy operation failed: {}",
-            String::from_utf8_lossy(&output.stderr),
-        );
-        Ok(())
-    }
-
     async fn run_test_with_zfs_dataset<T, Fut>(test: T)
     where
         T: FnOnce(CleanupTestContext) -> Fut,
diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs
index 0bdca5c19c..64136e756d 100644
--- a/sled-storage/src/resources.rs
+++ b/sled-storage/src/resources.rs
@@ -46,7 +46,9 @@ impl StorageResources {
     /// Insert a disk and its zpool
     ///
     /// Return true, if data was changed, false otherwise
-    pub(crate) fn insert_disk(&mut self, disk: Disk) -> Result<bool, Error> {
+    ///
+    /// This really should not be used outside this crate, except for testing
+    pub fn insert_disk(&mut self, disk: Disk) -> Result<bool, Error> {
         let disk_id = disk.identity().clone();
         let zpool_name = disk.zpool_name().clone();
         let zpool = Pool::new(zpool_name, disk_id.clone())?;

From 6a81c2c70e1b70ab0ceac2704525210bc7938948 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Thu, 12 Oct 2023 04:45:56 +0000
Subject: [PATCH 33/66] wip

---
 Cargo.lock                        |  1 +
 sled-agent/Cargo.toml             |  3 ++-
 sled-agent/src/services.rs        | 20 ++++++++++++++++-
 sled-agent/src/sled_agent.rs      | 11 +++++++++
 sled-agent/src/storage_monitor.rs | 27 ++++++++++------------
 sled-agent/src/zone_bundle.rs     | 37 ++++++++++++++++++-------------
 sled-storage/Cargo.toml           |  5 +++++
 sled-storage/src/dataset.rs       | 14 ++++++++++--
 8 files changed, 84 insertions(+), 34 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index d8c01f0136..9cc8995cb6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -7963,6 +7963,7 @@ dependencies = [
  "async-trait",
  "camino",
  "camino-tempfile",
+ "cfg-if 1.0.0",
  "derive_more",
  "glob",
  "illumos-utils",
diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml
index e219461b9b..50af5209dd 100644
--- a/sled-agent/Cargo.toml
+++ b/sled-agent/Cargo.toml
@@ -96,7 +96,8 @@ subprocess.workspace = true
 slog-async.workspace = true
 slog-term.workspace = true
 
-illumos-utils = { workspace = true, features = ["testing"] }
+illumos-utils = { workspace = true, features = ["testing", "tmp_keypath"] }
+sled-storage = { workspace = true, features = ["testing"] }
 
 #
 # Disable doc builds by default for our binaries to work around issue
diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs
index 6aaf69f198..63e8067b8d 100644
--- a/sled-agent/src/services.rs
+++ b/sled-agent/src/services.rs
@@ -2937,6 +2937,7 @@ mod test {
     use super::*;
     use crate::bootstrap::secret_retriever::HardcodedSecretRetriever;
     use crate::params::{ServiceZoneService, ZoneType};
+    use illumos_utils::zpool::ZpoolName;
     use illumos_utils::{
         dladm::{
             Etherstub, MockDladm, BOOTSTRAP_ETHERSTUB_NAME,
@@ -2947,6 +2948,8 @@ mod test {
     };
     use key_manager::KeyManager;
     use omicron_common::address::OXIMETER_PORT;
+    use sled_storage::disk::{RawDisk, SyntheticDisk};
+
     use sled_storage::manager::{StorageHandle, StorageManager};
     use std::net::{Ipv6Addr, SocketAddrV6};
     use std::os::unix::process::ExitStatusExt;
@@ -3017,7 +3020,6 @@ mod test {
         let wait_ctx = svc::wait_for_service_context();
         wait_ctx.expect().return_once(|_, _| Ok(()));
 
-        // Import the manifest, enable the service
         let execute_ctx = illumos_utils::execute_context();
         execute_ctx.expect().times(..).returning(|_| {
             Ok(std::process::Output {
@@ -3161,6 +3163,22 @@ mod test {
         // are using the HardcodedSecretRetriever, so no need to wait for RSS
         // or anything to setup the LRTQ
         handle.key_manager_ready().await;
+
+        // Create some backing disks
+        let tempdir = camino_tempfile::Utf8TempDir::new().unwrap();
+
+        // These must be internal zpools
+        //let mut zpool_names = vec![];
+        let internal_zpool_name = ZpoolName::new_internal(Uuid::new_v4());
+        let internal_disk: RawDisk =
+            SyntheticDisk::new(internal_zpool_name).into();
+        handle.upsert_disk(internal_disk).await;
+        let external_zpool_name = ZpoolName::new_external(Uuid::new_v4());
+        let external_disk: RawDisk =
+            SyntheticDisk::new(external_zpool_name).into();
+        handle.upsert_disk(external_disk).await;
+
+        //zpool_names.push(internal_zpool_name);
         handle
     }
 
diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs
index 571ea45499..e448f0719c 100644
--- a/sled-agent/src/sled_agent.rs
+++ b/sled-agent/src/sled_agent.rs
@@ -19,6 +19,7 @@ use crate::params::{
     VpcFirewallRule, ZoneBundleMetadata, Zpool,
 };
 use crate::services::{self, ServiceManager};
+use crate::storage_monitor::UnderlayAccess;
 use crate::updates::{ConfigUpdates, UpdateManager};
 use crate::zone_bundle;
 use crate::zone_bundle::BundleError;
@@ -329,6 +330,16 @@ impl SledAgent {
             *sled_address.ip(),
         );
 
+        // Inform the `StorageMonitor` that the underlay is available so that
+        // it can try to contact nexus.
+        long_running_task_handles
+            .storage_monitor
+            .underlay_available(UnderlayAccess {
+                nexus_client: nexus_client.clone(),
+                sled_id: request.id,
+            })
+            .await;
+
         let instances = InstanceManager::new(
             parent_log.clone(),
             nexus_client.clone(),
diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs
index c48fd5cbfa..da9f6b4897 100644
--- a/sled-agent/src/storage_monitor.rs
+++ b/sled-agent/src/storage_monitor.rs
@@ -60,6 +60,15 @@ pub struct StorageMonitorHandle {
     tx: mpsc::Sender<StorageMonitorMsg>,
 }
 
+impl StorageMonitorHandle {
+    pub async fn underlay_available(&self, underlay_access: UnderlayAccess) {
+        self.tx
+            .send(StorageMonitorMsg::UnderlayAvailable(underlay_access))
+            .await
+            .unwrap();
+    }
+}
+
 pub struct StorageMonitor {
     log: Logger,
     storage_manager: StorageHandle,
@@ -133,9 +142,6 @@ impl StorageMonitor {
                 let sled_id = underlay.sled_id;
                 self.underlay = Some(underlay);
                 self.notify_nexus_about_existing_resources(sled_id).await;
-                self.dump_setup
-                    .update_dumpdev_setup(&self.storage_resources.disks)
-                    .await;
             }
         }
     }
@@ -169,11 +175,6 @@ impl StorageMonitor {
                 &self.storage_resources,
                 &updated_resources,
             );
-            if nexus_updates.has_disk_updates() {
-                self.dump_setup
-                    .update_dumpdev_setup(&self.storage_resources.disks)
-                    .await;
-            }
 
             for put in nexus_updates.disk_puts {
                 self.physical_disk_notify(put.into()).await;
@@ -185,6 +186,8 @@ impl StorageMonitor {
                 self.add_zpool_notify(pool, put).await;
             }
         }
+        self.dump_setup.update_dumpdev_setup(&updated_resources.disks).await;
+
         // Save the updated `StorageResources`
         self.storage_resources = updated_resources;
     }
@@ -302,12 +305,6 @@ struct NexusUpdates {
     zpool_puts: Vec<(Pool, ZpoolPutRequest)>,
 }
 
-impl NexusUpdates {
-    fn has_disk_updates(&self) -> bool {
-        !self.disk_puts.is_empty() || !self.disk_deletes.is_empty()
-    }
-}
-
 fn compute_resource_diffs(
     log: &Logger,
     sled_id: &Uuid,
@@ -362,7 +359,7 @@ fn compute_resource_diffs(
 
     // Diff the existing resources with the update to see what has changed
     // This loop finds new disks and pools
-    for (disk_id, (updated_disk, updated_pool)) in updated.disks.iter() {
+    for (disk_id, (updated_disk, _)) in updated.disks.iter() {
         if !current.disks.contains_key(disk_id) {
             disk_puts.push(PhysicalDiskPutRequest {
                 sled_id: *sled_id,
diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs
index 55058ee23a..c2f6fceadf 100644
--- a/sled-agent/src/zone_bundle.rs
+++ b/sled-agent/src/zone_bundle.rs
@@ -184,6 +184,7 @@ impl Inner {
     async fn bundle_directories(&self) -> Vec<Utf8PathBuf> {
         let resources = self.storage_handle.get_latest_resources().await;
         let expected = resources.all_zone_bundle_directories();
+        println!("dirs = {:?}", expected);
         let mut out = Vec::with_capacity(expected.len());
         for each in expected.into_iter() {
             if tokio::fs::create_dir_all(&each).await.is_ok() {
@@ -1772,7 +1773,6 @@ mod illumos_tests {
     use super::ZoneBundleInfo;
     use super::ZoneBundleMetadata;
     use super::ZoneBundler;
-    use super::ZFS;
     use crate::bootstrap::secret_retriever::HardcodedSecretRetriever;
     use anyhow::Context;
     use chrono::TimeZone;
@@ -1784,7 +1784,6 @@ mod illumos_tests {
     use sled_storage::manager::{StorageHandle, StorageManager};
     use slog::Drain;
     use slog::Logger;
-    use tokio::process::Command;
 
     #[tokio::test]
     async fn test_zfs_quota() {
@@ -1846,20 +1845,21 @@ mod illumos_tests {
         // or anything to setup the LRTQ
         handle.key_manager_ready().await;
 
-        // Put the zpools under /rpool
-        let dir =
-            camino::Utf8PathBuf::from(format!("/rpool/{}", Uuid::new_v4()));
+        let tempdir = camino_tempfile::Utf8TempDir::new().unwrap();
 
-        let internal_zpool_name = ZpoolName::new_internal(Uuid::new_v4());
-        let internal_disk: RawDisk =
-            SyntheticDisk::create_zpool(&dir, &internal_zpool_name).into();
-        let external_zpool_name = ZpoolName::new_external(Uuid::new_v4());
-        let external_disk: RawDisk =
-            SyntheticDisk::create_zpool(&dir, &external_zpool_name).into();
-        handle.upsert_disk(internal_disk).await;
-        handle.upsert_disk(external_disk).await;
-
-        (handle, vec![internal_zpool_name, external_zpool_name])
+        // These must be internal zpools
+        let mut zpool_names = vec![];
+        for _ in 0..2 {
+            let internal_zpool_name = ZpoolName::new_internal(Uuid::new_v4());
+            let internal_disk: RawDisk = SyntheticDisk::create_zpool(
+                tempdir.path(),
+                &internal_zpool_name,
+            )
+            .into();
+            handle.upsert_disk(internal_disk).await;
+            zpool_names.push(internal_zpool_name);
+        }
+        (handle, zpool_names)
     }
 
     impl ResourceWrapper {
@@ -1871,6 +1871,7 @@ mod illumos_tests {
             let (storage_handle, zpool_names) = setup_storage(&log).await;
             let resources = storage_handle.get_latest_resources().await;
             let dirs = resources.all_zone_bundle_directories();
+            info!(log, "Initial dirs = {:?}", dirs);
             Self { storage_handle, zpool_names, dirs }
         }
     }
@@ -1930,6 +1931,12 @@ mod illumos_tests {
         assert_eq!(context, new_context, "failed to update context");
     }
 
+    // Quota applied to test datasets.
+    //
+    // This needs to be at least this big lest we get "out of space" errors when
+    // creating. Not sure where those come from, but could be ZFS overhead.
+    const TEST_QUOTA: u64 = sled_storage::dataset::DEBUG_DATASET_QUOTA as u64;
+
     async fn run_test_with_zfs_dataset<T, Fut>(test: T)
     where
         T: FnOnce(CleanupTestContext) -> Fut,
diff --git a/sled-storage/Cargo.toml b/sled-storage/Cargo.toml
index 617a0a0fd7..efb6afd6bc 100644
--- a/sled-storage/Cargo.toml
+++ b/sled-storage/Cargo.toml
@@ -6,6 +6,7 @@ edition = "2021"
 [dependencies]
 async-trait.workspace = true
 camino.workspace = true
+cfg-if.workspace = true
 derive_more.workspace = true
 glob.workspace = true
 illumos-utils.workspace = true
@@ -31,3 +32,7 @@ uuid.workspace = true
 illumos-utils = { workspace = true, features = ["tmp_keypath"] }
 omicron-test-utils.workspace = true
 camino-tempfile.workspace = true
+
+[features]
+# Quotas and the like can be shrunk via this feature
+testing = []
diff --git a/sled-storage/src/dataset.rs b/sled-storage/src/dataset.rs
index 71e04a6935..e56fde9aa4 100644
--- a/sled-storage/src/dataset.rs
+++ b/sled-storage/src/dataset.rs
@@ -6,6 +6,7 @@
 
 use crate::keyfile::KeyFile;
 use camino::Utf8PathBuf;
+use cfg_if::cfg_if;
 use illumos_utils::zfs::{
     self, DestroyDatasetErrorVariant, EncryptionDetails, Keypath, Mountpoint,
     SizeDetails, Zfs,
@@ -26,10 +27,19 @@ pub const CRASH_DATASET: &'static str = "crash";
 pub const CLUSTER_DATASET: &'static str = "cluster";
 pub const CONFIG_DATASET: &'static str = "config";
 pub const M2_DEBUG_DATASET: &'static str = "debug";
+
+cfg_if! {
+    if #[cfg(any(test, feature = "testing"))] {
+        // Tuned for zone_bundle tests
+        pub const DEBUG_DATASET_QUOTA: usize = 100 * (1 << 10);
+    } else {
+        // TODO-correctness: This value of 100GiB is a pretty wild guess, and should be
+        // tuned as needed.
+        pub const DEBUG_DATASET_QUOTA: usize = 100 * (1 << 30);
+    }
+}
 // TODO-correctness: This value of 100GiB is a pretty wild guess, and should be
 // tuned as needed.
-pub const DEBUG_DATASET_QUOTA: usize = 100 * (1 << 30);
-// ditto.
 pub const DUMP_DATASET_QUOTA: usize = 100 * (1 << 30);
 // passed to zfs create -o compression=
 pub const DUMP_DATASET_COMPRESSION: &'static str = "gzip-9";

From e3b77cfffa8bda3135d29ea5edb0960c2970856c Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Tue, 24 Oct 2023 01:01:18 +0000
Subject: [PATCH 34/66] wip - FakeStorageManager

---
 illumos-utils/src/zpool.rs    | 11 +++++++
 sled-agent/src/services.rs    | 36 ++++-----------------
 sled-storage/src/manager.rs   | 61 +++++++++++++++++++++++++++++++++++
 sled-storage/src/pool.rs      |  7 ++++
 sled-storage/src/resources.rs | 18 +++++++++++
 5 files changed, 104 insertions(+), 29 deletions(-)

diff --git a/illumos-utils/src/zpool.rs b/illumos-utils/src/zpool.rs
index f0916b236a..f2c395e22b 100644
--- a/illumos-utils/src/zpool.rs
+++ b/illumos-utils/src/zpool.rs
@@ -128,6 +128,17 @@ impl ZpoolInfo {
     pub fn health(&self) -> ZpoolHealth {
         self.health
     }
+
+    #[cfg(any(test, feature = "testing"))]
+    pub fn new_hardcoded(name: String) -> ZpoolInfo {
+        ZpoolInfo {
+            name,
+            size: 1024 * 1024 * 64,
+            allocated: 1024,
+            free: 1024 * 1023 * 64,
+            health: ZpoolHealth::Online,
+        }
+    }
 }
 
 impl FromStr for ZpoolInfo {
diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs
index 63e8067b8d..264325b6e5 100644
--- a/sled-agent/src/services.rs
+++ b/sled-agent/src/services.rs
@@ -2935,7 +2935,6 @@ impl ServiceManager {
 #[cfg(test)]
 mod test {
     use super::*;
-    use crate::bootstrap::secret_retriever::HardcodedSecretRetriever;
     use crate::params::{ServiceZoneService, ZoneType};
     use illumos_utils::zpool::ZpoolName;
     use illumos_utils::{
@@ -2946,11 +2945,10 @@ mod test {
         svc,
         zone::MockZones,
     };
-    use key_manager::KeyManager;
     use omicron_common::address::OXIMETER_PORT;
     use sled_storage::disk::{RawDisk, SyntheticDisk};
 
-    use sled_storage::manager::{StorageHandle, StorageManager};
+    use sled_storage::manager::{FakeStorageManager, StorageHandle};
     use std::net::{Ipv6Addr, SocketAddrV6};
     use std::os::unix::process::ExitStatusExt;
     use uuid::Uuid;
@@ -3142,33 +3140,14 @@ mod test {
         }
     }
 
-    // Spawn storage related tasks and return a handle to pass to both the `ServiceManager`
-    // and `ZoneBundler`. However, it is expected that this handle is not actually used
-    // as there are no provisioned zones or datasets. This is consistent with the use of
-    // `test_config.override_paths` below.
-    async fn setup_storage(log: &Logger) -> StorageHandle {
-        let (mut key_manager, key_requester) =
-            KeyManager::new(log, HardcodedSecretRetriever {});
-        let (mut manager, handle) = StorageManager::new(log, key_requester);
-
-        // Spawn the key_manager so that it will respond to requests for encryption keys
-        tokio::spawn(async move { key_manager.run().await });
+    async fn setup_storage() -> StorageHandle {
+        let (mut manager, handle) = FakeStorageManager::new();
 
         // Spawn the storage manager as done by sled-agent
         tokio::spawn(async move {
             manager.run().await;
         });
 
-        // Inform the storage manager that the secret retriever is ready We
-        // are using the HardcodedSecretRetriever, so no need to wait for RSS
-        // or anything to setup the LRTQ
-        handle.key_manager_ready().await;
-
-        // Create some backing disks
-        let tempdir = camino_tempfile::Utf8TempDir::new().unwrap();
-
-        // These must be internal zpools
-        //let mut zpool_names = vec![];
         let internal_zpool_name = ZpoolName::new_internal(Uuid::new_v4());
         let internal_disk: RawDisk =
             SyntheticDisk::new(internal_zpool_name).into();
@@ -3178,7 +3157,6 @@ mod test {
             SyntheticDisk::new(external_zpool_name).into();
         handle.upsert_disk(external_disk).await;
 
-        //zpool_names.push(internal_zpool_name);
         handle
     }
 
@@ -3190,7 +3168,7 @@ mod test {
         let log = logctx.log.clone();
         let test_config = TestConfig::new().await;
 
-        let storage_handle = setup_storage(&log).await;
+        let storage_handle = setup_storage().await;
         let zone_bundler = ZoneBundler::new(
             log.clone(),
             storage_handle.clone(),
@@ -3238,7 +3216,7 @@ mod test {
         let log = logctx.log.clone();
         let test_config = TestConfig::new().await;
 
-        let storage_handle = setup_storage(&log).await;
+        let storage_handle = setup_storage().await;
         let zone_bundler = ZoneBundler::new(
             log.clone(),
             storage_handle.clone(),
@@ -3291,7 +3269,7 @@ mod test {
 
         // First, spin up a ServiceManager, create a new service, and tear it
         // down.
-        let storage_handle = setup_storage(&log).await;
+        let storage_handle = setup_storage().await;
         let zone_bundler = ZoneBundler::new(
             log.clone(),
             storage_handle.clone(),
@@ -3374,7 +3352,7 @@ mod test {
 
         // First, spin up a ServiceManager, create a new service, and tear it
         // down.
-        let storage_handle = setup_storage(&log).await;
+        let storage_handle = setup_storage().await;
         let zone_bundler = ZoneBundler::new(
             log.clone(),
             storage_handle.clone(),
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index e00c9ad4fa..e12fb337c8 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -159,6 +159,67 @@ impl StorageHandle {
     }
 }
 
+// Some sled-agent tests cannot currently use the real StorageManager
+// and want to fake the entire behavior, but still have access to the
+// `StorageResources`. We allow this via use of the `FakeStorageManager`
+// that will respond to real storage requests from a real `StorageHandle`.
+#[cfg(feature = "testing")]
+pub struct FakeStorageManager {
+    rx: mpsc::Receiver<StorageRequest>,
+    resources: StorageResources,
+    resource_updates: watch::Sender<StorageResources>,
+}
+
+impl FakeStorageManager {
+    pub fn new() -> (Self, StorageHandle) {
+        let (tx, rx) = mpsc::channel(QUEUE_SIZE);
+        let resources = StorageResources::default();
+        let (update_tx, update_rx) = watch::channel(resources.clone());
+        (
+            Self { rx, resources, resource_updates: update_tx },
+            StorageHandle { tx, resource_updates: update_rx },
+        )
+    }
+
+    /// Run the main receive loop of the `StorageManager`
+    ///
+    /// This should be spawned into a tokio task
+    pub async fn run(&mut self) {
+        loop {
+            // The sending side should never disappear
+            match self.rx.recv().await.unwrap() {
+                StorageRequest::AddDisk(raw_disk) => {
+                    if self.add_disk(raw_disk) {
+                        self.resource_updates
+                            .send_replace(self.resources.clone());
+                    }
+                }
+                StorageRequest::GetLatestResources(tx) => {
+                    let _ = tx.send(self.resources.clone());
+                }
+                _ => {
+                    unreachable!();
+                }
+            }
+        }
+    }
+
+    // Add a disk to `StorageResources` if it is new and return Ok(true) if so
+    fn add_disk(&mut self, raw_disk: RawDisk) -> bool {
+        let disk = match raw_disk {
+            RawDisk::Real(_) => {
+                panic!(
+                    "Only synthetic disks can be used with `FakeStorageManager`"
+                );
+            }
+            RawDisk::Synthetic(synthetic_disk) => {
+                Disk::Synthetic(synthetic_disk)
+            }
+        };
+        self.resources.insert_fake_disk(disk)
+    }
+}
+
 /// The storage manager responsible for the state of the storage
 /// on a sled. The storage manager runs in its own task and is interacted
 /// with via the [`StorageHandle`].
diff --git a/sled-storage/src/pool.rs b/sled-storage/src/pool.rs
index bac851df46..cc71aeb19d 100644
--- a/sled-storage/src/pool.rs
+++ b/sled-storage/src/pool.rs
@@ -25,4 +25,11 @@ impl Pool {
         let info = Zpool::get_info(&name.to_string())?;
         Ok(Pool { name, info, parent })
     }
+
+    /// Return a Pool consisting of fake info
+    #[cfg(feature = "testing")]
+    pub fn new_with_fake_info(name: ZpoolName, parent: DiskIdentity) -> Pool {
+        let info = ZpoolInfo::new_hardcoded(name.to_string());
+        Pool { name, info, parent }
+    }
 }
diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs
index 64136e756d..ffb0e93ef2 100644
--- a/sled-storage/src/resources.rs
+++ b/sled-storage/src/resources.rs
@@ -65,6 +65,24 @@ impl StorageResources {
         Ok(true)
     }
 
+    /// Insert a disk while creating a fake pool
+    /// This is a workaround for current mock based testing strategies
+    /// in the sled-agent.
+    ///
+    /// Return true, if data was changed, false otherwise
+    #[cfg(feature = "testing")]
+    pub fn insert_fake_disk(&mut self, disk: Disk) -> bool {
+        let disk_id = disk.identity().clone();
+        let zpool_name = disk.zpool_name().clone();
+        let zpool = Pool::new_with_fake_info(zpool_name, disk_id.clone());
+        if self.disks.contains_key(&disk_id) {
+            return false;
+        }
+        // Either the disk or zpool changed
+        Arc::make_mut(&mut self.disks).insert(disk_id, (disk, zpool));
+        true
+    }
+
     /// Delete a real disk and its zpool
     ///
     /// Return true, if data was changed, false otherwise

From 79bd794ee4ecc2911d5138602a1b9a261a84ce3b Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Tue, 24 Oct 2023 16:03:12 +0000
Subject: [PATCH 35/66] fix zone bundle tests

---
 sled-agent/src/zone_bundle.rs | 103 ++++++++++++++++++++++++----------
 sled-storage/src/manager.rs   |   2 +-
 sled-storage/src/resources.rs |   4 +-
 3 files changed, 76 insertions(+), 33 deletions(-)

diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs
index c2f6fceadf..004a0ac646 100644
--- a/sled-agent/src/zone_bundle.rs
+++ b/sled-agent/src/zone_bundle.rs
@@ -1773,17 +1773,17 @@ mod illumos_tests {
     use super::ZoneBundleInfo;
     use super::ZoneBundleMetadata;
     use super::ZoneBundler;
-    use crate::bootstrap::secret_retriever::HardcodedSecretRetriever;
+    use super::ZFS;
     use anyhow::Context;
     use chrono::TimeZone;
     use chrono::Utc;
-    use illumos_utils::zpool::{Zpool, ZpoolName};
-    use key_manager::KeyManager;
+    use illumos_utils::zpool::ZpoolName;
     use sled_storage::disk::RawDisk;
     use sled_storage::disk::SyntheticDisk;
-    use sled_storage::manager::{StorageHandle, StorageManager};
+    use sled_storage::manager::{FakeStorageManager, StorageHandle};
     use slog::Drain;
     use slog::Logger;
+    use tokio::process::Command;
 
     #[tokio::test]
     async fn test_zfs_quota() {
@@ -1827,35 +1827,20 @@ mod illumos_tests {
         dirs: Vec<Utf8PathBuf>,
     }
 
-    async fn setup_storage(log: &Logger) -> (StorageHandle, Vec<ZpoolName>) {
-        let (mut key_manager, key_requester) =
-            KeyManager::new(log, HardcodedSecretRetriever {});
-        let (mut manager, handle) = StorageManager::new(log, key_requester);
-
-        // Spawn the key_manager so that it will respond to requests for encryption keys
-        tokio::spawn(async move { key_manager.run().await });
+    async fn setup_storage() -> (StorageHandle, Vec<ZpoolName>) {
+        let (mut manager, handle) = FakeStorageManager::new();
 
         // Spawn the storage manager as done by sled-agent
         tokio::spawn(async move {
             manager.run().await;
         });
 
-        // Inform the storage manager that the secret retriever is ready We
-        // are using the HardcodedSecretRetriever, so no need to wait for RSS
-        // or anything to setup the LRTQ
-        handle.key_manager_ready().await;
-
-        let tempdir = camino_tempfile::Utf8TempDir::new().unwrap();
-
         // These must be internal zpools
         let mut zpool_names = vec![];
         for _ in 0..2 {
             let internal_zpool_name = ZpoolName::new_internal(Uuid::new_v4());
-            let internal_disk: RawDisk = SyntheticDisk::create_zpool(
-                tempdir.path(),
-                &internal_zpool_name,
-            )
-            .into();
+            let internal_disk: RawDisk =
+                SyntheticDisk::new(internal_zpool_name.clone()).into();
             handle.upsert_disk(internal_disk).await;
             zpool_names.push(internal_zpool_name);
         }
@@ -1865,21 +1850,27 @@ mod illumos_tests {
     impl ResourceWrapper {
         // Create new storage resources, and mount fake datasets at the required
         // locations.
-        async fn new(log: Logger) -> Self {
+        async fn new() -> Self {
             // Spawn the storage related tasks required for testing and insert
             // synthetic disks.
-            let (storage_handle, zpool_names) = setup_storage(&log).await;
+            let (storage_handle, zpool_names) = setup_storage().await;
             let resources = storage_handle.get_latest_resources().await;
             let dirs = resources.all_zone_bundle_directories();
-            info!(log, "Initial dirs = {:?}", dirs);
+            for d in dirs.iter() {
+                let id =
+                    d.components().nth(3).unwrap().as_str().parse().unwrap();
+                create_test_dataset(&id, d).await.unwrap();
+            }
             Self { storage_handle, zpool_names, dirs }
         }
     }
 
     impl Drop for ResourceWrapper {
         fn drop(&mut self) {
-            for name in &self.zpool_names {
-                Zpool::destroy(name).unwrap();
+            for d in self.dirs.iter() {
+                let id =
+                    d.components().nth(3).unwrap().as_str().parse().unwrap();
+                remove_test_dataset(&id).unwrap();
             }
         }
     }
@@ -1891,7 +1882,7 @@ mod illumos_tests {
         let log =
             Logger::root(drain, slog::o!("component" => "fake-cleanup-task"));
         let context = CleanupContext::default();
-        let resource_wrapper = ResourceWrapper::new(log.clone()).await;
+        let resource_wrapper = ResourceWrapper::new().await;
         let bundler = ZoneBundler::new(
             log,
             resource_wrapper.storage_handle.clone(),
@@ -1935,7 +1926,59 @@ mod illumos_tests {
     //
     // This needs to be at least this big lest we get "out of space" errors when
     // creating. Not sure where those come from, but could be ZFS overhead.
-    const TEST_QUOTA: u64 = sled_storage::dataset::DEBUG_DATASET_QUOTA as u64;
+    const TEST_QUOTA: u64 = 1024 * 32;
+
+    async fn create_test_dataset(
+        id: &Uuid,
+        mountpoint: &Utf8PathBuf,
+    ) -> anyhow::Result<()> {
+        let output = Command::new("/usr/bin/pfexec")
+            .arg(ZFS)
+            .arg("create")
+            .arg("-o")
+            .arg(format!("quota={TEST_QUOTA}"))
+            .arg("-o")
+            .arg(format!("mountpoint={mountpoint}"))
+            .arg(format!("rpool/{id}"))
+            .output()
+            .await
+            .context("failed to spawn zfs create operation")?;
+        anyhow::ensure!(
+            output.status.success(),
+            "zfs create operation failed: {}",
+            String::from_utf8_lossy(&output.stderr),
+        );
+
+        // Make the path operable by the test code.
+        let output = Command::new("/usr/bin/pfexec")
+            .arg("chmod")
+            .arg("a+rw")
+            .arg(&mountpoint)
+            .output()
+            .await
+            .context("failed to spawn chmod operation")?;
+        anyhow::ensure!(
+            output.status.success(),
+            "chmod-ing the dataset failed: {}",
+            String::from_utf8_lossy(&output.stderr),
+        );
+        Ok(())
+    }
+
+    fn remove_test_dataset(id: &Uuid) -> anyhow::Result<()> {
+        let output = std::process::Command::new("/usr/bin/pfexec")
+            .arg(ZFS)
+            .arg("destroy")
+            .arg(format!("rpool/{id}"))
+            .output()
+            .context("failed to spawn zfs destroy operation")?;
+        anyhow::ensure!(
+            output.status.success(),
+            "zfs destroy operation failed: {}",
+            String::from_utf8_lossy(&output.stderr),
+        );
+        Ok(())
+    }
 
     async fn run_test_with_zfs_dataset<T, Fut>(test: T)
     where
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index e12fb337c8..410dcc225d 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -204,7 +204,7 @@ impl FakeStorageManager {
         }
     }
 
-    // Add a disk to `StorageResources` if it is new and return Ok(true) if so
+    // Add a disk to `StorageResources` if it is new and return true if so
     fn add_disk(&mut self, raw_disk: RawDisk) -> bool {
         let disk = match raw_disk {
             RawDisk::Real(_) => {
diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs
index ffb0e93ef2..51fd3ae222 100644
--- a/sled-storage/src/resources.rs
+++ b/sled-storage/src/resources.rs
@@ -45,7 +45,7 @@ pub struct StorageResources {
 impl StorageResources {
     /// Insert a disk and its zpool
     ///
-    /// Return true, if data was changed, false otherwise
+    /// Return true if data was changed, false otherwise
     ///
     /// This really should not be used outside this crate, except for testing
     pub fn insert_disk(&mut self, disk: Disk) -> Result<bool, Error> {
@@ -69,7 +69,7 @@ impl StorageResources {
     /// This is a workaround for current mock based testing strategies
     /// in the sled-agent.
     ///
-    /// Return true, if data was changed, false otherwise
+    /// Return true if data was changed, false otherwise
     #[cfg(feature = "testing")]
     pub fn insert_fake_disk(&mut self, disk: Disk) -> bool {
         let disk_id = disk.identity().clone();

From 91742c6e79c04fccd832a8f20734da324858c85f Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Tue, 24 Oct 2023 16:15:33 +0000
Subject: [PATCH 36/66] wip

---
 sled-agent/src/zone_bundle.rs | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs
index 004a0ac646..34c49a2f25 100644
--- a/sled-agent/src/zone_bundle.rs
+++ b/sled-agent/src/zone_bundle.rs
@@ -1823,11 +1823,10 @@ mod illumos_tests {
     // expected disk structure.
     struct ResourceWrapper {
         storage_handle: StorageHandle,
-        zpool_names: Vec<ZpoolName>,
         dirs: Vec<Utf8PathBuf>,
     }
 
-    async fn setup_storage() -> (StorageHandle, Vec<ZpoolName>) {
+    async fn setup_storage() -> StorageHandle {
         let (mut manager, handle) = FakeStorageManager::new();
 
         // Spawn the storage manager as done by sled-agent
@@ -1836,15 +1835,13 @@ mod illumos_tests {
         });
 
         // These must be internal zpools
-        let mut zpool_names = vec![];
         for _ in 0..2 {
             let internal_zpool_name = ZpoolName::new_internal(Uuid::new_v4());
             let internal_disk: RawDisk =
                 SyntheticDisk::new(internal_zpool_name.clone()).into();
             handle.upsert_disk(internal_disk).await;
-            zpool_names.push(internal_zpool_name);
         }
-        (handle, zpool_names)
+        handle
     }
 
     impl ResourceWrapper {
@@ -1853,7 +1850,7 @@ mod illumos_tests {
         async fn new() -> Self {
             // Spawn the storage related tasks required for testing and insert
             // synthetic disks.
-            let (storage_handle, zpool_names) = setup_storage().await;
+            let storage_handle = setup_storage().await;
             let resources = storage_handle.get_latest_resources().await;
             let dirs = resources.all_zone_bundle_directories();
             for d in dirs.iter() {
@@ -1861,7 +1858,7 @@ mod illumos_tests {
                     d.components().nth(3).unwrap().as_str().parse().unwrap();
                 create_test_dataset(&id, d).await.unwrap();
             }
-            Self { storage_handle, zpool_names, dirs }
+            Self { storage_handle, dirs }
         }
     }
 

From 34903bc2cb1c25d7d68adf93beb2a384a52004fa Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Tue, 24 Oct 2023 17:40:52 +0000
Subject: [PATCH 37/66] wip

---
 sled-agent/src/bootstrap/{bootstore.rs => bootstore_setup.rs} | 0
 sled-agent/src/bootstrap/mod.rs                               | 2 +-
 sled-agent/src/long_running_tasks.rs                          | 2 +-
 sled-storage/src/manager.rs                                   | 1 +
 4 files changed, 3 insertions(+), 2 deletions(-)
 rename sled-agent/src/bootstrap/{bootstore.rs => bootstore_setup.rs} (100%)

diff --git a/sled-agent/src/bootstrap/bootstore.rs b/sled-agent/src/bootstrap/bootstore_setup.rs
similarity index 100%
rename from sled-agent/src/bootstrap/bootstore.rs
rename to sled-agent/src/bootstrap/bootstore_setup.rs
diff --git a/sled-agent/src/bootstrap/mod.rs b/sled-agent/src/bootstrap/mod.rs
index 5bf25b8521..590e13c891 100644
--- a/sled-agent/src/bootstrap/mod.rs
+++ b/sled-agent/src/bootstrap/mod.rs
@@ -4,7 +4,7 @@
 
 //! Bootstrap-related utilities
 
-pub(crate) mod bootstore;
+pub(crate) mod bootstore_setup;
 pub mod client;
 pub mod config;
 pub mod early_networking;
diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs
index 714bd1e406..e87c990175 100644
--- a/sled-agent/src/long_running_tasks.rs
+++ b/sled-agent/src/long_running_tasks.rs
@@ -12,7 +12,7 @@
 //! these tasks are supposed to run forever, and they can shutdown if their
 //! handles are dropped.
 
-use crate::bootstrap::bootstore::{
+use crate::bootstrap::bootstore_setup::{
     new_bootstore_config, poll_ddmd_for_bootstore_peer_update,
 };
 use crate::bootstrap::secret_retriever::LrtqOrHardcodedSecretRetriever;
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index 410dcc225d..667264b4b7 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -170,6 +170,7 @@ pub struct FakeStorageManager {
     resource_updates: watch::Sender<StorageResources>,
 }
 
+#[cfg(feature = "testing")]
 impl FakeStorageManager {
     pub fn new() -> (Self, StorageHandle) {
         let (tx, rx) = mpsc::channel(QUEUE_SIZE);

From bbddcb4df496c35eef59747b4b036217c9ebf359 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Tue, 24 Oct 2023 19:50:41 +0000
Subject: [PATCH 38/66] cargo check --all-targets works

---
 Cargo.lock                                   | 1 +
 installinator/Cargo.toml                     | 1 +
 installinator/src/hardware.rs                | 6 ++++--
 installinator/src/write.rs                   | 5 +++--
 sled-agent/src/bootstrap/secret_retriever.rs | 2 +-
 sled-storage/src/disk.rs                     | 7 +++++++
 6 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 9cc8995cb6..380bcf8bf7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3496,6 +3496,7 @@ dependencies = [
  "serde",
  "sha2",
  "sled-hardware",
+ "sled-storage",
  "slog",
  "slog-async",
  "slog-envlogger",
diff --git a/installinator/Cargo.toml b/installinator/Cargo.toml
index c0e7625e6e..cda32a60d3 100644
--- a/installinator/Cargo.toml
+++ b/installinator/Cargo.toml
@@ -30,6 +30,7 @@ reqwest.workspace = true
 serde.workspace = true
 sha2.workspace = true
 sled-hardware.workspace = true
+sled-storage.workspace = true
 slog.workspace = true
 slog-async.workspace = true
 slog-envlogger.workspace = true
diff --git a/installinator/src/hardware.rs b/installinator/src/hardware.rs
index ffa0b74739..b037384cbe 100644
--- a/installinator/src/hardware.rs
+++ b/installinator/src/hardware.rs
@@ -6,10 +6,11 @@ use anyhow::anyhow;
 use anyhow::ensure;
 use anyhow::Context;
 use anyhow::Result;
-use sled_hardware::Disk;
 use sled_hardware::DiskVariant;
 use sled_hardware::HardwareManager;
 use sled_hardware::SledMode;
+use sled_storage::disk::Disk;
+use sled_storage::disk::RawDisk;
 use slog::info;
 use slog::Logger;
 
@@ -28,7 +29,8 @@ impl Hardware {
                 anyhow!("failed to create HardwareManager: {err}")
             })?;
 
-        let disks = hardware.disks();
+        let disks: Vec<RawDisk> =
+            hardware.disks().into_iter().map(|disk| disk.into()).collect();
 
         info!(
             log, "found gimlet hardware";
diff --git a/installinator/src/write.rs b/installinator/src/write.rs
index 6c0c1f63c7..22dd2adbf6 100644
--- a/installinator/src/write.rs
+++ b/installinator/src/write.rs
@@ -122,8 +122,9 @@ impl WriteDestination {
                     );
 
                     let zpool_name = disk.zpool_name().clone();
-                    let control_plane_dir = zpool_name
-                        .dataset_mountpoint(sled_hardware::INSTALL_DATASET);
+                    let control_plane_dir = zpool_name.dataset_mountpoint(
+                        sled_storage::dataset::INSTALL_DATASET,
+                    );
 
                     match drives.entry(slot) {
                         Entry::Vacant(entry) => {
diff --git a/sled-agent/src/bootstrap/secret_retriever.rs b/sled-agent/src/bootstrap/secret_retriever.rs
index d6b542378d..5cae06310c 100644
--- a/sled-agent/src/bootstrap/secret_retriever.rs
+++ b/sled-agent/src/bootstrap/secret_retriever.rs
@@ -92,7 +92,7 @@ impl LrtqOrHardcodedSecretRetriever {
 ///
 /// The local retriever only returns keys for epoch 0
 #[derive(Debug)]
-pub struct HardcodedSecretRetriever {}
+struct HardcodedSecretRetriever {}
 
 #[async_trait]
 impl SecretRetriever for HardcodedSecretRetriever {
diff --git a/sled-storage/src/disk.rs b/sled-storage/src/disk.rs
index f5a0e60c8f..f5209def77 100644
--- a/sled-storage/src/disk.rs
+++ b/sled-storage/src/disk.rs
@@ -116,6 +116,13 @@ impl RawDisk {
     pub fn is_real(&self) -> bool {
         !self.is_synthetic()
     }
+
+    pub fn devfs_path(&self) -> &Utf8PathBuf {
+        match self {
+            Self::Real(disk) => disk.devfs_path(),
+            Self::Synthetic(_) => unreachable!(),
+        }
+    }
 }
 
 /// A physical [`PooledDisk`] or a [`SyntheticDisk`] that contains or is backed

From b44defa8a9915a031dd6f3b89db56627a5c1fe2c Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Tue, 24 Oct 2023 22:56:46 +0000
Subject: [PATCH 39/66] feed hikari

---
 Cargo.lock                         |  1 +
 sled-agent/src/bootstrap/server.rs | 10 ----------
 sled-storage/Cargo.toml            |  1 +
 3 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 524db338d6..85eb29e1a1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -8210,6 +8210,7 @@ dependencies = [
  "nexus-client 0.1.0",
  "omicron-common 0.1.0",
  "omicron-test-utils",
+ "omicron-workspace-hack",
  "rand 0.8.5",
  "schemars",
  "serde",
diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs
index 17d988e749..635b33893d 100644
--- a/sled-agent/src/bootstrap/server.rs
+++ b/sled-agent/src/bootstrap/server.rs
@@ -25,7 +25,6 @@ use crate::config::ConfigError;
 use crate::long_running_tasks::LongRunningTaskHandles;
 use crate::server::Server as SledAgentServer;
 use crate::services::ServiceManager;
-use crate::sled_agent::SledAgent;
 use camino::Utf8PathBuf;
 use cancel_safe_futures::TryStreamExt;
 use ddm_admin_client::Client as DdmAdminClient;
@@ -312,15 +311,6 @@ enum SledAgentState {
     ServerStarted(SledAgentServer),
 }
 
-impl SledAgentState {
-    fn sled_agent(&self) -> Option<&SledAgent> {
-        match self {
-            SledAgentState::Bootstrapping => None,
-            SledAgentState::ServerStarted(server) => Some(server.sled_agent()),
-        }
-    }
-}
-
 #[derive(thiserror::Error, Debug)]
 pub enum SledAgentServerStartError {
     #[error("Failed to start sled-agent server: {0}")]
diff --git a/sled-storage/Cargo.toml b/sled-storage/Cargo.toml
index efb6afd6bc..38863e160b 100644
--- a/sled-storage/Cargo.toml
+++ b/sled-storage/Cargo.toml
@@ -27,6 +27,7 @@ slog.workspace = true
 thiserror.workspace = true
 tokio.workspace = true
 uuid.workspace = true
+omicron-workspace-hack.workspace = true
 
 [dev-dependencies]
 illumos-utils = { workspace = true, features = ["tmp_keypath"] }

From d60994cb76daebfbe18e74994c0a9085b7be1e6f Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Wed, 25 Oct 2023 19:50:06 +0000
Subject: [PATCH 40/66] tests pass

---
 illumos-utils/src/lib.rs    | 27 ++++++++++++++++++++++++++-
 sled-agent/src/services.rs  |  3 ++-
 sled-storage/src/manager.rs |  8 ++++++++
 3 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/illumos-utils/src/lib.rs b/illumos-utils/src/lib.rs
index 345f097ae2..e50b35849a 100644
--- a/illumos-utils/src/lib.rs
+++ b/illumos-utils/src/lib.rs
@@ -4,6 +4,8 @@
 
 //! Wrappers around illumos-specific commands.
 
+use std::sync::atomic::{AtomicBool, Ordering};
+
 use cfg_if::cfg_if;
 
 pub mod addrobj;
@@ -93,7 +95,7 @@ mod inner {
 
     // Helper function for starting the process and checking the
     // exit code result.
-    pub fn execute(
+    pub fn execute_helper(
         command: &mut std::process::Command,
     ) -> Result<std::process::Output, ExecutionError> {
         let output = command.output().map_err(|err| {
@@ -108,6 +110,29 @@ mod inner {
     }
 }
 
+// Due to feature unification, the `testing` feature is enabled when some tests
+// don't actually want to use it. We allow them to opt out of  the use of the
+// free function here. We also explicitly opt-in where mocks are used.
+//
+// We can remove all this when we get rid of the mocks.
+pub static USE_MOCKS: AtomicBool = AtomicBool::new(false);
+
+pub fn execute(
+    command: &mut std::process::Command,
+) -> Result<std::process::Output, ExecutionError> {
+    cfg_if! {
+        if #[cfg(any(test, feature = "testing"))] {
+            if USE_MOCKS.load(Ordering::SeqCst) {
+                mock_inner::execute_helper(command)
+            } else {
+                inner::execute_helper(command)
+            }
+        } else {
+            inner::execute_helper(command)
+        }
+    }
+}
+
 cfg_if! {
     if #[cfg(any(test, feature = "testing"))] {
         pub use mock_inner::*;
diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs
index b9a8cdada0..c692cca309 100644
--- a/sled-agent/src/services.rs
+++ b/sled-agent/src/services.rs
@@ -3037,6 +3037,7 @@ mod test {
 
     // Returns the expectations for a new service to be created.
     fn expect_new_service() -> Vec<Box<dyn std::any::Any>> {
+        illumos_utils::USE_MOCKS.store(true, Ordering::SeqCst);
         // Create a VNIC
         let create_vnic_ctx = MockDladm::create_vnic_context();
         create_vnic_ctx.expect().return_once(
@@ -3079,7 +3080,7 @@ mod test {
         let wait_ctx = svc::wait_for_service_context();
         wait_ctx.expect().return_once(|_, _| Ok(()));
 
-        let execute_ctx = illumos_utils::execute_context();
+        let execute_ctx = illumos_utils::execute_helper_context();
         execute_ctx.expect().times(..).returning(|_| {
             Ok(std::process::Output {
                 status: std::process::ExitStatus::from_raw(0),
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index fd0e607348..46fb34a3bf 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -606,6 +606,7 @@ mod tests {
 
     #[tokio::test]
     async fn add_u2_disk_while_not_in_normal_stage_and_ensure_it_gets_queued() {
+        illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);
         let logctx = test_setup_log(
             "add_u2_disk_while_not_in_normal_stage_and_ensure_it_gets_queued",
         );
@@ -630,6 +631,7 @@ mod tests {
 
     #[tokio::test]
     async fn ensure_u2_gets_added_to_resources() {
+        illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);
         let logctx = test_setup_log("ensure_u2_gets_added_to_resources");
         let (mut key_manager, key_requester) =
             KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
@@ -651,6 +653,7 @@ mod tests {
 
     #[tokio::test]
     async fn wait_for_bootdisk() {
+        illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);
         let logctx = test_setup_log("wait_for_bootdisk");
         let (mut key_manager, key_requester) =
             KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
@@ -677,6 +680,7 @@ mod tests {
 
     #[tokio::test]
     async fn queued_disks_get_added_as_resources() {
+        illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);
         let logctx = test_setup_log("queued_disks_get_added_as_resources");
         let (mut key_manager, key_requester) =
             KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
@@ -714,6 +718,7 @@ mod tests {
     /// This allows us to control timing precisely.
     #[tokio::test]
     async fn queued_disks_get_requeued_on_secret_retriever_error() {
+        illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);
         let logctx = test_setup_log(
             "queued_disks_get_requeued_on_secret_retriever_error",
         );
@@ -766,6 +771,7 @@ mod tests {
 
     #[tokio::test]
     async fn delete_disk_triggers_notification() {
+        illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);
         let logctx = test_setup_log("delete_disk_triggers_notification");
         let (mut key_manager, key_requester) =
             KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
@@ -806,6 +812,7 @@ mod tests {
 
     #[tokio::test]
     async fn ensure_using_exactly_these_disks() {
+        illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);
         let logctx = test_setup_log("ensure_using_exactly_these_disks");
         let (mut key_manager, key_requester) =
             KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
@@ -922,6 +929,7 @@ mod tests {
 
     #[tokio::test]
     async fn upsert_filesystem() {
+        illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);
         let logctx = test_setup_log("upsert_filesystem");
         let (mut key_manager, key_requester) =
             KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());

From 564f44bbd1e01c91df20f14697eef240f84744cd Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Wed, 25 Oct 2023 19:54:12 +0000
Subject: [PATCH 41/66] addendum

---
 illumos-utils/src/lib.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/illumos-utils/src/lib.rs b/illumos-utils/src/lib.rs
index e50b35849a..c882f258c9 100644
--- a/illumos-utils/src/lib.rs
+++ b/illumos-utils/src/lib.rs
@@ -114,6 +114,10 @@ mod inner {
 // don't actually want to use it. We allow them to opt out of  the use of the
 // free function here. We also explicitly opt-in where mocks are used.
 //
+// Note that this only works if the tests that use mocks and those that  don't
+// are run sequentially. However, this is how we do things in CI with nextest,
+// so there is no problem currently.
+//
 // We can remove all this when we get rid of the mocks.
 pub static USE_MOCKS: AtomicBool = AtomicBool::new(false);
 

From 396fddad47d4d4f456d77cdf30b3a06391a80bd9 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Wed, 25 Oct 2023 21:11:49 +0000
Subject: [PATCH 42/66] clippy clean

---
 sled-agent/src/bootstrap/bootstore_setup.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sled-agent/src/bootstrap/bootstore_setup.rs b/sled-agent/src/bootstrap/bootstore_setup.rs
index 3c7e860b4a..9eb0a87c03 100644
--- a/sled-agent/src/bootstrap/bootstore_setup.rs
+++ b/sled-agent/src/bootstrap/bootstore_setup.rs
@@ -5,6 +5,8 @@
 //! Helpers for configuring and starting the bootstore during bootstrap agent
 //! startup.
 
+#![allow(clippy::result_large_err)]
+
 use super::config::BOOTSTORE_PORT;
 use super::server::StartError;
 use bootstore::schemes::v0 as bootstore;

From 56a614f11b66b660da05e34d7f6eefe72f2d0adf Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew@oxidecomputer.com>
Date: Wed, 25 Oct 2023 18:10:51 -0400
Subject: [PATCH 43/66] fix sim builds

---
 sled-hardware/src/non_illumos/mod.rs | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sled-hardware/src/non_illumos/mod.rs b/sled-hardware/src/non_illumos/mod.rs
index 6e36330df0..d8372dd8aa 100644
--- a/sled-hardware/src/non_illumos/mod.rs
+++ b/sled-hardware/src/non_illumos/mod.rs
@@ -2,7 +2,9 @@
 // License, v. 2.0. If a copy of the MPL was not distributed with this
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
-use crate::disk::{DiskError, DiskPaths, DiskVariant, Partition, UnparsedDisk};
+use crate::disk::{
+    DiskPaths, DiskVariant, Partition, PooledDiskError, UnparsedDisk,
+};
 use crate::{Baseboard, SledMode};
 use slog::Logger;
 use std::collections::HashSet;
@@ -16,6 +18,7 @@ use tokio::sync::broadcast;
 ///
 /// If you're actually trying to run the Sled Agent on non-illumos platforms,
 /// use the simulated sled agent, which does not attempt to abstract hardware.
+#[derive(Clone)]
 pub struct HardwareManager {}
 
 impl HardwareManager {
@@ -56,7 +59,7 @@ pub fn ensure_partition_layout(
     _log: &Logger,
     _paths: &DiskPaths,
     _variant: DiskVariant,
-) -> Result<Vec<Partition>, DiskError> {
+) -> Result<Vec<Partition>, PooledDiskError> {
     unimplemented!("Accessing hardware unsupported on non-illumos");
 }
 

From 5e74730aba439e34c1c44ffbf2b5fa2d470154d8 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Wed, 25 Oct 2023 22:13:05 +0000
Subject: [PATCH 44/66] fix doc build

---
 illumos-utils/src/lib.rs                     | 1 +
 nexus/src/app/background/common.rs           | 4 ++--
 sled-agent/src/services.rs                   | 2 +-
 sled-agent/src/storage_monitor.rs            | 2 +-
 sled-hardware/src/disk.rs                    | 6 +++---
 sled-storage/src/resources.rs                | 4 ++--
 wicketd/src/artifacts/extracted_artifacts.rs | 2 +-
 7 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/illumos-utils/src/lib.rs b/illumos-utils/src/lib.rs
index c882f258c9..3b696d178b 100644
--- a/illumos-utils/src/lib.rs
+++ b/illumos-utils/src/lib.rs
@@ -4,6 +4,7 @@
 
 //! Wrappers around illumos-specific commands.
 
+#[allow(unused)]
 use std::sync::atomic::{AtomicBool, Ordering};
 
 use cfg_if::cfg_if;
diff --git a/nexus/src/app/background/common.rs b/nexus/src/app/background/common.rs
index 3fcf0483a5..b5291cf93c 100644
--- a/nexus/src/app/background/common.rs
+++ b/nexus/src/app/background/common.rs
@@ -177,7 +177,7 @@ pub struct Driver {
 ///
 /// This is returned by [`Driver::register()`] to identify the corresponding
 /// background task.  It's then accepted by functions like
-/// [`Driver::activate()`] and [`Driver::status()`] to identify the task.
+/// [`Driver::activate()`] and [`Driver::task_status()`] to identify the task.
 #[derive(Clone, Debug, Ord, PartialOrd, PartialEq, Eq)]
 pub struct TaskHandle(String);
 
@@ -277,7 +277,7 @@ impl Driver {
     /// Enumerate all registered background tasks
     ///
     /// This is aimed at callers that want to get the status of all background
-    /// tasks.  You'd call [`Driver::status()`] with each of the items produced
+    /// tasks.  You'd call [`Driver::task_status()`] with each of the items produced
     /// by the iterator.
     pub fn tasks(&self) -> impl Iterator<Item = &TaskHandle> {
         self.tasks.keys()
diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs
index c692cca309..384a745285 100644
--- a/sled-agent/src/services.rs
+++ b/sled-agent/src/services.rs
@@ -5,7 +5,7 @@
 //! Sled-local service management.
 //!
 //! For controlling zone-based storage services, refer to
-//! [sled_storage:manager::StorageManager].
+//! [sled_storage::manager::StorageManager].
 //!
 //! For controlling virtual machine instances, refer to
 //! [crate::instance_manager::InstanceManager].
diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs
index da9f6b4897..1a98a54f25 100644
--- a/sled-agent/src/storage_monitor.rs
+++ b/sled-agent/src/storage_monitor.rs
@@ -2,7 +2,7 @@
 // License, v. 2.0. If a copy of the MPL was not distributed with this
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
-//! A task that listens for storage events from [`sled_storage::StorageMonitor`]
+//! A task that listens for storage events from [`sled_storage::manager::StorageManager`]
 //! and dispatches them to other parst of the bootstrap agent and sled agent
 //! code.
 
diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs
index aeaca9dc31..44658658be 100644
--- a/sled-hardware/src/disk.rs
+++ b/sled-hardware/src/disk.rs
@@ -118,9 +118,9 @@ impl DiskPaths {
 /// A disk which has been observed by monitoring hardware.
 ///
 /// No guarantees are made about the partitions which exist within this disk.
-/// This exists as a distinct entity from [Disk] because it may be desirable to
-/// monitor for hardware in one context, and conform disks to partition layouts
-/// in a different context.
+/// This exists as a distinct entity from `Disk` in `sled-storage` because it
+/// may be desirable to monitor for hardware in one context, and conform disks
+/// to partition layouts in a different context.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct UnparsedDisk {
     paths: DiskPaths,
diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs
index 51fd3ae222..07c46e1265 100644
--- a/sled-storage/src/resources.rs
+++ b/sled-storage/src/resources.rs
@@ -23,9 +23,9 @@ const ZONE_BUNDLE_DIRECTORY: &str = "zone";
 
 /// Storage related resources: disks and zpools
 ///
-/// This state is internal to the [`crate::StorageManager`] task. Clones
+/// This state is internal to the [`crate::manager::StorageManager`] task. Clones
 /// of this state can be retrieved by requests to the `StorageManager` task
-/// from the [`crate::StorageManagerHandle`]. This state is not `Sync`, and
+/// from the [`crate::manager::StorageHandle`]. This state is not `Sync`, and
 /// as such does not require any mutexes. However, we do expect to share it
 /// relatively frequently, and we want copies of it to be as cheaply made
 /// as possible. So any large state is stored inside `Arc`s. On the other
diff --git a/wicketd/src/artifacts/extracted_artifacts.rs b/wicketd/src/artifacts/extracted_artifacts.rs
index 352d8ad3d5..b796201936 100644
--- a/wicketd/src/artifacts/extracted_artifacts.rs
+++ b/wicketd/src/artifacts/extracted_artifacts.rs
@@ -169,7 +169,7 @@ impl ExtractedArtifacts {
     ///
     /// As the returned file is written to, the data will be hashed; once
     /// writing is complete, call [`ExtractedArtifacts::store_tempfile()`] to
-    /// persist the temporary file into an [`ExtractedArtifactDataHandle()`].
+    /// persist the temporary file into an [`ExtractedArtifactDataHandle`].
     pub(super) fn new_tempfile(
         &self,
     ) -> Result<HashingNamedUtf8TempFile, RepositoryError> {

From fa35e2839b853800994c5bd3f6a2f120ba5203eb Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Thu, 26 Oct 2023 00:30:43 +0000
Subject: [PATCH 45/66] deadlock fix + logging

---
 sled-agent/src/bootstrap/pre_server.rs | 27 ----------------------
 sled-agent/src/long_running_tasks.rs   | 32 +++++++++++++++++++++++++-
 sled-agent/src/services.rs             |  1 +
 sled-storage/src/manager.rs            |  1 +
 4 files changed, 33 insertions(+), 28 deletions(-)

diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs
index 8a25023adb..7af9bbbf68 100644
--- a/sled-agent/src/bootstrap/pre_server.rs
+++ b/sled-agent/src/bootstrap/pre_server.rs
@@ -34,8 +34,6 @@ use omicron_common::FileKv;
 use sled_hardware::underlay;
 use sled_hardware::DendriteAsic;
 use sled_hardware::SledMode;
-use sled_storage::disk::SyntheticDisk;
-use sled_storage::manager::StorageHandle;
 use slog::Drain;
 use slog::Logger;
 use std::net::IpAddr;
@@ -111,13 +109,6 @@ impl BootstrapAgentStartup {
             &base_log,
             sled_mode,
             startup_networking.global_zone_bootstrap_ip,
-        )
-        .await;
-
-        // Add some synthetic disks if necessary.
-        upsert_synthetic_zpools_if_needed(
-            &log,
-            &long_running_task_handles.storage_manager,
             &config,
         )
         .await;
@@ -275,24 +266,6 @@ fn ensure_zfs_ramdisk_dataset() -> Result<(), StartError> {
     .map_err(StartError::EnsureZfsRamdiskDataset)
 }
 
-async fn upsert_synthetic_zpools_if_needed(
-    log: &Logger,
-    storage_manager: &StorageHandle,
-    config: &Config,
-) {
-    if let Some(pools) = &config.zpools {
-        for pool in pools {
-            info!(
-                log,
-                "Upserting synthetic zpool to Storage Manager: {}",
-                pool.to_string()
-            );
-            let disk = SyntheticDisk::new(pool.clone()).into();
-            storage_manager.upsert_disk(disk).await;
-        }
-    }
-}
-
 // Combine the `sled_mode` config with the build-time switch type to determine
 // the actual sled mode.
 fn sled_mode_from_config(config: &Config) -> Result<SledMode, StartError> {
diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs
index e87c990175..cf74f54e7e 100644
--- a/sled-agent/src/long_running_tasks.rs
+++ b/sled-agent/src/long_running_tasks.rs
@@ -16,12 +16,14 @@ use crate::bootstrap::bootstore_setup::{
     new_bootstore_config, poll_ddmd_for_bootstore_peer_update,
 };
 use crate::bootstrap::secret_retriever::LrtqOrHardcodedSecretRetriever;
+use crate::config::Config;
 use crate::hardware_monitor::{HardwareMonitor, HardwareMonitorHandle};
 use crate::storage_monitor::{StorageMonitor, StorageMonitorHandle};
 use crate::zone_bundle::{CleanupContext, ZoneBundler};
 use bootstore::schemes::v0 as bootstore;
 use key_manager::{KeyManager, StorageKeyRequester};
 use sled_hardware::{HardwareManager, SledMode};
+use sled_storage::disk::SyntheticDisk;
 use sled_storage::manager::{StorageHandle, StorageManager};
 use slog::{info, Logger};
 use std::net::Ipv6Addr;
@@ -64,6 +66,7 @@ pub async fn spawn_all_longrunning_tasks(
     log: &Logger,
     sled_mode: SledMode,
     global_zone_bootstrap_ip: Ipv6Addr,
+    config: &Config,
 ) -> LongRunningTaskHandles {
     let storage_key_requester = spawn_key_manager(log);
     let mut storage_manager =
@@ -78,9 +81,14 @@ pub async fn spawn_all_longrunning_tasks(
     let hardware_monitor =
         spawn_hardware_monitor(log, &hardware_manager, &storage_manager);
 
+    // Add some synthetic disks if necessary.
+    upsert_synthetic_zpools_if_needed(&log, &storage_manager, &config).await;
+
     // Wait for the boot disk so that we can work with any ledgers,
     // such as those needed by the bootstore and sled-agent
-    let _ = storage_manager.wait_for_boot_disk().await;
+    info!(log, "Waiting for boot disk");
+    let (disk_id, _) = storage_manager.wait_for_boot_disk().await;
+    info!(log, "Found boot disk {:?}", disk_id);
 
     let bootstore = spawn_bootstore_tasks(
         log,
@@ -158,6 +166,7 @@ fn spawn_hardware_monitor(
     hardware_manager: &HardwareManager,
     storage_handle: &StorageHandle,
 ) -> HardwareMonitorHandle {
+    info!(log, "Starting HardwareMonitor");
     let (mut monitor, handle) =
         HardwareMonitor::new(log, hardware_manager, storage_handle);
     tokio::spawn(async move {
@@ -181,10 +190,12 @@ async fn spawn_bootstore_tasks(
     .unwrap();
 
     // Create and spawn the bootstore
+    info!(log, "Starting Bootstore");
     let (mut node, node_handle) = bootstore::Node::new(config, log).await;
     tokio::spawn(async move { node.run().await });
 
     // Spawn a task for polling DDMD and updating bootstore with peer addresses
+    info!(log, "Starting Bootstore DDMD poller");
     let log = log.new(o!("component" => "bootstore_ddmd_poller"));
     let node_handle2 = node_handle.clone();
     tokio::spawn(async move {
@@ -199,6 +210,25 @@ fn spawn_zone_bundler_tasks(
     log: &Logger,
     storage_handle: &mut StorageHandle,
 ) -> ZoneBundler {
+    info!(log, "Starting ZoneBundler related tasks");
     let log = log.new(o!("component" => "ZoneBundler"));
     ZoneBundler::new(log, storage_handle.clone(), CleanupContext::default())
 }
+
+async fn upsert_synthetic_zpools_if_needed(
+    log: &Logger,
+    storage_manager: &StorageHandle,
+    config: &Config,
+) {
+    if let Some(pools) = &config.zpools {
+        for pool in pools {
+            info!(
+                log,
+                "Upserting synthetic zpool to Storage Manager: {}",
+                pool.to_string()
+            );
+            let disk = SyntheticDisk::new(pool.clone()).into();
+            storage_manager.upsert_disk(disk).await;
+        }
+    }
+}
diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs
index 384a745285..33859465cb 100644
--- a/sled-agent/src/services.rs
+++ b/sled-agent/src/services.rs
@@ -422,6 +422,7 @@ impl ServiceManager {
         zone_bundler: ZoneBundler,
     ) -> Self {
         let log = log.new(o!("component" => "ServiceManager"));
+        info!(log, "Creating ServiceManager");
         Self {
             inner: Arc::new(ServiceManagerInner {
                 log: log.clone(),
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index 46fb34a3bf..c58316ef1b 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -335,6 +335,7 @@ impl StorageManager {
     //
     // Return true if updates should be sent to watchers, false otherwise
     async fn add_queued_disks(&mut self) -> bool {
+        info!(self.log, "Attempting to add queued disks");
         self.state = StorageManagerState::Normal;
 
         let mut send_updates = false;

From 999714952f6dc0cd0bb5d818088403e50e4d0bbe Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Thu, 26 Oct 2023 01:45:44 +0000
Subject: [PATCH 46/66] synthetic disk related fixes

---
 sled-storage/src/manager.rs   |  5 ++++-
 sled-storage/src/resources.rs | 24 ++++++++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index c58316ef1b..19cd86df64 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -39,6 +39,7 @@ struct NewFilesystemRequest {
     responder: oneshot::Sender<Result<(), Error>>,
 }
 
+#[derive(Debug)]
 enum StorageRequest {
     AddDisk(RawDisk),
     RemoveDisk(RawDisk),
@@ -286,7 +287,9 @@ impl StorageManager {
     /// This is useful for testing/debugging
     pub async fn step(&mut self) -> Result<(), Error> {
         // The sending side should never disappear
-        let should_send_updates = match self.rx.recv().await.unwrap() {
+        let req = self.rx.recv().await.unwrap();
+        info!(self.log, "Received {:?}", req);
+        let should_send_updates = match req {
             StorageRequest::AddDisk(raw_disk) => {
                 self.add_disk(raw_disk).await?
             }
diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs
index 07c46e1265..f3444ac798 100644
--- a/sled-storage/src/resources.rs
+++ b/sled-storage/src/resources.rs
@@ -83,9 +83,33 @@ impl StorageResources {
         true
     }
 
+    /// Delete a disk and its zpool
+    ///
+    /// Return true, if data was changed, false otherwise
+    ///
+    /// Note: We never allow removal of synthetic disks as they are only added
+    /// once.
+    #[cfg(not(test))]
+    pub(crate) fn remove_disk(&mut self, id: &DiskIdentity) -> bool {
+        if let Some((disk, _)) = self.disks.get(id) {
+            if disk.is_synthetic() {
+                return false;
+            }
+        } else {
+            return false;
+        }
+        // Safe to unwrap as we just checked the key existed above
+        Arc::make_mut(&mut self.disks).remove(id).unwrap();
+        true
+    }
+
     /// Delete a real disk and its zpool
     ///
     /// Return true, if data was changed, false otherwise
+    ///
+    /// Note: For testing purposes of this crate, we allow synthetic disks to
+    /// be deleted.
+    #[cfg(test)]
     pub(crate) fn remove_disk(&mut self, id: &DiskIdentity) -> bool {
         if !self.disks.contains_key(id) {
             return false;

From 82e25053ceba10ced74ac04b3b16b5c1b8de79bd Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Thu, 26 Oct 2023 18:51:40 +0000
Subject: [PATCH 47/66] Fix M2 expected datasets

---
 sled-storage/src/dataset.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sled-storage/src/dataset.rs b/sled-storage/src/dataset.rs
index 4f39a20bb0..164141e875 100644
--- a/sled-storage/src/dataset.rs
+++ b/sled-storage/src/dataset.rs
@@ -63,7 +63,7 @@ static U2_EXPECTED_DATASETS: [ExpectedDataset; U2_EXPECTED_DATASET_COUNT] = [
         .compression(DUMP_DATASET_COMPRESSION),
 ];
 
-const M2_EXPECTED_DATASET_COUNT: usize = 5;
+const M2_EXPECTED_DATASET_COUNT: usize = 6;
 static M2_EXPECTED_DATASETS: [ExpectedDataset; M2_EXPECTED_DATASET_COUNT] = [
     // Stores software images.
     //
@@ -71,6 +71,10 @@ static M2_EXPECTED_DATASETS: [ExpectedDataset; M2_EXPECTED_DATASET_COUNT] = [
     ExpectedDataset::new(INSTALL_DATASET),
     // Stores crash dumps.
     ExpectedDataset::new(CRASH_DATASET),
+    // Backing store for OS data that should be persisted across reboots.
+    // Its children are selectively overlay mounted onto parts of the ramdisk
+    // root.
+    ExpectedDataset::new(M2_BACKING_DATASET),
     // Stores cluter configuration information.
     //
     // Should be duplicated to both M.2s.

From a622fec45eb56d7371d36120b34472f66d951031 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Thu, 26 Oct 2023 22:18:22 +0000
Subject: [PATCH 48/66] actually poll nexus storage notifications

---
 sled-agent/src/storage_monitor.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs
index 1a98a54f25..a8fd899f2d 100644
--- a/sled-agent/src/storage_monitor.rs
+++ b/sled-agent/src/storage_monitor.rs
@@ -11,6 +11,7 @@ use crate::nexus::NexusClientWithResolver;
 use derive_more::From;
 use futures::stream::FuturesOrdered;
 use futures::FutureExt;
+use futures::StreamExt;
 use nexus_client::types::PhysicalDiskDeleteRequest;
 use nexus_client::types::PhysicalDiskPutRequest;
 use nexus_client::types::ZpoolPutRequest;
@@ -116,6 +117,11 @@ impl StorageMonitor {
     pub async fn run(&mut self) {
         loop {
             tokio::select! {
+                _ = self.nexus_notifications.next(),
+                    if !self.nexus_notifications.is_empty() =>
+                {
+                    debug!(self.log, "Processing nexus notification");
+                }
                 resources = self.storage_manager.wait_for_changes() => {
                     info!(
                         self.log,

From 6de271b3f9fb075ca62240e6303944cb75d240d9 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Fri, 27 Oct 2023 07:02:17 +0000
Subject: [PATCH 49/66] better logging for nexus requests

---
 sled-agent/src/storage_monitor.rs | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs
index a8fd899f2d..696335636e 100644
--- a/sled-agent/src/storage_monitor.rs
+++ b/sled-agent/src/storage_monitor.rs
@@ -116,11 +116,12 @@ impl StorageMonitor {
     /// This should be spawned into a tokio task
     pub async fn run(&mut self) {
         loop {
+            info!(self.log, "looping again");
             tokio::select! {
-                _ = self.nexus_notifications.next(),
+                res = self.nexus_notifications.next(),
                     if !self.nexus_notifications.is_empty() =>
                 {
-                    debug!(self.log, "Processing nexus notification");
+                    info!(self.log, "Nexus notification complete: {:?}", res);
                 }
                 resources = self.storage_manager.wait_for_changes() => {
                     info!(
@@ -134,7 +135,7 @@ impl StorageMonitor {
                     info!(
                         self.log,
                         "Received storage monitor message";
-                        "msg" => ?msg
+                        "monitor_msg" => ?msg
                     );
                     self.handle_monitor_msg(msg).await;
                 }
@@ -227,7 +228,8 @@ impl StorageMonitor {
                             })?;
                     }
                 }
-                Ok(())
+                let msg = format!("{:?}", disk);
+                Ok(msg)
             }
         };
 
@@ -235,11 +237,14 @@ impl StorageMonitor {
         // This notification is often invoked before Nexus has started
         // running, so avoid flagging any errors as concerning until some
         // time has passed.
-        let log_post_failure = move |_, call_count, total_duration| {
+        let log_post_failure = move |err, call_count, total_duration| {
             if call_count == 0 {
-                info!(log, "failed to notify nexus about {disk2:?}");
+                info!(log, "failed to notify nexus about {disk2:?}";
+                    "err" => ?err
+                );
             } else if total_duration > std::time::Duration::from_secs(30) {
                 warn!(log, "failed to notify nexus about {disk2:?}";
+                    "err" => ?err,
                     "total duration" => ?total_duration);
             }
         };
@@ -275,18 +280,21 @@ impl StorageMonitor {
                     .map_err(|e| {
                         backoff::BackoffError::transient(e.to_string())
                     })?;
-                Ok(())
+                let msg = format!("{:?}", zpool_request);
+                Ok(msg)
             }
         };
 
         let log = self.log.clone();
         let name = pool.name.clone();
         let disk = pool.parent.clone();
-        let log_post_failure = move |_, call_count, total_duration| {
+        let log_post_failure = move |err, call_count, total_duration| {
             if call_count == 0 {
-                info!(log, "failed to notify nexus about a new pool {name} on disk {disk:?}");
+                info!(log, "failed to notify nexus about a new pool {name} on disk {disk:?}";
+                    "err" => ?err);
             } else if total_duration > std::time::Duration::from_secs(30) {
                 warn!(log, "failed to notify nexus about a new pool {name} on disk {disk:?}";
+                    "err" => ?err,
                     "total duration" => ?total_duration);
             }
         };
@@ -303,7 +311,7 @@ impl StorageMonitor {
 
 // The type of a future which is used to send a notification to Nexus.
 type NotifyFut =
-    Pin<Box<dyn futures::Future<Output = Result<(), String>> + Send>>;
+    Pin<Box<dyn futures::Future<Output = Result<String, String>> + Send>>;
 
 struct NexusUpdates {
     disk_puts: Vec<PhysicalDiskPutRequest>,

From adedecc5adfdefd38f7d3a16d40f3cc1fc32b599 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Fri, 27 Oct 2023 08:15:33 +0000
Subject: [PATCH 50/66] actually add zpools to nexus

---
 sled-agent/src/storage_monitor.rs | 42 ++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs
index 696335636e..0b4475ce74 100644
--- a/sled-agent/src/storage_monitor.rs
+++ b/sled-agent/src/storage_monitor.rs
@@ -17,6 +17,7 @@ use nexus_client::types::PhysicalDiskPutRequest;
 use nexus_client::types::ZpoolPutRequest;
 use omicron_common::api::external::ByteCount;
 use omicron_common::backoff;
+use omicron_common::disk::DiskIdentity;
 use sled_storage::manager::StorageHandle;
 use sled_storage::pool::Pool;
 use sled_storage::resources::StorageResources;
@@ -329,6 +330,27 @@ fn compute_resource_diffs(
     let mut disk_deletes = vec![];
     let mut zpool_puts = vec![];
 
+    let mut put_pool = |disk_id: &DiskIdentity, updated_pool: &Pool| {
+        match ByteCount::try_from(updated_pool.info.size()) {
+            Ok(size) => zpool_puts.push((
+                updated_pool.clone(),
+                ZpoolPutRequest {
+                    size: size.into(),
+                    disk_model: disk_id.model.clone(),
+                    disk_serial: disk_id.serial.clone(),
+                    disk_vendor: disk_id.vendor.clone(),
+                },
+            )),
+            Err(err) => {
+                error!(
+                    log,
+                    "Error parsing pool size";
+                    "name" => updated_pool.name.to_string(),
+                    "err" => ?err);
+            }
+        }
+    };
+
     // Diff the existing resources with the update to see what has changed
     // This loop finds disks and pools that were modified or deleted
     for (disk_id, (disk, pool)) in current.disks.iter() {
@@ -344,22 +366,7 @@ fn compute_resource_diffs(
                     });
                 }
                 if pool != updated_pool {
-                    match ByteCount::try_from(pool.info.size()) {
-                        Ok(size) => zpool_puts.push((
-                            pool.clone(),
-                            ZpoolPutRequest {
-                                size: size.into(),
-                                disk_model: disk_id.model.clone(),
-                                disk_serial: disk_id.serial.clone(),
-                                disk_vendor: disk_id.vendor.clone(),
-                            },
-                        )),
-                        Err(err) => error!(
-                            log, 
-                            "Error parsing pool size";
-                            "name" => pool.name.to_string(),
-                            "err" => ?err),
-                    }
+                    put_pool(disk_id, updated_pool);
                 }
             }
             None => disk_deletes.push(PhysicalDiskDeleteRequest {
@@ -373,7 +380,7 @@ fn compute_resource_diffs(
 
     // Diff the existing resources with the update to see what has changed
     // This loop finds new disks and pools
-    for (disk_id, (updated_disk, _)) in updated.disks.iter() {
+    for (disk_id, (updated_disk, updated_pool)) in updated.disks.iter() {
         if !current.disks.contains_key(disk_id) {
             disk_puts.push(PhysicalDiskPutRequest {
                 sled_id: *sled_id,
@@ -382,6 +389,7 @@ fn compute_resource_diffs(
                 vendor: disk_id.vendor.clone(),
                 variant: updated_disk.variant().into(),
             });
+            put_pool(disk_id, updated_pool);
         }
     }
 

From c8db7c88dec4030f29bea0520dea43e81187c8dd Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Fri, 27 Oct 2023 23:14:17 +0000
Subject: [PATCH 51/66] remove unnecessary prints

---
 sled-agent/src/storage_monitor.rs | 1 -
 sled-agent/src/zone_bundle.rs     | 1 -
 2 files changed, 2 deletions(-)

diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs
index 0b4475ce74..71e61e84c2 100644
--- a/sled-agent/src/storage_monitor.rs
+++ b/sled-agent/src/storage_monitor.rs
@@ -117,7 +117,6 @@ impl StorageMonitor {
     /// This should be spawned into a tokio task
     pub async fn run(&mut self) {
         loop {
-            info!(self.log, "looping again");
             tokio::select! {
                 res = self.nexus_notifications.next(),
                     if !self.nexus_notifications.is_empty() =>
diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs
index d2615ffce6..91604b7099 100644
--- a/sled-agent/src/zone_bundle.rs
+++ b/sled-agent/src/zone_bundle.rs
@@ -257,7 +257,6 @@ impl Inner {
     async fn bundle_directories(&self) -> Vec<Utf8PathBuf> {
         let resources = self.storage_handle.get_latest_resources().await;
         let expected = resources.all_zone_bundle_directories();
-        println!("dirs = {:?}", expected);
         let mut out = Vec::with_capacity(expected.len());
         for each in expected.into_iter() {
             if tokio::fs::create_dir_all(&each).await.is_ok() {

From a7a1971e99ef8d58823c3bcf921c2c65694d9739 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Wed, 8 Nov 2023 20:38:57 +0000
Subject: [PATCH 52/66] Some review fixes

---
 Cargo.lock                           |  4 +--
 clients/nexus-client/Cargo.toml      |  1 +
 clients/nexus-client/src/lib.rs      | 14 ++++++++++
 clients/sled-agent-client/Cargo.toml |  1 +
 clients/sled-agent-client/src/lib.rs | 25 +++++++++++++++++
 illumos-utils/src/zfs.rs             |  4 +--
 sled-storage/Cargo.toml              |  6 ----
 sled-storage/src/dataset.rs          | 41 ----------------------------
 sled-storage/src/error.rs            |  3 --
 sled-storage/src/keyfile.rs          |  3 --
 10 files changed, 44 insertions(+), 58 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 85eb29e1a1..c62148cdd4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4341,6 +4341,7 @@ dependencies = [
  "serde",
  "serde_json",
  "sled-hardware",
+ "sled-storage",
  "slog",
  "uuid",
 ]
@@ -8164,6 +8165,7 @@ dependencies = [
  "regress",
  "reqwest",
  "serde",
+ "sled-storage",
  "slog",
  "uuid",
 ]
@@ -8207,7 +8209,6 @@ dependencies = [
  "glob",
  "illumos-utils",
  "key-manager",
- "nexus-client 0.1.0",
  "omicron-common 0.1.0",
  "omicron-test-utils",
  "omicron-workspace-hack",
@@ -8215,7 +8216,6 @@ dependencies = [
  "schemars",
  "serde",
  "serde_json",
- "sled-agent-client",
  "sled-hardware",
  "slog",
  "thiserror",
diff --git a/clients/nexus-client/Cargo.toml b/clients/nexus-client/Cargo.toml
index 1085cf1ec9..239cb77789 100644
--- a/clients/nexus-client/Cargo.toml
+++ b/clients/nexus-client/Cargo.toml
@@ -11,6 +11,7 @@ ipnetwork.workspace = true
 omicron-common.workspace = true
 omicron-passwords.workspace = true
 sled-hardware.workspace = true
+sled-storage.workspace = true
 progenitor.workspace = true
 regress.workspace = true
 reqwest = { workspace = true, features = ["rustls-tls", "stream"] }
diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs
index 71a555476b..9f81492d10 100644
--- a/clients/nexus-client/src/lib.rs
+++ b/clients/nexus-client/src/lib.rs
@@ -407,3 +407,17 @@ impl From<sled_hardware::Baseboard> for types::Baseboard {
         }
     }
 }
+
+impl From<sled_storage::dataset::DatasetKind> for types::DatasetKind {
+    fn from(k: sled_storage::dataset::DatasetKind) -> Self {
+        use sled_storage::dataset::DatasetKind::*;
+        match k {
+            CockroachDb => Self::Cockroach,
+            Crucible => Self::Crucible,
+            Clickhouse => Self::Clickhouse,
+            ClickhouseKeeper => Self::ClickhouseKeeper,
+            ExternalDns => Self::ExternalDns,
+            InternalDns => Self::InternalDns,
+        }
+    }
+}
diff --git a/clients/sled-agent-client/Cargo.toml b/clients/sled-agent-client/Cargo.toml
index b2ed07caba..e2cc737e70 100644
--- a/clients/sled-agent-client/Cargo.toml
+++ b/clients/sled-agent-client/Cargo.toml
@@ -14,5 +14,6 @@ regress.workspace = true
 reqwest = { workspace = true, features = [ "json", "rustls-tls", "stream" ] }
 serde.workspace = true
 slog.workspace = true
+sled-storage.workspace = true
 uuid.workspace = true
 omicron-workspace-hack.workspace = true
diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs
index 0df21d894e..30b554a021 100644
--- a/clients/sled-agent-client/src/lib.rs
+++ b/clients/sled-agent-client/src/lib.rs
@@ -6,6 +6,7 @@
 
 use async_trait::async_trait;
 use std::convert::TryFrom;
+use std::str::FromStr;
 use uuid::Uuid;
 
 progenitor::generate_api!(
@@ -528,3 +529,27 @@ impl TestInterfaces for Client {
             .expect("disk_finish_transition() failed unexpectedly");
     }
 }
+
+impl From<sled_storage::dataset::DatasetKind> for types::DatasetKind {
+    fn from(k: sled_storage::dataset::DatasetKind) -> Self {
+        use sled_storage::dataset::DatasetKind::*;
+        match k {
+            CockroachDb => Self::CockroachDb,
+            Crucible => Self::Crucible,
+            Clickhouse => Self::Clickhouse,
+            ClickhouseKeeper => Self::ClickhouseKeeper,
+            ExternalDns => Self::ExternalDns,
+            InternalDns => Self::InternalDns,
+        }
+    }
+}
+
+impl From<sled_storage::dataset::DatasetName> for types::DatasetName {
+    fn from(n: sled_storage::dataset::DatasetName) -> Self {
+        Self {
+            pool_name: types::ZpoolName::from_str(&n.pool().to_string())
+                .unwrap(),
+            kind: n.dataset().clone().into(),
+        }
+    }
+}
diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs
index 0d8f468705..e9554100af 100644
--- a/illumos-utils/src/zfs.rs
+++ b/illumos-utils/src/zfs.rs
@@ -184,9 +184,7 @@ impl From<&DiskIdentity> for Keypath {
 fn build_keypath(id: &DiskIdentity, root: &str) -> Keypath {
     let filename =
         format!("{}-{}-{}-zfs-aes-256-gcm.key", id.vendor, id.serial, id.model);
-    let mut path = Utf8PathBuf::new();
-    path.push(root);
-    path.push(filename);
+    let path: Utf8PathBuf = [root, &filename].iter().collect();
     Keypath(path)
 }
 
diff --git a/sled-storage/Cargo.toml b/sled-storage/Cargo.toml
index 38863e160b..82ab206a8e 100644
--- a/sled-storage/Cargo.toml
+++ b/sled-storage/Cargo.toml
@@ -11,17 +11,11 @@ derive_more.workspace = true
 glob.workspace = true
 illumos-utils.workspace = true
 key-manager.workspace = true
-# Needed strictly for parameter type conversion
-# We could put this in the nexus-client instead
-nexus-client.workspace = true
 omicron-common.workspace = true
 rand.workspace = true
 schemars = { workspace = true, features = [ "chrono", "uuid1" ] }
 serde.workspace = true
 serde_json.workspace = true
-# Needed strictly for parameter type conversion
-# We could put this in the sled-agent-client instead
-sled-agent-client.workspace = true
 sled-hardware.workspace = true
 slog.workspace = true
 thiserror.workspace = true
diff --git a/sled-storage/src/dataset.rs b/sled-storage/src/dataset.rs
index 164141e875..503ccb053a 100644
--- a/sled-storage/src/dataset.rs
+++ b/sled-storage/src/dataset.rs
@@ -19,7 +19,6 @@ use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use sled_hardware::DiskVariant;
 use slog::{info, Logger};
-use std::str::FromStr;
 use std::sync::OnceLock;
 
 pub const INSTALL_DATASET: &'static str = "install";
@@ -139,34 +138,6 @@ pub enum DatasetKind {
     InternalDns,
 }
 
-impl From<DatasetKind> for sled_agent_client::types::DatasetKind {
-    fn from(k: DatasetKind) -> Self {
-        use DatasetKind::*;
-        match k {
-            CockroachDb => Self::CockroachDb,
-            Crucible => Self::Crucible,
-            Clickhouse => Self::Clickhouse,
-            ClickhouseKeeper => Self::ClickhouseKeeper,
-            ExternalDns => Self::ExternalDns,
-            InternalDns => Self::InternalDns,
-        }
-    }
-}
-
-impl From<DatasetKind> for nexus_client::types::DatasetKind {
-    fn from(k: DatasetKind) -> Self {
-        use DatasetKind::*;
-        match k {
-            CockroachDb => Self::Cockroach,
-            Crucible => Self::Crucible,
-            Clickhouse => Self::Clickhouse,
-            ClickhouseKeeper => Self::ClickhouseKeeper,
-            ExternalDns => Self::ExternalDns,
-            InternalDns => Self::InternalDns,
-        }
-    }
-}
-
 impl std::fmt::Display for DatasetKind {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         use DatasetKind::*;
@@ -210,18 +181,6 @@ impl DatasetName {
     }
 }
 
-impl From<DatasetName> for sled_agent_client::types::DatasetName {
-    fn from(n: DatasetName) -> Self {
-        Self {
-            pool_name: sled_agent_client::types::ZpoolName::from_str(
-                &n.pool().to_string(),
-            )
-            .unwrap(),
-            kind: n.dataset().clone().into(),
-        }
-    }
-}
-
 #[derive(Debug, thiserror::Error)]
 pub enum DatasetError {
     #[error("Cannot open {path} due to {error}")]
diff --git a/sled-storage/src/error.rs b/sled-storage/src/error.rs
index 70d7fe7c1e..b9f97ee428 100644
--- a/sled-storage/src/error.rs
+++ b/sled-storage/src/error.rs
@@ -78,7 +78,4 @@ pub enum Error {
 
     #[error("Zpool Not Found: {0}")]
     ZpoolNotFound(String),
-
-    #[error("Underlay not yet initialized")]
-    UnderlayNotInitialized,
 }
diff --git a/sled-storage/src/keyfile.rs b/sled-storage/src/keyfile.rs
index fcdbf8b3bf..105092c99e 100644
--- a/sled-storage/src/keyfile.rs
+++ b/sled-storage/src/keyfile.rs
@@ -26,10 +26,7 @@ impl KeyFile {
         key: &[u8; 32],
         log: &Logger,
     ) -> std::io::Result<KeyFile> {
-        // TODO: fix this to not truncate
         // We want to overwrite any existing contents.
-        // If we truncate we may leave dirty pages around
-        // containing secrets.
         let mut file = tokio::fs::OpenOptions::new()
             .create(true)
             .write(true)

From 7d457cee41e5c428e8d1be9ca87945a78436c1bf Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Wed, 8 Nov 2023 23:08:27 +0000
Subject: [PATCH 53/66] Fix subtle bugs wrt watch channels

---
 sled-storage/src/manager.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index 19cd86df64..078b9a19f8 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -115,21 +115,21 @@ impl StorageHandle {
     /// Wait for a boot disk to be initialized
     pub async fn wait_for_boot_disk(&mut self) -> (DiskIdentity, ZpoolName) {
         loop {
-            // We panic if the sender is dropped, as this means
-            // the StorageManager has gone away, which it should not do.
-            self.resource_updates.changed().await.unwrap();
-            // Limit any RWLock related cancellation issues by immediately cloning
-            let resources = self.resource_updates.borrow().clone();
+            let resources = self.resource_updates.borrow_and_update();
             if let Some((disk_id, zpool_name)) = resources.boot_disk() {
                 return (disk_id, zpool_name);
             }
+            drop(resources);
+            // We panic if the sender is dropped, as this means
+            // the StorageManager has gone away, which it should not do.
+            self.resource_updates.changed().await.unwrap();
         }
     }
 
     /// Wait for any storage resource changes
     pub async fn wait_for_changes(&mut self) -> StorageResources {
         self.resource_updates.changed().await.unwrap();
-        self.resource_updates.borrow().clone()
+        self.resource_updates.borrow_and_update().clone()
     }
 
     /// Retrieve the latest value of `StorageResources` from the

From c895657e1d29e440f5de20d59689dbb870763ece Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Thu, 9 Nov 2023 00:25:11 +0000
Subject: [PATCH 54/66] some more review fixes

---
 sled-storage/src/manager.rs | 86 ++++++++++++++++++++++++++-----------
 1 file changed, 62 insertions(+), 24 deletions(-)

diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index 078b9a19f8..b60114a519 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -23,6 +23,31 @@ use uuid::Uuid;
 
 // The size of the mpsc bounded channel used to communicate
 // between the `StorageHandle` and `StorageManager`.
+//
+// How did we choose this bound, and why?
+//
+// Picking a bound can be tricky, but in general, you want the channel to act
+// unbounded, such that sends never fail. This makes the channels reliable,
+// such that we never drop messages inside the process, and the caller doesn't
+// have to choose what to do when overloaded. This simplifies things drastically
+// for developers. However, you also don't want to make the channel actually
+// unbounded, because that can lead to run-away memory growth and pathological
+// behaviors, such that requests get slower over time until the system crashes.
+//
+// Our team's chosen solution, and used elsewhere in the codebase, is is to
+// choose a large enough bound such that we should never hit it in practice
+// unless we are truly overloaded. If we hit the bound it means that beyond that
+// requests will start to build up and we will eventually topple over. So when
+// we hit this bound, we just go ahead and panic.
+//
+// Picking a channel bound is hard to do empirically, but practically, if
+// requests are mostly mutating task local state, a bound of 1024 or even 8192
+// should be plenty. Tasks that must perform longer running ops can spawn helper
+// tasks as necessary or include their own handles for replies rather than
+// synchronously waiting. Memory for the queue can be kept small with boxing of
+// large messages.
+//
+// Here we start relatively small so that we can evaluate our choice over time.
 const QUEUE_SIZE: usize = 256;
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -186,22 +211,22 @@ impl FakeStorageManager {
     /// Run the main receive loop of the `StorageManager`
     ///
     /// This should be spawned into a tokio task
-    pub async fn run(&mut self) {
+    pub async fn run(mut self) {
         loop {
-            // The sending side should never disappear
-            match self.rx.recv().await.unwrap() {
-                StorageRequest::AddDisk(raw_disk) => {
+            match self.rx.recv().await {
+                Some(StorageRequest::AddDisk(raw_disk)) => {
                     if self.add_disk(raw_disk) {
                         self.resource_updates
                             .send_replace(self.resources.clone());
                     }
                 }
-                StorageRequest::GetLatestResources(tx) => {
+                Some(StorageRequest::GetLatestResources(tx)) => {
                     let _ = tx.send(self.resources.clone());
                 }
-                _ => {
+                Some(_) => {
                     unreachable!();
                 }
+                None => break,
             }
         }
     }
@@ -260,15 +285,23 @@ impl StorageManager {
     /// Run the main receive loop of the `StorageManager`
     ///
     /// This should be spawned into a tokio task
-    pub async fn run(&mut self) {
+    pub async fn run(mut self) {
         loop {
             const QUEUED_DISK_RETRY_TIMEOUT: Duration = Duration::from_secs(10);
             let mut interval = interval(QUEUED_DISK_RETRY_TIMEOUT);
             interval.set_missed_tick_behavior(MissedTickBehavior::Delay);
             tokio::select! {
                 res = self.step() => {
-                    if let Err(e) = res {
-                        warn!(self.log, "{e}");
+                    match res {
+                        Some(Ok(())) => (),
+                        Some(Err(e)) => warn!(self.log, "{e}"),
+                        None => {
+                            info!(
+                                self.log,
+                                "Shutting down StorageManager task: no handles."
+                            );
+                            return;
+                        }
                     }
                 }
                 _ = interval.tick(),
@@ -285,13 +318,20 @@ impl StorageManager {
     /// Process the next event
     ///
     /// This is useful for testing/debugging
-    pub async fn step(&mut self) -> Result<(), Error> {
-        // The sending side should never disappear
-        let req = self.rx.recv().await.unwrap();
+    ///
+    /// Return `None` if the sender side has disappeared and the task should
+    /// shutdown.
+    pub async fn step(&mut self) -> Option<Result<(), Error>> {
+        let Some(req) = self.rx.recv().await else {
+            return None;
+        };
         info!(self.log, "Received {:?}", req);
         let should_send_updates = match req {
             StorageRequest::AddDisk(raw_disk) => {
-                self.add_disk(raw_disk).await?
+                match self.add_disk(raw_disk).await {
+                    Ok(is_new) => is_new,
+                    Err(e) => return Some(Err(e)),
+                }
             }
             StorageRequest::RemoveDisk(raw_disk) => {
                 self.remove_disk(raw_disk).await
@@ -328,7 +368,7 @@ impl StorageManager {
             let _ = self.resource_updates.send_replace(self.resources.clone());
         }
 
-        Ok(())
+        Some(Ok(()))
     }
 
     // Loop through all queued disks inserting them into [`StorageResources`]
@@ -624,7 +664,7 @@ mod tests {
         assert!(manager.resources.all_u2_zpools().is_empty());
         assert_eq!(manager.queued_u2_drives, HashSet::from([raw_disk.clone()]));
 
-        // Check other non-normal stages and enusre disk gets queued
+        // Check other non-normal stages and ensure disk gets queued
         manager.queued_u2_drives.clear();
         manager.state = StorageManagerState::QueuingDisks;
         manager.add_u2_disk(raw_disk.clone()).await.unwrap();
@@ -661,7 +701,7 @@ mod tests {
         let logctx = test_setup_log("wait_for_bootdisk");
         let (mut key_manager, key_requester) =
             KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
-        let (mut manager, mut handle) =
+        let (manager, mut handle) =
             StorageManager::new(&logctx.log, key_requester);
         // Spawn the key_manager so that it will respond to requests for encryption keys
         tokio::spawn(async move { key_manager.run().await });
@@ -688,8 +728,7 @@ mod tests {
         let logctx = test_setup_log("queued_disks_get_added_as_resources");
         let (mut key_manager, key_requester) =
             KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
-        let (mut manager, handle) =
-            StorageManager::new(&logctx.log, key_requester);
+        let (manager, handle) = StorageManager::new(&logctx.log, key_requester);
 
         // Spawn the key_manager so that it will respond to requests for encryption keys
         tokio::spawn(async move { key_manager.run().await });
@@ -743,7 +782,7 @@ mod tests {
         let dir = tempdir().unwrap();
         let disk = SyntheticDisk::create_zpool(dir.path(), &zpool_name).into();
         handle.upsert_disk(disk).await;
-        manager.step().await.unwrap();
+        manager.step().await.unwrap().unwrap();
 
         // We can't wait for a reply through the handle as the storage manager task
         // isn't actually running. We just check the resources directly.
@@ -756,7 +795,7 @@ mod tests {
         // Now inform the storage manager that the key manager is ready
         // The queued disk should not be added due to the error
         handle.key_manager_ready().await;
-        manager.step().await.unwrap();
+        manager.step().await.unwrap().unwrap();
         assert!(manager.resources.all_u2_zpools().is_empty());
 
         // Manually simulating a timer tick to add queued disks should also
@@ -779,7 +818,7 @@ mod tests {
         let logctx = test_setup_log("delete_disk_triggers_notification");
         let (mut key_manager, key_requester) =
             KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
-        let (mut manager, mut handle) =
+        let (manager, mut handle) =
             StorageManager::new(&logctx.log, key_requester);
 
         // Spawn the key_manager so that it will respond to requests for encryption keys
@@ -820,7 +859,7 @@ mod tests {
         let logctx = test_setup_log("ensure_using_exactly_these_disks");
         let (mut key_manager, key_requester) =
             KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
-        let (mut manager, mut handle) =
+        let (manager, mut handle) =
             StorageManager::new(&logctx.log, key_requester);
 
         // Spawn the key_manager so that it will respond to requests for encryption keys
@@ -937,8 +976,7 @@ mod tests {
         let logctx = test_setup_log("upsert_filesystem");
         let (mut key_manager, key_requester) =
             KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
-        let (mut manager, handle) =
-            StorageManager::new(&logctx.log, key_requester);
+        let (manager, handle) = StorageManager::new(&logctx.log, key_requester);
 
         // Spawn the key_manager so that it will respond to requests for encryption keys
         tokio::spawn(async move { key_manager.run().await });

From ed7f059d0b251954e026b5ef9839c10e0a3820f2 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Thu, 9 Nov 2023 03:37:43 +0000
Subject: [PATCH 55/66] Use oneshot channels for HardwareMonitor setup

---
 sled-agent/src/bootstrap/pre_server.rs |  20 +++--
 sled-agent/src/bootstrap/server.rs     |  30 ++++---
 sled-agent/src/hardware_monitor.rs     | 111 +++++++++----------------
 sled-agent/src/long_running_tasks.rs   |  46 +++++-----
 sled-agent/src/services.rs             |   2 +-
 sled-agent/src/zone_bundle.rs          |   2 +-
 sled-storage/src/manager.rs            |   2 +-
 7 files changed, 100 insertions(+), 113 deletions(-)

diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs
index 7af9bbbf68..ff87437001 100644
--- a/sled-agent/src/bootstrap/pre_server.rs
+++ b/sled-agent/src/bootstrap/pre_server.rs
@@ -17,6 +17,7 @@ use crate::long_running_tasks::{
     spawn_all_longrunning_tasks, LongRunningTaskHandles,
 };
 use crate::services::ServiceManager;
+use crate::sled_agent::SledAgent;
 use camino::Utf8PathBuf;
 use cancel_safe_futures::TryStreamExt;
 use ddm_admin_client::Client as DdmAdminClient;
@@ -38,6 +39,7 @@ use slog::Drain;
 use slog::Logger;
 use std::net::IpAddr;
 use std::net::Ipv6Addr;
+use tokio::sync::oneshot;
 
 pub(super) struct BootstrapAgentStartup {
     pub(super) config: Config,
@@ -47,6 +49,7 @@ pub(super) struct BootstrapAgentStartup {
     pub(super) startup_log: Logger,
     pub(super) service_manager: ServiceManager,
     pub(super) long_running_task_handles: LongRunningTaskHandles,
+    pub(super) sled_agent_started_tx: oneshot::Sender<SledAgent>,
 }
 
 impl BootstrapAgentStartup {
@@ -105,7 +108,11 @@ impl BootstrapAgentStartup {
 
         // Spawn all important long running tasks that live for the lifetime of
         // the process and are used by both the bootstrap agent and sled agent
-        let long_running_task_handles = spawn_all_longrunning_tasks(
+        let (
+            long_running_task_handles,
+            sled_agent_started_tx,
+            service_manager_ready_tx,
+        ) = spawn_all_longrunning_tasks(
             &base_log,
             sled_mode,
             startup_networking.global_zone_bootstrap_ip,
@@ -128,10 +135,12 @@ impl BootstrapAgentStartup {
             long_running_task_handles.zone_bundler.clone(),
         );
 
-        long_running_task_handles
-            .hardware_monitor
-            .service_manager_ready(service_manager.clone())
-            .await;
+        // Inform the hardware monitor that the service manager is ready
+        // This is a onetime operation, and so we use a oneshot channel
+        service_manager_ready_tx
+            .send(service_manager.clone())
+            .map_err(|_| ())
+            .expect("Failed to send to StorageMonitor");
 
         Ok(Self {
             config,
@@ -141,6 +150,7 @@ impl BootstrapAgentStartup {
             startup_log: log,
             service_manager,
             long_running_task_handles,
+            sled_agent_started_tx,
         })
     }
 }
diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs
index 635b33893d..90653873c4 100644
--- a/sled-agent/src/bootstrap/server.rs
+++ b/sled-agent/src/bootstrap/server.rs
@@ -25,6 +25,7 @@ use crate::config::ConfigError;
 use crate::long_running_tasks::LongRunningTaskHandles;
 use crate::server::Server as SledAgentServer;
 use crate::services::ServiceManager;
+use crate::sled_agent::SledAgent;
 use camino::Utf8PathBuf;
 use cancel_safe_futures::TryStreamExt;
 use ddm_admin_client::Client as DdmAdminClient;
@@ -175,6 +176,7 @@ impl Server {
             startup_log,
             service_manager,
             long_running_task_handles,
+            sled_agent_started_tx,
         } = BootstrapAgentStartup::run(config).await?;
 
         // Do we have a StartSledAgentRequest stored in the ledger?
@@ -251,10 +253,10 @@ impl Server {
             // switch zone, if we're a scrimlet, to give it our underlay network
             // information.
             let sled_agent = sled_agent_server.sled_agent();
-            long_running_task_handles
-                .hardware_monitor
-                .sled_agent_started(sled_agent.clone())
-                .await;
+            sled_agent_started_tx
+                .send(sled_agent.clone())
+                .map_err(|_| ())
+                .expect("Failed to send to StorageMonitor");
 
             // For cold boot specifically, we now need to load the services
             // we're responsible for, while continuing to handle hardware
@@ -263,7 +265,7 @@ impl Server {
             sled_agent.cold_boot_load_services().await;
             SledAgentState::ServerStarted(sled_agent_server)
         } else {
-            SledAgentState::Bootstrapping
+            SledAgentState::Bootstrapping(Some(sled_agent_started_tx))
         };
 
         // Spawn our inner task that handles any future hardware updates and any
@@ -306,7 +308,7 @@ impl Server {
 // bootstrap server).
 enum SledAgentState {
     // We're still in the bootstrapping phase, waiting for a sled-agent request.
-    Bootstrapping,
+    Bootstrapping(Option<oneshot::Sender<SledAgent>>),
     // ... or the sled agent server is running.
     ServerStarted(SledAgentServer),
 }
@@ -548,8 +550,11 @@ impl Inner {
         response_tx: oneshot::Sender<Result<SledAgentResponse, String>>,
         log: &Logger,
     ) {
-        match &self.state {
-            SledAgentState::Bootstrapping => {
+        match &mut self.state {
+            SledAgentState::Bootstrapping(sled_agent_started_tx) => {
+                // Extract from an option to satisfy the borrow checker
+                let sled_agent_started_tx =
+                    sled_agent_started_tx.take().unwrap();
                 let response = match start_sled_agent(
                     &self.config,
                     &request,
@@ -565,11 +570,10 @@ impl Inner {
                         // We've created sled-agent; we need to (possibly)
                         // reconfigure the switch zone, if we're a scrimlet, to
                         // give it our underlay network information.
-                        self.long_running_task_handles
-                            .hardware_monitor
-                            .sled_agent_started(server.sled_agent().clone())
-                            .await;
-
+                        sled_agent_started_tx
+                            .send(server.sled_agent().clone())
+                            .map_err(|_| ())
+                            .expect("Failed to send to StorageMonitor");
                         self.state = SledAgentState::ServerStarted(server);
                         Ok(SledAgentResponse { id: request.id })
                     }
diff --git a/sled-agent/src/hardware_monitor.rs b/sled-agent/src/hardware_monitor.rs
index f3402cb6bd..698d2d4608 100644
--- a/sled-agent/src/hardware_monitor.rs
+++ b/sled-agent/src/hardware_monitor.rs
@@ -12,30 +12,8 @@ use sled_hardware::{Baseboard, HardwareManager, HardwareUpdate};
 use sled_storage::disk::RawDisk;
 use sled_storage::manager::StorageHandle;
 use slog::Logger;
-use std::fmt::Debug;
-use tokio::sync::broadcast;
 use tokio::sync::broadcast::error::RecvError;
-use tokio::sync::mpsc;
-
-const QUEUE_SIZE: usize = 10;
-
-pub enum HardwareMonitorMsg {
-    SledAgentStarted(SledAgent),
-    ServiceManagerCreated(ServiceManager),
-}
-
-impl Debug for HardwareMonitorMsg {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            HardwareMonitorMsg::SledAgentStarted(_) => {
-                f.debug_struct("SledAgentStarted").finish()
-            }
-            HardwareMonitorMsg::ServiceManagerCreated(_) => {
-                f.debug_struct("ServiceManagerCreated").finish()
-            }
-        }
-    }
-}
+use tokio::sync::{broadcast, oneshot};
 
 // A thin wrapper around the the [`ServiceManager`] that caches the state
 // whether or not the tofino is loaded if the [`ServiceManager`] doesn't exist
@@ -62,36 +40,26 @@ impl TofinoManager {
         *self = Self::Ready(service_manager);
         tofino_loaded
     }
-}
-
-#[derive(Clone)]
-pub struct HardwareMonitorHandle {
-    tx: mpsc::Sender<HardwareMonitorMsg>,
-}
-
-impl HardwareMonitorHandle {
-    pub async fn service_manager_ready(&self, service_manager: ServiceManager) {
-        self.tx
-            .send(HardwareMonitorMsg::ServiceManagerCreated(service_manager))
-            .await
-            .unwrap();
-    }
 
-    pub async fn sled_agent_started(&self, sled_agent: SledAgent) {
-        self.tx
-            .send(HardwareMonitorMsg::SledAgentStarted(sled_agent))
-            .await
-            .unwrap();
+    pub fn is_ready(&self) -> bool {
+        match self {
+            TofinoManager::Ready(_) => true,
+            _ => false,
+        }
     }
 }
 
+// A monitor for hardware events
 pub struct HardwareMonitor {
     log: Logger,
 
     baseboard: Baseboard,
 
-    // Receive messages from the [`HardwareMonitorHandle`]
-    handle_rx: mpsc::Receiver<HardwareMonitorMsg>,
+    // Receive a onetime notification that the SledAgent has started
+    sled_agent_started_rx: oneshot::Receiver<SledAgent>,
+
+    // Receive a onetime notification that the ServiceManager is ready
+    service_manager_ready_rx: oneshot::Receiver<ServiceManager>,
 
     // Receive messages from the [`HardwareManager`]
     hardware_rx: broadcast::Receiver<HardwareUpdate>,
@@ -123,9 +91,15 @@ impl HardwareMonitor {
         log: &Logger,
         hardware_manager: &HardwareManager,
         storage_manager: &StorageHandle,
-    ) -> (HardwareMonitor, HardwareMonitorHandle) {
+    ) -> (
+        HardwareMonitor,
+        oneshot::Sender<SledAgent>,
+        oneshot::Sender<ServiceManager>,
+    ) {
+        let (sled_agent_started_tx, sled_agent_started_rx) = oneshot::channel();
+        let (service_manager_ready_tx, service_manager_ready_rx) =
+            oneshot::channel();
         let baseboard = hardware_manager.baseboard();
-        let (handle_tx, handle_rx) = mpsc::channel(QUEUE_SIZE);
         let hardware_rx = hardware_manager.monitor();
         let log = log.new(o!("component" => "HardwareMonitor"));
         let tofino_manager = TofinoManager::new();
@@ -133,14 +107,16 @@ impl HardwareMonitor {
             HardwareMonitor {
                 log,
                 baseboard,
-                handle_rx,
+                sled_agent_started_rx,
+                service_manager_ready_rx,
                 hardware_rx,
                 hardware_manager: hardware_manager.clone(),
                 storage_manager: storage_manager.clone(),
                 sled_agent: None,
                 tofino_manager,
             },
-            HardwareMonitorHandle { tx: handle_tx },
+            sled_agent_started_tx,
+            service_manager_ready_tx,
         )
     }
 
@@ -155,13 +131,21 @@ impl HardwareMonitor {
 
         loop {
             tokio::select! {
-                Some(msg) = self.handle_rx.recv() => {
-                    info!(
-                        self.log,
-                        "Received hardware monitor message";
-                        "msg" => ?msg
-                    );
-                    self.handle_monitor_msg(msg).await;
+                Ok(sled_agent) = &mut self.sled_agent_started_rx,
+                    if self.sled_agent.is_none() =>
+                {
+                    info!(self.log, "Sled Agent Started");
+                    self.sled_agent = Some(sled_agent);
+                    self.check_latest_hardware_snapshot().await;
+                }
+                Ok(service_manager) = &mut self.service_manager_ready_rx,
+                    if !self.tofino_manager.is_ready() =>
+                {
+                    let tofino_loaded =
+                        self.tofino_manager.become_ready(service_manager);
+                    if tofino_loaded {
+                        self.activate_switch().await;
+                    }
                 }
                 update = self.hardware_rx.recv() => {
                     info!(
@@ -175,23 +159,6 @@ impl HardwareMonitor {
         }
     }
 
-    // Handle a message from the [`HardwareMonitorHandle`]
-    async fn handle_monitor_msg(&mut self, msg: HardwareMonitorMsg) {
-        match msg {
-            HardwareMonitorMsg::SledAgentStarted(sled_agent) => {
-                self.sled_agent = Some(sled_agent);
-                self.check_latest_hardware_snapshot().await;
-            }
-            HardwareMonitorMsg::ServiceManagerCreated(service_manager) => {
-                let tofino_loaded =
-                    self.tofino_manager.become_ready(service_manager);
-                if tofino_loaded {
-                    self.activate_switch().await;
-                }
-            }
-        }
-    }
-
     // Handle an update from the [`HardwareMonitor`]
     async fn handle_hardware_update(
         &mut self,
diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs
index cf74f54e7e..9411e30c2d 100644
--- a/sled-agent/src/long_running_tasks.rs
+++ b/sled-agent/src/long_running_tasks.rs
@@ -17,7 +17,9 @@ use crate::bootstrap::bootstore_setup::{
 };
 use crate::bootstrap::secret_retriever::LrtqOrHardcodedSecretRetriever;
 use crate::config::Config;
-use crate::hardware_monitor::{HardwareMonitor, HardwareMonitorHandle};
+use crate::hardware_monitor::HardwareMonitor;
+use crate::services::ServiceManager;
+use crate::sled_agent::SledAgent;
 use crate::storage_monitor::{StorageMonitor, StorageMonitorHandle};
 use crate::zone_bundle::{CleanupContext, ZoneBundler};
 use bootstore::schemes::v0 as bootstore;
@@ -27,6 +29,7 @@ use sled_storage::disk::SyntheticDisk;
 use sled_storage::manager::{StorageHandle, StorageManager};
 use slog::{info, Logger};
 use std::net::Ipv6Addr;
+use tokio::sync::oneshot;
 
 /// A mechanism for interacting with all long running tasks that can be shared
 /// between the bootstrap-agent and sled-agent code.
@@ -50,10 +53,6 @@ pub struct LongRunningTaskHandles {
     /// A mechanism for interacting with the hardware device tree
     pub hardware_manager: HardwareManager,
 
-    /// A mechanism for interacting with the task that monitors for hardware
-    /// updates from the [`HardwareManager`]
-    pub hardware_monitor: HardwareMonitorHandle,
-
     // A handle for interacting with the bootstore
     pub bootstore: bootstore::NodeHandle,
 
@@ -67,7 +66,11 @@ pub async fn spawn_all_longrunning_tasks(
     sled_mode: SledMode,
     global_zone_bootstrap_ip: Ipv6Addr,
     config: &Config,
-) -> LongRunningTaskHandles {
+) -> (
+    LongRunningTaskHandles,
+    oneshot::Sender<SledAgent>,
+    oneshot::Sender<ServiceManager>,
+) {
     let storage_key_requester = spawn_key_manager(log);
     let mut storage_manager =
         spawn_storage_manager(log, storage_key_requester.clone());
@@ -78,7 +81,7 @@ pub async fn spawn_all_longrunning_tasks(
     let hardware_manager = spawn_hardware_manager(log, sled_mode);
 
     // Start monitoring for hardware changes
-    let hardware_monitor =
+    let (sled_agent_started_tx, service_manager_ready_tx) =
         spawn_hardware_monitor(log, &hardware_manager, &storage_manager);
 
     // Add some synthetic disks if necessary.
@@ -100,15 +103,18 @@ pub async fn spawn_all_longrunning_tasks(
 
     let zone_bundler = spawn_zone_bundler_tasks(log, &mut storage_manager);
 
-    LongRunningTaskHandles {
-        storage_key_requester,
-        storage_manager,
-        storage_monitor,
-        hardware_manager,
-        hardware_monitor,
-        bootstore,
-        zone_bundler,
-    }
+    (
+        LongRunningTaskHandles {
+            storage_key_requester,
+            storage_manager,
+            storage_monitor,
+            hardware_manager,
+            bootstore,
+            zone_bundler,
+        },
+        sled_agent_started_tx,
+        service_manager_ready_tx,
+    )
 }
 
 fn spawn_key_manager(log: &Logger) -> StorageKeyRequester {
@@ -125,7 +131,7 @@ fn spawn_storage_manager(
     key_requester: StorageKeyRequester,
 ) -> StorageHandle {
     info!(log, "Starting StorageManager");
-    let (mut manager, handle) = StorageManager::new(log, key_requester);
+    let (manager, handle) = StorageManager::new(log, key_requester);
     tokio::spawn(async move {
         manager.run().await;
     });
@@ -165,14 +171,14 @@ fn spawn_hardware_monitor(
     log: &Logger,
     hardware_manager: &HardwareManager,
     storage_handle: &StorageHandle,
-) -> HardwareMonitorHandle {
+) -> (oneshot::Sender<SledAgent>, oneshot::Sender<ServiceManager>) {
     info!(log, "Starting HardwareMonitor");
-    let (mut monitor, handle) =
+    let (mut monitor, sled_agent_started_tx, service_manager_ready_tx) =
         HardwareMonitor::new(log, hardware_manager, storage_handle);
     tokio::spawn(async move {
         monitor.run().await;
     });
-    handle
+    (sled_agent_started_tx, service_manager_ready_tx)
 }
 
 async fn spawn_bootstore_tasks(
diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs
index 33859465cb..3d99ee9558 100644
--- a/sled-agent/src/services.rs
+++ b/sled-agent/src/services.rs
@@ -3204,7 +3204,7 @@ mod test {
     }
 
     async fn setup_storage() -> StorageHandle {
-        let (mut manager, handle) = FakeStorageManager::new();
+        let (manager, handle) = FakeStorageManager::new();
 
         // Spawn the storage manager as done by sled-agent
         tokio::spawn(async move {
diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs
index 91604b7099..b17baf533f 100644
--- a/sled-agent/src/zone_bundle.rs
+++ b/sled-agent/src/zone_bundle.rs
@@ -2222,7 +2222,7 @@ mod illumos_tests {
     }
 
     async fn setup_storage() -> StorageHandle {
-        let (mut manager, handle) = FakeStorageManager::new();
+        let (manager, handle) = FakeStorageManager::new();
 
         // Spawn the storage manager as done by sled-agent
         tokio::spawn(async move {
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index b60114a519..8b121bc467 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -392,7 +392,7 @@ impl StorageManager {
                 // We hit a transient error in a prior iteration.
                 saved.insert(disk);
             } else {
-                // Try ot add the disk. If there was a transient error the disk will
+                // Try to add the disk. If there was a transient error the disk will
                 // have been requeued. If there was a permanent error, it will have been
                 // dropped. If there is an another unexpected error, we will handle it and
                 // requeue ourselves.

From c0e3c710ac59c7e1f7384d14facd586c1f6a4067 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Thu, 9 Nov 2023 17:42:06 +0000
Subject: [PATCH 56/66] Use oneshot for UnderlayAccess

---
 sled-agent/src/bootstrap/pre_server.rs |  4 ++
 sled-agent/src/bootstrap/server.rs     | 25 ++++++++---
 sled-agent/src/long_running_tasks.rs   | 19 ++++----
 sled-agent/src/server.rs               |  4 ++
 sled-agent/src/services.rs             |  2 +-
 sled-agent/src/sled_agent.rs           | 10 +++--
 sled-agent/src/storage_monitor.rs      | 60 +++++++-------------------
 7 files changed, 58 insertions(+), 66 deletions(-)

diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs
index ff87437001..61a8b09edf 100644
--- a/sled-agent/src/bootstrap/pre_server.rs
+++ b/sled-agent/src/bootstrap/pre_server.rs
@@ -18,6 +18,7 @@ use crate::long_running_tasks::{
 };
 use crate::services::ServiceManager;
 use crate::sled_agent::SledAgent;
+use crate::storage_monitor::UnderlayAccess;
 use camino::Utf8PathBuf;
 use cancel_safe_futures::TryStreamExt;
 use ddm_admin_client::Client as DdmAdminClient;
@@ -50,6 +51,7 @@ pub(super) struct BootstrapAgentStartup {
     pub(super) service_manager: ServiceManager,
     pub(super) long_running_task_handles: LongRunningTaskHandles,
     pub(super) sled_agent_started_tx: oneshot::Sender<SledAgent>,
+    pub(super) underlay_available_tx: oneshot::Sender<UnderlayAccess>,
 }
 
 impl BootstrapAgentStartup {
@@ -112,6 +114,7 @@ impl BootstrapAgentStartup {
             long_running_task_handles,
             sled_agent_started_tx,
             service_manager_ready_tx,
+            underlay_available_tx,
         ) = spawn_all_longrunning_tasks(
             &base_log,
             sled_mode,
@@ -151,6 +154,7 @@ impl BootstrapAgentStartup {
             service_manager,
             long_running_task_handles,
             sled_agent_started_tx,
+            underlay_available_tx,
         })
     }
 }
diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs
index 90653873c4..0a055d13cc 100644
--- a/sled-agent/src/bootstrap/server.rs
+++ b/sled-agent/src/bootstrap/server.rs
@@ -26,6 +26,7 @@ use crate::long_running_tasks::LongRunningTaskHandles;
 use crate::server::Server as SledAgentServer;
 use crate::services::ServiceManager;
 use crate::sled_agent::SledAgent;
+use crate::storage_monitor::UnderlayAccess;
 use camino::Utf8PathBuf;
 use cancel_safe_futures::TryStreamExt;
 use ddm_admin_client::Client as DdmAdminClient;
@@ -177,6 +178,7 @@ impl Server {
             service_manager,
             long_running_task_handles,
             sled_agent_started_tx,
+            underlay_available_tx,
         } = BootstrapAgentStartup::run(config).await?;
 
         // Do we have a StartSledAgentRequest stored in the ledger?
@@ -242,6 +244,7 @@ impl Server {
                 &config,
                 &sled_request.request,
                 long_running_task_handles.clone(),
+                underlay_available_tx,
                 service_manager.clone(),
                 &ddm_admin_localhost_client,
                 &base_log,
@@ -249,9 +252,7 @@ impl Server {
             )
             .await?;
 
-            // We've created sled-agent; we need to (possibly) reconfigure the
-            // switch zone, if we're a scrimlet, to give it our underlay network
-            // information.
+            // Give the HardwareMonitory access to the `SledAgent`
             let sled_agent = sled_agent_server.sled_agent();
             sled_agent_started_tx
                 .send(sled_agent.clone())
@@ -265,7 +266,10 @@ impl Server {
             sled_agent.cold_boot_load_services().await;
             SledAgentState::ServerStarted(sled_agent_server)
         } else {
-            SledAgentState::Bootstrapping(Some(sled_agent_started_tx))
+            SledAgentState::Bootstrapping(
+                Some(sled_agent_started_tx),
+                Some(underlay_available_tx),
+            )
         };
 
         // Spawn our inner task that handles any future hardware updates and any
@@ -308,7 +312,10 @@ impl Server {
 // bootstrap server).
 enum SledAgentState {
     // We're still in the bootstrapping phase, waiting for a sled-agent request.
-    Bootstrapping(Option<oneshot::Sender<SledAgent>>),
+    Bootstrapping(
+        Option<oneshot::Sender<SledAgent>>,
+        Option<oneshot::Sender<UnderlayAccess>>,
+    ),
     // ... or the sled agent server is running.
     ServerStarted(SledAgentServer),
 }
@@ -345,6 +352,7 @@ async fn start_sled_agent(
     config: &SledConfig,
     request: &StartSledAgentRequest,
     long_running_task_handles: LongRunningTaskHandles,
+    underlay_available_tx: oneshot::Sender<UnderlayAccess>,
     service_manager: ServiceManager,
     ddmd_client: &DdmAdminClient,
     base_log: &Logger,
@@ -392,6 +400,7 @@ async fn start_sled_agent(
         request.clone(),
         long_running_task_handles.clone(),
         service_manager,
+        underlay_available_tx,
     )
     .await
     .map_err(SledAgentServerStartError::FailedStartingServer)?;
@@ -551,7 +560,10 @@ impl Inner {
         log: &Logger,
     ) {
         match &mut self.state {
-            SledAgentState::Bootstrapping(sled_agent_started_tx) => {
+            SledAgentState::Bootstrapping(
+                sled_agent_started_tx,
+                underlay_available_tx,
+            ) => {
                 // Extract from an option to satisfy the borrow checker
                 let sled_agent_started_tx =
                     sled_agent_started_tx.take().unwrap();
@@ -559,6 +571,7 @@ impl Inner {
                     &self.config,
                     &request,
                     self.long_running_task_handles.clone(),
+                    underlay_available_tx.take().unwrap(),
                     self.service_manager.clone(),
                     &self.ddm_admin_localhost_client,
                     &self.base_log,
diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs
index 9411e30c2d..e6736f6ea6 100644
--- a/sled-agent/src/long_running_tasks.rs
+++ b/sled-agent/src/long_running_tasks.rs
@@ -20,7 +20,7 @@ use crate::config::Config;
 use crate::hardware_monitor::HardwareMonitor;
 use crate::services::ServiceManager;
 use crate::sled_agent::SledAgent;
-use crate::storage_monitor::{StorageMonitor, StorageMonitorHandle};
+use crate::storage_monitor::{StorageMonitor, UnderlayAccess};
 use crate::zone_bundle::{CleanupContext, ZoneBundler};
 use bootstore::schemes::v0 as bootstore;
 use key_manager::{KeyManager, StorageKeyRequester};
@@ -45,11 +45,6 @@ pub struct LongRunningTaskHandles {
     /// for establishing zpools on disks and managing their datasets.
     pub storage_manager: StorageHandle,
 
-    /// A task which monitors for updates from the `StorageManager` and takes
-    /// actions based on those updates, such as informing Nexus and setting
-    /// up dump locations.
-    pub storage_monitor: StorageMonitorHandle,
-
     /// A mechanism for interacting with the hardware device tree
     pub hardware_manager: HardwareManager,
 
@@ -70,12 +65,14 @@ pub async fn spawn_all_longrunning_tasks(
     LongRunningTaskHandles,
     oneshot::Sender<SledAgent>,
     oneshot::Sender<ServiceManager>,
+    oneshot::Sender<UnderlayAccess>,
 ) {
     let storage_key_requester = spawn_key_manager(log);
     let mut storage_manager =
         spawn_storage_manager(log, storage_key_requester.clone());
 
-    let storage_monitor = spawn_storage_monitor(log, storage_manager.clone());
+    let underlay_available_tx =
+        spawn_storage_monitor(log, storage_manager.clone());
 
     // TODO: Does this need to run inside tokio::task::spawn_blocking?
     let hardware_manager = spawn_hardware_manager(log, sled_mode);
@@ -107,13 +104,13 @@ pub async fn spawn_all_longrunning_tasks(
         LongRunningTaskHandles {
             storage_key_requester,
             storage_manager,
-            storage_monitor,
             hardware_manager,
             bootstore,
             zone_bundler,
         },
         sled_agent_started_tx,
         service_manager_ready_tx,
+        underlay_available_tx,
     )
 }
 
@@ -141,14 +138,14 @@ fn spawn_storage_manager(
 fn spawn_storage_monitor(
     log: &Logger,
     storage_handle: StorageHandle,
-) -> StorageMonitorHandle {
+) -> oneshot::Sender<UnderlayAccess> {
     info!(log, "Starting StorageMonitor");
-    let (mut storage_monitor, handle) =
+    let (storage_monitor, underlay_available_tx) =
         StorageMonitor::new(log, storage_handle);
     tokio::spawn(async move {
         storage_monitor.run().await;
     });
-    handle
+    underlay_available_tx
 }
 
 fn spawn_hardware_manager(
diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs
index c9828e7542..903c8dabaa 100644
--- a/sled-agent/src/server.rs
+++ b/sled-agent/src/server.rs
@@ -11,10 +11,12 @@ use crate::bootstrap::params::StartSledAgentRequest;
 use crate::long_running_tasks::LongRunningTaskHandles;
 use crate::nexus::NexusClientWithResolver;
 use crate::services::ServiceManager;
+use crate::storage_monitor::UnderlayAccess;
 use internal_dns::resolver::Resolver;
 use slog::Logger;
 use std::net::SocketAddr;
 use std::sync::Arc;
+use tokio::sync::oneshot;
 use uuid::Uuid;
 
 /// Packages up a [`SledAgent`], running the sled agent API under a Dropshot
@@ -40,6 +42,7 @@ impl Server {
         request: StartSledAgentRequest,
         long_running_tasks_handles: LongRunningTaskHandles,
         services: ServiceManager,
+        underlay_available_tx: oneshot::Sender<UnderlayAccess>,
     ) -> Result<Server, String> {
         info!(log, "setting up sled agent server");
 
@@ -62,6 +65,7 @@ impl Server {
             request,
             services,
             long_running_tasks_handles,
+            underlay_available_tx,
         )
         .await
         .map_err(|e| e.to_string())?;
diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs
index 3d99ee9558..90466370fc 100644
--- a/sled-agent/src/services.rs
+++ b/sled-agent/src/services.rs
@@ -474,10 +474,10 @@ impl ServiceManager {
     }
 
     async fn all_service_ledgers(&self) -> Vec<Utf8PathBuf> {
-        let resources = self.inner.storage.get_latest_resources().await;
         if let Some(dir) = self.inner.ledger_directory_override.get() {
             return vec![dir.join(SERVICES_LEDGER_FILENAME)];
         }
+        let resources = self.inner.storage.get_latest_resources().await;
         resources
             .all_m2_mountpoints(CONFIG_DATASET)
             .into_iter()
diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs
index fe6ff0a1c2..1f08177d21 100644
--- a/sled-agent/src/sled_agent.rs
+++ b/sled-agent/src/sled_agent.rs
@@ -57,6 +57,7 @@ use slog::Logger;
 use std::collections::BTreeMap;
 use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6};
 use std::sync::Arc;
+use tokio::sync::oneshot;
 use uuid::Uuid;
 
 #[cfg(not(test))]
@@ -271,6 +272,7 @@ impl SledAgent {
         request: StartSledAgentRequest,
         services: ServiceManager,
         long_running_task_handles: LongRunningTaskHandles,
+        underlay_available_tx: oneshot::Sender<UnderlayAccess>,
     ) -> Result<SledAgent, Error> {
         // Pass the "parent_log" to all subcomponents that want to set their own
         // "component" value.
@@ -347,13 +349,13 @@ impl SledAgent {
 
         // Inform the `StorageMonitor` that the underlay is available so that
         // it can try to contact nexus.
-        long_running_task_handles
-            .storage_monitor
-            .underlay_available(UnderlayAccess {
+        underlay_available_tx
+            .send(UnderlayAccess {
                 nexus_client: nexus_client.clone(),
                 sled_id: request.id,
             })
-            .await;
+            .map_err(|_| ())
+            .expect("Failed to send to StorageMonitor");
 
         let instances = InstanceManager::new(
             parent_log.clone(),
diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs
index 71e61e84c2..3500803164 100644
--- a/sled-agent/src/storage_monitor.rs
+++ b/sled-agent/src/storage_monitor.rs
@@ -24,23 +24,15 @@ use sled_storage::resources::StorageResources;
 use slog::Logger;
 use std::fmt::Debug;
 use std::pin::Pin;
-use tokio::sync::mpsc;
+use tokio::sync::oneshot;
 use uuid::Uuid;
 
-const QUEUE_SIZE: usize = 10;
-
 #[derive(From, Clone, Debug)]
 enum NexusDiskRequest {
     Put(PhysicalDiskPutRequest),
     Delete(PhysicalDiskDeleteRequest),
 }
 
-/// A message sent from the `StorageMonitorHandle` to the `StorageMonitor`.
-#[derive(Debug)]
-pub enum StorageMonitorMsg {
-    UnderlayAvailable(UnderlayAccess),
-}
-
 /// Describes the access to the underlay used by the StorageManager.
 #[derive(Clone)]
 pub struct UnderlayAccess {
@@ -56,25 +48,12 @@ impl Debug for UnderlayAccess {
     }
 }
 
-/// A mechanism for interacting with the StorageMonitor
-#[derive(Clone)]
-pub struct StorageMonitorHandle {
-    tx: mpsc::Sender<StorageMonitorMsg>,
-}
-
-impl StorageMonitorHandle {
-    pub async fn underlay_available(&self, underlay_access: UnderlayAccess) {
-        self.tx
-            .send(StorageMonitorMsg::UnderlayAvailable(underlay_access))
-            .await
-            .unwrap();
-    }
-}
-
 pub struct StorageMonitor {
     log: Logger,
     storage_manager: StorageHandle,
-    handle_rx: mpsc::Receiver<StorageMonitorMsg>,
+
+    // Receive a onetime notification that the underlay is available
+    underlay_available_rx: oneshot::Receiver<UnderlayAccess>,
 
     // A cached copy of the `StorageResources` from the last update
     storage_resources: StorageResources,
@@ -93,8 +72,8 @@ impl StorageMonitor {
     pub fn new(
         log: &Logger,
         storage_manager: StorageHandle,
-    ) -> (StorageMonitor, StorageMonitorHandle) {
-        let (handle_tx, handle_rx) = mpsc::channel(QUEUE_SIZE);
+    ) -> (StorageMonitor, oneshot::Sender<UnderlayAccess>) {
+        let (underlay_available_tx, underlay_available_rx) = oneshot::channel();
         let storage_resources = StorageResources::default();
         let dump_setup = DumpSetup::new(&log);
         let log = log.new(o!("component" => "StorageMonitor"));
@@ -102,20 +81,20 @@ impl StorageMonitor {
             StorageMonitor {
                 log,
                 storage_manager,
-                handle_rx,
+                underlay_available_rx,
                 storage_resources,
                 underlay: None,
                 nexus_notifications: FuturesOrdered::new(),
                 dump_setup,
             },
-            StorageMonitorHandle { tx: handle_tx },
+            underlay_available_tx,
         )
     }
 
     /// Run the main receive loop of the `StorageMonitor`
     ///
     /// This should be spawned into a tokio task
-    pub async fn run(&mut self) {
+    pub async fn run(mut self) {
         loop {
             tokio::select! {
                 res = self.nexus_notifications.next(),
@@ -131,28 +110,21 @@ impl StorageMonitor {
                     );
                     self.handle_resource_update(resources).await;
                 }
-                Some(msg) = self.handle_rx.recv() => {
+                Ok(underlay) = &mut self.underlay_available_rx,
+                    if self.underlay.is_none() =>
+                {
+                    let sled_id = underlay.sled_id;
                     info!(
                         self.log,
-                        "Received storage monitor message";
-                        "monitor_msg" => ?msg
+                        "Underlay Available"; "sled_id" => %sled_id
                     );
-                    self.handle_monitor_msg(msg).await;
+                    self.underlay = Some(underlay);
+                    self.notify_nexus_about_existing_resources(sled_id).await;
                 }
             }
         }
     }
 
-    async fn handle_monitor_msg(&mut self, msg: StorageMonitorMsg) {
-        match msg {
-            StorageMonitorMsg::UnderlayAvailable(underlay) => {
-                let sled_id = underlay.sled_id;
-                self.underlay = Some(underlay);
-                self.notify_nexus_about_existing_resources(sled_id).await;
-            }
-        }
-    }
-
     /// When the underlay becomes available, we need to notify nexus about any
     /// discovered disks and pools, since we don't attempt to notify until there
     /// is an underlay available.

From a7848f91ffbfd1208b6a0baf69c17b84915509ba Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Thu, 9 Nov 2023 18:17:16 +0000
Subject: [PATCH 57/66] more review fixes

---
 sled-agent/src/long_running_tasks.rs | 12 ++++++++----
 sled-agent/src/storage_monitor.rs    |  7 ++++++-
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs
index e6736f6ea6..f4a665c098 100644
--- a/sled-agent/src/long_running_tasks.rs
+++ b/sled-agent/src/long_running_tasks.rs
@@ -74,8 +74,7 @@ pub async fn spawn_all_longrunning_tasks(
     let underlay_available_tx =
         spawn_storage_monitor(log, storage_manager.clone());
 
-    // TODO: Does this need to run inside tokio::task::spawn_blocking?
-    let hardware_manager = spawn_hardware_manager(log, sled_mode);
+    let hardware_manager = spawn_hardware_manager(log, sled_mode).await;
 
     // Start monitoring for hardware changes
     let (sled_agent_started_tx, service_manager_ready_tx) =
@@ -148,7 +147,7 @@ fn spawn_storage_monitor(
     underlay_available_tx
 }
 
-fn spawn_hardware_manager(
+async fn spawn_hardware_manager(
     log: &Logger,
     sled_mode: SledMode,
 ) -> HardwareManager {
@@ -161,7 +160,12 @@ fn spawn_hardware_manager(
     // There are pros and cons to both methods, but the reason to mention it here is that
     // the handle in this case is the `HardwareManager` itself.
     info!(log, "Starting HardwareManager"; "sled_mode" => ?sled_mode);
-    HardwareManager::new(log, sled_mode).unwrap()
+    let log = log.clone();
+    tokio::task::spawn_blocking(move || {
+        HardwareManager::new(&log, sled_mode).unwrap()
+    })
+    .await
+    .unwrap()
 }
 
 fn spawn_hardware_monitor(
diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs
index 3500803164..4f4af3b59e 100644
--- a/sled-agent/src/storage_monitor.rs
+++ b/sled-agent/src/storage_monitor.rs
@@ -100,7 +100,12 @@ impl StorageMonitor {
                 res = self.nexus_notifications.next(),
                     if !self.nexus_notifications.is_empty() =>
                 {
-                    info!(self.log, "Nexus notification complete: {:?}", res);
+                    match res {
+                        Some(Ok(s)) => {
+                            info!(self.log, "Nexus notification complete: {s}");
+                        }
+                        e => error!(self.log, "Nexus notification error: {e:?}")
+                    }
                 }
                 resources = self.storage_manager.wait_for_changes() => {
                     info!(

From 3998708ef6d3ec50c84c37edbaff0663f178ef23 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Thu, 9 Nov 2023 20:18:41 +0000
Subject: [PATCH 58/66] more review fixes

---
 sled-agent/src/bootstrap/server.rs |  1 +
 sled-storage/src/keyfile.rs        | 23 ++++++++++-
 sled-storage/src/manager.rs        | 62 +++++++++++++++++++-----------
 3 files changed, 62 insertions(+), 24 deletions(-)

diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs
index 0a055d13cc..68d7aedf02 100644
--- a/sled-agent/src/bootstrap/server.rs
+++ b/sled-agent/src/bootstrap/server.rs
@@ -348,6 +348,7 @@ impl From<SledAgentServerStartError> for StartError {
     }
 }
 
+#[allow(clippy::too_many_arguments)]
 async fn start_sled_agent(
     config: &SledConfig,
     request: &StartSledAgentRequest,
diff --git a/sled-storage/src/keyfile.rs b/sled-storage/src/keyfile.rs
index 105092c99e..48e5d9a528 100644
--- a/sled-storage/src/keyfile.rs
+++ b/sled-storage/src/keyfile.rs
@@ -5,7 +5,7 @@
 //! Key file support for ZFS dataset encryption
 
 use illumos_utils::zfs::Keypath;
-use slog::{info, Logger};
+use slog::{error, info, Logger};
 use tokio::fs::{remove_file, File};
 use tokio::io::{AsyncSeekExt, AsyncWriteExt, SeekFrom};
 
@@ -18,6 +18,7 @@ pub struct KeyFile {
     path: Keypath,
     file: File,
     log: Logger,
+    zero_and_unlink_called: bool,
 }
 
 impl KeyFile {
@@ -34,7 +35,12 @@ impl KeyFile {
             .await?;
         file.write_all(key).await?;
         info!(log, "Created keyfile {}", path);
-        Ok(KeyFile { path, file, log: log.clone() })
+        Ok(KeyFile {
+            path,
+            file,
+            log: log.clone(),
+            zero_and_unlink_called: false,
+        })
     }
 
     /// These keyfiles live on a tmpfs and we zero the file so the data doesn't
@@ -43,6 +49,7 @@ impl KeyFile {
     /// It'd be nice to `impl Drop for `KeyFile` and then call `zero`
     /// from within the drop handler, but async `Drop` isn't supported.
     pub async fn zero_and_unlink(&mut self) -> std::io::Result<()> {
+        self.zero_and_unlink_called = true;
         let zeroes = [0u8; 32];
         let _ = self.file.seek(SeekFrom::Start(0)).await?;
         self.file.write_all(&zeroes).await?;
@@ -55,3 +62,15 @@ impl KeyFile {
         &self.path
     }
 }
+
+impl Drop for KeyFile {
+    fn drop(&mut self) {
+        if !self.zero_and_unlink_called {
+            error!(
+                self.log,
+                "Failed to call zero_and_unlink for keyfile";
+                "path" => %self.path
+            );
+        }
+    }
+}
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index 8b121bc467..e69034e396 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -208,7 +208,7 @@ impl FakeStorageManager {
         )
     }
 
-    /// Run the main receive loop of the `StorageManager`
+    /// Run the main receive loop of the `FakeStorageManager`
     ///
     /// This should be spawned into a tokio task
     pub async fn run(mut self) {
@@ -253,11 +253,14 @@ impl FakeStorageManager {
 pub struct StorageManager {
     log: Logger,
     state: StorageManagerState,
+    // Used to find the capacity of the channel for tracking purposes
+    tx: mpsc::Sender<StorageRequest>,
     rx: mpsc::Receiver<StorageRequest>,
     resources: StorageResources,
     queued_u2_drives: HashSet<RawDisk>,
     key_requester: StorageKeyRequester,
     resource_updates: watch::Sender<StorageResources>,
+    last_logged_capacity: usize,
 }
 
 impl StorageManager {
@@ -272,11 +275,13 @@ impl StorageManager {
             StorageManager {
                 log: log.new(o!("component" => "StorageManager")),
                 state: StorageManagerState::WaitingForKeyManager,
+                tx: tx.clone(),
                 rx,
                 resources,
                 queued_u2_drives: HashSet::new(),
                 key_requester,
                 resource_updates: update_tx,
+                last_logged_capacity: 0,
             },
             StorageHandle { tx, resource_updates: update_rx },
         )
@@ -292,16 +297,8 @@ impl StorageManager {
             interval.set_missed_tick_behavior(MissedTickBehavior::Delay);
             tokio::select! {
                 res = self.step() => {
-                    match res {
-                        Some(Ok(())) => (),
-                        Some(Err(e)) => warn!(self.log, "{e}"),
-                        None => {
-                            info!(
-                                self.log,
-                                "Shutting down StorageManager task: no handles."
-                            );
-                            return;
-                        }
+                    if let Err(e) = res {
+                        warn!(self.log, "{e}");
                     }
                 }
                 _ = interval.tick(),
@@ -321,17 +318,38 @@ impl StorageManager {
     ///
     /// Return `None` if the sender side has disappeared and the task should
     /// shutdown.
-    pub async fn step(&mut self) -> Option<Result<(), Error>> {
-        let Some(req) = self.rx.recv().await else {
-            return None;
-        };
+    pub async fn step(&mut self) -> Result<(), Error> {
+        const CAPACITY_LOG_THRESHOLD: usize = 10;
+        // We check the capacity and log it every time it changes by at least 10
+        // entries in either direction.
+        let current = self.tx.capacity();
+        if self.last_logged_capacity.saturating_sub(current)
+            >= CAPACITY_LOG_THRESHOLD
+        {
+            info!(
+                self.log,
+                "Channel capacity decreased";
+                "previous" => ?self.last_logged_capacity,
+                "current" => ?current
+            );
+            self.last_logged_capacity = current;
+        } else if current.saturating_sub(self.last_logged_capacity)
+            >= CAPACITY_LOG_THRESHOLD
+        {
+            info!(
+                self.log,
+                "Channel capacity increased";
+                "previous" => ?self.last_logged_capacity,
+                "current" => ?current
+            );
+            self.last_logged_capacity = current;
+        }
+        // The sending side never disappears because we hold a copy
+        let req = self.rx.recv().await.unwrap();
         info!(self.log, "Received {:?}", req);
         let should_send_updates = match req {
             StorageRequest::AddDisk(raw_disk) => {
-                match self.add_disk(raw_disk).await {
-                    Ok(is_new) => is_new,
-                    Err(e) => return Some(Err(e)),
-                }
+                self.add_disk(raw_disk).await?
             }
             StorageRequest::RemoveDisk(raw_disk) => {
                 self.remove_disk(raw_disk).await
@@ -368,7 +386,7 @@ impl StorageManager {
             let _ = self.resource_updates.send_replace(self.resources.clone());
         }
 
-        Some(Ok(()))
+        Ok(())
     }
 
     // Loop through all queued disks inserting them into [`StorageResources`]
@@ -782,7 +800,7 @@ mod tests {
         let dir = tempdir().unwrap();
         let disk = SyntheticDisk::create_zpool(dir.path(), &zpool_name).into();
         handle.upsert_disk(disk).await;
-        manager.step().await.unwrap().unwrap();
+        manager.step().await.unwrap();
 
         // We can't wait for a reply through the handle as the storage manager task
         // isn't actually running. We just check the resources directly.
@@ -795,7 +813,7 @@ mod tests {
         // Now inform the storage manager that the key manager is ready
         // The queued disk should not be added due to the error
         handle.key_manager_ready().await;
-        manager.step().await.unwrap().unwrap();
+        manager.step().await.unwrap();
         assert!(manager.resources.all_u2_zpools().is_empty());
 
         // Manually simulating a timer tick to add queued disks should also

From 9b5816afde41362b24d53e276df568af3532bf2f Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Thu, 9 Nov 2023 21:02:35 +0000
Subject: [PATCH 59/66] Some more review updates

---
 sled-agent/src/dump_setup.rs      |  2 +-
 sled-agent/src/storage_monitor.rs | 10 +++++-----
 sled-storage/src/manager.rs       | 22 +++++++++++++++-------
 sled-storage/src/resources.rs     | 21 ++++++++++++++-------
 4 files changed, 35 insertions(+), 20 deletions(-)

diff --git a/sled-agent/src/dump_setup.rs b/sled-agent/src/dump_setup.rs
index 50bbda44b4..e675e6e12d 100644
--- a/sled-agent/src/dump_setup.rs
+++ b/sled-agent/src/dump_setup.rs
@@ -100,7 +100,7 @@ const ARCHIVAL_INTERVAL: Duration = Duration::from_secs(300);
 impl DumpSetup {
     pub(crate) async fn update_dumpdev_setup(
         &self,
-        disks: &Arc<BTreeMap<DiskIdentity, (Disk, Pool)>>,
+        disks: &BTreeMap<DiskIdentity, (Disk, Pool)>,
     ) {
         let log = &self.log;
         let mut m2_dump_slices = Vec::new();
diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs
index 4f4af3b59e..3d49f44815 100644
--- a/sled-agent/src/storage_monitor.rs
+++ b/sled-agent/src/storage_monitor.rs
@@ -170,7 +170,7 @@ impl StorageMonitor {
                 self.add_zpool_notify(pool, put).await;
             }
         }
-        self.dump_setup.update_dumpdev_setup(&updated_resources.disks).await;
+        self.dump_setup.update_dumpdev_setup(&updated_resources.disks()).await;
 
         // Save the updated `StorageResources`
         self.storage_resources = updated_resources;
@@ -329,8 +329,8 @@ fn compute_resource_diffs(
 
     // Diff the existing resources with the update to see what has changed
     // This loop finds disks and pools that were modified or deleted
-    for (disk_id, (disk, pool)) in current.disks.iter() {
-        match updated.disks.get(disk_id) {
+    for (disk_id, (disk, pool)) in current.disks().iter() {
+        match updated.disks().get(disk_id) {
             Some((updated_disk, updated_pool)) => {
                 if disk != updated_disk {
                     disk_puts.push(PhysicalDiskPutRequest {
@@ -356,8 +356,8 @@ fn compute_resource_diffs(
 
     // Diff the existing resources with the update to see what has changed
     // This loop finds new disks and pools
-    for (disk_id, (updated_disk, updated_pool)) in updated.disks.iter() {
-        if !current.disks.contains_key(disk_id) {
+    for (disk_id, (updated_disk, updated_pool)) in updated.disks().iter() {
+        if !current.disks().contains_key(disk_id) {
             disk_puts.push(PhysicalDiskPutRequest {
                 sled_id: *sled_id,
                 model: disk_id.model.clone(),
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index e69034e396..7b48610e15 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -351,9 +351,7 @@ impl StorageManager {
             StorageRequest::AddDisk(raw_disk) => {
                 self.add_disk(raw_disk).await?
             }
-            StorageRequest::RemoveDisk(raw_disk) => {
-                self.remove_disk(raw_disk).await
-            }
+            StorageRequest::RemoveDisk(raw_disk) => self.remove_disk(raw_disk),
             StorageRequest::DisksChanged(raw_disks) => {
                 self.ensure_using_exactly_these_disks(raw_disks).await
             }
@@ -433,7 +431,17 @@ impl StorageManager {
         send_updates
     }
 
-    // Add a disk to `StorageResources` if it is new and return Ok(true) if so
+    // Add a disk to `StorageResources` if it is new,
+    // updated, or its pool has been updated as determined by
+    // [`$crate::resources::StorageResources::insert_disk`] and we decide not to
+    // queue the disk for later addition. If the disk was inserted to resources
+    // return `Ok(true)`.
+    //
+    // In case the disk is queued, it wasn't inserted into `StorageResources`
+    // for another reason, or we have already consumed and logged an error
+    // return `Ok(false).
+    //
+    // In all other cases return an Error.
     async fn add_disk(&mut self, raw_disk: RawDisk) -> Result<bool, Error> {
         match raw_disk.variant() {
             DiskVariant::U2 => self.add_u2_disk(raw_disk).await,
@@ -486,7 +494,7 @@ impl StorageManager {
     }
 
     // Delete a real disk and return `true` if the disk was actually removed
-    async fn remove_disk(&mut self, raw_disk: RawDisk) -> bool {
+    fn remove_disk(&mut self, raw_disk: RawDisk) -> bool {
         // If the disk is a U.2, we want to first delete it from any queued disks
         let _ = self.queued_u2_drives.remove(&raw_disk);
         self.resources.remove_disk(raw_disk.identity())
@@ -512,7 +520,7 @@ impl StorageManager {
         // Find all existing disks not in the current set
         let to_remove: Vec<DiskIdentity> = self
             .resources
-            .disks
+            .disks()
             .keys()
             .filter_map(|id| {
                 if !all_ids.contains(id) {
@@ -555,7 +563,7 @@ impl StorageManager {
         info!(self.log, "add_dataset: {:?}", request);
         if !self
             .resources
-            .disks
+            .disks()
             .values()
             .any(|(_, pool)| &pool.name == request.dataset_name.pool())
         {
diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs
index f3444ac798..93f7f0793c 100644
--- a/sled-storage/src/resources.rs
+++ b/sled-storage/src/resources.rs
@@ -34,21 +34,28 @@ const ZONE_BUNDLE_DIRECTORY: &str = "zone";
 /// inside the `StorageManager` task if there are any outstanding copies.
 /// Therefore, we only pay the cost to update infrequently, and no locks are
 /// required by callers when operating on cloned data. The only contention here
-/// is for the refrence counters of the internal Arcs when `StorageResources`
+/// is for the reference counters of the internal Arcs when `StorageResources`
 /// gets cloned or dropped.
 #[derive(Debug, Clone, Default, PartialEq, Eq)]
 pub struct StorageResources {
     // All disks, real and synthetic, being managed by this sled
-    pub disks: Arc<BTreeMap<DiskIdentity, (Disk, Pool)>>,
+    disks: Arc<BTreeMap<DiskIdentity, (Disk, Pool)>>,
 }
 
 impl StorageResources {
+    /// Return a reference to the current snapshot of disks
+    pub fn disks(&self) -> &BTreeMap<DiskIdentity, (Disk, Pool)> {
+        &self.disks
+    }
+
     /// Insert a disk and its zpool
     ///
-    /// Return true if data was changed, false otherwise
-    ///
-    /// This really should not be used outside this crate, except for testing
-    pub fn insert_disk(&mut self, disk: Disk) -> Result<bool, Error> {
+    /// If the disk passed in is new or modified, or its pool size or pool name
+    /// changed, then insert the changed values and return `true`. Otherwise,
+    /// do not insert anything and return false. For instance, if only the pool
+    /// health changes, because it is not one of the checked values, we will not
+    /// insert the update and will return `false`.
+    pub(crate) fn insert_disk(&mut self, disk: Disk) -> Result<bool, Error> {
         let disk_id = disk.identity().clone();
         let zpool_name = disk.zpool_name().clone();
         let zpool = Pool::new(zpool_name, disk_id.clone())?;
@@ -130,6 +137,7 @@ impl StorageResources {
         }
         None
     }
+
     /// Returns all M.2 zpools
     pub fn all_m2_zpools(&self) -> Vec<ZpoolName> {
         self.all_zpools(DiskVariant::M2)
@@ -159,7 +167,6 @@ impl StorageResources {
     pub fn get_all_zpools(&self) -> Vec<(ZpoolName, DiskVariant)> {
         self.disks
             .values()
-            .cloned()
             .map(|(disk, _)| (disk.zpool_name().clone(), disk.variant()))
             .collect()
     }

From 34fe6cce485b8c4765c2e2f410a1b15779eed0d0 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Thu, 9 Nov 2023 21:32:08 +0000
Subject: [PATCH 60/66] Review fixes

---
 sled-storage/src/manager.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index 7b48610e15..243f154037 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -53,7 +53,7 @@ const QUEUE_SIZE: usize = 256;
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum StorageManagerState {
     WaitingForKeyManager,
-    QueuingDisks,
+    QueueingDisks,
     Normal,
 }
 
@@ -72,7 +72,7 @@ enum StorageRequest {
     NewFilesystem(NewFilesystemRequest),
     KeyManagerReady,
     /// This will always grab the latest state after any new updates, as it
-    /// serializes through the `StorageManager` task.
+    /// serializes through the `StorageManager` task after all prior requests.
     /// This serialization is particularly useful for tests.
     GetLatestResources(oneshot::Sender<StorageResources>),
 
@@ -302,7 +302,7 @@ impl StorageManager {
                     }
                 }
                 _ = interval.tick(),
-                    if self.state == StorageManagerState::QueuingDisks =>
+                    if self.state == StorageManagerState::QueueingDisks =>
                 {
                     if self.add_queued_disks().await {
                         let _ = self.resource_updates.send_replace(self.resources.clone());
@@ -404,7 +404,7 @@ impl StorageManager {
         let queued = std::mem::take(&mut self.queued_u2_drives);
         let mut iter = queued.into_iter();
         while let Some(disk) = iter.next() {
-            if self.state == StorageManagerState::QueuingDisks {
+            if self.state == StorageManagerState::QueueingDisks {
                 // We hit a transient error in a prior iteration.
                 saved.insert(disk);
             } else {
@@ -467,7 +467,7 @@ impl StorageManager {
                     "disk_id" => ?raw_disk.identity()
                 );
                 self.queued_u2_drives.insert(raw_disk);
-                self.state = StorageManagerState::QueuingDisks;
+                self.state = StorageManagerState::QueueingDisks;
                 Ok(false)
             }
             Err(err) => {
@@ -692,7 +692,7 @@ mod tests {
 
         // Check other non-normal stages and ensure disk gets queued
         manager.queued_u2_drives.clear();
-        manager.state = StorageManagerState::QueuingDisks;
+        manager.state = StorageManagerState::QueueingDisks;
         manager.add_u2_disk(raw_disk.clone()).await.unwrap();
         assert!(manager.resources.all_u2_zpools().is_empty());
         assert_eq!(manager.queued_u2_drives, HashSet::from([raw_disk]));

From 7884213840a27055b19cba46083d99f3ba42fdc0 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Thu, 9 Nov 2023 21:39:30 +0000
Subject: [PATCH 61/66] USE_MOCKS only exists during testing now

---
 illumos-utils/src/lib.rs    |  3 ++-
 sled-storage/Cargo.toml     |  2 +-
 sled-storage/src/manager.rs | 10 +++++-----
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/illumos-utils/src/lib.rs b/illumos-utils/src/lib.rs
index 3b696d178b..1faa4c5c37 100644
--- a/illumos-utils/src/lib.rs
+++ b/illumos-utils/src/lib.rs
@@ -112,7 +112,7 @@ mod inner {
 }
 
 // Due to feature unification, the `testing` feature is enabled when some tests
-// don't actually want to use it. We allow them to opt out of  the use of the
+// don't actually want to use it. We allow them to opt out of the use of the
 // free function here. We also explicitly opt-in where mocks are used.
 //
 // Note that this only works if the tests that use mocks and those that  don't
@@ -120,6 +120,7 @@ mod inner {
 // so there is no problem currently.
 //
 // We can remove all this when we get rid of the mocks.
+#[cfg(any(test, feature = "testing"))]
 pub static USE_MOCKS: AtomicBool = AtomicBool::new(false);
 
 pub fn execute(
diff --git a/sled-storage/Cargo.toml b/sled-storage/Cargo.toml
index 82ab206a8e..cb3a790631 100644
--- a/sled-storage/Cargo.toml
+++ b/sled-storage/Cargo.toml
@@ -24,7 +24,7 @@ uuid.workspace = true
 omicron-workspace-hack.workspace = true
 
 [dev-dependencies]
-illumos-utils = { workspace = true, features = ["tmp_keypath"] }
+illumos-utils = { workspace = true, features = ["tmp_keypath", "testing"] }
 omicron-test-utils.workspace = true
 camino-tempfile.workspace = true
 
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index 243f154037..f7f9238a56 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -922,7 +922,7 @@ mod tests {
         let resources = handle.wait_for_changes().await;
         let expected: HashSet<_> =
             disks.iter().take(3).map(|d| d.identity()).collect();
-        let actual: HashSet<_> = resources.disks.keys().collect();
+        let actual: HashSet<_> = resources.disks().keys().collect();
         assert_eq!(expected, actual);
 
         // Add first three disks after the initial one. The returned resources
@@ -935,7 +935,7 @@ mod tests {
         let resources = handle.wait_for_changes().await;
         let expected: HashSet<_> =
             disks.iter().skip(1).take(3).map(|d| d.identity()).collect();
-        let actual: HashSet<_> = resources.disks.keys().collect();
+        let actual: HashSet<_> = resources.disks().keys().collect();
         assert_eq!(expected, actual);
 
         // Ensure the same set of disks and make sure no change occurs
@@ -958,7 +958,7 @@ mod tests {
         let resources = handle.wait_for_changes().await;
         let expected: HashSet<_> =
             disks.iter().skip(4).take(5).map(|d| d.identity()).collect();
-        let actual: HashSet<_> = resources.disks.keys().collect();
+        let actual: HashSet<_> = resources.disks().keys().collect();
         assert_eq!(expected, actual);
 
         // Finally, change the zpool backing of the 5th disk to be that of the 10th
@@ -980,10 +980,10 @@ mod tests {
         let resources = handle.wait_for_changes().await;
 
         // Ensure the one modified disk changed as we expected
-        assert_eq!(5, resources.disks.len());
+        assert_eq!(5, resources.disks().len());
         for raw_disk in expected {
             let (disk, pool) =
-                resources.disks.get(raw_disk.identity()).unwrap();
+                resources.disks().get(raw_disk.identity()).unwrap();
             assert_eq!(disk.zpool_name(), raw_disk.zpool_name());
             assert_eq!(&pool.name, disk.zpool_name());
             assert_eq!(raw_disk.identity(), &pool.parent);

From 49f26561fef0aba95571dccfc4ea52e023d57be2 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Thu, 9 Nov 2023 22:31:41 +0000
Subject: [PATCH 62/66] remove unnecessary ref

---
 sled-agent/src/storage_monitor.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs
index 3d49f44815..f552fdfd86 100644
--- a/sled-agent/src/storage_monitor.rs
+++ b/sled-agent/src/storage_monitor.rs
@@ -170,7 +170,7 @@ impl StorageMonitor {
                 self.add_zpool_notify(pool, put).await;
             }
         }
-        self.dump_setup.update_dumpdev_setup(&updated_resources.disks()).await;
+        self.dump_setup.update_dumpdev_setup(updated_resources.disks()).await;
 
         // Save the updated `StorageResources`
         self.storage_resources = updated_resources;

From 35e252bde0ddf79f98a44687e812ff64f1587452 Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Thu, 9 Nov 2023 23:33:40 +0000
Subject: [PATCH 63/66] remove autogenerated file

---
 nexus/preprocessed_configs/config.xml | 41 ---------------------------
 1 file changed, 41 deletions(-)
 delete mode 100644 nexus/preprocessed_configs/config.xml

diff --git a/nexus/preprocessed_configs/config.xml b/nexus/preprocessed_configs/config.xml
deleted file mode 100644
index 9b13f12aea..0000000000
--- a/nexus/preprocessed_configs/config.xml
+++ /dev/null
@@ -1,41 +0,0 @@
-<!-- This file was generated automatically.
-     Do not edit it: it is likely to be discarded and generated again before it's read next time.
-     Files used to generate this file:
-       config.xml      -->
-
-<!-- Config that is used when server is run without config file. --><clickhouse>
-    <logger>
-        <level>trace</level>
-        <console>true</console>
-    </logger>
-
-    <http_port>8123</http_port>
-    <tcp_port>9000</tcp_port>
-    <mysql_port>9004</mysql_port>
-
-    <path>./</path>
-
-    <mlock_executable>true</mlock_executable>
-
-    <users>
-        <default>
-            <password/>
-
-            <networks>
-                <ip>::/0</ip>
-            </networks>
-
-            <profile>default</profile>
-            <quota>default</quota>
-            <access_management>1</access_management>
-        </default>
-    </users>
-
-    <profiles>
-        <default/>
-    </profiles>
-
-    <quotas>
-        <default/>
-    </quotas>
-</clickhouse>
\ No newline at end of file

From 11f1b3695b4954e94ce40b9680e8e863b735c56f Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Fri, 10 Nov 2023 06:15:49 +0000
Subject: [PATCH 64/66] More review cleanup

---
 sled-storage/src/manager.rs | 113 ++++++++++++++++++++++--------------
 1 file changed, 71 insertions(+), 42 deletions(-)

diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index f7f9238a56..d5df71c0b2 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -57,6 +57,31 @@ pub enum StorageManagerState {
     Normal,
 }
 
+enum AddDiskResult {
+    DiskInserted,
+    DiskAlreadyInserted,
+    DiskQueued,
+}
+
+impl AddDiskResult {
+    fn disk_inserted(&self) -> bool {
+        match self {
+            AddDiskResult::DiskInserted => true,
+            _ => false,
+        }
+    }
+}
+
+impl From<bool> for AddDiskResult {
+    fn from(value: bool) -> Self {
+        if value {
+            AddDiskResult::DiskInserted
+        } else {
+            AddDiskResult::DiskAlreadyInserted
+        }
+    }
+}
+
 #[derive(Debug)]
 struct NewFilesystemRequest {
     dataset_id: Uuid,
@@ -281,7 +306,7 @@ impl StorageManager {
                 queued_u2_drives: HashSet::new(),
                 key_requester,
                 resource_updates: update_tx,
-                last_logged_capacity: 0,
+                last_logged_capacity: QUEUE_SIZE,
             },
             StorageHandle { tx, resource_updates: update_rx },
         )
@@ -349,7 +374,7 @@ impl StorageManager {
         info!(self.log, "Received {:?}", req);
         let should_send_updates = match req {
             StorageRequest::AddDisk(raw_disk) => {
-                self.add_disk(raw_disk).await?
+                self.add_disk(raw_disk).await?.disk_inserted()
             }
             StorageRequest::RemoveDisk(raw_disk) => self.remove_disk(raw_disk),
             StorageRequest::DisksChanged(raw_disks) => {
@@ -394,55 +419,53 @@ impl StorageManager {
     //
     // Return true if updates should be sent to watchers, false otherwise
     async fn add_queued_disks(&mut self) -> bool {
-        info!(self.log, "Attempting to add queued disks");
+        info!(
+            self.log,
+            "Attempting to add queued disks";
+            "num_disks" => %self.queued_u2_drives.len()
+        );
         self.state = StorageManagerState::Normal;
 
         let mut send_updates = false;
 
         // Disks that should be requeued.
-        let mut saved = HashSet::new();
-        let queued = std::mem::take(&mut self.queued_u2_drives);
-        let mut iter = queued.into_iter();
-        while let Some(disk) = iter.next() {
+        let queued = self.queued_u2_drives.clone();
+        let mut to_dequeue = HashSet::new();
+        for disk in queued.iter() {
             if self.state == StorageManagerState::QueueingDisks {
                 // We hit a transient error in a prior iteration.
-                saved.insert(disk);
+                break;
             } else {
-                // Try to add the disk. If there was a transient error the disk will
-                // have been requeued. If there was a permanent error, it will have been
-                // dropped. If there is an another unexpected error, we will handle it and
-                // requeue ourselves.
                 match self.add_u2_disk(disk.clone()).await {
-                    Err(err) => {
-                        warn!(
-                            self.log,
-                            "Potentially transient error: {err}: requeuing disk";
-                            "disk_id" => ?disk.identity()
-                        );
-                        saved.insert(disk);
+                    Err(_) => {
+                        // This is an unrecoverable error, so we don't queue the
+                        // disk again.
+                        to_dequeue.insert(disk);
+                    }
+                    Ok(AddDiskResult::DiskInserted) => {
+                        send_updates = true;
+                        to_dequeue.insert(disk);
                     }
-                    Ok(true) => send_updates = true,
-                    Ok(false) => (),
+                    Ok(AddDiskResult::DiskAlreadyInserted) => {
+                        to_dequeue.insert(disk);
+                    }
+                    Ok(AddDiskResult::DiskQueued) => (),
                 }
             }
         }
-        // Merge any requeued disks from transient errors with saved disks here
-        self.queued_u2_drives.extend(saved);
+        // Dequeue any inserted disks
+        self.queued_u2_drives.retain(|k| !to_dequeue.contains(k));
         send_updates
     }
 
     // Add a disk to `StorageResources` if it is new,
     // updated, or its pool has been updated as determined by
     // [`$crate::resources::StorageResources::insert_disk`] and we decide not to
-    // queue the disk for later addition. If the disk was inserted to resources
-    // return `Ok(true)`.
-    //
-    // In case the disk is queued, it wasn't inserted into `StorageResources`
-    // for another reason, or we have already consumed and logged an error
-    // return `Ok(false).
-    //
-    // In all other cases return an Error.
-    async fn add_disk(&mut self, raw_disk: RawDisk) -> Result<bool, Error> {
+    // queue the disk for later addition.
+    async fn add_disk(
+        &mut self,
+        raw_disk: RawDisk,
+    ) -> Result<AddDiskResult, Error> {
         match raw_disk.variant() {
             DiskVariant::U2 => self.add_u2_disk(raw_disk).await,
             DiskVariant::M2 => self.add_m2_disk(raw_disk).await,
@@ -450,16 +473,19 @@ impl StorageManager {
     }
 
     // Add a U.2 disk to [`StorageResources`] or queue it to be added later
-    async fn add_u2_disk(&mut self, raw_disk: RawDisk) -> Result<bool, Error> {
+    async fn add_u2_disk(
+        &mut self,
+        raw_disk: RawDisk,
+    ) -> Result<AddDiskResult, Error> {
         if self.state != StorageManagerState::Normal {
             self.queued_u2_drives.insert(raw_disk);
-            return Ok(false);
+            return Ok(AddDiskResult::DiskQueued);
         }
 
         match Disk::new(&self.log, raw_disk.clone(), Some(&self.key_requester))
             .await
         {
-            Ok(disk) => self.resources.insert_disk(disk),
+            Ok(disk) => self.resources.insert_disk(disk).map(Into::into),
             Err(err @ DiskError::Dataset(DatasetError::KeyManager(_))) => {
                 warn!(
                     self.log,
@@ -468,7 +494,7 @@ impl StorageManager {
                 );
                 self.queued_u2_drives.insert(raw_disk);
                 self.state = StorageManagerState::QueueingDisks;
-                Ok(false)
+                Ok(AddDiskResult::DiskQueued)
             }
             Err(err) => {
                 error!(
@@ -476,7 +502,7 @@ impl StorageManager {
                     "Persistent error: {err}: not queueing disk";
                     "disk_id" => ?raw_disk.identity()
                 );
-                Ok(false)
+                Err(err.into())
             }
         }
     }
@@ -486,11 +512,14 @@ impl StorageManager {
     //
     // We never queue M.2 drives, as they don't rely on [`KeyManager`] based
     // encryption
-    async fn add_m2_disk(&mut self, raw_disk: RawDisk) -> Result<bool, Error> {
+    async fn add_m2_disk(
+        &mut self,
+        raw_disk: RawDisk,
+    ) -> Result<AddDiskResult, Error> {
         let disk =
             Disk::new(&self.log, raw_disk.clone(), Some(&self.key_requester))
                 .await?;
-        self.resources.insert_disk(disk)
+        self.resources.insert_disk(disk).map(Into::into)
     }
 
     // Delete a real disk and return `true` if the disk was actually removed
@@ -500,7 +529,7 @@ impl StorageManager {
         self.resources.remove_disk(raw_disk.identity())
     }
 
-    // Find all disks to remove that are not in raw_disks and remove them Then
+    // Find all disks to remove that are not in raw_disks and remove them. Then
     // take the remaining disks and try to add them all. `StorageResources` will
     // inform us if anything changed, and if so we return true, otherwise we
     // return false.
@@ -540,8 +569,8 @@ impl StorageManager {
         for raw_disk in raw_disks {
             let disk_id = raw_disk.identity().clone();
             match self.add_disk(raw_disk).await {
-                Ok(true) => should_update = true,
-                Ok(false) => (),
+                Ok(AddDiskResult::DiskInserted) => should_update = true,
+                Ok(_) => (),
                 Err(err) => {
                     warn!(
                         self.log,

From cfc3ef7c732afdfbf476ac4a62b64471c697bfbb Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Tue, 14 Nov 2023 18:36:28 +0000
Subject: [PATCH 65/66] review fixes

---
 sled-agent/src/bootstrap/server.rs | 20 +++++--
 sled-storage/src/manager.rs        | 38 ++------------
 sled-storage/src/resources.rs      | 84 +++++++++++++++++-------------
 3 files changed, 70 insertions(+), 72 deletions(-)

diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs
index 604baea55b..f4948de83b 100644
--- a/sled-agent/src/bootstrap/server.rs
+++ b/sled-agent/src/bootstrap/server.rs
@@ -565,14 +565,23 @@ impl Inner {
                 underlay_available_tx,
             ) => {
                 let request_id = request.body.id;
-                // Extract from an option to satisfy the borrow checker
+
+                // Extract from options to satisfy the borrow checker.
+                // It is not possible for `start_sled_agent` to be cancelled
+                // or fail in a safe, restartable manner. Therefore, for now,
+                // we explicitly unwrap here, and panic on error below.
+                //
+                // See https://github.com/oxidecomputer/omicron/issues/4494
                 let sled_agent_started_tx =
                     sled_agent_started_tx.take().unwrap();
+                let underlay_available_tx =
+                    underlay_available_tx.take().unwrap();
+
                 let response = match start_sled_agent(
                     &self.config,
                     request,
                     self.long_running_task_handles.clone(),
-                    underlay_available_tx.take().unwrap(),
+                    underlay_available_tx,
                     self.service_manager.clone(),
                     &self.ddm_admin_localhost_client,
                     &self.base_log,
@@ -591,7 +600,12 @@ impl Inner {
                         self.state = SledAgentState::ServerStarted(server);
                         Ok(SledAgentResponse { id: request_id })
                     }
-                    Err(err) => Err(format!("{err:#}")),
+                    Err(err) => {
+                        // This error is unrecoverable, and if returned we'd
+                        // end up in maintenance mode anyway.
+                        error!(log, "Failed to start sled agent: {err:#}");
+                        panic!("Failed to start sled agent");
+                    }
                 };
                 _ = response_tx.send(response);
             }
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index d5df71c0b2..50b1c44148 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -9,7 +9,7 @@ use std::collections::HashSet;
 use crate::dataset::{DatasetError, DatasetName};
 use crate::disk::{Disk, DiskError, RawDisk};
 use crate::error::Error;
-use crate::resources::StorageResources;
+use crate::resources::{AddDiskResult, StorageResources};
 use camino::Utf8PathBuf;
 use illumos_utils::zfs::{Mountpoint, Zfs};
 use illumos_utils::zpool::ZpoolName;
@@ -57,31 +57,6 @@ pub enum StorageManagerState {
     Normal,
 }
 
-enum AddDiskResult {
-    DiskInserted,
-    DiskAlreadyInserted,
-    DiskQueued,
-}
-
-impl AddDiskResult {
-    fn disk_inserted(&self) -> bool {
-        match self {
-            AddDiskResult::DiskInserted => true,
-            _ => false,
-        }
-    }
-}
-
-impl From<bool> for AddDiskResult {
-    fn from(value: bool) -> Self {
-        if value {
-            AddDiskResult::DiskInserted
-        } else {
-            AddDiskResult::DiskAlreadyInserted
-        }
-    }
-}
-
 #[derive(Debug)]
 struct NewFilesystemRequest {
     dataset_id: Uuid,
@@ -240,7 +215,7 @@ impl FakeStorageManager {
         loop {
             match self.rx.recv().await {
                 Some(StorageRequest::AddDisk(raw_disk)) => {
-                    if self.add_disk(raw_disk) {
+                    if self.add_disk(raw_disk).disk_inserted() {
                         self.resource_updates
                             .send_replace(self.resources.clone());
                     }
@@ -257,7 +232,7 @@ impl FakeStorageManager {
     }
 
     // Add a disk to `StorageResources` if it is new and return true if so
-    fn add_disk(&mut self, raw_disk: RawDisk) -> bool {
+    fn add_disk(&mut self, raw_disk: RawDisk) -> AddDiskResult {
         let disk = match raw_disk {
             RawDisk::Real(_) => {
                 panic!(
@@ -340,9 +315,6 @@ impl StorageManager {
     /// Process the next event
     ///
     /// This is useful for testing/debugging
-    ///
-    /// Return `None` if the sender side has disappeared and the task should
-    /// shutdown.
     pub async fn step(&mut self) -> Result<(), Error> {
         const CAPACITY_LOG_THRESHOLD: usize = 10;
         // We check the capacity and log it every time it changes by at least 10
@@ -485,7 +457,7 @@ impl StorageManager {
         match Disk::new(&self.log, raw_disk.clone(), Some(&self.key_requester))
             .await
         {
-            Ok(disk) => self.resources.insert_disk(disk).map(Into::into),
+            Ok(disk) => self.resources.insert_disk(disk),
             Err(err @ DiskError::Dataset(DatasetError::KeyManager(_))) => {
                 warn!(
                     self.log,
@@ -519,7 +491,7 @@ impl StorageManager {
         let disk =
             Disk::new(&self.log, raw_disk.clone(), Some(&self.key_requester))
                 .await?;
-        self.resources.insert_disk(disk).map(Into::into)
+        self.resources.insert_disk(disk)
     }
 
     // Delete a real disk and return `true` if the disk was actually removed
diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs
index 93f7f0793c..c1f460dc92 100644
--- a/sled-storage/src/resources.rs
+++ b/sled-storage/src/resources.rs
@@ -9,6 +9,7 @@ use crate::disk::Disk;
 use crate::error::Error;
 use crate::pool::Pool;
 use camino::Utf8PathBuf;
+use cfg_if::cfg_if;
 use illumos_utils::zpool::ZpoolName;
 use omicron_common::disk::DiskIdentity;
 use sled_hardware::DiskVariant;
@@ -21,6 +22,21 @@ const BUNDLE_DIRECTORY: &str = "bundle";
 // The directory for zone bundles.
 const ZONE_BUNDLE_DIRECTORY: &str = "zone";
 
+pub enum AddDiskResult {
+    DiskInserted,
+    DiskAlreadyInserted,
+    DiskQueued,
+}
+
+impl AddDiskResult {
+    pub fn disk_inserted(&self) -> bool {
+        match self {
+            AddDiskResult::DiskInserted => true,
+            _ => false,
+        }
+    }
+}
+
 /// Storage related resources: disks and zpools
 ///
 /// This state is internal to the [`crate::manager::StorageManager`] task. Clones
@@ -50,12 +66,16 @@ impl StorageResources {
 
     /// Insert a disk and its zpool
     ///
-    /// If the disk passed in is new or modified, or its pool size or pool name
-    /// changed, then insert the changed values and return `true`. Otherwise,
-    /// do not insert anything and return false. For instance, if only the pool
-    /// health changes, because it is not one of the checked values, we will not
-    /// insert the update and will return `false`.
-    pub(crate) fn insert_disk(&mut self, disk: Disk) -> Result<bool, Error> {
+    /// If the disk passed in is new or modified, or its pool size or pool
+    /// name changed, then insert the changed values and return `DiskInserted`.
+    /// Otherwise, do not insert anything and return `DiskAlreadyInserted`.
+    /// For instance, if only the pool health changes, because it is not one
+    /// of the checked values, we will not insert the update and will return
+    /// `DiskAlreadyInserted`.
+    pub(crate) fn insert_disk(
+        &mut self,
+        disk: Disk,
+    ) -> Result<AddDiskResult, Error> {
         let disk_id = disk.identity().clone();
         let zpool_name = disk.zpool_name().clone();
         let zpool = Pool::new(zpool_name, disk_id.clone())?;
@@ -64,63 +84,55 @@ impl StorageResources {
                 && stored_pool.info.size() == zpool.info.size()
                 && stored_pool.name == zpool.name
             {
-                return Ok(false);
+                return Ok(AddDiskResult::DiskAlreadyInserted);
             }
         }
         // Either the disk or zpool changed
         Arc::make_mut(&mut self.disks).insert(disk_id, (disk, zpool));
-        Ok(true)
+        Ok(AddDiskResult::DiskInserted)
     }
 
     /// Insert a disk while creating a fake pool
     /// This is a workaround for current mock based testing strategies
     /// in the sled-agent.
-    ///
-    /// Return true if data was changed, false otherwise
     #[cfg(feature = "testing")]
-    pub fn insert_fake_disk(&mut self, disk: Disk) -> bool {
+    pub fn insert_fake_disk(&mut self, disk: Disk) -> AddDiskResult {
         let disk_id = disk.identity().clone();
         let zpool_name = disk.zpool_name().clone();
         let zpool = Pool::new_with_fake_info(zpool_name, disk_id.clone());
         if self.disks.contains_key(&disk_id) {
-            return false;
+            return AddDiskResult::DiskAlreadyInserted;
         }
         // Either the disk or zpool changed
         Arc::make_mut(&mut self.disks).insert(disk_id, (disk, zpool));
-        true
+        AddDiskResult::DiskInserted
     }
 
     /// Delete a disk and its zpool
     ///
     /// Return true, if data was changed, false otherwise
     ///
-    /// Note: We never allow removal of synthetic disks as they are only added
-    /// once.
-    #[cfg(not(test))]
+    /// Note: We never allow removal of synthetic disks in production as they
+    /// are only added once.
     pub(crate) fn remove_disk(&mut self, id: &DiskIdentity) -> bool {
-        if let Some((disk, _)) = self.disks.get(id) {
-            if disk.is_synthetic() {
-                return false;
-            }
-        } else {
+        let Some((disk, _)) = self.disks.get(id) else {
             return false;
+        };
+
+        cfg_if! {
+            if #[cfg(test)] {
+                // For testing purposes, we allow synthetic disks to be deleted.
+                // Silence an unused variable warning.
+                _ = disk;
+            } else {
+                // In production, we disallow removal of synthetic disks as they
+                // are only added once.
+                if disk.is_synthetic() {
+                    return false;
+                }
+            }
         }
-        // Safe to unwrap as we just checked the key existed above
-        Arc::make_mut(&mut self.disks).remove(id).unwrap();
-        true
-    }
 
-    /// Delete a real disk and its zpool
-    ///
-    /// Return true, if data was changed, false otherwise
-    ///
-    /// Note: For testing purposes of this crate, we allow synthetic disks to
-    /// be deleted.
-    #[cfg(test)]
-    pub(crate) fn remove_disk(&mut self, id: &DiskIdentity) -> bool {
-        if !self.disks.contains_key(id) {
-            return false;
-        }
         // Safe to unwrap as we just checked the key existed above
         Arc::make_mut(&mut self.disks).remove(id).unwrap();
         true

From 6b7845c825c5be415f9d4f36167419e614729cae Mon Sep 17 00:00:00 2001
From: "Andrew J. Stone" <andrew.j.stone.1@gmail.com>
Date: Tue, 14 Nov 2023 18:48:27 +0000
Subject: [PATCH 66/66] fix nit

---
 sled-storage/src/dataset.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sled-storage/src/dataset.rs b/sled-storage/src/dataset.rs
index 503ccb053a..a2878af7f6 100644
--- a/sled-storage/src/dataset.rs
+++ b/sled-storage/src/dataset.rs
@@ -324,7 +324,10 @@ pub(crate) async fn ensure_zpool_has_datasets(
                     Zfs::destroy_dataset(name).or_else(|err| {
                         // If we can't find the dataset, that's fine -- it might
                         // not have been formatted yet.
-                        if let DestroyDatasetErrorVariant::NotFound = err.err {
+                        if matches!(
+                            err.err,
+                            DestroyDatasetErrorVariant::NotFound
+                        ) {
                             Ok(())
                         } else {
                             Err(err)