diff --git a/Cargo.lock b/Cargo.lock
index 29122b8903..bbb08791e0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3480,6 +3480,7 @@ dependencies = [
  "reqwest",
  "sha2",
  "sled-hardware",
+ "sled-storage",
  "slog",
  "slog-async",
  "slog-envlogger",
@@ -4293,6 +4294,8 @@ dependencies = [
  "schemars",
  "serde",
  "serde_json",
+ "sled-hardware",
+ "sled-storage",
  "slog",
  "uuid",
 ]
@@ -5297,6 +5300,7 @@ dependencies = [
  "sha3",
  "sled-agent-client",
  "sled-hardware",
+ "sled-storage",
  "slog",
  "slog-async",
  "slog-dtrace",
@@ -8167,6 +8171,7 @@ dependencies = [
  "regress",
  "reqwest",
  "serde",
+ "sled-storage",
  "slog",
  "uuid",
 ]
@@ -8181,11 +8186,9 @@ dependencies = [
  "futures",
  "illumos-devinfo",
  "illumos-utils",
- "key-manager",
  "libc",
  "libefi-illumos",
  "macaddr",
- "nexus-client 0.1.0",
  "omicron-common 0.1.0",
  "omicron-test-utils",
  "omicron-workspace-hack",
@@ -8200,6 +8203,32 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "sled-storage"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "camino",
+ "camino-tempfile",
+ "cfg-if 1.0.0",
+ "derive_more",
+ "glob",
+ "illumos-utils",
+ "key-manager",
+ "omicron-common 0.1.0",
+ "omicron-test-utils",
+ "omicron-workspace-hack",
+ "rand 0.8.5",
+ "schemars",
+ "serde",
+ "serde_json",
+ "sled-hardware",
+ "slog",
+ "thiserror",
+ "tokio",
+ "uuid",
+]
+
 [[package]]
 name = "slog"
 version = "2.7.0"
diff --git a/Cargo.toml b/Cargo.toml
index 0e13946533..dfc6fe9c76 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -57,6 +57,7 @@ members = [
     "rpaths",
     "sled-agent",
     "sled-hardware",
+    "sled-storage",
     "sp-sim",
     "test-utils",
     "tufaceous-lib",
@@ -122,6 +123,7 @@ default-members = [
     "rpaths",
     "sled-agent",
     "sled-hardware",
+    "sled-storage",
     "sp-sim",
     "test-utils",
     "tufaceous-lib",
@@ -329,6 +331,7 @@ similar-asserts = "1.5.0"
 sled = "0.34"
 sled-agent-client = { path = "clients/sled-agent-client" }
 sled-hardware = { path = "sled-hardware" }
+sled-storage = { path = "sled-storage" }
 slog = { version = "2.7", features = [ "dynamic-keys", "max_level_trace", "release_max_level_debug" ] }
 slog-async = "2.8"
 slog-dtrace = "0.2"
diff --git a/clients/nexus-client/Cargo.toml b/clients/nexus-client/Cargo.toml
index 2734142f9f..239cb77789 100644
--- a/clients/nexus-client/Cargo.toml
+++ b/clients/nexus-client/Cargo.toml
@@ -10,6 +10,8 @@ futures.workspace = true
 ipnetwork.workspace = true
 omicron-common.workspace = true
 omicron-passwords.workspace = true
+sled-hardware.workspace = true
+sled-storage.workspace = true
 progenitor.workspace = true
 regress.workspace = true
 reqwest = { workspace = true, features = ["rustls-tls", "stream"] }
diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs
index 23ceb114fc..9f81492d10 100644
--- a/clients/nexus-client/src/lib.rs
+++ b/clients/nexus-client/src/lib.rs
@@ -388,3 +388,36 @@ impl From<omicron_common::api::internal::shared::ExternalPortDiscovery>
         }
     }
 }
+
+impl From<sled_hardware::DiskVariant> for types::PhysicalDiskKind {
+    fn from(value: sled_hardware::DiskVariant) -> Self {
+        match value {
+            sled_hardware::DiskVariant::U2 => types::PhysicalDiskKind::U2,
+            sled_hardware::DiskVariant::M2 => types::PhysicalDiskKind::M2,
+        }
+    }
+}
+
+impl From<sled_hardware::Baseboard> for types::Baseboard {
+    fn from(b: sled_hardware::Baseboard) -> types::Baseboard {
+        types::Baseboard {
+            serial_number: b.identifier().to_string(),
+            part_number: b.model().to_string(),
+            revision: b.revision(),
+        }
+    }
+}
+
+impl From<sled_storage::dataset::DatasetKind> for types::DatasetKind {
+    fn from(k: sled_storage::dataset::DatasetKind) -> Self {
+        use sled_storage::dataset::DatasetKind::*;
+        match k {
+            CockroachDb => Self::Cockroach,
+            Crucible => Self::Crucible,
+            Clickhouse => Self::Clickhouse,
+            ClickhouseKeeper => Self::ClickhouseKeeper,
+            ExternalDns => Self::ExternalDns,
+            InternalDns => Self::InternalDns,
+        }
+    }
+}
diff --git a/clients/sled-agent-client/Cargo.toml b/clients/sled-agent-client/Cargo.toml
index b2ed07caba..e2cc737e70 100644
--- a/clients/sled-agent-client/Cargo.toml
+++ b/clients/sled-agent-client/Cargo.toml
@@ -14,5 +14,6 @@ regress.workspace = true
 reqwest = { workspace = true, features = [ "json", "rustls-tls", "stream" ] }
 serde.workspace = true
 slog.workspace = true
+sled-storage.workspace = true
 uuid.workspace = true
 omicron-workspace-hack.workspace = true
diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs
index 0df21d894e..30b554a021 100644
--- a/clients/sled-agent-client/src/lib.rs
+++ b/clients/sled-agent-client/src/lib.rs
@@ -6,6 +6,7 @@
 
 use async_trait::async_trait;
 use std::convert::TryFrom;
+use std::str::FromStr;
 use uuid::Uuid;
 
 progenitor::generate_api!(
@@ -528,3 +529,27 @@ impl TestInterfaces for Client {
             .expect("disk_finish_transition() failed unexpectedly");
     }
 }
+
+impl From<sled_storage::dataset::DatasetKind> for types::DatasetKind {
+    fn from(k: sled_storage::dataset::DatasetKind) -> Self {
+        use sled_storage::dataset::DatasetKind::*;
+        match k {
+            CockroachDb => Self::CockroachDb,
+            Crucible => Self::Crucible,
+            Clickhouse => Self::Clickhouse,
+            ClickhouseKeeper => Self::ClickhouseKeeper,
+            ExternalDns => Self::ExternalDns,
+            InternalDns => Self::InternalDns,
+        }
+    }
+}
+
+impl From<sled_storage::dataset::DatasetName> for types::DatasetName {
+    fn from(n: sled_storage::dataset::DatasetName) -> Self {
+        Self {
+            pool_name: types::ZpoolName::from_str(&n.pool().to_string())
+                .unwrap(),
+            kind: n.dataset().clone().into(),
+        }
+    }
+}
diff --git a/common/src/disk.rs b/common/src/disk.rs
index 3ea8091326..3ae9c31e01 100644
--- a/common/src/disk.rs
+++ b/common/src/disk.rs
@@ -5,7 +5,7 @@
 //! Disk related types shared among crates
 
 /// Uniquely identifies a disk.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)]
 pub struct DiskIdentity {
     pub vendor: String,
     pub serial: String,
diff --git a/illumos-utils/Cargo.toml b/illumos-utils/Cargo.toml
index a291a15e78..497454e047 100644
--- a/illumos-utils/Cargo.toml
+++ b/illumos-utils/Cargo.toml
@@ -44,3 +44,6 @@ toml.workspace = true
 [features]
 # Enable to generate MockZones
 testing = ["mockall"]
+# Useful for tests that want real functionality and ability to run without
+# pfexec
+tmp_keypath = []
diff --git a/illumos-utils/src/lib.rs b/illumos-utils/src/lib.rs
index 345f097ae2..1faa4c5c37 100644
--- a/illumos-utils/src/lib.rs
+++ b/illumos-utils/src/lib.rs
@@ -4,6 +4,9 @@
 
 //! Wrappers around illumos-specific commands.
 
+#[allow(unused)]
+use std::sync::atomic::{AtomicBool, Ordering};
+
 use cfg_if::cfg_if;
 
 pub mod addrobj;
@@ -93,7 +96,7 @@ mod inner {
 
     // Helper function for starting the process and checking the
     // exit code result.
-    pub fn execute(
+    pub fn execute_helper(
         command: &mut std::process::Command,
     ) -> Result<std::process::Output, ExecutionError> {
         let output = command.output().map_err(|err| {
@@ -108,6 +111,34 @@ mod inner {
     }
 }
 
+// Due to feature unification, the `testing` feature is enabled when some tests
+// don't actually want to use it. We allow them to opt out of the use of the
+// free function here. We also explicitly opt-in where mocks are used.
+//
+// Note that this only works if the tests that use mocks and those that  don't
+// are run sequentially. However, this is how we do things in CI with nextest,
+// so there is no problem currently.
+//
+// We can remove all this when we get rid of the mocks.
+#[cfg(any(test, feature = "testing"))]
+pub static USE_MOCKS: AtomicBool = AtomicBool::new(false);
+
+pub fn execute(
+    command: &mut std::process::Command,
+) -> Result<std::process::Output, ExecutionError> {
+    cfg_if! {
+        if #[cfg(any(test, feature = "testing"))] {
+            if USE_MOCKS.load(Ordering::SeqCst) {
+                mock_inner::execute_helper(command)
+            } else {
+                inner::execute_helper(command)
+            }
+        } else {
+            inner::execute_helper(command)
+        }
+    }
+}
+
 cfg_if! {
     if #[cfg(any(test, feature = "testing"))] {
         pub use mock_inner::*;
diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs
index a6af997619..e9554100af 100644
--- a/illumos-utils/src/zfs.rs
+++ b/illumos-utils/src/zfs.rs
@@ -20,7 +20,16 @@ pub const ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT: &str = "/zone";
 pub const ZONE_ZFS_RAMDISK_DATASET: &str = "rpool/zone";
 
 pub const ZFS: &str = "/usr/sbin/zfs";
+
+/// This path is intentionally on a `tmpfs` to prevent copy-on-write behavior
+/// and to ensure it goes away on power off.
+///
+/// We want minimize the time the key files are in memory, and so we rederive
+/// the keys and recreate the files on demand when creating and mounting
+/// encrypted filesystems. We then zero them and unlink them.
 pub const KEYPATH_ROOT: &str = "/var/run/oxide/";
+// Use /tmp so we don't have to worry about running tests with pfexec
+pub const TEST_KEYPATH_ROOT: &str = "/tmp";
 
 /// Error returned by [`Zfs::list_datasets`].
 #[derive(thiserror::Error, Debug)]
@@ -158,19 +167,27 @@ impl fmt::Display for Keypath {
     }
 }
 
+#[cfg(not(feature = "tmp_keypath"))]
+impl From<&DiskIdentity> for Keypath {
+    fn from(id: &DiskIdentity) -> Self {
+        build_keypath(id, KEYPATH_ROOT)
+    }
+}
+
+#[cfg(feature = "tmp_keypath")]
 impl From<&DiskIdentity> for Keypath {
     fn from(id: &DiskIdentity) -> Self {
-        let filename = format!(
-            "{}-{}-{}-zfs-aes-256-gcm.key",
-            id.vendor, id.serial, id.model
-        );
-        let mut path = Utf8PathBuf::new();
-        path.push(KEYPATH_ROOT);
-        path.push(filename);
-        Keypath(path)
+        build_keypath(id, TEST_KEYPATH_ROOT)
     }
 }
 
+fn build_keypath(id: &DiskIdentity, root: &str) -> Keypath {
+    let filename =
+        format!("{}-{}-{}-zfs-aes-256-gcm.key", id.vendor, id.serial, id.model);
+    let path: Utf8PathBuf = [root, &filename].iter().collect();
+    Keypath(path)
+}
+
 #[derive(Debug)]
 pub struct EncryptionDetails {
     pub keypath: Keypath,
diff --git a/illumos-utils/src/zpool.rs b/illumos-utils/src/zpool.rs
index 81ded2655e..f2c395e22b 100644
--- a/illumos-utils/src/zpool.rs
+++ b/illumos-utils/src/zpool.rs
@@ -39,6 +39,13 @@ pub struct CreateError {
     err: Error,
 }
 
+#[derive(thiserror::Error, Debug)]
+#[error("Failed to destroy zpool: {err}")]
+pub struct DestroyError {
+    #[from]
+    err: Error,
+}
+
 #[derive(thiserror::Error, Debug)]
 #[error("Failed to list zpools: {err}")]
 pub struct ListError {
@@ -89,7 +96,7 @@ impl FromStr for ZpoolHealth {
 }
 
 /// Describes a Zpool.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
 pub struct ZpoolInfo {
     name: String,
     size: u64,
@@ -121,6 +128,17 @@ impl ZpoolInfo {
     pub fn health(&self) -> ZpoolHealth {
         self.health
     }
+
+    #[cfg(any(test, feature = "testing"))]
+    pub fn new_hardcoded(name: String) -> ZpoolInfo {
+        ZpoolInfo {
+            name,
+            size: 1024 * 1024 * 64,
+            allocated: 1024,
+            free: 1024 * 1023 * 64,
+            health: ZpoolHealth::Online,
+        }
+    }
 }
 
 impl FromStr for ZpoolInfo {
@@ -167,7 +185,10 @@ pub struct Zpool {}
 
 #[cfg_attr(any(test, feature = "testing"), mockall::automock, allow(dead_code))]
 impl Zpool {
-    pub fn create(name: ZpoolName, vdev: &Utf8Path) -> Result<(), CreateError> {
+    pub fn create(
+        name: &ZpoolName,
+        vdev: &Utf8Path,
+    ) -> Result<(), CreateError> {
         let mut cmd = std::process::Command::new(PFEXEC);
         cmd.env_clear();
         cmd.env("LC_ALL", "C.UTF-8");
@@ -189,7 +210,17 @@ impl Zpool {
         Ok(())
     }
 
-    pub fn import(name: ZpoolName) -> Result<(), Error> {
+    pub fn destroy(name: &ZpoolName) -> Result<(), DestroyError> {
+        let mut cmd = std::process::Command::new(PFEXEC);
+        cmd.env_clear();
+        cmd.env("LC_ALL", "C.UTF-8");
+        cmd.arg(ZPOOL).arg("destroy");
+        cmd.arg(&name.to_string());
+        execute(&mut cmd).map_err(Error::from)?;
+        Ok(())
+    }
+
+    pub fn import(name: &ZpoolName) -> Result<(), Error> {
         let mut cmd = std::process::Command::new(PFEXEC);
         cmd.env_clear();
         cmd.env("LC_ALL", "C.UTF-8");
diff --git a/installinator/Cargo.toml b/installinator/Cargo.toml
index 5a7c6bd0e5..d489e73ec1 100644
--- a/installinator/Cargo.toml
+++ b/installinator/Cargo.toml
@@ -27,6 +27,7 @@ omicron-common.workspace = true
 reqwest.workspace = true
 sha2.workspace = true
 sled-hardware.workspace = true
+sled-storage.workspace = true
 slog.workspace = true
 slog-async.workspace = true
 slog-envlogger.workspace = true
diff --git a/installinator/src/hardware.rs b/installinator/src/hardware.rs
index ffa0b74739..b037384cbe 100644
--- a/installinator/src/hardware.rs
+++ b/installinator/src/hardware.rs
@@ -6,10 +6,11 @@ use anyhow::anyhow;
 use anyhow::ensure;
 use anyhow::Context;
 use anyhow::Result;
-use sled_hardware::Disk;
 use sled_hardware::DiskVariant;
 use sled_hardware::HardwareManager;
 use sled_hardware::SledMode;
+use sled_storage::disk::Disk;
+use sled_storage::disk::RawDisk;
 use slog::info;
 use slog::Logger;
 
@@ -28,7 +29,8 @@ impl Hardware {
                 anyhow!("failed to create HardwareManager: {err}")
             })?;
 
-        let disks = hardware.disks();
+        let disks: Vec<RawDisk> =
+            hardware.disks().into_iter().map(|disk| disk.into()).collect();
 
         info!(
             log, "found gimlet hardware";
diff --git a/installinator/src/write.rs b/installinator/src/write.rs
index 6c0c1f63c7..22dd2adbf6 100644
--- a/installinator/src/write.rs
+++ b/installinator/src/write.rs
@@ -122,8 +122,9 @@ impl WriteDestination {
                     );
 
                     let zpool_name = disk.zpool_name().clone();
-                    let control_plane_dir = zpool_name
-                        .dataset_mountpoint(sled_hardware::INSTALL_DATASET);
+                    let control_plane_dir = zpool_name.dataset_mountpoint(
+                        sled_storage::dataset::INSTALL_DATASET,
+                    );
 
                     match drives.entry(slot) {
                         Entry::Vacant(entry) => {
diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml
index 827cb131cb..46148304f8 100644
--- a/sled-agent/Cargo.toml
+++ b/sled-agent/Cargo.toml
@@ -58,6 +58,7 @@ serde_json = {workspace = true, features = ["raw_value"]}
 sha3.workspace = true
 sled-agent-client.workspace = true
 sled-hardware.workspace = true
+sled-storage.workspace = true
 slog.workspace = true
 slog-async.workspace = true
 slog-dtrace.workspace = true
@@ -94,7 +95,8 @@ slog-async.workspace = true
 slog-term.workspace = true
 tempfile.workspace = true
 
-illumos-utils = { workspace = true, features = ["testing"] }
+illumos-utils = { workspace = true, features = ["testing", "tmp_keypath"] }
+sled-storage = { workspace = true, features = ["testing"] }
 
 #
 # Disable doc builds by default for our binaries to work around issue
diff --git a/sled-agent/src/backing_fs.rs b/sled-agent/src/backing_fs.rs
index 6ecb9dac43..2e9ea4c8d9 100644
--- a/sled-agent/src/backing_fs.rs
+++ b/sled-agent/src/backing_fs.rs
@@ -128,7 +128,7 @@ pub(crate) fn ensure_backing_fs(
         let dataset = format!(
             "{}/{}/{}",
             boot_zpool_name,
-            sled_hardware::disk::M2_BACKING_DATASET,
+            sled_storage::dataset::M2_BACKING_DATASET,
             bfs.name
         );
         let mountpoint = Mountpoint::Path(Utf8PathBuf::from(bfs.mountpoint));
diff --git a/sled-agent/src/bootstrap/bootstore.rs b/sled-agent/src/bootstrap/bootstore_setup.rs
similarity index 55%
rename from sled-agent/src/bootstrap/bootstore.rs
rename to sled-agent/src/bootstrap/bootstore_setup.rs
index 17267bef55..9eb0a87c03 100644
--- a/sled-agent/src/bootstrap/bootstore.rs
+++ b/sled-agent/src/bootstrap/bootstore_setup.rs
@@ -5,124 +5,78 @@
 //! Helpers for configuring and starting the bootstore during bootstrap agent
 //! startup.
 
+#![allow(clippy::result_large_err)]
+
 use super::config::BOOTSTORE_PORT;
 use super::server::StartError;
-use crate::storage_manager::StorageResources;
 use bootstore::schemes::v0 as bootstore;
 use camino::Utf8PathBuf;
 use ddm_admin_client::Client as DdmAdminClient;
 use sled_hardware::underlay::BootstrapInterface;
 use sled_hardware::Baseboard;
+use sled_storage::dataset::CLUSTER_DATASET;
+use sled_storage::resources::StorageResources;
 use slog::Logger;
 use std::collections::BTreeSet;
 use std::net::Ipv6Addr;
 use std::net::SocketAddrV6;
 use std::time::Duration;
-use tokio::task::JoinHandle;
 
 const BOOTSTORE_FSM_STATE_FILE: &str = "bootstore-fsm-state.json";
 const BOOTSTORE_NETWORK_CONFIG_FILE: &str = "bootstore-network-config.json";
 
-pub(super) struct BootstoreHandles {
-    pub(super) node_handle: bootstore::NodeHandle,
-
-    // These two are never used; we keep them to show ownership of the spawned
-    // tasks.
-    _node_task_handle: JoinHandle<()>,
-    _peer_update_task_handle: JoinHandle<()>,
-}
-
-impl BootstoreHandles {
-    pub(super) async fn spawn(
-        storage_resources: &StorageResources,
-        ddm_admin_client: DdmAdminClient,
-        baseboard: Baseboard,
-        global_zone_bootstrap_ip: Ipv6Addr,
-        base_log: &Logger,
-    ) -> Result<Self, StartError> {
-        let config = bootstore::Config {
-            id: baseboard,
-            addr: SocketAddrV6::new(
-                global_zone_bootstrap_ip,
-                BOOTSTORE_PORT,
-                0,
-                0,
-            ),
-            time_per_tick: Duration::from_millis(250),
-            learn_timeout: Duration::from_secs(5),
-            rack_init_timeout: Duration::from_secs(300),
-            rack_secret_request_timeout: Duration::from_secs(5),
-            fsm_state_ledger_paths: bootstore_fsm_state_paths(
-                &storage_resources,
-            )
-            .await?,
-            network_config_ledger_paths: bootstore_network_config_paths(
-                &storage_resources,
-            )
-            .await?,
-        };
-
-        let (mut node, node_handle) =
-            bootstore::Node::new(config, base_log).await;
-
-        let join_handle = tokio::spawn(async move { node.run().await });
-
-        // Spawn a task for polling DDMD and updating bootstore
-        let peer_update_handle =
-            tokio::spawn(poll_ddmd_for_bootstore_peer_update(
-                base_log.new(o!("component" => "bootstore_ddmd_poller")),
-                node_handle.clone(),
-                ddm_admin_client,
-            ));
-
-        Ok(Self {
-            node_handle,
-            _node_task_handle: join_handle,
-            _peer_update_task_handle: peer_update_handle,
-        })
-    }
+pub fn new_bootstore_config(
+    storage_resources: &StorageResources,
+    baseboard: Baseboard,
+    global_zone_bootstrap_ip: Ipv6Addr,
+) -> Result<bootstore::Config, StartError> {
+    Ok(bootstore::Config {
+        id: baseboard,
+        addr: SocketAddrV6::new(global_zone_bootstrap_ip, BOOTSTORE_PORT, 0, 0),
+        time_per_tick: Duration::from_millis(250),
+        learn_timeout: Duration::from_secs(5),
+        rack_init_timeout: Duration::from_secs(300),
+        rack_secret_request_timeout: Duration::from_secs(5),
+        fsm_state_ledger_paths: bootstore_fsm_state_paths(&storage_resources)?,
+        network_config_ledger_paths: bootstore_network_config_paths(
+            &storage_resources,
+        )?,
+    })
 }
 
-async fn bootstore_fsm_state_paths(
+fn bootstore_fsm_state_paths(
     storage: &StorageResources,
 ) -> Result<Vec<Utf8PathBuf>, StartError> {
     let paths: Vec<_> = storage
-        .all_m2_mountpoints(sled_hardware::disk::CLUSTER_DATASET)
-        .await
+        .all_m2_mountpoints(CLUSTER_DATASET)
         .into_iter()
         .map(|p| p.join(BOOTSTORE_FSM_STATE_FILE))
         .collect();
 
     if paths.is_empty() {
-        return Err(StartError::MissingM2Paths(
-            sled_hardware::disk::CLUSTER_DATASET,
-        ));
+        return Err(StartError::MissingM2Paths(CLUSTER_DATASET));
     }
     Ok(paths)
 }
 
-async fn bootstore_network_config_paths(
+fn bootstore_network_config_paths(
     storage: &StorageResources,
 ) -> Result<Vec<Utf8PathBuf>, StartError> {
     let paths: Vec<_> = storage
-        .all_m2_mountpoints(sled_hardware::disk::CLUSTER_DATASET)
-        .await
+        .all_m2_mountpoints(CLUSTER_DATASET)
         .into_iter()
         .map(|p| p.join(BOOTSTORE_NETWORK_CONFIG_FILE))
         .collect();
 
     if paths.is_empty() {
-        return Err(StartError::MissingM2Paths(
-            sled_hardware::disk::CLUSTER_DATASET,
-        ));
+        return Err(StartError::MissingM2Paths(CLUSTER_DATASET));
     }
     Ok(paths)
 }
 
-async fn poll_ddmd_for_bootstore_peer_update(
+pub async fn poll_ddmd_for_bootstore_peer_update(
     log: Logger,
     bootstore_node_handle: bootstore::NodeHandle,
-    ddmd_client: DdmAdminClient,
 ) {
     let mut current_peers: BTreeSet<SocketAddrV6> = BTreeSet::new();
     // We're talking to a service's admin interface on localhost and
@@ -132,7 +86,7 @@ async fn poll_ddmd_for_bootstore_peer_update(
     // We also use this timeout in the case of spurious ddmd failures
     // that require a reconnection from the ddmd_client.
     const RETRY: tokio::time::Duration = tokio::time::Duration::from_secs(5);
-
+    let ddmd_client = DdmAdminClient::localhost(&log).unwrap();
     loop {
         match ddmd_client
             .derive_bootstrap_addrs_from_prefixes(&[
@@ -154,7 +108,7 @@ async fn poll_ddmd_for_bootstore_peer_update(
                             log,
                             concat!(
                                 "Bootstore comms error: {}. ",
-                                "bootstore::Node task must have paniced",
+                                "bootstore::Node task must have panicked",
                             ),
                             e
                         );
diff --git a/sled-agent/src/bootstrap/http_entrypoints.rs b/sled-agent/src/bootstrap/http_entrypoints.rs
index c69bdeb0ce..7c32bf48a5 100644
--- a/sled-agent/src/bootstrap/http_entrypoints.rs
+++ b/sled-agent/src/bootstrap/http_entrypoints.rs
@@ -12,7 +12,6 @@ use super::BootstrapError;
 use super::RssAccessError;
 use crate::bootstrap::params::RackInitializeRequest;
 use crate::bootstrap::rack_ops::{RackInitId, RackResetId};
-use crate::storage_manager::StorageResources;
 use crate::updates::ConfigUpdates;
 use crate::updates::{Component, UpdateManager};
 use bootstore::schemes::v0 as bootstore;
@@ -25,6 +24,7 @@ use omicron_common::api::external::Error;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use sled_hardware::Baseboard;
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::net::Ipv6Addr;
 use tokio::sync::mpsc::error::TrySendError;
@@ -33,7 +33,7 @@ use tokio::sync::{mpsc, oneshot};
 pub(crate) struct BootstrapServerContext {
     pub(crate) base_log: Logger,
     pub(crate) global_zone_bootstrap_ip: Ipv6Addr,
-    pub(crate) storage_resources: StorageResources,
+    pub(crate) storage_manager: StorageHandle,
     pub(crate) bootstore_node_handle: bootstore::NodeHandle,
     pub(crate) baseboard: Baseboard,
     pub(crate) rss_access: RssAccess,
@@ -50,7 +50,7 @@ impl BootstrapServerContext {
         self.rss_access.start_initializing(
             &self.base_log,
             self.global_zone_bootstrap_ip,
-            &self.storage_resources,
+            &self.storage_manager,
             &self.bootstore_node_handle,
             request,
         )
diff --git a/sled-agent/src/bootstrap/mod.rs b/sled-agent/src/bootstrap/mod.rs
index 96e674acf3..590e13c891 100644
--- a/sled-agent/src/bootstrap/mod.rs
+++ b/sled-agent/src/bootstrap/mod.rs
@@ -4,7 +4,7 @@
 
 //! Bootstrap-related utilities
 
-mod bootstore;
+pub(crate) mod bootstore_setup;
 pub mod client;
 pub mod config;
 pub mod early_networking;
@@ -14,7 +14,7 @@ pub(crate) mod params;
 mod pre_server;
 mod rack_ops;
 pub(crate) mod rss_handle;
-mod secret_retriever;
+pub mod secret_retriever;
 pub mod server;
 mod sprockets_server;
 mod views;
diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs
index 05493f5aa3..02710ff583 100644
--- a/sled-agent/src/bootstrap/pre_server.rs
+++ b/sled-agent/src/bootstrap/pre_server.rs
@@ -11,13 +11,15 @@
 #![allow(clippy::result_large_err)]
 
 use super::maghemite;
-use super::secret_retriever::LrtqOrHardcodedSecretRetriever;
 use super::server::StartError;
 use crate::config::Config;
 use crate::config::SidecarRevision;
+use crate::long_running_tasks::{
+    spawn_all_longrunning_tasks, LongRunningTaskHandles,
+};
 use crate::services::ServiceManager;
 use crate::sled_agent::SledAgent;
-use crate::storage_manager::StorageManager;
+use crate::storage_monitor::UnderlayAccess;
 use camino::Utf8PathBuf;
 use cancel_safe_futures::TryStreamExt;
 use ddm_admin_client::Client as DdmAdminClient;
@@ -30,115 +32,16 @@ use illumos_utils::zfs;
 use illumos_utils::zfs::Zfs;
 use illumos_utils::zone;
 use illumos_utils::zone::Zones;
-use key_manager::KeyManager;
-use key_manager::StorageKeyRequester;
 use omicron_common::address::Ipv6Subnet;
 use omicron_common::FileKv;
 use sled_hardware::underlay;
 use sled_hardware::DendriteAsic;
-use sled_hardware::HardwareManager;
-use sled_hardware::HardwareUpdate;
 use sled_hardware::SledMode;
 use slog::Drain;
 use slog::Logger;
 use std::net::IpAddr;
 use std::net::Ipv6Addr;
-use tokio::sync::broadcast;
-use tokio::task::JoinHandle;
-
-pub(super) struct BootstrapManagers {
-    pub(super) hardware: HardwareManager,
-    pub(super) storage: StorageManager,
-    pub(super) service: ServiceManager,
-}
-
-impl BootstrapManagers {
-    pub(super) async fn handle_hardware_update(
-        &self,
-        update: Result<HardwareUpdate, broadcast::error::RecvError>,
-        sled_agent: Option<&SledAgent>,
-        log: &Logger,
-    ) {
-        match update {
-            Ok(update) => match update {
-                HardwareUpdate::TofinoLoaded => {
-                    let baseboard = self.hardware.baseboard();
-                    if let Err(e) = self
-                        .service
-                        .activate_switch(
-                            sled_agent.map(|sa| sa.switch_zone_underlay_info()),
-                            baseboard,
-                        )
-                        .await
-                    {
-                        warn!(log, "Failed to activate switch: {e}");
-                    }
-                }
-                HardwareUpdate::TofinoUnloaded => {
-                    if let Err(e) = self.service.deactivate_switch().await {
-                        warn!(log, "Failed to deactivate switch: {e}");
-                    }
-                }
-                HardwareUpdate::TofinoDeviceChange => {
-                    if let Some(sled_agent) = sled_agent {
-                        sled_agent.notify_nexus_about_self(log);
-                    }
-                }
-                HardwareUpdate::DiskAdded(disk) => {
-                    self.storage.upsert_disk(disk).await;
-                }
-                HardwareUpdate::DiskRemoved(disk) => {
-                    self.storage.delete_disk(disk).await;
-                }
-            },
-            Err(broadcast::error::RecvError::Lagged(count)) => {
-                warn!(log, "Hardware monitor missed {count} messages");
-                self.check_latest_hardware_snapshot(sled_agent, log).await;
-            }
-            Err(broadcast::error::RecvError::Closed) => {
-                // The `HardwareManager` monitoring task is an infinite loop -
-                // the only way for us to get `Closed` here is if it panicked,
-                // so we will propagate such a panic.
-                panic!("Hardware manager monitor task panicked");
-            }
-        }
-    }
-
-    // Observe the current hardware state manually.
-    //
-    // We use this when we're monitoring hardware for the first
-    // time, and if we miss notifications.
-    pub(super) async fn check_latest_hardware_snapshot(
-        &self,
-        sled_agent: Option<&SledAgent>,
-        log: &Logger,
-    ) {
-        let underlay_network = sled_agent.map(|sled_agent| {
-            sled_agent.notify_nexus_about_self(log);
-            sled_agent.switch_zone_underlay_info()
-        });
-        info!(
-            log, "Checking current full hardware snapshot";
-            "underlay_network_info" => ?underlay_network,
-        );
-        if self.hardware.is_scrimlet_driver_loaded() {
-            let baseboard = self.hardware.baseboard();
-            if let Err(e) =
-                self.service.activate_switch(underlay_network, baseboard).await
-            {
-                warn!(log, "Failed to activate switch: {e}");
-            }
-        } else {
-            if let Err(e) = self.service.deactivate_switch().await {
-                warn!(log, "Failed to deactivate switch: {e}");
-            }
-        }
-
-        self.storage
-            .ensure_using_exactly_these_disks(self.hardware.disks())
-            .await;
-    }
-}
+use tokio::sync::oneshot;
 
 pub(super) struct BootstrapAgentStartup {
     pub(super) config: Config,
@@ -146,8 +49,10 @@ pub(super) struct BootstrapAgentStartup {
     pub(super) ddm_admin_localhost_client: DdmAdminClient,
     pub(super) base_log: Logger,
     pub(super) startup_log: Logger,
-    pub(super) managers: BootstrapManagers,
-    pub(super) key_manager_handle: JoinHandle<()>,
+    pub(super) service_manager: ServiceManager,
+    pub(super) long_running_task_handles: LongRunningTaskHandles,
+    pub(super) sled_agent_started_tx: oneshot::Sender<SledAgent>,
+    pub(super) underlay_available_tx: oneshot::Sender<UnderlayAccess>,
 }
 
 impl BootstrapAgentStartup {
@@ -201,36 +106,23 @@ impl BootstrapAgentStartup {
         // This should be a no-op if already enabled.
         BootstrapNetworking::enable_ipv6_forwarding().await?;
 
-        // Spawn the `KeyManager` which is needed by the the StorageManager to
-        // retrieve encryption keys.
-        let (storage_key_requester, key_manager_handle) =
-            spawn_key_manager_task(&base_log);
-
+        // Are we a gimlet or scrimlet?
         let sled_mode = sled_mode_from_config(&config)?;
 
-        // Start monitoring hardware. This is blocking so we use
-        // `spawn_blocking`; similar to above, we move some things in and (on
-        // success) it gives them back.
-        let (base_log, log, hardware_manager) = {
-            tokio::task::spawn_blocking(move || {
-                info!(
-                    log, "Starting hardware monitor";
-                    "sled_mode" => ?sled_mode,
-                );
-                let hardware_manager =
-                    HardwareManager::new(&base_log, sled_mode)
-                        .map_err(StartError::StartHardwareManager)?;
-                Ok::<_, StartError>((base_log, log, hardware_manager))
-            })
-            .await
-            .unwrap()?
-        };
-
-        // Create a `StorageManager` and (possibly) synthetic disks.
-        let storage_manager =
-            StorageManager::new(&base_log, storage_key_requester).await;
-        upsert_synthetic_zpools_if_needed(&log, &storage_manager, &config)
-            .await;
+        // Spawn all important long running tasks that live for the lifetime of
+        // the process and are used by both the bootstrap agent and sled agent
+        let (
+            long_running_task_handles,
+            sled_agent_started_tx,
+            service_manager_ready_tx,
+            underlay_available_tx,
+        ) = spawn_all_longrunning_tasks(
+            &base_log,
+            sled_mode,
+            startup_networking.global_zone_bootstrap_ip,
+            &config,
+        )
+        .await;
 
         let global_zone_bootstrap_ip =
             startup_networking.global_zone_bootstrap_ip;
@@ -243,22 +135,27 @@ impl BootstrapAgentStartup {
             config.skip_timesync,
             config.sidecar_revision.clone(),
             config.switch_zone_maghemite_links.clone(),
-            storage_manager.resources().clone(),
-            storage_manager.zone_bundler().clone(),
+            long_running_task_handles.storage_manager.clone(),
+            long_running_task_handles.zone_bundler.clone(),
         );
 
+        // Inform the hardware monitor that the service manager is ready
+        // This is a onetime operation, and so we use a oneshot channel
+        service_manager_ready_tx
+            .send(service_manager.clone())
+            .map_err(|_| ())
+            .expect("Failed to send to StorageMonitor");
+
         Ok(Self {
             config,
             global_zone_bootstrap_ip,
             ddm_admin_localhost_client,
             base_log,
             startup_log: log,
-            managers: BootstrapManagers {
-                hardware: hardware_manager,
-                storage: storage_manager,
-                service: service_manager,
-            },
-            key_manager_handle,
+            service_manager,
+            long_running_task_handles,
+            sled_agent_started_tx,
+            underlay_available_tx,
         })
     }
 }
@@ -359,13 +256,10 @@ fn ensure_zfs_key_directory_exists(log: &Logger) -> Result<(), StartError> {
     // to create and mount encrypted datasets.
     info!(
         log, "Ensuring zfs key directory exists";
-        "path" => sled_hardware::disk::KEYPATH_ROOT,
+        "path" => zfs::KEYPATH_ROOT,
     );
-    std::fs::create_dir_all(sled_hardware::disk::KEYPATH_ROOT).map_err(|err| {
-        StartError::CreateZfsKeyDirectory {
-            dir: sled_hardware::disk::KEYPATH_ROOT,
-            err,
-        }
+    std::fs::create_dir_all(zfs::KEYPATH_ROOT).map_err(|err| {
+        StartError::CreateZfsKeyDirectory { dir: zfs::KEYPATH_ROOT, err }
     })
 }
 
@@ -388,23 +282,6 @@ fn ensure_zfs_ramdisk_dataset() -> Result<(), StartError> {
     .map_err(StartError::EnsureZfsRamdiskDataset)
 }
 
-async fn upsert_synthetic_zpools_if_needed(
-    log: &Logger,
-    storage_manager: &StorageManager,
-    config: &Config,
-) {
-    if let Some(pools) = &config.zpools {
-        for pool in pools {
-            info!(
-                log,
-                "Upserting synthetic zpool to Storage Manager: {}",
-                pool.to_string()
-            );
-            storage_manager.upsert_synthetic_disk(pool.clone()).await;
-        }
-    }
-}
-
 // Combine the `sled_mode` config with the build-time switch type to determine
 // the actual sled mode.
 fn sled_mode_from_config(config: &Config) -> Result<SledMode, StartError> {
@@ -447,19 +324,6 @@ fn sled_mode_from_config(config: &Config) -> Result<SledMode, StartError> {
     Ok(sled_mode)
 }
 
-fn spawn_key_manager_task(
-    log: &Logger,
-) -> (StorageKeyRequester, JoinHandle<()>) {
-    let secret_retriever = LrtqOrHardcodedSecretRetriever::new();
-    let (mut key_manager, storage_key_requester) =
-        KeyManager::new(log, secret_retriever);
-
-    let key_manager_handle =
-        tokio::spawn(async move { key_manager.run().await });
-
-    (storage_key_requester, key_manager_handle)
-}
-
 #[derive(Debug, Clone)]
 pub(crate) struct BootstrapNetworking {
     pub(crate) bootstrap_etherstub: dladm::Etherstub,
diff --git a/sled-agent/src/bootstrap/rack_ops.rs b/sled-agent/src/bootstrap/rack_ops.rs
index b8721f8332..5cfd0b074a 100644
--- a/sled-agent/src/bootstrap/rack_ops.rs
+++ b/sled-agent/src/bootstrap/rack_ops.rs
@@ -8,11 +8,11 @@ use crate::bootstrap::http_entrypoints::RackOperationStatus;
 use crate::bootstrap::params::RackInitializeRequest;
 use crate::bootstrap::rss_handle::RssHandle;
 use crate::rack_setup::service::SetupServiceError;
-use crate::storage_manager::StorageResources;
 use bootstore::schemes::v0 as bootstore;
 use schemars::JsonSchema;
 use serde::Deserialize;
 use serde::Serialize;
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::mem;
 use std::net::Ipv6Addr;
@@ -171,7 +171,7 @@ impl RssAccess {
         &self,
         parent_log: &Logger,
         global_zone_bootstrap_ip: Ipv6Addr,
-        storage_resources: &StorageResources,
+        storage_manager: &StorageHandle,
         bootstore_node_handle: &bootstore::NodeHandle,
         request: RackInitializeRequest,
     ) -> Result<RackInitId, RssAccessError> {
@@ -207,14 +207,14 @@ impl RssAccess {
                 mem::drop(status);
 
                 let parent_log = parent_log.clone();
-                let storage_resources = storage_resources.clone();
+                let storage_manager = storage_manager.clone();
                 let bootstore_node_handle = bootstore_node_handle.clone();
                 let status = Arc::clone(&self.status);
                 tokio::spawn(async move {
                     let result = rack_initialize(
                         &parent_log,
                         global_zone_bootstrap_ip,
-                        storage_resources,
+                        storage_manager,
                         bootstore_node_handle,
                         request,
                     )
@@ -342,7 +342,7 @@ enum RssStatus {
 async fn rack_initialize(
     parent_log: &Logger,
     global_zone_bootstrap_ip: Ipv6Addr,
-    storage_resources: StorageResources,
+    storage_manager: StorageHandle,
     bootstore_node_handle: bootstore::NodeHandle,
     request: RackInitializeRequest,
 ) -> Result<(), SetupServiceError> {
@@ -350,7 +350,7 @@ async fn rack_initialize(
         parent_log,
         request,
         global_zone_bootstrap_ip,
-        storage_resources,
+        storage_manager,
         bootstore_node_handle,
     )
     .await
diff --git a/sled-agent/src/bootstrap/rss_handle.rs b/sled-agent/src/bootstrap/rss_handle.rs
index c82873d91d..5d9c01e7f2 100644
--- a/sled-agent/src/bootstrap/rss_handle.rs
+++ b/sled-agent/src/bootstrap/rss_handle.rs
@@ -9,7 +9,6 @@ use super::params::StartSledAgentRequest;
 use crate::rack_setup::config::SetupServiceConfig;
 use crate::rack_setup::service::RackSetupService;
 use crate::rack_setup::service::SetupServiceError;
-use crate::storage_manager::StorageResources;
 use ::bootstrap_agent_client::Client as BootstrapAgentClient;
 use bootstore::schemes::v0 as bootstore;
 use futures::stream::FuturesUnordered;
@@ -17,6 +16,7 @@ use futures::StreamExt;
 use omicron_common::backoff::retry_notify;
 use omicron_common::backoff::retry_policy_local;
 use omicron_common::backoff::BackoffError;
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::net::Ipv6Addr;
 use std::net::SocketAddrV6;
@@ -46,7 +46,7 @@ impl RssHandle {
         log: &Logger,
         config: SetupServiceConfig,
         our_bootstrap_address: Ipv6Addr,
-        storage_resources: StorageResources,
+        storage_manager: StorageHandle,
         bootstore: bootstore::NodeHandle,
     ) -> Result<(), SetupServiceError> {
         let (tx, rx) = rss_channel(our_bootstrap_address);
@@ -54,7 +54,7 @@ impl RssHandle {
         let rss = RackSetupService::new(
             log.new(o!("component" => "RSS")),
             config,
-            storage_resources,
+            storage_manager,
             tx,
             bootstore,
         );
diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs
index e0ea85860b..f4948de83b 100644
--- a/sled-agent/src/bootstrap/server.rs
+++ b/sled-agent/src/bootstrap/server.rs
@@ -8,12 +8,10 @@ use super::config::BOOTSTRAP_AGENT_HTTP_PORT;
 use super::http_entrypoints;
 use super::params::RackInitializeRequest;
 use super::params::StartSledAgentRequest;
-use super::pre_server::BootstrapManagers;
 use super::rack_ops::RackInitId;
 use super::views::SledAgentResponse;
 use super::BootstrapError;
 use super::RssAccessError;
-use crate::bootstrap::bootstore::BootstoreHandles;
 use crate::bootstrap::config::BOOTSTRAP_AGENT_RACK_INIT_PORT;
 use crate::bootstrap::http_entrypoints::api as http_api;
 use crate::bootstrap::http_entrypoints::BootstrapServerContext;
@@ -24,16 +22,17 @@ use crate::bootstrap::secret_retriever::LrtqOrHardcodedSecretRetriever;
 use crate::bootstrap::sprockets_server::SprocketsServer;
 use crate::config::Config as SledConfig;
 use crate::config::ConfigError;
+use crate::long_running_tasks::LongRunningTaskHandles;
 use crate::server::Server as SledAgentServer;
+use crate::services::ServiceManager;
 use crate::sled_agent::SledAgent;
-use crate::storage_manager::StorageResources;
+use crate::storage_monitor::UnderlayAccess;
 use bootstore::schemes::v0 as bootstore;
 use camino::Utf8PathBuf;
 use cancel_safe_futures::TryStreamExt;
 use ddm_admin_client::Client as DdmAdminClient;
 use ddm_admin_client::DdmError;
 use dropshot::HttpServer;
-use futures::Future;
 use futures::StreamExt;
 use illumos_utils::dladm;
 use illumos_utils::zfs;
@@ -42,12 +41,12 @@ use illumos_utils::zone::Zones;
 use omicron_common::ledger;
 use omicron_common::ledger::Ledger;
 use sled_hardware::underlay;
-use sled_hardware::HardwareUpdate;
+use sled_storage::dataset::CONFIG_DATASET;
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::io;
 use std::net::SocketAddr;
 use std::net::SocketAddrV6;
-use tokio::sync::broadcast;
 use tokio::sync::mpsc;
 use tokio::sync::oneshot;
 use tokio::task::JoinHandle;
@@ -175,65 +174,18 @@ impl Server {
             ddm_admin_localhost_client,
             base_log,
             startup_log,
-            managers,
-            key_manager_handle,
+            service_manager,
+            long_running_task_handles,
+            sled_agent_started_tx,
+            underlay_available_tx,
         } = BootstrapAgentStartup::run(config).await?;
 
-        // From this point on we will listen for hardware notifications and
-        // potentially start the switch zone and be notified of new disks; we
-        // are responsible for responding to updates from this point on.
-        let mut hardware_monitor = managers.hardware.monitor();
-        let storage_resources = managers.storage.resources();
-
-        // Check the latest hardware snapshot; we could have missed events
-        // between the creation of the hardware manager and our subscription of
-        // its monitor.
-        managers.check_latest_hardware_snapshot(None, &startup_log).await;
-
-        // Wait for our boot M.2 to show up.
-        wait_while_handling_hardware_updates(
-            wait_for_boot_m2(storage_resources, &startup_log),
-            &mut hardware_monitor,
-            &managers,
-            None, // No underlay network yet
-            &startup_log,
-            "waiting for boot M.2",
-        )
-        .await;
-
-        // Wait for the bootstore to start.
-        let bootstore_handles = wait_while_handling_hardware_updates(
-            BootstoreHandles::spawn(
-                storage_resources,
-                ddm_admin_localhost_client.clone(),
-                managers.hardware.baseboard(),
-                global_zone_bootstrap_ip,
-                &base_log,
-            ),
-            &mut hardware_monitor,
-            &managers,
-            None, // No underlay network yet
-            &startup_log,
-            "initializing bootstore",
-        )
-        .await?;
-
         // Do we have a StartSledAgentRequest stored in the ledger?
-        let maybe_ledger = wait_while_handling_hardware_updates(
-            async {
-                let paths = sled_config_paths(storage_resources).await?;
-                let maybe_ledger =
-                    Ledger::<StartSledAgentRequest>::new(&startup_log, paths)
-                        .await;
-                Ok::<_, StartError>(maybe_ledger)
-            },
-            &mut hardware_monitor,
-            &managers,
-            None, // No underlay network yet
-            &startup_log,
-            "loading sled-agent request from ledger",
-        )
-        .await?;
+        let paths =
+            sled_config_paths(&long_running_task_handles.storage_manager)
+                .await?;
+        let maybe_ledger =
+            Ledger::<StartSledAgentRequest>::new(&startup_log, paths).await;
 
         // We don't yet _act_ on the `StartSledAgentRequest` if we have one, but
         // if we have one we init our `RssAccess` noting that we're already
@@ -250,9 +202,9 @@ impl Server {
         let bootstrap_context = BootstrapServerContext {
             base_log: base_log.clone(),
             global_zone_bootstrap_ip,
-            storage_resources: storage_resources.clone(),
-            bootstore_node_handle: bootstore_handles.node_handle.clone(),
-            baseboard: managers.hardware.baseboard(),
+            storage_manager: long_running_task_handles.storage_manager.clone(),
+            bootstore_node_handle: long_running_task_handles.bootstore.clone(),
+            baseboard: long_running_task_handles.hardware_manager.baseboard(),
             rss_access,
             updates: config.updates.clone(),
             sled_reset_tx,
@@ -284,55 +236,36 @@ impl Server {
         // Do we have a persistent sled-agent request that we need to restore?
         let state = if let Some(ledger) = maybe_ledger {
             let start_sled_agent_request = ledger.into_inner();
-            let sled_agent_server = wait_while_handling_hardware_updates(
-                start_sled_agent(
-                    &config,
-                    start_sled_agent_request,
-                    &bootstore_handles.node_handle,
-                    &managers,
-                    &ddm_admin_localhost_client,
-                    &base_log,
-                    &startup_log,
-                ),
-                &mut hardware_monitor,
-                &managers,
-                None, // No underlay network yet
+            let sled_agent_server = start_sled_agent(
+                &config,
+                start_sled_agent_request,
+                long_running_task_handles.clone(),
+                underlay_available_tx,
+                service_manager.clone(),
+                &ddm_admin_localhost_client,
+                &base_log,
                 &startup_log,
-                "restoring sled-agent (cold boot)",
             )
             .await?;
 
+            // Give the HardwareMonitory access to the `SledAgent`
             let sled_agent = sled_agent_server.sled_agent();
-
-            // We've created sled-agent; we need to (possibly) reconfigure the
-            // switch zone, if we're a scrimlet, to give it our underlay network
-            // information.
-            let underlay_network_info = sled_agent.switch_zone_underlay_info();
-            info!(
-                startup_log, "Sled Agent started; rescanning hardware";
-                "underlay_network_info" => ?underlay_network_info,
-            );
-            managers
-                .check_latest_hardware_snapshot(Some(&sled_agent), &startup_log)
-                .await;
+            sled_agent_started_tx
+                .send(sled_agent.clone())
+                .map_err(|_| ())
+                .expect("Failed to send to StorageMonitor");
 
             // For cold boot specifically, we now need to load the services
             // we're responsible for, while continuing to handle hardware
             // notifications. This cannot fail: we retry indefinitely until
             // we're done loading services.
-            wait_while_handling_hardware_updates(
-                sled_agent.cold_boot_load_services(),
-                &mut hardware_monitor,
-                &managers,
-                Some(&sled_agent),
-                &startup_log,
-                "restoring sled-agent services (cold boot)",
-            )
-            .await;
-
+            sled_agent.cold_boot_load_services().await;
             SledAgentState::ServerStarted(sled_agent_server)
         } else {
-            SledAgentState::Bootstrapping
+            SledAgentState::Bootstrapping(
+                Some(sled_agent_started_tx),
+                Some(underlay_available_tx),
+            )
         };
 
         // Spawn our inner task that handles any future hardware updates and any
@@ -340,15 +273,13 @@ impl Server {
         // agent state.
         let inner = Inner {
             config,
-            hardware_monitor,
             state,
             sled_init_rx,
             sled_reset_rx,
-            managers,
             ddm_admin_localhost_client,
-            bootstore_handles,
+            long_running_task_handles,
+            service_manager,
             _sprockets_server_handle: sprockets_server_handle,
-            _key_manager_handle: key_manager_handle,
             base_log,
         };
         let inner_task = tokio::spawn(inner.run());
@@ -377,20 +308,14 @@ impl Server {
 // bootstrap server).
 enum SledAgentState {
     // We're still in the bootstrapping phase, waiting for a sled-agent request.
-    Bootstrapping,
+    Bootstrapping(
+        Option<oneshot::Sender<SledAgent>>,
+        Option<oneshot::Sender<UnderlayAccess>>,
+    ),
     // ... or the sled agent server is running.
     ServerStarted(SledAgentServer),
 }
 
-impl SledAgentState {
-    fn sled_agent(&self) -> Option<&SledAgent> {
-        match self {
-            SledAgentState::Bootstrapping => None,
-            SledAgentState::ServerStarted(server) => Some(server.sled_agent()),
-        }
-    }
-}
-
 #[derive(thiserror::Error, Debug)]
 pub enum SledAgentServerStartError {
     #[error("Failed to start sled-agent server: {0}")]
@@ -425,11 +350,13 @@ impl From<SledAgentServerStartError> for StartError {
     }
 }
 
+#[allow(clippy::too_many_arguments)]
 async fn start_sled_agent(
     config: &SledConfig,
     request: StartSledAgentRequest,
-    bootstore: &bootstore::NodeHandle,
-    managers: &BootstrapManagers,
+    long_running_task_handles: LongRunningTaskHandles,
+    underlay_available_tx: oneshot::Sender<UnderlayAccess>,
+    service_manager: ServiceManager,
     ddmd_client: &DdmAdminClient,
     base_log: &Logger,
     log: &Logger,
@@ -444,7 +371,10 @@ async fn start_sled_agent(
     if request.body.use_trust_quorum {
         info!(log, "KeyManager: using lrtq secret retriever");
         let salt = request.hash_rack_id();
-        LrtqOrHardcodedSecretRetriever::init_lrtq(salt, bootstore.clone())
+        LrtqOrHardcodedSecretRetriever::init_lrtq(
+            salt,
+            long_running_task_handles.bootstore.clone(),
+        )
     } else {
         info!(log, "KeyManager: using hardcoded secret retriever");
         LrtqOrHardcodedSecretRetriever::init_hardcoded();
@@ -452,7 +382,7 @@ async fn start_sled_agent(
 
     if request.body.use_trust_quorum && request.body.is_lrtq_learner {
         info!(log, "Initializing sled as learner");
-        match bootstore.init_learner().await {
+        match long_running_task_handles.bootstore.init_learner().await {
             Err(bootstore::NodeRequestError::Fsm(
                 bootstore::ApiError::AlreadyInitialized,
             )) => {
@@ -464,7 +394,7 @@ async fn start_sled_agent(
     }
 
     // Inform the storage service that the key manager is available
-    managers.storage.key_manager_ready().await;
+    long_running_task_handles.storage_manager.key_manager_ready().await;
 
     // Start trying to notify ddmd of our sled prefix so it can
     // advertise it to other sleds.
@@ -484,9 +414,9 @@ async fn start_sled_agent(
         config,
         base_log.clone(),
         request.clone(),
-        managers.service.clone(),
-        managers.storage.clone(),
-        bootstore.clone(),
+        long_running_task_handles.clone(),
+        service_manager,
+        underlay_available_tx,
     )
     .await
     .map_err(SledAgentServerStartError::FailedStartingServer)?;
@@ -495,7 +425,8 @@ async fn start_sled_agent(
 
     // Record this request so the sled agent can be automatically
     // initialized on the next boot.
-    let paths = sled_config_paths(managers.storage.resources()).await?;
+    let paths =
+        sled_config_paths(&long_running_task_handles.storage_manager).await?;
 
     let mut ledger = Ledger::new_with(&log, paths, request);
     ledger.commit().await?;
@@ -534,28 +465,6 @@ fn start_dropshot_server(
     Ok(http_server)
 }
 
-/// Wait for at least the M.2 we booted from to show up.
-///
-/// TODO-correctness Subsequent steps may assume all M.2s that will ever be
-/// present are present once we return from this function; see
-/// <https://github.com/oxidecomputer/omicron/issues/3815>.
-async fn wait_for_boot_m2(storage_resources: &StorageResources, log: &Logger) {
-    // Wait for at least the M.2 we booted from to show up.
-    loop {
-        match storage_resources.boot_disk().await {
-            Some(disk) => {
-                info!(log, "Found boot disk M.2: {disk:?}");
-                break;
-            }
-            None => {
-                info!(log, "Waiting for boot disk M.2...");
-                tokio::time::sleep(core::time::Duration::from_millis(250))
-                    .await;
-            }
-        }
-    }
-}
-
 struct MissingM2Paths(&'static str);
 
 impl From<MissingM2Paths> for StartError {
@@ -571,56 +480,21 @@ impl From<MissingM2Paths> for SledAgentServerStartError {
 }
 
 async fn sled_config_paths(
-    storage: &StorageResources,
+    storage: &StorageHandle,
 ) -> Result<Vec<Utf8PathBuf>, MissingM2Paths> {
-    let paths: Vec<_> = storage
-        .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET)
-        .await
+    let resources = storage.get_latest_resources().await;
+    let paths: Vec<_> = resources
+        .all_m2_mountpoints(CONFIG_DATASET)
         .into_iter()
         .map(|p| p.join(SLED_AGENT_REQUEST_FILE))
         .collect();
 
     if paths.is_empty() {
-        return Err(MissingM2Paths(sled_hardware::disk::CONFIG_DATASET));
+        return Err(MissingM2Paths(CONFIG_DATASET));
     }
     Ok(paths)
 }
 
-// Helper function to wait for `fut` while handling any updates about hardware.
-async fn wait_while_handling_hardware_updates<F: Future<Output = T>, T>(
-    fut: F,
-    hardware_monitor: &mut broadcast::Receiver<HardwareUpdate>,
-    managers: &BootstrapManagers,
-    sled_agent: Option<&SledAgent>,
-    log: &Logger,
-    log_phase: &str,
-) -> T {
-    tokio::pin!(fut);
-    loop {
-        tokio::select! {
-            // Cancel-safe per the docs on `broadcast::Receiver::recv()`.
-            hardware_update = hardware_monitor.recv() => {
-                info!(
-                    log,
-                    "Handling hardware update message";
-                    "phase" => log_phase,
-                    "update" => ?hardware_update,
-                );
-
-                managers.handle_hardware_update(
-                    hardware_update,
-                    sled_agent,
-                    log,
-                ).await;
-            }
-
-            // Cancel-safe: we're using a `&mut Future`; dropping the
-            // reference does not cancel the underlying future.
-            result = &mut fut => return result,
-        }
-    }
-}
-
 /// Runs the OpenAPI generator, emitting the spec to stdout.
 pub fn run_openapi() -> Result<(), String> {
     http_api()
@@ -634,18 +508,16 @@ pub fn run_openapi() -> Result<(), String> {
 
 struct Inner {
     config: SledConfig,
-    hardware_monitor: broadcast::Receiver<HardwareUpdate>,
     state: SledAgentState,
     sled_init_rx: mpsc::Receiver<(
         StartSledAgentRequest,
         oneshot::Sender<Result<SledAgentResponse, String>>,
     )>,
     sled_reset_rx: mpsc::Receiver<oneshot::Sender<Result<(), BootstrapError>>>,
-    managers: BootstrapManagers,
     ddm_admin_localhost_client: DdmAdminClient,
-    bootstore_handles: BootstoreHandles,
+    long_running_task_handles: LongRunningTaskHandles,
+    service_manager: ServiceManager,
     _sprockets_server_handle: JoinHandle<()>,
-    _key_manager_handle: JoinHandle<()>,
     base_log: Logger,
 }
 
@@ -653,14 +525,7 @@ impl Inner {
     async fn run(mut self) {
         let log = self.base_log.new(o!("component" => "SledAgentMain"));
         loop {
-            // TODO-correctness We pause handling hardware update messages while
-            // we handle sled init/reset requests - is that okay?
             tokio::select! {
-                // Cancel-safe per the docs on `broadcast::Receiver::recv()`.
-                hardware_update = self.hardware_monitor.recv() => {
-                    self.handle_hardware_update(hardware_update, &log).await;
-                }
-
                 // Cancel-safe per the docs on `mpsc::Receiver::recv()`.
                 Some((request, response_tx)) = self.sled_init_rx.recv() => {
                     self.handle_start_sled_agent_request(
@@ -688,41 +553,36 @@ impl Inner {
         }
     }
 
-    async fn handle_hardware_update(
-        &self,
-        hardware_update: Result<HardwareUpdate, broadcast::error::RecvError>,
-        log: &Logger,
-    ) {
-        info!(
-            log,
-            "Handling hardware update message";
-            "phase" => "bootstore-steady-state",
-            "update" => ?hardware_update,
-        );
-
-        self.managers
-            .handle_hardware_update(
-                hardware_update,
-                self.state.sled_agent(),
-                &log,
-            )
-            .await;
-    }
-
     async fn handle_start_sled_agent_request(
         &mut self,
         request: StartSledAgentRequest,
         response_tx: oneshot::Sender<Result<SledAgentResponse, String>>,
         log: &Logger,
     ) {
-        let request_id = request.body.id;
-        match &self.state {
-            SledAgentState::Bootstrapping => {
+        match &mut self.state {
+            SledAgentState::Bootstrapping(
+                sled_agent_started_tx,
+                underlay_available_tx,
+            ) => {
+                let request_id = request.body.id;
+
+                // Extract from options to satisfy the borrow checker.
+                // It is not possible for `start_sled_agent` to be cancelled
+                // or fail in a safe, restartable manner. Therefore, for now,
+                // we explicitly unwrap here, and panic on error below.
+                //
+                // See https://github.com/oxidecomputer/omicron/issues/4494
+                let sled_agent_started_tx =
+                    sled_agent_started_tx.take().unwrap();
+                let underlay_available_tx =
+                    underlay_available_tx.take().unwrap();
+
                 let response = match start_sled_agent(
                     &self.config,
                     request,
-                    &self.bootstore_handles.node_handle,
-                    &self.managers,
+                    self.long_running_task_handles.clone(),
+                    underlay_available_tx,
+                    self.service_manager.clone(),
                     &self.ddm_admin_localhost_client,
                     &self.base_log,
                     &log,
@@ -733,17 +593,19 @@ impl Inner {
                         // We've created sled-agent; we need to (possibly)
                         // reconfigure the switch zone, if we're a scrimlet, to
                         // give it our underlay network information.
-                        self.managers
-                            .check_latest_hardware_snapshot(
-                                Some(server.sled_agent()),
-                                log,
-                            )
-                            .await;
-
+                        sled_agent_started_tx
+                            .send(server.sled_agent().clone())
+                            .map_err(|_| ())
+                            .expect("Failed to send to StorageMonitor");
                         self.state = SledAgentState::ServerStarted(server);
                         Ok(SledAgentResponse { id: request_id })
                     }
-                    Err(err) => Err(format!("{err:#}")),
+                    Err(err) => {
+                        // This error is unrecoverable, and if returned we'd
+                        // end up in maintenance mode anyway.
+                        error!(log, "Failed to start sled agent: {err:#}");
+                        panic!("Failed to start sled agent");
+                    }
                 };
                 _ = response_tx.send(response);
             }
@@ -787,11 +649,11 @@ impl Inner {
 
     async fn uninstall_sled_local_config(&self) -> Result<(), BootstrapError> {
         let config_dirs = self
-            .managers
-            .storage
-            .resources()
-            .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET)
+            .long_running_task_handles
+            .storage_manager
+            .get_latest_resources()
             .await
+            .all_m2_mountpoints(CONFIG_DATASET)
             .into_iter();
 
         for dir in config_dirs {
diff --git a/sled-agent/src/storage/dump_setup.rs b/sled-agent/src/dump_setup.rs
similarity index 93%
rename from sled-agent/src/storage/dump_setup.rs
rename to sled-agent/src/dump_setup.rs
index 9b5edc0a7e..e675e6e12d 100644
--- a/sled-agent/src/storage/dump_setup.rs
+++ b/sled-agent/src/dump_setup.rs
@@ -1,4 +1,3 @@
-use crate::storage_manager::DiskWrapper;
 use camino::Utf8PathBuf;
 use derive_more::{AsRef, Deref, From};
 use illumos_utils::dumpadm::DumpAdmError;
@@ -6,13 +5,15 @@ use illumos_utils::zone::{AdmError, Zones};
 use illumos_utils::zpool::{ZpoolHealth, ZpoolName};
 use omicron_common::disk::DiskIdentity;
 use sled_hardware::DiskVariant;
+use sled_storage::dataset::{CRASH_DATASET, DUMP_DATASET};
+use sled_storage::disk::Disk;
+use sled_storage::pool::Pool;
 use slog::Logger;
-use std::collections::{HashMap, HashSet};
+use std::collections::{BTreeMap, HashSet};
 use std::ffi::OsString;
 use std::path::{Path, PathBuf};
 use std::sync::{Arc, Weak};
 use std::time::{Duration, SystemTime, SystemTimeError, UNIX_EPOCH};
-use tokio::sync::MutexGuard;
 
 pub struct DumpSetup {
     worker: Arc<std::sync::Mutex<DumpSetupWorker>>,
@@ -70,11 +71,11 @@ trait GetMountpoint: std::ops::Deref<Target = ZpoolName> {
 }
 impl GetMountpoint for DebugZpool {
     type NewType = DebugDataset;
-    const MOUNTPOINT: &'static str = sled_hardware::disk::DUMP_DATASET;
+    const MOUNTPOINT: &'static str = DUMP_DATASET;
 }
 impl GetMountpoint for CoreZpool {
     type NewType = CoreDataset;
-    const MOUNTPOINT: &'static str = sled_hardware::disk::CRASH_DATASET;
+    const MOUNTPOINT: &'static str = CRASH_DATASET;
 }
 
 struct DumpSetupWorker {
@@ -99,50 +100,51 @@ const ARCHIVAL_INTERVAL: Duration = Duration::from_secs(300);
 impl DumpSetup {
     pub(crate) async fn update_dumpdev_setup(
         &self,
-        disks: &mut MutexGuard<'_, HashMap<DiskIdentity, DiskWrapper>>,
+        disks: &BTreeMap<DiskIdentity, (Disk, Pool)>,
     ) {
         let log = &self.log;
         let mut m2_dump_slices = Vec::new();
         let mut u2_debug_datasets = Vec::new();
         let mut m2_core_datasets = Vec::new();
-        for (_id, disk_wrapper) in disks.iter() {
-            match disk_wrapper {
-                DiskWrapper::Real { disk, .. } => match disk.variant() {
-                    DiskVariant::M2 => {
-                        match disk.dump_device_devfs_path(false) {
-                            Ok(path) => {
-                                m2_dump_slices.push(DumpSlicePath(path))
-                            }
-                            Err(err) => {
-                                warn!(log, "Error getting dump device devfs path: {err:?}");
-                            }
+        for (_id, (disk, _)) in disks.iter() {
+            if disk.is_synthetic() {
+                // We only setup dump devices on real disks
+                continue;
+            }
+            match disk.variant() {
+                DiskVariant::M2 => {
+                    match disk.dump_device_devfs_path(false) {
+                        Ok(path) => m2_dump_slices.push(DumpSlicePath(path)),
+                        Err(err) => {
+                            warn!(
+                                log,
+                                "Error getting dump device devfs path: {err:?}"
+                            );
                         }
-                        let name = disk.zpool_name();
-                        if let Ok(info) = illumos_utils::zpool::Zpool::get_info(
-                            &name.to_string(),
-                        ) {
-                            if info.health() == ZpoolHealth::Online {
-                                m2_core_datasets.push(CoreZpool(name.clone()));
-                            } else {
-                                warn!(log, "Zpool {name:?} not online, won't attempt to save process core dumps there");
-                            }
+                    }
+                    let name = disk.zpool_name();
+                    if let Ok(info) =
+                        illumos_utils::zpool::Zpool::get_info(&name.to_string())
+                    {
+                        if info.health() == ZpoolHealth::Online {
+                            m2_core_datasets.push(CoreZpool(name.clone()));
+                        } else {
+                            warn!(log, "Zpool {name:?} not online, won't attempt to save process core dumps there");
                         }
                     }
-                    DiskVariant::U2 => {
-                        let name = disk.zpool_name();
-                        if let Ok(info) = illumos_utils::zpool::Zpool::get_info(
-                            &name.to_string(),
-                        ) {
-                            if info.health() == ZpoolHealth::Online {
-                                u2_debug_datasets
-                                    .push(DebugZpool(name.clone()));
-                            } else {
-                                warn!(log, "Zpool {name:?} not online, won't attempt to save kernel core dumps there");
-                            }
+                }
+                DiskVariant::U2 => {
+                    let name = disk.zpool_name();
+                    if let Ok(info) =
+                        illumos_utils::zpool::Zpool::get_info(&name.to_string())
+                    {
+                        if info.health() == ZpoolHealth::Online {
+                            u2_debug_datasets.push(DebugZpool(name.clone()));
+                        } else {
+                            warn!(log, "Zpool {name:?} not online, won't attempt to save kernel core dumps there");
                         }
                     }
-                },
-                DiskWrapper::Synthetic { .. } => {}
+                }
             }
         }
 
diff --git a/sled-agent/src/hardware_monitor.rs b/sled-agent/src/hardware_monitor.rs
new file mode 100644
index 0000000000..698d2d4608
--- /dev/null
+++ b/sled-agent/src/hardware_monitor.rs
@@ -0,0 +1,257 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! A task that listens for hardware events from the
+//! [`sled_hardware::HardwareManager`] and dispatches them to other parts
+//! of the bootstrap agent and sled-agent code.
+
+use crate::services::ServiceManager;
+use crate::sled_agent::SledAgent;
+use sled_hardware::{Baseboard, HardwareManager, HardwareUpdate};
+use sled_storage::disk::RawDisk;
+use sled_storage::manager::StorageHandle;
+use slog::Logger;
+use tokio::sync::broadcast::error::RecvError;
+use tokio::sync::{broadcast, oneshot};
+
+// A thin wrapper around the the [`ServiceManager`] that caches the state
+// whether or not the tofino is loaded if the [`ServiceManager`] doesn't exist
+// yet.
+enum TofinoManager {
+    Ready(ServiceManager),
+    NotReady { tofino_loaded: bool },
+}
+
+impl TofinoManager {
+    pub fn new() -> TofinoManager {
+        TofinoManager::NotReady { tofino_loaded: false }
+    }
+
+    // Must only be called once on the transition from `NotReady` to `Ready`.
+    // Panics otherwise.
+    //
+    // Returns whether the tofino was loaded or not
+    pub fn become_ready(&mut self, service_manager: ServiceManager) -> bool {
+        let tofino_loaded = match self {
+            Self::Ready(_) => panic!("ServiceManager is already available"),
+            Self::NotReady { tofino_loaded } => *tofino_loaded,
+        };
+        *self = Self::Ready(service_manager);
+        tofino_loaded
+    }
+
+    pub fn is_ready(&self) -> bool {
+        match self {
+            TofinoManager::Ready(_) => true,
+            _ => false,
+        }
+    }
+}
+
+// A monitor for hardware events
+pub struct HardwareMonitor {
+    log: Logger,
+
+    baseboard: Baseboard,
+
+    // Receive a onetime notification that the SledAgent has started
+    sled_agent_started_rx: oneshot::Receiver<SledAgent>,
+
+    // Receive a onetime notification that the ServiceManager is ready
+    service_manager_ready_rx: oneshot::Receiver<ServiceManager>,
+
+    // Receive messages from the [`HardwareManager`]
+    hardware_rx: broadcast::Receiver<HardwareUpdate>,
+
+    // A reference to the hardware manager
+    hardware_manager: HardwareManager,
+
+    // A handle to [`sled_hardware::manager::StorageManger`]
+    storage_manager: StorageHandle,
+
+    // A handle to the sled-agent
+    //
+    // This will go away once Nexus updates are polled:
+    // See:
+    //  * https://github.com/oxidecomputer/omicron/issues/1917
+    //  * https://rfd.shared.oxide.computer/rfd/0433
+    sled_agent: Option<SledAgent>,
+
+    // The [`ServiceManager`] is instantiated after we start the [`HardwareMonitor`]
+    // task. However, it is only used to load and unload the switch zone when thes
+    // state of the tofino changes. We keep track of the tofino state so that we
+    // can properly load the tofino when the [`ServiceManager`] becomes available
+    // available.
+    tofino_manager: TofinoManager,
+}
+
+impl HardwareMonitor {
+    pub fn new(
+        log: &Logger,
+        hardware_manager: &HardwareManager,
+        storage_manager: &StorageHandle,
+    ) -> (
+        HardwareMonitor,
+        oneshot::Sender<SledAgent>,
+        oneshot::Sender<ServiceManager>,
+    ) {
+        let (sled_agent_started_tx, sled_agent_started_rx) = oneshot::channel();
+        let (service_manager_ready_tx, service_manager_ready_rx) =
+            oneshot::channel();
+        let baseboard = hardware_manager.baseboard();
+        let hardware_rx = hardware_manager.monitor();
+        let log = log.new(o!("component" => "HardwareMonitor"));
+        let tofino_manager = TofinoManager::new();
+        (
+            HardwareMonitor {
+                log,
+                baseboard,
+                sled_agent_started_rx,
+                service_manager_ready_rx,
+                hardware_rx,
+                hardware_manager: hardware_manager.clone(),
+                storage_manager: storage_manager.clone(),
+                sled_agent: None,
+                tofino_manager,
+            },
+            sled_agent_started_tx,
+            service_manager_ready_tx,
+        )
+    }
+
+    /// Run the main receive loop of the `HardwareMonitor`
+    ///
+    /// This should be spawned into a tokio task
+    pub async fn run(&mut self) {
+        // Check the latest hardware snapshot; we could have missed events
+        // between the creation of the hardware manager and our subscription of
+        // its monitor.
+        self.check_latest_hardware_snapshot().await;
+
+        loop {
+            tokio::select! {
+                Ok(sled_agent) = &mut self.sled_agent_started_rx,
+                    if self.sled_agent.is_none() =>
+                {
+                    info!(self.log, "Sled Agent Started");
+                    self.sled_agent = Some(sled_agent);
+                    self.check_latest_hardware_snapshot().await;
+                }
+                Ok(service_manager) = &mut self.service_manager_ready_rx,
+                    if !self.tofino_manager.is_ready() =>
+                {
+                    let tofino_loaded =
+                        self.tofino_manager.become_ready(service_manager);
+                    if tofino_loaded {
+                        self.activate_switch().await;
+                    }
+                }
+                update = self.hardware_rx.recv() => {
+                    info!(
+                        self.log,
+                        "Received hardware update message";
+                        "update" => ?update,
+                    );
+                    self.handle_hardware_update(update).await;
+                }
+            }
+        }
+    }
+
+    // Handle an update from the [`HardwareMonitor`]
+    async fn handle_hardware_update(
+        &mut self,
+        update: Result<HardwareUpdate, RecvError>,
+    ) {
+        match update {
+            Ok(update) => match update {
+                HardwareUpdate::TofinoLoaded => self.activate_switch().await,
+                HardwareUpdate::TofinoUnloaded => {
+                    self.deactivate_switch().await
+                }
+                HardwareUpdate::TofinoDeviceChange => {
+                    if let Some(sled_agent) = &mut self.sled_agent {
+                        sled_agent.notify_nexus_about_self(&self.log);
+                    }
+                }
+                HardwareUpdate::DiskAdded(disk) => {
+                    self.storage_manager.upsert_disk(disk.into()).await;
+                }
+                HardwareUpdate::DiskRemoved(disk) => {
+                    self.storage_manager.delete_disk(disk.into()).await;
+                }
+            },
+            Err(broadcast::error::RecvError::Lagged(count)) => {
+                warn!(self.log, "Hardware monitor missed {count} messages");
+                self.check_latest_hardware_snapshot().await;
+            }
+            Err(broadcast::error::RecvError::Closed) => {
+                // The `HardwareManager` monitoring task is an infinite loop -
+                // the only way for us to get `Closed` here is if it panicked,
+                // so we will propagate such a panic.
+                panic!("Hardware manager monitor task panicked");
+            }
+        }
+    }
+
+    async fn activate_switch(&mut self) {
+        match &mut self.tofino_manager {
+            TofinoManager::Ready(service_manager) => {
+                if let Err(e) = service_manager
+                    .activate_switch(
+                        self.sled_agent
+                            .as_ref()
+                            .map(|sa| sa.switch_zone_underlay_info()),
+                        self.baseboard.clone(),
+                    )
+                    .await
+                {
+                    warn!(self.log, "Failed to activate switch: {e}");
+                }
+            }
+            TofinoManager::NotReady { tofino_loaded } => {
+                *tofino_loaded = true;
+            }
+        }
+    }
+
+    async fn deactivate_switch(&mut self) {
+        match &mut self.tofino_manager {
+            TofinoManager::Ready(service_manager) => {
+                if let Err(e) = service_manager.deactivate_switch().await {
+                    warn!(self.log, "Failed to deactivate switch: {e}");
+                }
+            }
+            TofinoManager::NotReady { tofino_loaded } => {
+                *tofino_loaded = false;
+            }
+        }
+    }
+
+    // Observe the current hardware state manually.
+    //
+    // We use this when we're monitoring hardware for the first
+    // time, and if we miss notifications.
+    async fn check_latest_hardware_snapshot(&mut self) {
+        let underlay_network = self.sled_agent.as_ref().map(|sled_agent| {
+            sled_agent.notify_nexus_about_self(&self.log);
+            sled_agent.switch_zone_underlay_info()
+        });
+        info!(
+            self.log, "Checking current full hardware snapshot";
+            "underlay_network_info" => ?underlay_network,
+        );
+        if self.hardware_manager.is_scrimlet_driver_loaded() {
+            self.activate_switch().await;
+        } else {
+            self.deactivate_switch().await;
+        }
+
+        self.storage_manager
+            .ensure_using_exactly_these_disks(
+                self.hardware_manager.disks().into_iter().map(RawDisk::from),
+            )
+            .await;
+    }
+}
diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs
index ab107f9a63..2d0e2c4001 100644
--- a/sled-agent/src/http_entrypoints.rs
+++ b/sled-agent/src/http_entrypoints.rs
@@ -364,7 +364,7 @@ async fn zpools_get(
     rqctx: RequestContext<SledAgent>,
 ) -> Result<HttpResponseOk<Vec<Zpool>>, HttpError> {
     let sa = rqctx.context();
-    Ok(HttpResponseOk(sa.zpools_get().await.map_err(|e| Error::from(e))?))
+    Ok(HttpResponseOk(sa.zpools_get().await))
 }
 
 #[endpoint {
diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs
index 6db3b11740..f030078761 100644
--- a/sled-agent/src/instance.rs
+++ b/sled-agent/src/instance.rs
@@ -17,7 +17,6 @@ use crate::params::{
     InstanceMigrationTargetParams, InstanceStateRequested, VpcFirewallRule,
 };
 use crate::profile::*;
-use crate::storage_manager::StorageResources;
 use crate::zone_bundle::BundleError;
 use crate::zone_bundle::ZoneBundler;
 use anyhow::anyhow;
@@ -42,7 +41,8 @@ use omicron_common::backoff;
 use propolis_client::Client as PropolisClient;
 use rand::prelude::SliceRandom;
 use rand::SeedableRng;
-use sled_hardware::disk::ZONE_DATASET;
+use sled_storage::dataset::ZONE_DATASET;
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::net::IpAddr;
 use std::net::{SocketAddr, SocketAddrV6};
@@ -225,7 +225,7 @@ struct InstanceInner {
     nexus_client: NexusClientWithResolver,
 
     // Storage resources
-    storage: StorageResources,
+    storage: StorageHandle,
 
     // Object used to collect zone bundles from this instance when terminated.
     zone_bundler: ZoneBundler,
@@ -899,8 +899,9 @@ impl Instance {
         let mut rng = rand::rngs::StdRng::from_entropy();
         let root = inner
             .storage
-            .all_u2_mountpoints(ZONE_DATASET)
+            .get_latest_resources()
             .await
+            .all_u2_mountpoints(ZONE_DATASET)
             .choose(&mut rng)
             .ok_or_else(|| Error::U2NotFound)?
             .clone();
diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs
index 4b430812e1..fa40a876f0 100644
--- a/sled-agent/src/instance_manager.rs
+++ b/sled-agent/src/instance_manager.rs
@@ -12,7 +12,6 @@ use crate::params::{
     InstanceHardware, InstanceMigrationSourceParams, InstancePutStateResponse,
     InstanceStateRequested, InstanceUnregisterResponse,
 };
-use crate::storage_manager::StorageResources;
 use crate::zone_bundle::BundleError;
 use crate::zone_bundle::ZoneBundler;
 use illumos_utils::dladm::Etherstub;
@@ -23,6 +22,7 @@ use omicron_common::api::external::ByteCount;
 use omicron_common::api::internal::nexus::InstanceRuntimeState;
 use omicron_common::api::internal::nexus::SledInstanceState;
 use omicron_common::api::internal::nexus::VmmRuntimeState;
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::collections::BTreeMap;
 use std::net::SocketAddr;
@@ -74,7 +74,7 @@ struct InstanceManagerInternal {
 
     vnic_allocator: VnicAllocator<Etherstub>,
     port_manager: PortManager,
-    storage: StorageResources,
+    storage: StorageHandle,
     zone_bundler: ZoneBundler,
 }
 
@@ -82,7 +82,7 @@ pub(crate) struct InstanceManagerServices {
     pub nexus_client: NexusClientWithResolver,
     pub vnic_allocator: VnicAllocator<Etherstub>,
     pub port_manager: PortManager,
-    pub storage: StorageResources,
+    pub storage: StorageHandle,
     pub zone_bundler: ZoneBundler,
 }
 
@@ -98,7 +98,7 @@ impl InstanceManager {
         nexus_client: NexusClientWithResolver,
         etherstub: Etherstub,
         port_manager: PortManager,
-        storage: StorageResources,
+        storage: StorageHandle,
         zone_bundler: ZoneBundler,
     ) -> Result<InstanceManager, Error> {
         Ok(InstanceManager {
diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs
index db89b17b5a..924fd4bd92 100644
--- a/sled-agent/src/lib.rs
+++ b/sled-agent/src/lib.rs
@@ -20,9 +20,12 @@ pub mod common;
 mod backing_fs;
 pub mod bootstrap;
 pub mod config;
+pub(crate) mod dump_setup;
+pub(crate) mod hardware_monitor;
 mod http_entrypoints;
 mod instance;
 mod instance_manager;
+mod long_running_tasks;
 mod metrics;
 mod nexus;
 pub mod params;
@@ -32,8 +35,7 @@ pub mod server;
 mod services;
 mod sled_agent;
 mod smf_helper;
-pub(crate) mod storage;
-mod storage_manager;
+mod storage_monitor;
 mod swap_device;
 mod updates;
 mod zone_bundle;
diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs
new file mode 100644
index 0000000000..f4a665c098
--- /dev/null
+++ b/sled-agent/src/long_running_tasks.rs
@@ -0,0 +1,241 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! This module is responsible for spawning, starting, and managing long running
+//! tasks and task driven subsystems. These tasks run for the remainder of the
+//! sled-agent process from the moment they begin. Primarily they include the
+//! "managers", like `StorageManager`, `InstanceManager`, etc..., and are used
+//! by both the bootstrap agent and the sled-agent.
+//!
+//! We don't bother keeping track of the spawned tasks handles because we know
+//! these tasks are supposed to run forever, and they can shutdown if their
+//! handles are dropped.
+
+use crate::bootstrap::bootstore_setup::{
+    new_bootstore_config, poll_ddmd_for_bootstore_peer_update,
+};
+use crate::bootstrap::secret_retriever::LrtqOrHardcodedSecretRetriever;
+use crate::config::Config;
+use crate::hardware_monitor::HardwareMonitor;
+use crate::services::ServiceManager;
+use crate::sled_agent::SledAgent;
+use crate::storage_monitor::{StorageMonitor, UnderlayAccess};
+use crate::zone_bundle::{CleanupContext, ZoneBundler};
+use bootstore::schemes::v0 as bootstore;
+use key_manager::{KeyManager, StorageKeyRequester};
+use sled_hardware::{HardwareManager, SledMode};
+use sled_storage::disk::SyntheticDisk;
+use sled_storage::manager::{StorageHandle, StorageManager};
+use slog::{info, Logger};
+use std::net::Ipv6Addr;
+use tokio::sync::oneshot;
+
+/// A mechanism for interacting with all long running tasks that can be shared
+/// between the bootstrap-agent and sled-agent code.
+#[derive(Clone)]
+pub struct LongRunningTaskHandles {
+    /// A mechanism for retrieving storage keys. This interacts with the
+    /// [`KeyManager`] task. In the future, there may be other handles for
+    /// retrieving different types of keys. Separating the handles limits the
+    /// access for a given key type to the code that holds the handle.
+    pub storage_key_requester: StorageKeyRequester,
+
+    /// A mechanism for talking to the [`StorageManager`] which is responsible
+    /// for establishing zpools on disks and managing their datasets.
+    pub storage_manager: StorageHandle,
+
+    /// A mechanism for interacting with the hardware device tree
+    pub hardware_manager: HardwareManager,
+
+    // A handle for interacting with the bootstore
+    pub bootstore: bootstore::NodeHandle,
+
+    // A reference to the object used to manage zone bundles
+    pub zone_bundler: ZoneBundler,
+}
+
+/// Spawn all long running tasks
+pub async fn spawn_all_longrunning_tasks(
+    log: &Logger,
+    sled_mode: SledMode,
+    global_zone_bootstrap_ip: Ipv6Addr,
+    config: &Config,
+) -> (
+    LongRunningTaskHandles,
+    oneshot::Sender<SledAgent>,
+    oneshot::Sender<ServiceManager>,
+    oneshot::Sender<UnderlayAccess>,
+) {
+    let storage_key_requester = spawn_key_manager(log);
+    let mut storage_manager =
+        spawn_storage_manager(log, storage_key_requester.clone());
+
+    let underlay_available_tx =
+        spawn_storage_monitor(log, storage_manager.clone());
+
+    let hardware_manager = spawn_hardware_manager(log, sled_mode).await;
+
+    // Start monitoring for hardware changes
+    let (sled_agent_started_tx, service_manager_ready_tx) =
+        spawn_hardware_monitor(log, &hardware_manager, &storage_manager);
+
+    // Add some synthetic disks if necessary.
+    upsert_synthetic_zpools_if_needed(&log, &storage_manager, &config).await;
+
+    // Wait for the boot disk so that we can work with any ledgers,
+    // such as those needed by the bootstore and sled-agent
+    info!(log, "Waiting for boot disk");
+    let (disk_id, _) = storage_manager.wait_for_boot_disk().await;
+    info!(log, "Found boot disk {:?}", disk_id);
+
+    let bootstore = spawn_bootstore_tasks(
+        log,
+        &mut storage_manager,
+        &hardware_manager,
+        global_zone_bootstrap_ip,
+    )
+    .await;
+
+    let zone_bundler = spawn_zone_bundler_tasks(log, &mut storage_manager);
+
+    (
+        LongRunningTaskHandles {
+            storage_key_requester,
+            storage_manager,
+            hardware_manager,
+            bootstore,
+            zone_bundler,
+        },
+        sled_agent_started_tx,
+        service_manager_ready_tx,
+        underlay_available_tx,
+    )
+}
+
+fn spawn_key_manager(log: &Logger) -> StorageKeyRequester {
+    info!(log, "Starting KeyManager");
+    let secret_retriever = LrtqOrHardcodedSecretRetriever::new();
+    let (mut key_manager, storage_key_requester) =
+        KeyManager::new(log, secret_retriever);
+    tokio::spawn(async move { key_manager.run().await });
+    storage_key_requester
+}
+
+fn spawn_storage_manager(
+    log: &Logger,
+    key_requester: StorageKeyRequester,
+) -> StorageHandle {
+    info!(log, "Starting StorageManager");
+    let (manager, handle) = StorageManager::new(log, key_requester);
+    tokio::spawn(async move {
+        manager.run().await;
+    });
+    handle
+}
+
+fn spawn_storage_monitor(
+    log: &Logger,
+    storage_handle: StorageHandle,
+) -> oneshot::Sender<UnderlayAccess> {
+    info!(log, "Starting StorageMonitor");
+    let (storage_monitor, underlay_available_tx) =
+        StorageMonitor::new(log, storage_handle);
+    tokio::spawn(async move {
+        storage_monitor.run().await;
+    });
+    underlay_available_tx
+}
+
+async fn spawn_hardware_manager(
+    log: &Logger,
+    sled_mode: SledMode,
+) -> HardwareManager {
+    // The `HardwareManager` does not use the the "task/handle" pattern
+    // and spawns its worker task inside `HardwareManager::new`. Instead of returning
+    // a handle to send messages to that task, the "Inner/Mutex" pattern is used
+    // which shares data between the task, the manager itself, and the users of the manager
+    // since the manager can be freely cloned and passed around.
+    //
+    // There are pros and cons to both methods, but the reason to mention it here is that
+    // the handle in this case is the `HardwareManager` itself.
+    info!(log, "Starting HardwareManager"; "sled_mode" => ?sled_mode);
+    let log = log.clone();
+    tokio::task::spawn_blocking(move || {
+        HardwareManager::new(&log, sled_mode).unwrap()
+    })
+    .await
+    .unwrap()
+}
+
+fn spawn_hardware_monitor(
+    log: &Logger,
+    hardware_manager: &HardwareManager,
+    storage_handle: &StorageHandle,
+) -> (oneshot::Sender<SledAgent>, oneshot::Sender<ServiceManager>) {
+    info!(log, "Starting HardwareMonitor");
+    let (mut monitor, sled_agent_started_tx, service_manager_ready_tx) =
+        HardwareMonitor::new(log, hardware_manager, storage_handle);
+    tokio::spawn(async move {
+        monitor.run().await;
+    });
+    (sled_agent_started_tx, service_manager_ready_tx)
+}
+
+async fn spawn_bootstore_tasks(
+    log: &Logger,
+    storage_handle: &mut StorageHandle,
+    hardware_manager: &HardwareManager,
+    global_zone_bootstrap_ip: Ipv6Addr,
+) -> bootstore::NodeHandle {
+    let storage_resources = storage_handle.get_latest_resources().await;
+    let config = new_bootstore_config(
+        &storage_resources,
+        hardware_manager.baseboard(),
+        global_zone_bootstrap_ip,
+    )
+    .unwrap();
+
+    // Create and spawn the bootstore
+    info!(log, "Starting Bootstore");
+    let (mut node, node_handle) = bootstore::Node::new(config, log).await;
+    tokio::spawn(async move { node.run().await });
+
+    // Spawn a task for polling DDMD and updating bootstore with peer addresses
+    info!(log, "Starting Bootstore DDMD poller");
+    let log = log.new(o!("component" => "bootstore_ddmd_poller"));
+    let node_handle2 = node_handle.clone();
+    tokio::spawn(async move {
+        poll_ddmd_for_bootstore_peer_update(log, node_handle2).await
+    });
+
+    node_handle
+}
+
+// `ZoneBundler::new` spawns a periodic cleanup task that runs indefinitely
+fn spawn_zone_bundler_tasks(
+    log: &Logger,
+    storage_handle: &mut StorageHandle,
+) -> ZoneBundler {
+    info!(log, "Starting ZoneBundler related tasks");
+    let log = log.new(o!("component" => "ZoneBundler"));
+    ZoneBundler::new(log, storage_handle.clone(), CleanupContext::default())
+}
+
+async fn upsert_synthetic_zpools_if_needed(
+    log: &Logger,
+    storage_manager: &StorageHandle,
+    config: &Config,
+) {
+    if let Some(pools) = &config.zpools {
+        for pool in pools {
+            info!(
+                log,
+                "Upserting synthetic zpool to Storage Manager: {}",
+                pool.to_string()
+            );
+            let disk = SyntheticDisk::new(pool.clone()).into();
+            storage_manager.upsert_disk(disk).await;
+        }
+    }
+}
diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs
index cd84c9acd4..67d5f049e7 100644
--- a/sled-agent/src/params.rs
+++ b/sled-agent/src/params.rs
@@ -20,6 +20,7 @@ use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use sled_hardware::Baseboard;
 pub use sled_hardware::DendriteAsic;
+use sled_storage::dataset::DatasetName;
 use std::fmt::{Debug, Display, Formatter, Result as FormatResult};
 use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6};
 use std::time::Duration;
@@ -228,64 +229,6 @@ pub struct Zpool {
     pub disk_type: DiskType,
 }
 
-/// The type of a dataset, and an auxiliary information necessary
-/// to successfully launch a zone managing the associated data.
-#[derive(
-    Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash,
-)]
-#[serde(tag = "type", rename_all = "snake_case")]
-pub enum DatasetKind {
-    CockroachDb,
-    Crucible,
-    Clickhouse,
-    ClickhouseKeeper,
-    ExternalDns,
-    InternalDns,
-}
-
-impl From<DatasetKind> for sled_agent_client::types::DatasetKind {
-    fn from(k: DatasetKind) -> Self {
-        use DatasetKind::*;
-        match k {
-            CockroachDb => Self::CockroachDb,
-            Crucible => Self::Crucible,
-            Clickhouse => Self::Clickhouse,
-            ClickhouseKeeper => Self::ClickhouseKeeper,
-            ExternalDns => Self::ExternalDns,
-            InternalDns => Self::InternalDns,
-        }
-    }
-}
-
-impl From<DatasetKind> for nexus_client::types::DatasetKind {
-    fn from(k: DatasetKind) -> Self {
-        use DatasetKind::*;
-        match k {
-            CockroachDb => Self::Cockroach,
-            Crucible => Self::Crucible,
-            Clickhouse => Self::Clickhouse,
-            ClickhouseKeeper => Self::ClickhouseKeeper,
-            ExternalDns => Self::ExternalDns,
-            InternalDns => Self::InternalDns,
-        }
-    }
-}
-
-impl std::fmt::Display for DatasetKind {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        use DatasetKind::*;
-        let s = match self {
-            Crucible => "crucible",
-            CockroachDb { .. } => "cockroachdb",
-            Clickhouse => "clickhouse",
-            ClickhouseKeeper => "clickhouse_keeper",
-            ExternalDns { .. } => "external_dns",
-            InternalDns { .. } => "internal_dns",
-        };
-        write!(f, "{}", s)
-    }
-}
-
 /// Describes service-specific parameters.
 #[derive(
     Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash,
@@ -594,7 +537,7 @@ impl std::fmt::Display for ZoneType {
 )]
 pub struct DatasetRequest {
     pub id: Uuid,
-    pub name: crate::storage::dataset::DatasetName,
+    pub name: DatasetName,
     pub service_address: SocketAddrV6,
 }
 
diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs
index 8cd815e7fb..980f5b6ebd 100644
--- a/sled-agent/src/rack_setup/plan/service.rs
+++ b/sled-agent/src/rack_setup/plan/service.rs
@@ -6,12 +6,10 @@
 
 use crate::bootstrap::params::StartSledAgentRequest;
 use crate::params::{
-    DatasetKind, DatasetRequest, ServiceType, ServiceZoneRequest,
-    ServiceZoneService, ZoneType,
+    DatasetRequest, ServiceType, ServiceZoneRequest, ServiceZoneService,
+    ZoneType,
 };
 use crate::rack_setup::config::SetupServiceConfig as Config;
-use crate::storage::dataset::DatasetName;
-use crate::storage_manager::StorageResources;
 use camino::Utf8PathBuf;
 use dns_service_client::types::DnsConfigParams;
 use illumos_utils::zpool::ZpoolName;
@@ -36,6 +34,8 @@ use serde::{Deserialize, Serialize};
 use sled_agent_client::{
     types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError,
 };
+use sled_storage::dataset::{DatasetKind, DatasetName, CONFIG_DATASET};
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::collections::{BTreeSet, HashMap, HashSet};
 use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV6};
@@ -125,11 +125,12 @@ const RSS_SERVICE_PLAN_FILENAME: &str = "rss-service-plan.json";
 impl Plan {
     pub async fn load(
         log: &Logger,
-        storage: &StorageResources,
+        storage_manager: &StorageHandle,
     ) -> Result<Option<Plan>, PlanError> {
-        let paths: Vec<Utf8PathBuf> = storage
-            .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET)
+        let paths: Vec<Utf8PathBuf> = storage_manager
+            .get_latest_resources()
             .await
+            .all_m2_mountpoints(CONFIG_DATASET)
             .into_iter()
             .map(|p| p.join(RSS_SERVICE_PLAN_FILENAME))
             .collect();
@@ -237,7 +238,7 @@ impl Plan {
     pub async fn create(
         log: &Logger,
         config: &Config,
-        storage: &StorageResources,
+        storage_manager: &StorageHandle,
         sleds: &HashMap<SocketAddrV6, StartSledAgentRequest>,
     ) -> Result<Self, PlanError> {
         let mut dns_builder = internal_dns::DnsConfigBuilder::new();
@@ -737,9 +738,10 @@ impl Plan {
         let plan = Self { services, dns_config };
 
         // Once we've constructed a plan, write it down to durable storage.
-        let paths: Vec<Utf8PathBuf> = storage
-            .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET)
+        let paths: Vec<Utf8PathBuf> = storage_manager
+            .get_latest_resources()
             .await
+            .all_m2_mountpoints(CONFIG_DATASET)
             .into_iter()
             .map(|p| p.join(RSS_SERVICE_PLAN_FILENAME))
             .collect();
diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs
index 163b24cd45..07f33893fc 100644
--- a/sled-agent/src/rack_setup/plan/sled.rs
+++ b/sled-agent/src/rack_setup/plan/sled.rs
@@ -9,11 +9,12 @@ use crate::bootstrap::{
     config::BOOTSTRAP_AGENT_RACK_INIT_PORT, params::StartSledAgentRequest,
 };
 use crate::rack_setup::config::SetupServiceConfig as Config;
-use crate::storage_manager::StorageResources;
 use camino::Utf8PathBuf;
 use omicron_common::ledger::{self, Ledger, Ledgerable};
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
+use sled_storage::dataset::CONFIG_DATASET;
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::collections::{HashMap, HashSet};
 use std::net::{Ipv6Addr, SocketAddrV6};
@@ -55,11 +56,12 @@ pub struct Plan {
 impl Plan {
     pub async fn load(
         log: &Logger,
-        storage: &StorageResources,
+        storage: &StorageHandle,
     ) -> Result<Option<Self>, PlanError> {
         let paths: Vec<Utf8PathBuf> = storage
-            .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET)
+            .get_latest_resources()
             .await
+            .all_m2_mountpoints(CONFIG_DATASET)
             .into_iter()
             .map(|p| p.join(RSS_SLED_PLAN_FILENAME))
             .collect();
@@ -78,7 +80,7 @@ impl Plan {
     pub async fn create(
         log: &Logger,
         config: &Config,
-        storage: &StorageResources,
+        storage_manager: &StorageHandle,
         bootstrap_addrs: HashSet<Ipv6Addr>,
         use_trust_quorum: bool,
     ) -> Result<Self, PlanError> {
@@ -123,9 +125,10 @@ impl Plan {
         let plan = Self { rack_id, sleds, config: config.clone() };
 
         // Once we've constructed a plan, write it down to durable storage.
-        let paths: Vec<Utf8PathBuf> = storage
-            .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET)
+        let paths: Vec<Utf8PathBuf> = storage_manager
+            .get_latest_resources()
             .await
+            .all_m2_mountpoints(CONFIG_DATASET)
             .into_iter()
             .map(|p| p.join(RSS_SLED_PLAN_FILENAME))
             .collect();
diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs
index 362d93479d..34d5e06cfe 100644
--- a/sled-agent/src/rack_setup/service.rs
+++ b/sled-agent/src/rack_setup/service.rs
@@ -74,7 +74,6 @@ use crate::rack_setup::plan::service::{
 use crate::rack_setup::plan::sled::{
     Plan as SledPlan, PlanError as SledPlanError,
 };
-use crate::storage_manager::StorageResources;
 use bootstore::schemes::v0 as bootstore;
 use camino::Utf8PathBuf;
 use ddm_admin_client::{Client as DdmAdminClient, DdmError};
@@ -94,6 +93,8 @@ use sled_agent_client::{
     types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError,
 };
 use sled_hardware::underlay::BootstrapInterface;
+use sled_storage::dataset::CONFIG_DATASET;
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::collections::BTreeSet;
 use std::collections::{HashMap, HashSet};
@@ -187,7 +188,7 @@ impl RackSetupService {
     pub(crate) fn new(
         log: Logger,
         config: Config,
-        storage_resources: StorageResources,
+        storage_manager: StorageHandle,
         local_bootstrap_agent: BootstrapAgentHandle,
         bootstore: bootstore::NodeHandle,
     ) -> Self {
@@ -196,7 +197,7 @@ impl RackSetupService {
             if let Err(e) = svc
                 .run(
                     &config,
-                    &storage_resources,
+                    &storage_manager,
                     local_bootstrap_agent,
                     bootstore,
                 )
@@ -773,7 +774,7 @@ impl ServiceInner {
     async fn run(
         &self,
         config: &Config,
-        storage_resources: &StorageResources,
+        storage_manager: &StorageHandle,
         local_bootstrap_agent: BootstrapAgentHandle,
         bootstore: bootstore::NodeHandle,
     ) -> Result<(), SetupServiceError> {
@@ -784,9 +785,10 @@ impl ServiceInner {
             config.az_subnet(),
         )?;
 
-        let marker_paths: Vec<Utf8PathBuf> = storage_resources
-            .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET)
+        let marker_paths: Vec<Utf8PathBuf> = storage_manager
+            .get_latest_resources()
             .await
+            .all_m2_mountpoints(CONFIG_DATASET)
             .into_iter()
             .map(|p| p.join(RSS_COMPLETED_FILENAME))
             .collect();
@@ -807,7 +809,7 @@ impl ServiceInner {
                 "RSS configuration looks like it has already been applied",
             );
 
-            let sled_plan = SledPlan::load(&self.log, storage_resources)
+            let sled_plan = SledPlan::load(&self.log, storage_manager)
                 .await?
                 .expect("Sled plan should exist if completed marker exists");
             if &sled_plan.config != config {
@@ -815,7 +817,7 @@ impl ServiceInner {
                     "Configuration changed".to_string(),
                 ));
             }
-            let service_plan = ServicePlan::load(&self.log, storage_resources)
+            let service_plan = ServicePlan::load(&self.log, storage_manager)
                 .await?
                 .expect("Service plan should exist if completed marker exists");
 
@@ -849,7 +851,7 @@ impl ServiceInner {
             BootstrapAddressDiscovery::OnlyThese { addrs } => addrs.clone(),
         };
         let maybe_sled_plan =
-            SledPlan::load(&self.log, storage_resources).await?;
+            SledPlan::load(&self.log, storage_manager).await?;
         if let Some(plan) = &maybe_sled_plan {
             let stored_peers: HashSet<Ipv6Addr> =
                 plan.sleds.keys().map(|a| *a.ip()).collect();
@@ -881,7 +883,7 @@ impl ServiceInner {
             SledPlan::create(
                 &self.log,
                 config,
-                &storage_resources,
+                &storage_manager,
                 bootstrap_addrs,
                 config.trust_quorum_peers.is_some(),
             )
@@ -936,14 +938,14 @@ impl ServiceInner {
             })
             .collect();
         let service_plan = if let Some(plan) =
-            ServicePlan::load(&self.log, storage_resources).await?
+            ServicePlan::load(&self.log, storage_manager).await?
         {
             plan
         } else {
             ServicePlan::create(
                 &self.log,
                 &config,
-                &storage_resources,
+                &storage_manager,
                 &plan.sleds,
             )
             .await?
diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs
index 156547627c..903c8dabaa 100644
--- a/sled-agent/src/server.rs
+++ b/sled-agent/src/server.rs
@@ -8,14 +8,15 @@ use super::config::Config;
 use super::http_entrypoints::api as http_api;
 use super::sled_agent::SledAgent;
 use crate::bootstrap::params::StartSledAgentRequest;
+use crate::long_running_tasks::LongRunningTaskHandles;
 use crate::nexus::NexusClientWithResolver;
 use crate::services::ServiceManager;
-use crate::storage_manager::StorageManager;
-use bootstore::schemes::v0 as bootstore;
+use crate::storage_monitor::UnderlayAccess;
 use internal_dns::resolver::Resolver;
 use slog::Logger;
 use std::net::SocketAddr;
 use std::sync::Arc;
+use tokio::sync::oneshot;
 use uuid::Uuid;
 
 /// Packages up a [`SledAgent`], running the sled agent API under a Dropshot
@@ -39,9 +40,9 @@ impl Server {
         config: &Config,
         log: Logger,
         request: StartSledAgentRequest,
+        long_running_tasks_handles: LongRunningTaskHandles,
         services: ServiceManager,
-        storage: StorageManager,
-        bootstore: bootstore::NodeHandle,
+        underlay_available_tx: oneshot::Sender<UnderlayAccess>,
     ) -> Result<Server, String> {
         info!(log, "setting up sled agent server");
 
@@ -63,8 +64,8 @@ impl Server {
             nexus_client,
             request,
             services,
-            storage,
-            bootstore,
+            long_running_tasks_handles,
+            underlay_available_tx,
         )
         .await
         .map_err(|e| e.to_string())?;
diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs
index d1d8dbfff0..b87c91768b 100644
--- a/sled-agent/src/services.rs
+++ b/sled-agent/src/services.rs
@@ -5,7 +5,7 @@
 //! Sled-local service management.
 //!
 //! For controlling zone-based storage services, refer to
-//! [crate::storage_manager::StorageManager].
+//! [sled_storage::manager::StorageManager].
 //!
 //! For controlling virtual machine instances, refer to
 //! [crate::instance_manager::InstanceManager].
@@ -38,7 +38,6 @@ use crate::params::{
 use crate::profile::*;
 use crate::smf_helper::Service;
 use crate::smf_helper::SmfHelper;
-use crate::storage_manager::StorageResources;
 use crate::zone_bundle::BundleError;
 use crate::zone_bundle::ZoneBundler;
 use anyhow::anyhow;
@@ -91,12 +90,13 @@ use omicron_common::nexus_config::{
 use once_cell::sync::OnceCell;
 use rand::prelude::SliceRandom;
 use rand::SeedableRng;
-use sled_hardware::disk::ZONE_DATASET;
 use sled_hardware::is_gimlet;
 use sled_hardware::underlay;
 use sled_hardware::underlay::BOOTSTRAP_PREFIX;
 use sled_hardware::Baseboard;
 use sled_hardware::SledMode;
+use sled_storage::dataset::{CONFIG_DATASET, INSTALL_DATASET, ZONE_DATASET};
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::collections::BTreeMap;
 use std::collections::HashSet;
@@ -373,7 +373,7 @@ pub struct ServiceManagerInner {
     advertised_prefixes: Mutex<HashSet<Ipv6Subnet<SLED_PREFIX>>>,
     sled_info: OnceCell<SledAgentInfo>,
     switch_zone_bootstrap_address: Ipv6Addr,
-    storage: StorageResources,
+    storage: StorageHandle,
     zone_bundler: ZoneBundler,
     ledger_directory_override: OnceCell<Utf8PathBuf>,
     image_directory_override: OnceCell<Utf8PathBuf>,
@@ -418,10 +418,11 @@ impl ServiceManager {
         skip_timesync: Option<bool>,
         sidecar_revision: SidecarRevision,
         switch_zone_maghemite_links: Vec<PhysicalLink>,
-        storage: StorageResources,
+        storage: StorageHandle,
         zone_bundler: ZoneBundler,
     ) -> Self {
         let log = log.new(o!("component" => "ServiceManager"));
+        info!(log, "Creating ServiceManager");
         Self {
             inner: Arc::new(ServiceManagerInner {
                 log: log.clone(),
@@ -476,10 +477,9 @@ impl ServiceManager {
         if let Some(dir) = self.inner.ledger_directory_override.get() {
             return vec![dir.join(SERVICES_LEDGER_FILENAME)];
         }
-        self.inner
-            .storage
-            .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET)
-            .await
+        let resources = self.inner.storage.get_latest_resources().await;
+        resources
+            .all_m2_mountpoints(CONFIG_DATASET)
             .into_iter()
             .map(|p| p.join(SERVICES_LEDGER_FILENAME))
             .collect()
@@ -1096,11 +1096,11 @@ impl ServiceManager {
 
         // If the boot disk exists, look for the image in the "install" dataset
         // there too.
-        if let Some((_, boot_zpool)) = self.inner.storage.boot_disk().await {
-            zone_image_paths.push(
-                boot_zpool
-                    .dataset_mountpoint(sled_hardware::disk::INSTALL_DATASET),
-            );
+        if let Some((_, boot_zpool)) =
+            self.inner.storage.get_latest_resources().await.boot_disk()
+        {
+            zone_image_paths
+                .push(boot_zpool.dataset_mountpoint(INSTALL_DATASET));
         }
 
         let installed_zone = InstalledZone::install(
@@ -2252,8 +2252,12 @@ impl ServiceManager {
 
         // Create zones that should be running
         let mut zone_requests = AllZoneRequests::default();
-        let all_u2_roots =
-            self.inner.storage.all_u2_mountpoints(ZONE_DATASET).await;
+        let all_u2_roots = self
+            .inner
+            .storage
+            .get_latest_resources()
+            .await
+            .all_u2_mountpoints(ZONE_DATASET);
         for zone in zones_to_be_added {
             // Check if we think the zone should already be running
             let name = zone.zone_name();
@@ -2979,8 +2983,12 @@ impl ServiceManager {
         let root = if request.zone_type == ZoneType::Switch {
             Utf8PathBuf::from(ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT)
         } else {
-            let all_u2_roots =
-                self.inner.storage.all_u2_mountpoints(ZONE_DATASET).await;
+            let all_u2_roots = self
+                .inner
+                .storage
+                .get_latest_resources()
+                .await
+                .all_u2_mountpoints(ZONE_DATASET);
             let mut rng = rand::rngs::StdRng::from_entropy();
             all_u2_roots
                 .choose(&mut rng)
@@ -3038,7 +3046,7 @@ impl ServiceManager {
 mod test {
     use super::*;
     use crate::params::{ServiceZoneService, ZoneType};
-    use async_trait::async_trait;
+    use illumos_utils::zpool::ZpoolName;
     use illumos_utils::{
         dladm::{
             Etherstub, MockDladm, BOOTSTRAP_ETHERSTUB_NAME,
@@ -3047,10 +3055,10 @@ mod test {
         svc,
         zone::MockZones,
     };
-    use key_manager::{
-        SecretRetriever, SecretRetrieverError, SecretState, VersionedIkm,
-    };
     use omicron_common::address::OXIMETER_PORT;
+    use sled_storage::disk::{RawDisk, SyntheticDisk};
+
+    use sled_storage::manager::{FakeStorageManager, StorageHandle};
     use std::net::{Ipv6Addr, SocketAddrV6};
     use std::os::unix::process::ExitStatusExt;
     use uuid::Uuid;
@@ -3078,6 +3086,7 @@ mod test {
 
     // Returns the expectations for a new service to be created.
     fn expect_new_service() -> Vec<Box<dyn std::any::Any>> {
+        illumos_utils::USE_MOCKS.store(true, Ordering::SeqCst);
         // Create a VNIC
         let create_vnic_ctx = MockDladm::create_vnic_context();
         create_vnic_ctx.expect().return_once(
@@ -3120,8 +3129,7 @@ mod test {
         let wait_ctx = svc::wait_for_service_context();
         wait_ctx.expect().return_once(|_, _, _| Ok(()));
 
-        // Import the manifest, enable the service
-        let execute_ctx = illumos_utils::execute_context();
+        let execute_ctx = illumos_utils::execute_helper_context();
         execute_ctx.expect().times(..).returning(|_| {
             Ok(std::process::Output {
                 status: std::process::ExitStatus::from_raw(0),
@@ -3243,29 +3251,24 @@ mod test {
         }
     }
 
-    pub struct TestSecretRetriever {}
+    async fn setup_storage() -> StorageHandle {
+        let (manager, handle) = FakeStorageManager::new();
 
-    #[async_trait]
-    impl SecretRetriever for TestSecretRetriever {
-        async fn get_latest(
-            &self,
-        ) -> Result<VersionedIkm, SecretRetrieverError> {
-            let epoch = 0;
-            let salt = [0u8; 32];
-            let secret = [0x1d; 32];
+        // Spawn the storage manager as done by sled-agent
+        tokio::spawn(async move {
+            manager.run().await;
+        });
 
-            Ok(VersionedIkm::new(epoch, salt, &secret))
-        }
+        let internal_zpool_name = ZpoolName::new_internal(Uuid::new_v4());
+        let internal_disk: RawDisk =
+            SyntheticDisk::new(internal_zpool_name).into();
+        handle.upsert_disk(internal_disk).await;
+        let external_zpool_name = ZpoolName::new_external(Uuid::new_v4());
+        let external_disk: RawDisk =
+            SyntheticDisk::new(external_zpool_name).into();
+        handle.upsert_disk(external_disk).await;
 
-        async fn get(
-            &self,
-            epoch: u64,
-        ) -> Result<SecretState, SecretRetrieverError> {
-            if epoch != 0 {
-                return Err(SecretRetrieverError::NoSuchEpoch(epoch));
-            }
-            Ok(SecretState::Current(self.get_latest().await?))
-        }
+        handle
     }
 
     #[tokio::test]
@@ -3276,10 +3279,10 @@ mod test {
         let log = logctx.log.clone();
         let test_config = TestConfig::new().await;
 
-        let resources = StorageResources::new_for_test();
+        let storage_handle = setup_storage().await;
         let zone_bundler = ZoneBundler::new(
             log.clone(),
-            resources.clone(),
+            storage_handle.clone(),
             Default::default(),
         );
         let mgr = ServiceManager::new(
@@ -3290,7 +3293,7 @@ mod test {
             Some(true),
             SidecarRevision::Physical("rev-test".to_string()),
             vec![],
-            resources,
+            storage_handle,
             zone_bundler,
         );
         test_config.override_paths(&mgr);
@@ -3324,10 +3327,10 @@ mod test {
         let log = logctx.log.clone();
         let test_config = TestConfig::new().await;
 
-        let resources = StorageResources::new_for_test();
+        let storage_handle = setup_storage().await;
         let zone_bundler = ZoneBundler::new(
             log.clone(),
-            resources.clone(),
+            storage_handle.clone(),
             Default::default(),
         );
         let mgr = ServiceManager::new(
@@ -3338,7 +3341,7 @@ mod test {
             Some(true),
             SidecarRevision::Physical("rev-test".to_string()),
             vec![],
-            resources,
+            storage_handle,
             zone_bundler,
         );
         test_config.override_paths(&mgr);
@@ -3377,10 +3380,10 @@ mod test {
 
         // First, spin up a ServiceManager, create a new service, and tear it
         // down.
-        let resources = StorageResources::new_for_test();
+        let storage_handle = setup_storage().await;
         let zone_bundler = ZoneBundler::new(
             log.clone(),
-            resources.clone(),
+            storage_handle.clone(),
             Default::default(),
         );
         let mgr = ServiceManager::new(
@@ -3391,7 +3394,7 @@ mod test {
             Some(true),
             SidecarRevision::Physical("rev-test".to_string()),
             vec![],
-            resources.clone(),
+            storage_handle.clone(),
             zone_bundler.clone(),
         );
         test_config.override_paths(&mgr);
@@ -3424,7 +3427,7 @@ mod test {
             Some(true),
             SidecarRevision::Physical("rev-test".to_string()),
             vec![],
-            resources.clone(),
+            storage_handle.clone(),
             zone_bundler.clone(),
         );
         test_config.override_paths(&mgr);
@@ -3460,10 +3463,10 @@ mod test {
 
         // First, spin up a ServiceManager, create a new service, and tear it
         // down.
-        let resources = StorageResources::new_for_test();
+        let storage_handle = setup_storage().await;
         let zone_bundler = ZoneBundler::new(
             log.clone(),
-            resources.clone(),
+            storage_handle.clone(),
             Default::default(),
         );
         let mgr = ServiceManager::new(
@@ -3474,7 +3477,7 @@ mod test {
             Some(true),
             SidecarRevision::Physical("rev-test".to_string()),
             vec![],
-            resources.clone(),
+            storage_handle.clone(),
             zone_bundler.clone(),
         );
         test_config.override_paths(&mgr);
@@ -3512,7 +3515,7 @@ mod test {
             Some(true),
             SidecarRevision::Physical("rev-test".to_string()),
             vec![],
-            resources.clone(),
+            storage_handle,
             zone_bundler.clone(),
         );
         test_config.override_paths(&mgr);
diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs
index 906993ad00..aec64a1349 100644
--- a/sled-agent/src/sled_agent.rs
+++ b/sled-agent/src/sled_agent.rs
@@ -11,6 +11,7 @@ use crate::bootstrap::early_networking::{
 use crate::bootstrap::params::StartSledAgentRequest;
 use crate::config::Config;
 use crate::instance_manager::{InstanceManager, ReservoirMode};
+use crate::long_running_tasks::LongRunningTaskHandles;
 use crate::metrics::MetricsManager;
 use crate::nexus::{NexusClientWithResolver, NexusRequestQueue};
 use crate::params::{
@@ -20,7 +21,7 @@ use crate::params::{
     VpcFirewallRule, ZoneBundleMetadata, Zpool,
 };
 use crate::services::{self, ServiceManager};
-use crate::storage_manager::{self, StorageManager};
+use crate::storage_monitor::UnderlayAccess;
 use crate::updates::{ConfigUpdates, UpdateManager};
 use crate::zone_bundle;
 use crate::zone_bundle::BundleError;
@@ -57,13 +58,13 @@ use omicron_common::backoff::{
     retry_policy_internal_service_aggressive, BackoffError,
 };
 use oximeter::types::ProducerRegistry;
-use sled_hardware::underlay;
-use sled_hardware::HardwareManager;
-use sled_hardware::{underlay::BootstrapInterface, Baseboard};
+use sled_hardware::{underlay, Baseboard, HardwareManager};
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::collections::BTreeMap;
 use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6};
 use std::sync::Arc;
+use tokio::sync::oneshot;
 use uuid::Uuid;
 
 #[cfg(not(test))]
@@ -110,7 +111,7 @@ pub enum Error {
     Instance(#[from] crate::instance_manager::Error),
 
     #[error("Error managing storage: {0}")]
-    Storage(#[from] crate::storage_manager::Error),
+    Storage(#[from] sled_storage::error::Error),
 
     #[error("Error updating: {0}")]
     Download(#[from] crate::updates::Error),
@@ -227,7 +228,7 @@ struct SledAgentInner {
     start_request: StartSledAgentRequest,
 
     // Component of Sled Agent responsible for storage and dataset management.
-    storage: StorageManager,
+    storage: StorageHandle,
 
     // Component of Sled Agent responsible for managing Propolis instances.
     instances: InstanceManager,
@@ -287,8 +288,8 @@ impl SledAgent {
         nexus_client: NexusClientWithResolver,
         request: StartSledAgentRequest,
         services: ServiceManager,
-        storage: StorageManager,
-        bootstore: bootstore::NodeHandle,
+        long_running_task_handles: LongRunningTaskHandles,
+        underlay_available_tx: oneshot::Sender<UnderlayAccess>,
     ) -> Result<SledAgent, Error> {
         // Pass the "parent_log" to all subcomponents that want to set their own
         // "component" value.
@@ -301,14 +302,14 @@ impl SledAgent {
         ));
         info!(&log, "SledAgent::new(..) starting");
 
-        let boot_disk = storage
-            .resources()
-            .boot_disk()
+        let storage_manager = &long_running_task_handles.storage_manager;
+        let boot_disk = storage_manager
+            .get_latest_resources()
             .await
+            .boot_disk()
             .ok_or_else(|| Error::BootDiskNotFound)?;
 
-        // Configure a swap device of the configured size before other system
-        // setup.
+        // Configure a swap device of the configured size before other system setup.
         match config.swap_device_size_gb {
             Some(sz) if sz > 0 => {
                 info!(log, "Requested swap device of size {} GiB", sz);
@@ -363,28 +364,23 @@ impl SledAgent {
             *sled_address.ip(),
         );
 
-        storage
-            .setup_underlay_access(storage_manager::UnderlayAccess {
+        // Inform the `StorageMonitor` that the underlay is available so that
+        // it can try to contact nexus.
+        underlay_available_tx
+            .send(UnderlayAccess {
                 nexus_client: nexus_client.clone(),
                 sled_id: request.body.id,
             })
-            .await?;
-
-        // TODO-correctness The bootstrap agent _also_ has a `HardwareManager`.
-        // We only use it for reading properties, but it's not `Clone`able
-        // because it's holding an inner task handle. Could we add a way to get
-        // a read-only handle to it, and have bootstrap agent give us that
-        // instead of creating a new full one ourselves?
-        let hardware = HardwareManager::new(&parent_log, services.sled_mode())
-            .map_err(|e| Error::Hardware(e))?;
+            .map_err(|_| ())
+            .expect("Failed to send to StorageMonitor");
 
         let instances = InstanceManager::new(
             parent_log.clone(),
             nexus_client.clone(),
             etherstub.clone(),
             port_manager.clone(),
-            storage.resources().clone(),
-            storage.zone_bundler().clone(),
+            storage_manager.clone(),
+            long_running_task_handles.zone_bundler.clone(),
         )?;
 
         // Configure the VMM reservoir as either a percentage of DRAM or as an
@@ -409,7 +405,10 @@ impl SledAgent {
             }
             _ => {
                 instances
-                    .set_reservoir_size(&hardware, reservoir_mode)
+                    .set_reservoir_size(
+                        &long_running_task_handles.hardware_manager,
+                        reservoir_mode,
+                    )
                     .map_err(|e| {
                         error!(log, "Failed to setup VMM reservoir: {e}");
                         e
@@ -431,7 +430,8 @@ impl SledAgent {
         // until we have this, as we need to know which switches have uplinks to
         // correctly set up services.
         let get_network_config = || async {
-            let serialized_config = bootstore
+            let serialized_config = long_running_task_handles
+                .bootstore
                 .get_network_config()
                 .await
                 .map_err(|err| BackoffError::transient(err.to_string()))?
@@ -477,7 +477,7 @@ impl SledAgent {
         let mut metrics_manager = MetricsManager::new(
             request.body.id,
             request.body.rack_id,
-            hardware.baseboard(),
+            long_running_task_handles.hardware_manager.baseboard(),
             log.new(o!("component" => "MetricsManager")),
         )?;
 
@@ -514,15 +514,14 @@ impl SledAgent {
             endpoint,
         ));
 
-        let zone_bundler = storage.zone_bundler().clone();
         let sled_agent = SledAgent {
             inner: Arc::new(SledAgentInner {
                 id: request.body.id,
                 subnet: request.body.subnet,
                 start_request: request,
-                storage,
+                storage: long_running_task_handles.storage_manager.clone(),
                 instances,
-                hardware,
+                hardware: long_running_task_handles.hardware_manager.clone(),
                 updates,
                 port_manager,
                 services,
@@ -536,8 +535,8 @@ impl SledAgent {
                 // request queue?
                 nexus_request_queue: NexusRequestQueue::new(),
                 rack_network_config,
-                zone_bundler,
-                bootstore: bootstore.clone(),
+                zone_bundler: long_running_task_handles.zone_bundler.clone(),
+                bootstore: long_running_task_handles.bootstore.clone(),
                 metrics_manager,
             }),
             log: log.clone(),
@@ -558,6 +557,7 @@ impl SledAgent {
     /// Blocks until all services have started, retrying indefinitely on
     /// failure.
     pub(crate) async fn cold_boot_load_services(&self) {
+        info!(self.log, "Loading cold boot services");
         retry_notify(
             retry_policy_internal_service_aggressive(),
             || async {
@@ -664,12 +664,15 @@ impl SledAgent {
                 if call_count == 0 {
                     info!(
                         log,
-                        "failed to notify nexus about sled agent"; "error" => err,
+                        "failed to notify nexus about sled agent";
+                        "error" => %err,
                     );
                 } else if total_duration > std::time::Duration::from_secs(30) {
                     warn!(
                         log,
-                        "failed to notify nexus about sled agent"; "error" => err, "total duration" => ?total_duration,
+                        "failed to notify nexus about sled agent";
+                        "error" => %err,
+                        "total duration" => ?total_duration,
                     );
                 }
             };
@@ -838,9 +841,18 @@ impl SledAgent {
     }
 
     /// Gets the sled's current list of all zpools.
-    pub async fn zpools_get(&self) -> Result<Vec<Zpool>, Error> {
-        let zpools = self.inner.storage.get_zpools().await?;
-        Ok(zpools)
+    pub async fn zpools_get(&self) -> Vec<Zpool> {
+        self.inner
+            .storage
+            .get_latest_resources()
+            .await
+            .get_all_zpools()
+            .into_iter()
+            .map(|(name, variant)| Zpool {
+                id: name.id(),
+                disk_type: variant.into(),
+            })
+            .collect()
     }
 
     /// Returns whether or not the sled believes itself to be a scrimlet
@@ -1080,7 +1092,9 @@ pub async fn add_sled_to_initialized_rack(
     // Get all known bootstrap addresses via DDM
     let ddm_admin_client = DdmAdminClient::localhost(&log)?;
     let addrs = ddm_admin_client
-        .derive_bootstrap_addrs_from_prefixes(&[BootstrapInterface::GlobalZone])
+        .derive_bootstrap_addrs_from_prefixes(&[
+            underlay::BootstrapInterface::GlobalZone,
+        ])
         .await?;
 
     // Create a set of futures to concurrently map the baseboard to bootstrap ip
diff --git a/sled-agent/src/storage/dataset.rs b/sled-agent/src/storage/dataset.rs
deleted file mode 100644
index 4efc0f320a..0000000000
--- a/sled-agent/src/storage/dataset.rs
+++ /dev/null
@@ -1,63 +0,0 @@
-// This Source Code Form is subject to the terms of the Mozilla Public
-// License, v. 2.0. If a copy of the MPL was not distributed with this
-// file, You can obtain one at https://mozilla.org/MPL/2.0/.
-
-use crate::params::DatasetKind;
-use illumos_utils::zpool::ZpoolName;
-use schemars::JsonSchema;
-use serde::{Deserialize, Serialize};
-use std::str::FromStr;
-
-#[derive(
-    Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone, JsonSchema,
-)]
-pub struct DatasetName {
-    // A unique identifier for the Zpool on which the dataset is stored.
-    pool_name: ZpoolName,
-    // A name for the dataset within the Zpool.
-    kind: DatasetKind,
-}
-
-impl DatasetName {
-    pub fn new(pool_name: ZpoolName, kind: DatasetKind) -> Self {
-        Self { pool_name, kind }
-    }
-
-    pub fn pool(&self) -> &ZpoolName {
-        &self.pool_name
-    }
-
-    pub fn dataset(&self) -> &DatasetKind {
-        &self.kind
-    }
-
-    pub fn full(&self) -> String {
-        format!("{}/{}", self.pool_name, self.kind)
-    }
-}
-
-impl From<DatasetName> for sled_agent_client::types::DatasetName {
-    fn from(n: DatasetName) -> Self {
-        Self {
-            pool_name: sled_agent_client::types::ZpoolName::from_str(
-                &n.pool().to_string(),
-            )
-            .unwrap(),
-            kind: n.dataset().clone().into(),
-        }
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-    use uuid::Uuid;
-
-    #[test]
-    fn serialize_dataset_name() {
-        let pool = ZpoolName::new_internal(Uuid::new_v4());
-        let kind = DatasetKind::Crucible;
-        let name = DatasetName::new(pool, kind);
-        toml::to_string(&name).unwrap();
-    }
-}
diff --git a/sled-agent/src/storage/mod.rs b/sled-agent/src/storage/mod.rs
deleted file mode 100644
index 74bd59a151..0000000000
--- a/sled-agent/src/storage/mod.rs
+++ /dev/null
@@ -1,8 +0,0 @@
-// This Source Code Form is subject to the terms of the Mozilla Public
-// License, v. 2.0. If a copy of the MPL was not distributed with this
-// file, You can obtain one at https://mozilla.org/MPL/2.0/.
-
-//! Management of local storage
-
-pub(crate) mod dataset;
-pub(crate) mod dump_setup;
diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs
deleted file mode 100644
index c31a4dc0bc..0000000000
--- a/sled-agent/src/storage_manager.rs
+++ /dev/null
@@ -1,1432 +0,0 @@
-// This Source Code Form is subject to the terms of the Mozilla Public
-// License, v. 2.0. If a copy of the MPL was not distributed with this
-// file, You can obtain one at https://mozilla.org/MPL/2.0/.
-
-//! Management of sled-local storage.
-
-use crate::nexus::NexusClientWithResolver;
-use crate::storage::dataset::DatasetName;
-use crate::storage::dump_setup::DumpSetup;
-use crate::zone_bundle::ZoneBundler;
-use camino::Utf8PathBuf;
-use derive_more::From;
-use futures::stream::FuturesOrdered;
-use futures::FutureExt;
-use futures::StreamExt;
-use illumos_utils::zpool::{ZpoolKind, ZpoolName};
-use illumos_utils::{zfs::Mountpoint, zpool::ZpoolInfo};
-use key_manager::StorageKeyRequester;
-use nexus_client::types::PhysicalDiskDeleteRequest;
-use nexus_client::types::PhysicalDiskKind;
-use nexus_client::types::PhysicalDiskPutRequest;
-use nexus_client::types::ZpoolPutRequest;
-use omicron_common::api::external::{ByteCount, ByteCountRangeError};
-use omicron_common::backoff;
-use omicron_common::disk::DiskIdentity;
-use sled_hardware::{Disk, DiskVariant, UnparsedDisk};
-use slog::Logger;
-use std::collections::hash_map;
-use std::collections::HashMap;
-use std::collections::HashSet;
-use std::convert::TryFrom;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::sync::OnceLock;
-use std::time::Duration;
-use tokio::sync::{mpsc, oneshot, Mutex};
-use tokio::task::JoinHandle;
-use tokio::time::{interval, MissedTickBehavior};
-use uuid::Uuid;
-
-use illumos_utils::dumpadm::DumpHdrError;
-#[cfg(test)]
-use illumos_utils::{zfs::MockZfs as Zfs, zpool::MockZpool as Zpool};
-#[cfg(not(test))]
-use illumos_utils::{zfs::Zfs, zpool::Zpool};
-
-// A key manager can only become ready once. This occurs during RSS or cold
-// boot when the bootstore has detected it has a key share.
-static KEY_MANAGER_READY: OnceLock<()> = OnceLock::new();
-
-#[derive(thiserror::Error, Debug)]
-pub enum Error {
-    #[error(transparent)]
-    DiskError(#[from] sled_hardware::DiskError),
-
-    // TODO: We could add the context of "why are we doint this op", maybe?
-    #[error(transparent)]
-    ZfsListDataset(#[from] illumos_utils::zfs::ListDatasetsError),
-
-    #[error(transparent)]
-    ZfsEnsureFilesystem(#[from] illumos_utils::zfs::EnsureFilesystemError),
-
-    #[error(transparent)]
-    ZfsSetValue(#[from] illumos_utils::zfs::SetValueError),
-
-    #[error(transparent)]
-    ZfsGetValue(#[from] illumos_utils::zfs::GetValueError),
-
-    #[error(transparent)]
-    GetZpoolInfo(#[from] illumos_utils::zpool::GetInfoError),
-
-    #[error(transparent)]
-    Fstyp(#[from] illumos_utils::fstyp::Error),
-
-    #[error(transparent)]
-    ZoneCommand(#[from] illumos_utils::running_zone::RunCommandError),
-
-    #[error(transparent)]
-    ZoneBoot(#[from] illumos_utils::running_zone::BootError),
-
-    #[error(transparent)]
-    ZoneEnsureAddress(#[from] illumos_utils::running_zone::EnsureAddressError),
-
-    #[error(transparent)]
-    ZoneInstall(#[from] illumos_utils::running_zone::InstallZoneError),
-
-    #[error("No U.2 Zpools found")]
-    NoU2Zpool,
-
-    #[error("Failed to parse UUID from {path}: {err}")]
-    ParseUuid {
-        path: Utf8PathBuf,
-        #[source]
-        err: uuid::Error,
-    },
-
-    #[error("Dataset {name:?} exists with a different uuid (has {old}, requested {new})")]
-    UuidMismatch { name: Box<DatasetName>, old: Uuid, new: Uuid },
-
-    #[error("Error parsing pool {name}'s size: {err}")]
-    BadPoolSize {
-        name: String,
-        #[source]
-        err: ByteCountRangeError,
-    },
-
-    #[error("Failed to parse the dataset {name}'s UUID: {err}")]
-    ParseDatasetUuid {
-        name: String,
-        #[source]
-        err: uuid::Error,
-    },
-
-    #[error("Zpool Not Found: {0}")]
-    ZpoolNotFound(String),
-
-    #[error("Failed to serialize toml (intended for {path:?}): {err}")]
-    Serialize {
-        path: Utf8PathBuf,
-        #[source]
-        err: toml::ser::Error,
-    },
-
-    #[error("Failed to deserialize toml from {path:?}: {err}")]
-    Deserialize {
-        path: Utf8PathBuf,
-        #[source]
-        err: toml::de::Error,
-    },
-
-    #[error("Failed to perform I/O: {message}: {err}")]
-    Io {
-        message: String,
-        #[source]
-        err: std::io::Error,
-    },
-
-    #[error("Underlay not yet initialized")]
-    UnderlayNotInitialized,
-
-    #[error("Encountered error checking dump device flags: {0}")]
-    DumpHdr(#[from] DumpHdrError),
-}
-
-/// A ZFS storage pool.
-struct Pool {
-    name: ZpoolName,
-    info: ZpoolInfo,
-    parent: DiskIdentity,
-}
-
-impl Pool {
-    /// Queries for an existing Zpool by name.
-    ///
-    /// Returns Ok if the pool exists.
-    fn new(name: ZpoolName, parent: DiskIdentity) -> Result<Pool, Error> {
-        let info = Zpool::get_info(&name.to_string())?;
-        Ok(Pool { name, info, parent })
-    }
-
-    fn parent(&self) -> &DiskIdentity {
-        &self.parent
-    }
-}
-
-// The type of a future which is used to send a notification to Nexus.
-type NotifyFut =
-    Pin<Box<dyn futures::Future<Output = Result<(), String>> + Send>>;
-
-#[derive(Debug)]
-struct NewFilesystemRequest {
-    dataset_id: Uuid,
-    dataset_name: DatasetName,
-    responder: oneshot::Sender<Result<DatasetName, Error>>,
-}
-
-struct UnderlayRequest {
-    underlay: UnderlayAccess,
-    responder: oneshot::Sender<Result<(), Error>>,
-}
-
-#[derive(PartialEq, Eq, Clone)]
-pub(crate) enum DiskWrapper {
-    Real { disk: Disk, devfs_path: Utf8PathBuf },
-    Synthetic { zpool_name: ZpoolName },
-}
-
-impl From<Disk> for DiskWrapper {
-    fn from(disk: Disk) -> Self {
-        let devfs_path = disk.devfs_path().clone();
-        Self::Real { disk, devfs_path }
-    }
-}
-
-impl DiskWrapper {
-    fn identity(&self) -> DiskIdentity {
-        match self {
-            DiskWrapper::Real { disk, .. } => disk.identity().clone(),
-            DiskWrapper::Synthetic { zpool_name } => {
-                let id = zpool_name.id();
-                DiskIdentity {
-                    vendor: "synthetic-vendor".to_string(),
-                    serial: format!("synthetic-serial-{id}"),
-                    model: "synthetic-model".to_string(),
-                }
-            }
-        }
-    }
-
-    fn variant(&self) -> DiskVariant {
-        match self {
-            DiskWrapper::Real { disk, .. } => disk.variant(),
-            DiskWrapper::Synthetic { zpool_name } => match zpool_name.kind() {
-                ZpoolKind::External => DiskVariant::U2,
-                ZpoolKind::Internal => DiskVariant::M2,
-            },
-        }
-    }
-
-    fn zpool_name(&self) -> &ZpoolName {
-        match self {
-            DiskWrapper::Real { disk, .. } => disk.zpool_name(),
-            DiskWrapper::Synthetic { zpool_name } => zpool_name,
-        }
-    }
-}
-
-#[derive(Clone)]
-pub struct StorageResources {
-    // All disks, real and synthetic, being managed by this sled
-    disks: Arc<Mutex<HashMap<DiskIdentity, DiskWrapper>>>,
-
-    // A map of "Uuid" to "pool".
-    pools: Arc<Mutex<HashMap<Uuid, Pool>>>,
-}
-
-// The directory within the debug dataset in which bundles are created.
-const BUNDLE_DIRECTORY: &str = "bundle";
-
-// The directory for zone bundles.
-const ZONE_BUNDLE_DIRECTORY: &str = "zone";
-
-impl StorageResources {
-    /// Creates a fabricated view of storage resources.
-    ///
-    /// Use this only when you want to reference the disks, but not actually
-    /// access them. Creates one internal and one external disk.
-    #[cfg(test)]
-    pub fn new_for_test() -> Self {
-        let new_disk_identity = || DiskIdentity {
-            vendor: "vendor".to_string(),
-            serial: Uuid::new_v4().to_string(),
-            model: "model".to_string(),
-        };
-
-        Self {
-            disks: Arc::new(Mutex::new(HashMap::from([
-                (
-                    new_disk_identity(),
-                    DiskWrapper::Synthetic {
-                        zpool_name: ZpoolName::new_internal(Uuid::new_v4()),
-                    },
-                ),
-                (
-                    new_disk_identity(),
-                    DiskWrapper::Synthetic {
-                        zpool_name: ZpoolName::new_external(Uuid::new_v4()),
-                    },
-                ),
-            ]))),
-            pools: Arc::new(Mutex::new(HashMap::new())),
-        }
-    }
-
-    /// Returns the identity of the boot disk.
-    ///
-    /// If this returns `None`, we have not processed the boot disk yet.
-    pub async fn boot_disk(&self) -> Option<(DiskIdentity, ZpoolName)> {
-        let disks = self.disks.lock().await;
-        disks.iter().find_map(|(id, disk)| {
-            match disk {
-                // This is the "real" use-case: if we have real disks, query
-                // their properties to identify if they truly are the boot disk.
-                DiskWrapper::Real { disk, .. } => {
-                    if disk.is_boot_disk() {
-                        return Some((id.clone(), disk.zpool_name().clone()));
-                    }
-                }
-                // This is the "less real" use-case: if we have synthetic disks,
-                // just label the first M.2-looking one as a "boot disk".
-                DiskWrapper::Synthetic { .. } => {
-                    if matches!(disk.variant(), DiskVariant::M2) {
-                        return Some((id.clone(), disk.zpool_name().clone()));
-                    }
-                }
-            };
-            None
-        })
-    }
-
-    // TODO: Could be generic over DiskVariant
-
-    /// Returns all M.2 zpools
-    pub async fn all_m2_zpools(&self) -> Vec<ZpoolName> {
-        self.all_zpools(DiskVariant::M2).await
-    }
-
-    /// Returns all U.2 zpools
-    pub async fn all_u2_zpools(&self) -> Vec<ZpoolName> {
-        self.all_zpools(DiskVariant::U2).await
-    }
-
-    /// Returns all mountpoints within all M.2s for a particular dataset.
-    pub async fn all_m2_mountpoints(&self, dataset: &str) -> Vec<Utf8PathBuf> {
-        let m2_zpools = self.all_m2_zpools().await;
-        m2_zpools
-            .iter()
-            .map(|zpool| zpool.dataset_mountpoint(dataset))
-            .collect()
-    }
-
-    /// Returns all mountpoints within all U.2s for a particular dataset.
-    pub async fn all_u2_mountpoints(&self, dataset: &str) -> Vec<Utf8PathBuf> {
-        let u2_zpools = self.all_u2_zpools().await;
-        u2_zpools
-            .iter()
-            .map(|zpool| zpool.dataset_mountpoint(dataset))
-            .collect()
-    }
-
-    /// Returns all zpools of a particular variant
-    pub async fn all_zpools(&self, variant: DiskVariant) -> Vec<ZpoolName> {
-        let disks = self.disks.lock().await;
-        disks
-            .values()
-            .filter_map(|disk| {
-                if disk.variant() == variant {
-                    return Some(disk.zpool_name().clone());
-                }
-                None
-            })
-            .collect()
-    }
-
-    /// Return the directories for storing zone service bundles.
-    pub async fn all_zone_bundle_directories(&self) -> Vec<Utf8PathBuf> {
-        self.all_m2_mountpoints(sled_hardware::disk::M2_DEBUG_DATASET)
-            .await
-            .into_iter()
-            .map(|p| p.join(BUNDLE_DIRECTORY).join(ZONE_BUNDLE_DIRECTORY))
-            .collect()
-    }
-}
-
-/// Describes the access to the underlay used by the StorageManager.
-pub struct UnderlayAccess {
-    pub nexus_client: NexusClientWithResolver,
-    pub sled_id: Uuid,
-}
-
-// A worker that starts zones for pools as they are received.
-struct StorageWorker {
-    log: Logger,
-    nexus_notifications: FuturesOrdered<NotifyFut>,
-    rx: mpsc::Receiver<StorageWorkerRequest>,
-    underlay: Arc<Mutex<Option<UnderlayAccess>>>,
-
-    // A mechanism for requesting disk encryption keys from the
-    // [`key_manager::KeyManager`]
-    key_requester: StorageKeyRequester,
-
-    // Invokes dumpadm(8) and savecore(8) when new disks are encountered
-    dump_setup: Arc<DumpSetup>,
-}
-
-#[derive(Clone, Debug)]
-enum NotifyDiskRequest {
-    Add { identity: DiskIdentity, variant: DiskVariant },
-    Remove(DiskIdentity),
-}
-
-#[derive(From, Clone, Debug, PartialEq, Eq, Hash)]
-enum QueuedDiskCreate {
-    Real(UnparsedDisk),
-    Synthetic(ZpoolName),
-}
-
-impl QueuedDiskCreate {
-    fn is_synthetic(&self) -> bool {
-        if let QueuedDiskCreate::Synthetic(_) = self {
-            true
-        } else {
-            false
-        }
-    }
-}
-
-impl StorageWorker {
-    // Ensures the named dataset exists as a filesystem with a UUID, optionally
-    // creating it if `do_format` is true.
-    //
-    // Returns the UUID attached to the ZFS filesystem.
-    fn ensure_dataset(
-        &mut self,
-        dataset_id: Uuid,
-        dataset_name: &DatasetName,
-    ) -> Result<(), Error> {
-        let zoned = true;
-        let fs_name = &dataset_name.full();
-        let do_format = true;
-        let encryption_details = None;
-        let size_details = None;
-        Zfs::ensure_filesystem(
-            &dataset_name.full(),
-            Mountpoint::Path(Utf8PathBuf::from("/data")),
-            zoned,
-            do_format,
-            encryption_details,
-            size_details,
-            None,
-        )?;
-        // Ensure the dataset has a usable UUID.
-        if let Ok(id_str) = Zfs::get_oxide_value(&fs_name, "uuid") {
-            if let Ok(id) = id_str.parse::<Uuid>() {
-                if id != dataset_id {
-                    return Err(Error::UuidMismatch {
-                        name: Box::new(dataset_name.clone()),
-                        old: id,
-                        new: dataset_id,
-                    });
-                }
-                return Ok(());
-            }
-        }
-        Zfs::set_oxide_value(&fs_name, "uuid", &dataset_id.to_string())?;
-        Ok(())
-    }
-
-    // Adds a "notification to nexus" to `nexus_notifications`,
-    // informing it about the addition of `pool_id` to this sled.
-    async fn add_zpool_notify(&mut self, pool: &Pool, size: ByteCount) {
-        // The underlay network is setup once at sled-agent startup. Before
-        // there is an underlay we want to avoid sending notifications to nexus for
-        // two reasons:
-        //  1. They can't possibly succeed
-        //  2. They increase the backoff time exponentially, so that once
-        //   sled-agent does start it may take much longer to notify nexus
-        //   than it would if we avoid this. This goes especially so for rack
-        //   setup, when bootstrap agent is waiting an aribtrary time for RSS
-        //   initialization.
-        if self.underlay.lock().await.is_none() {
-            return;
-        }
-
-        let pool_id = pool.name.id();
-        let DiskIdentity { vendor, serial, model } = pool.parent.clone();
-        let underlay = self.underlay.clone();
-
-        let notify_nexus = move || {
-            let zpool_request = ZpoolPutRequest {
-                size: size.into(),
-                disk_vendor: vendor.clone(),
-                disk_serial: serial.clone(),
-                disk_model: model.clone(),
-            };
-            let underlay = underlay.clone();
-
-            async move {
-                let underlay_guard = underlay.lock().await;
-                let Some(underlay) = underlay_guard.as_ref() else {
-                    return Err(backoff::BackoffError::transient(
-                        Error::UnderlayNotInitialized.to_string(),
-                    ));
-                };
-                let sled_id = underlay.sled_id;
-                let nexus_client = underlay.nexus_client.client().clone();
-                drop(underlay_guard);
-
-                nexus_client
-                    .zpool_put(&sled_id, &pool_id, &zpool_request)
-                    .await
-                    .map_err(|e| {
-                        backoff::BackoffError::transient(e.to_string())
-                    })?;
-                Ok(())
-            }
-        };
-        let log = self.log.clone();
-        let name = pool.name.clone();
-        let disk = pool.parent().clone();
-        let log_post_failure = move |_, call_count, total_duration| {
-            if call_count == 0 {
-                info!(log, "failed to notify nexus about a new pool {name} on disk {disk:?}");
-            } else if total_duration > std::time::Duration::from_secs(30) {
-                warn!(log, "failed to notify nexus about a new pool {name} on disk {disk:?}";
-                    "total duration" => ?total_duration);
-            }
-        };
-        self.nexus_notifications.push_back(
-            backoff::retry_notify_ext(
-                backoff::retry_policy_internal_service_aggressive(),
-                notify_nexus,
-                log_post_failure,
-            )
-            .boxed(),
-        );
-    }
-
-    async fn ensure_using_exactly_these_disks(
-        &mut self,
-        resources: &StorageResources,
-        unparsed_disks: Vec<UnparsedDisk>,
-        queued_u2_drives: &mut Option<HashSet<QueuedDiskCreate>>,
-    ) -> Result<(), Error> {
-        // Queue U.2 drives if necessary
-        // We clear all existing queued drives that are not synthetic and add
-        // new ones in the loop below
-        if let Some(queued) = queued_u2_drives {
-            info!(
-                self.log,
-                "Ensure exact disks: clearing non-synthetic queued disks."
-            );
-            queued.retain(|d| d.is_synthetic());
-        }
-
-        let mut new_disks = HashMap::new();
-
-        // We may encounter errors while parsing any of the disks; keep track of
-        // any errors that occur and return any of them if something goes wrong.
-        //
-        // That being said, we should not prevent access to the other disks if
-        // only one failure occurs.
-        let mut err: Option<Error> = None;
-
-        // Ensure all disks conform to the expected partition layout.
-        for disk in unparsed_disks.into_iter() {
-            if disk.variant() == DiskVariant::U2 {
-                if let Some(queued) = queued_u2_drives {
-                    info!(self.log, "Queuing disk for upsert: {disk:?}");
-                    queued.insert(disk.into());
-                    continue;
-                }
-            }
-            match self.add_new_disk(disk, queued_u2_drives).await.map_err(
-                |err| {
-                    warn!(self.log, "Could not ensure partitions: {err}");
-                    err
-                },
-            ) {
-                Ok(disk) => {
-                    new_disks.insert(disk.identity().clone(), disk);
-                }
-                Err(e) => {
-                    warn!(self.log, "Cannot parse disk: {e}");
-                    err = Some(e.into());
-                }
-            };
-        }
-
-        let mut disks = resources.disks.lock().await;
-
-        // Remove disks that don't appear in the "new_disks" set.
-        //
-        // This also accounts for zpools and notifies Nexus.
-        let disks_to_be_removed = disks
-            .iter_mut()
-            .filter(|(key, old_disk)| {
-                // If this disk appears in the "new" and "old" set, it should
-                // only be removed if it has changed.
-                //
-                // This treats a disk changing in an unexpected way as a
-                // "removal and re-insertion".
-                match old_disk {
-                    DiskWrapper::Real { disk, .. } => {
-                        if let Some(new_disk) = new_disks.get(*key) {
-                            // Changed Disk -> Disk should be removed.
-                            new_disk != disk
-                        } else {
-                            // Real disk, not in the new set -> Disk should be removed.
-                            true
-                        }
-                    }
-                    // Synthetic disk -> Disk should NOT be removed.
-                    DiskWrapper::Synthetic { .. } => false,
-                }
-            })
-            .map(|(_key, disk)| disk.clone())
-            .collect::<Vec<_>>();
-
-        for disk in disks_to_be_removed {
-            if let Err(e) = self
-                .delete_disk_locked(&resources, &mut disks, &disk.identity())
-                .await
-            {
-                warn!(self.log, "Failed to delete disk: {e}");
-                err = Some(e);
-            }
-        }
-
-        // Add new disks to `resources.disks`.
-        //
-        // This also accounts for zpools and notifies Nexus.
-        for (key, new_disk) in new_disks {
-            if let Some(old_disk) = disks.get(&key) {
-                // In this case, the disk should be unchanged.
-                //
-                // This assertion should be upheld by the filter above, which
-                // should remove disks that changed.
-                assert!(old_disk == &new_disk.into());
-            } else {
-                let disk = DiskWrapper::Real {
-                    disk: new_disk.clone(),
-                    devfs_path: new_disk.devfs_path().clone(),
-                };
-                if let Err(e) =
-                    self.upsert_disk_locked(&resources, &mut disks, disk).await
-                {
-                    warn!(self.log, "Failed to upsert disk: {e}");
-                    err = Some(e);
-                }
-            }
-        }
-
-        if let Some(err) = err {
-            Err(err)
-        } else {
-            Ok(())
-        }
-    }
-
-    // Attempt to create a new disk via `sled_hardware::Disk::new()`. If the
-    // disk addition fails because the the key manager cannot load a secret,
-    // this indicates a transient error, and so we queue the disk so we can
-    // try again.
-    async fn add_new_disk(
-        &mut self,
-        unparsed_disk: UnparsedDisk,
-        queued_u2_drives: &mut Option<HashSet<QueuedDiskCreate>>,
-    ) -> Result<Disk, sled_hardware::DiskError> {
-        match sled_hardware::Disk::new(
-            &self.log,
-            unparsed_disk.clone(),
-            Some(&self.key_requester),
-        )
-        .await
-        {
-            Ok(disk) => Ok(disk),
-            Err(sled_hardware::DiskError::KeyManager(err)) => {
-                warn!(
-                    self.log,
-                    "Transient error: {err} - queuing disk {:?}", unparsed_disk
-                );
-                if let Some(queued) = queued_u2_drives {
-                    queued.insert(unparsed_disk.into());
-                } else {
-                    *queued_u2_drives =
-                        Some(HashSet::from([unparsed_disk.into()]));
-                }
-                Err(sled_hardware::DiskError::KeyManager(err))
-            }
-            Err(err) => {
-                error!(
-                    self.log,
-                    "Persistent error: {err} - not queueing disk {:?}",
-                    unparsed_disk
-                );
-                Err(err)
-            }
-        }
-    }
-
-    // Attempt to create a new synthetic disk via
-    // `sled_hardware::Disk::ensure_zpool_ready()`. If the disk addition fails
-    // because the the key manager cannot load a secret, this indicates a
-    // transient error, and so we queue the disk so we can try again.
-    async fn add_new_synthetic_disk(
-        &mut self,
-        zpool_name: ZpoolName,
-        queued_u2_drives: &mut Option<HashSet<QueuedDiskCreate>>,
-    ) -> Result<(), sled_hardware::DiskError> {
-        let synthetic_id = DiskIdentity {
-            vendor: "fake_vendor".to_string(),
-            serial: "fake_serial".to_string(),
-            model: zpool_name.id().to_string(),
-        };
-        match sled_hardware::Disk::ensure_zpool_ready(
-            &self.log,
-            &zpool_name,
-            &synthetic_id,
-            Some(&self.key_requester),
-        )
-        .await
-        {
-            Ok(()) => Ok(()),
-            Err(sled_hardware::DiskError::KeyManager(err)) => {
-                warn!(
-                    self.log,
-                    "Transient error: {err} - queuing synthetic disk: {:?}",
-                    zpool_name
-                );
-                if let Some(queued) = queued_u2_drives {
-                    queued.insert(zpool_name.into());
-                } else {
-                    *queued_u2_drives =
-                        Some(HashSet::from([zpool_name.into()]));
-                }
-                Err(sled_hardware::DiskError::KeyManager(err))
-            }
-            Err(err) => {
-                error!(
-                    self.log,
-                    "Persistent error: {} - not queueing synthetic disk {:?}",
-                    err,
-                    zpool_name
-                );
-                Err(err)
-            }
-        }
-    }
-
-    async fn upsert_disk(
-        &mut self,
-        resources: &StorageResources,
-        disk: UnparsedDisk,
-        queued_u2_drives: &mut Option<HashSet<QueuedDiskCreate>>,
-    ) -> Result<(), Error> {
-        // Queue U.2 drives if necessary
-        if let Some(queued) = queued_u2_drives {
-            if disk.variant() == DiskVariant::U2 {
-                info!(self.log, "Queuing disk for upsert: {disk:?}");
-                queued.insert(disk.into());
-                return Ok(());
-            }
-        }
-
-        info!(self.log, "Upserting disk: {disk:?}");
-
-        // Ensure the disk conforms to an expected partition layout.
-        let disk =
-            self.add_new_disk(disk, queued_u2_drives).await.map_err(|err| {
-                warn!(self.log, "Could not ensure partitions: {err}");
-                err
-            })?;
-
-        let mut disks = resources.disks.lock().await;
-        let disk = DiskWrapper::Real {
-            disk: disk.clone(),
-            devfs_path: disk.devfs_path().clone(),
-        };
-        self.upsert_disk_locked(resources, &mut disks, disk).await
-    }
-
-    async fn upsert_synthetic_disk(
-        &mut self,
-        resources: &StorageResources,
-        zpool_name: ZpoolName,
-        queued_u2_drives: &mut Option<HashSet<QueuedDiskCreate>>,
-    ) -> Result<(), Error> {
-        // Queue U.2 drives if necessary
-        if let Some(queued) = queued_u2_drives {
-            if zpool_name.kind() == ZpoolKind::External {
-                info!(
-                    self.log,
-                    "Queuing synthetic disk for upsert: {zpool_name:?}"
-                );
-                queued.insert(zpool_name.into());
-                return Ok(());
-            }
-        }
-
-        info!(self.log, "Upserting synthetic disk for: {zpool_name:?}");
-
-        self.add_new_synthetic_disk(zpool_name.clone(), queued_u2_drives)
-            .await?;
-        let disk = DiskWrapper::Synthetic { zpool_name };
-        let mut disks = resources.disks.lock().await;
-        self.upsert_disk_locked(resources, &mut disks, disk).await
-    }
-
-    async fn upsert_disk_locked(
-        &mut self,
-        resources: &StorageResources,
-        disks: &mut tokio::sync::MutexGuard<
-            '_,
-            HashMap<DiskIdentity, DiskWrapper>,
-        >,
-        disk: DiskWrapper,
-    ) -> Result<(), Error> {
-        disks.insert(disk.identity(), disk.clone());
-        self.physical_disk_notify(NotifyDiskRequest::Add {
-            identity: disk.identity(),
-            variant: disk.variant(),
-        })
-        .await;
-        self.upsert_zpool(&resources, disk.identity(), disk.zpool_name())
-            .await?;
-
-        self.dump_setup.update_dumpdev_setup(disks).await;
-
-        Ok(())
-    }
-
-    async fn delete_disk(
-        &mut self,
-        resources: &StorageResources,
-        disk: UnparsedDisk,
-    ) -> Result<(), Error> {
-        info!(self.log, "Deleting disk: {disk:?}");
-        // TODO: Don't we need to do some accounting, e.g. for all the information
-        // that's no longer accessible? Or is that up to Nexus to figure out at
-        // a later point-in-time?
-        //
-        // If we're storing zone images on the M.2s for internal services, how
-        // do we reconcile them?
-        let mut disks = resources.disks.lock().await;
-        self.delete_disk_locked(resources, &mut disks, disk.identity()).await
-    }
-
-    async fn delete_disk_locked(
-        &mut self,
-        resources: &StorageResources,
-        disks: &mut tokio::sync::MutexGuard<
-            '_,
-            HashMap<DiskIdentity, DiskWrapper>,
-        >,
-        key: &DiskIdentity,
-    ) -> Result<(), Error> {
-        if let Some(parsed_disk) = disks.remove(key) {
-            resources.pools.lock().await.remove(&parsed_disk.zpool_name().id());
-            self.physical_disk_notify(NotifyDiskRequest::Remove(key.clone()))
-                .await;
-        }
-
-        self.dump_setup.update_dumpdev_setup(disks).await;
-
-        Ok(())
-    }
-
-    /// When the underlay becomes available, we need to notify nexus about any
-    /// discovered disks and pools, since we don't attempt to notify until there
-    /// is an underlay available.
-    async fn notify_nexus_about_existing_resources(
-        &mut self,
-        resources: &StorageResources,
-    ) -> Result<(), Error> {
-        let disks = resources.disks.lock().await;
-        for disk in disks.values() {
-            self.physical_disk_notify(NotifyDiskRequest::Add {
-                identity: disk.identity(),
-                variant: disk.variant(),
-            })
-            .await;
-        }
-
-        // We may encounter errors while processing any of the pools; keep track of
-        // any errors that occur and return any of them if something goes wrong.
-        //
-        // That being said, we should not prevent notification to nexus of the
-        // other pools if only one failure occurs.
-        let mut err: Option<Error> = None;
-
-        let pools = resources.pools.lock().await;
-        for pool in pools.values() {
-            match ByteCount::try_from(pool.info.size()).map_err(|err| {
-                Error::BadPoolSize { name: pool.name.to_string(), err }
-            }) {
-                Ok(size) => self.add_zpool_notify(pool, size).await,
-                Err(e) => {
-                    warn!(self.log, "Failed to notify nexus about pool: {e}");
-                    err = Some(e)
-                }
-            }
-        }
-
-        if let Some(err) = err {
-            Err(err)
-        } else {
-            Ok(())
-        }
-    }
-
-    // Adds a "notification to nexus" to `self.nexus_notifications`, informing it
-    // about the addition/removal of a physical disk to this sled.
-    async fn physical_disk_notify(&mut self, disk: NotifyDiskRequest) {
-        // The underlay network is setup once at sled-agent startup. Before
-        // there is an underlay we want to avoid sending notifications to nexus for
-        // two reasons:
-        //  1. They can't possibly succeed
-        //  2. They increase the backoff time exponentially, so that once
-        //   sled-agent does start it may take much longer to notify nexus
-        //   than it would if we avoid this. This goes especially so for rack
-        //   setup, when bootstrap agent is waiting an aribtrary time for RSS
-        //   initialization.
-        if self.underlay.lock().await.is_none() {
-            return;
-        }
-        let underlay = self.underlay.clone();
-        let disk2 = disk.clone();
-        let notify_nexus = move || {
-            let disk = disk.clone();
-            let underlay = underlay.clone();
-            async move {
-                let underlay_guard = underlay.lock().await;
-                let Some(underlay) = underlay_guard.as_ref() else {
-                    return Err(backoff::BackoffError::transient(
-                        Error::UnderlayNotInitialized.to_string(),
-                    ));
-                };
-                let sled_id = underlay.sled_id;
-                let nexus_client = underlay.nexus_client.client().clone();
-                drop(underlay_guard);
-
-                match &disk {
-                    NotifyDiskRequest::Add { identity, variant } => {
-                        let request = PhysicalDiskPutRequest {
-                            model: identity.model.clone(),
-                            serial: identity.serial.clone(),
-                            vendor: identity.vendor.clone(),
-                            variant: match variant {
-                                DiskVariant::U2 => PhysicalDiskKind::U2,
-                                DiskVariant::M2 => PhysicalDiskKind::M2,
-                            },
-                            sled_id,
-                        };
-                        nexus_client
-                            .physical_disk_put(&request)
-                            .await
-                            .map_err(|e| {
-                                backoff::BackoffError::transient(e.to_string())
-                            })?;
-                    }
-                    NotifyDiskRequest::Remove(disk_identity) => {
-                        let request = PhysicalDiskDeleteRequest {
-                            model: disk_identity.model.clone(),
-                            serial: disk_identity.serial.clone(),
-                            vendor: disk_identity.vendor.clone(),
-                            sled_id,
-                        };
-                        nexus_client
-                            .physical_disk_delete(&request)
-                            .await
-                            .map_err(|e| {
-                                backoff::BackoffError::transient(e.to_string())
-                            })?;
-                    }
-                }
-                Ok(())
-            }
-        };
-        let log = self.log.clone();
-        // This notification is often invoked before Nexus has started
-        // running, so avoid flagging any errors as concerning until some
-        // time has passed.
-        let log_post_failure = move |_, call_count, total_duration| {
-            if call_count == 0 {
-                info!(log, "failed to notify nexus about {disk2:?}");
-            } else if total_duration > std::time::Duration::from_secs(30) {
-                warn!(log, "failed to notify nexus about {disk2:?}";
-                    "total duration" => ?total_duration);
-            }
-        };
-        self.nexus_notifications.push_back(
-            backoff::retry_notify_ext(
-                backoff::retry_policy_internal_service_aggressive(),
-                notify_nexus,
-                log_post_failure,
-            )
-            .boxed(),
-        );
-    }
-
-    async fn upsert_zpool(
-        &mut self,
-        resources: &StorageResources,
-        parent: DiskIdentity,
-        pool_name: &ZpoolName,
-    ) -> Result<(), Error> {
-        let mut pools = resources.pools.lock().await;
-        let zpool = Pool::new(pool_name.clone(), parent)?;
-
-        let pool = match pools.entry(pool_name.id()) {
-            hash_map::Entry::Occupied(mut entry) => {
-                // The pool already exists.
-                entry.get_mut().info = zpool.info;
-                return Ok(());
-            }
-            hash_map::Entry::Vacant(entry) => entry.insert(zpool),
-        };
-        info!(&self.log, "Storage manager processing zpool: {:#?}", pool.info);
-
-        let size = ByteCount::try_from(pool.info.size()).map_err(|err| {
-            Error::BadPoolSize { name: pool_name.to_string(), err }
-        })?;
-        // Notify Nexus of the zpool.
-        self.add_zpool_notify(&pool, size).await;
-        Ok(())
-    }
-
-    // Attempts to add a dataset within a zpool, according to `request`.
-    async fn add_dataset(
-        &mut self,
-        resources: &StorageResources,
-        request: &NewFilesystemRequest,
-    ) -> Result<DatasetName, Error> {
-        info!(self.log, "add_dataset: {:?}", request);
-        let mut pools = resources.pools.lock().await;
-        let pool = pools
-            .get_mut(&request.dataset_name.pool().id())
-            .ok_or_else(|| {
-                Error::ZpoolNotFound(format!(
-                    "{}, looked up while trying to add dataset",
-                    request.dataset_name.pool(),
-                ))
-            })?;
-        let dataset_name = DatasetName::new(
-            pool.name.clone(),
-            request.dataset_name.dataset().clone(),
-        );
-        self.ensure_dataset(request.dataset_id, &dataset_name)?;
-        Ok(dataset_name)
-    }
-
-    // Small wrapper around `Self::do_work_internal` that ensures we always
-    // emit info to the log when we exit.
-    async fn do_work(
-        &mut self,
-        resources: StorageResources,
-    ) -> Result<(), Error> {
-        // We queue U.2 sleds until the StorageKeyRequester is ready to use.
-        let mut queued_u2_drives = Some(HashSet::new());
-        loop {
-            match self.do_work_internal(&resources, &mut queued_u2_drives).await
-            {
-                Ok(()) => {
-                    info!(self.log, "StorageWorker exited successfully");
-                    return Ok(());
-                }
-                Err(e) => {
-                    warn!(
-                        self.log,
-                        "StorageWorker encountered unexpected error: {}", e
-                    );
-                    // ... for now, keep trying.
-                }
-            }
-        }
-    }
-
-    async fn do_work_internal(
-        &mut self,
-        resources: &StorageResources,
-        queued_u2_drives: &mut Option<HashSet<QueuedDiskCreate>>,
-    ) -> Result<(), Error> {
-        const QUEUED_DISK_RETRY_TIMEOUT: Duration = Duration::from_secs(5);
-        let mut interval = interval(QUEUED_DISK_RETRY_TIMEOUT);
-        interval.set_missed_tick_behavior(MissedTickBehavior::Delay);
-        loop {
-            tokio::select! {
-                _ = self.nexus_notifications.next(),
-                    if !self.nexus_notifications.is_empty() => {},
-                Some(request) = self.rx.recv() => {
-                    // We want to queue failed requests related to the key manager
-                    match self.handle_storage_worker_request(
-                        resources, queued_u2_drives, request)
-                    .await {
-                        Err(Error::DiskError(_)) => {
-                            // We already handle and log disk errors, no need to
-                            // return here.
-                        }
-                        Err(e) => return Err(e),
-                        Ok(()) => {}
-                    }
-               }
-               _ = interval.tick(), if queued_u2_drives.is_some() &&
-                   KEY_MANAGER_READY.get().is_some()=>
-                {
-                    self.upsert_queued_disks(resources, queued_u2_drives).await;
-                }
-            }
-        }
-    }
-
-    async fn handle_storage_worker_request(
-        &mut self,
-        resources: &StorageResources,
-        queued_u2_drives: &mut Option<HashSet<QueuedDiskCreate>>,
-        request: StorageWorkerRequest,
-    ) -> Result<(), Error> {
-        use StorageWorkerRequest::*;
-        match request {
-            AddDisk(disk) => {
-                self.upsert_disk(&resources, disk, queued_u2_drives).await?;
-            }
-            AddSyntheticDisk(zpool_name) => {
-                self.upsert_synthetic_disk(
-                    &resources,
-                    zpool_name,
-                    queued_u2_drives,
-                )
-                .await?;
-            }
-            RemoveDisk(disk) => {
-                self.delete_disk(&resources, disk).await?;
-            }
-            NewFilesystem(request) => {
-                let result = self.add_dataset(&resources, &request).await;
-                let _ = request.responder.send(result);
-            }
-            DisksChanged(disks) => {
-                self.ensure_using_exactly_these_disks(
-                    &resources,
-                    disks,
-                    queued_u2_drives,
-                )
-                .await?;
-            }
-            SetupUnderlayAccess(UnderlayRequest { underlay, responder }) => {
-                // If this is the first time establishing an
-                // underlay we should notify nexus of all existing
-                // disks and zpools.
-                //
-                // Instead of individual notifications, we should
-                // send a bulk notification as described in https://
-                // github.com/oxidecomputer/omicron/issues/1917
-                if self.underlay.lock().await.replace(underlay).is_none() {
-                    self.notify_nexus_about_existing_resources(&resources)
-                        .await?;
-                }
-                let _ = responder.send(Ok(()));
-            }
-            KeyManagerReady => {
-                let _ = KEY_MANAGER_READY.set(());
-                self.upsert_queued_disks(resources, queued_u2_drives).await;
-            }
-        }
-        Ok(())
-    }
-
-    async fn upsert_queued_disks(
-        &mut self,
-        resources: &StorageResources,
-        queued_u2_drives: &mut Option<HashSet<QueuedDiskCreate>>,
-    ) {
-        let queued = queued_u2_drives.take();
-        if let Some(queued) = queued {
-            for disk in queued {
-                if let Some(saved) = queued_u2_drives {
-                    // We already hit a transient error and recreated our queue.
-                    // Add any remaining queued disks back on the queue so we
-                    // can try again later.
-                    saved.insert(disk);
-                } else {
-                    match self.upsert_queued_disk(disk, resources).await {
-                        Ok(()) => {}
-                        Err((_, None)) => {
-                            // We already logged this as a persistent error in
-                            // `add_new_disk` or `add_new_synthetic_disk`
-                        }
-                        Err((_, Some(disk))) => {
-                            // We already logged this as a transient error in
-                            // `add_new_disk` or `add_new_synthetic_disk`
-                            *queued_u2_drives = Some(HashSet::from([disk]));
-                        }
-                    }
-                }
-            }
-        }
-        if queued_u2_drives.is_none() {
-            info!(self.log, "upserted all queued disks");
-        } else {
-            warn!(
-                self.log,
-                "failed to upsert all queued disks - will try again"
-            );
-        }
-    }
-
-    // Attempt to upsert a queued disk. Return the disk and error if the upsert
-    // fails due to a transient error. Examples of transient errors are key
-    // manager errors which indicate that there are not enough sleds available
-    // to unlock the rack.
-    async fn upsert_queued_disk(
-        &mut self,
-        disk: QueuedDiskCreate,
-        resources: &StorageResources,
-    ) -> Result<(), (Error, Option<QueuedDiskCreate>)> {
-        let mut temp: Option<HashSet<QueuedDiskCreate>> = None;
-        let res = match disk {
-            QueuedDiskCreate::Real(disk) => {
-                self.upsert_disk(&resources, disk, &mut temp).await
-            }
-            QueuedDiskCreate::Synthetic(zpool_name) => {
-                self.upsert_synthetic_disk(&resources, zpool_name, &mut temp)
-                    .await
-            }
-        };
-        if let Some(mut disks) = temp.take() {
-            assert!(res.is_err());
-            assert_eq!(disks.len(), 1);
-            return Err((
-                res.unwrap_err(),
-                disks.drain().next().unwrap().into(),
-            ));
-        }
-        // Any error at this point is not transient.
-        // We don't requeue the disk.
-        res.map_err(|e| (e, None))
-    }
-}
-
-enum StorageWorkerRequest {
-    AddDisk(UnparsedDisk),
-    AddSyntheticDisk(ZpoolName),
-    RemoveDisk(UnparsedDisk),
-    DisksChanged(Vec<UnparsedDisk>),
-    NewFilesystem(NewFilesystemRequest),
-    SetupUnderlayAccess(UnderlayRequest),
-    KeyManagerReady,
-}
-
-struct StorageManagerInner {
-    log: Logger,
-
-    resources: StorageResources,
-
-    tx: mpsc::Sender<StorageWorkerRequest>,
-
-    // A handle to a worker which updates "pools".
-    task: JoinHandle<Result<(), Error>>,
-}
-
-/// A sled-local view of all attached storage.
-#[derive(Clone)]
-pub struct StorageManager {
-    inner: Arc<StorageManagerInner>,
-    zone_bundler: ZoneBundler,
-}
-
-impl StorageManager {
-    /// Creates a new [`StorageManager`] which should manage local storage.
-    pub async fn new(log: &Logger, key_requester: StorageKeyRequester) -> Self {
-        let log = log.new(o!("component" => "StorageManager"));
-        let resources = StorageResources {
-            disks: Arc::new(Mutex::new(HashMap::new())),
-            pools: Arc::new(Mutex::new(HashMap::new())),
-        };
-        let (tx, rx) = mpsc::channel(30);
-
-        let zb_log = log.new(o!("component" => "ZoneBundler"));
-        let zone_bundler =
-            ZoneBundler::new(zb_log, resources.clone(), Default::default());
-
-        StorageManager {
-            inner: Arc::new(StorageManagerInner {
-                log: log.clone(),
-                resources: resources.clone(),
-                tx,
-                task: tokio::task::spawn(async move {
-                    let dump_setup = Arc::new(DumpSetup::new(&log));
-                    let mut worker = StorageWorker {
-                        log,
-                        nexus_notifications: FuturesOrdered::new(),
-                        rx,
-                        underlay: Arc::new(Mutex::new(None)),
-                        key_requester,
-                        dump_setup,
-                    };
-
-                    worker.do_work(resources).await
-                }),
-            }),
-            zone_bundler,
-        }
-    }
-
-    /// Return a reference to the object used to manage zone bundles.
-    ///
-    /// This can be cloned by other code wishing to create and manage their own
-    /// zone bundles.
-    pub fn zone_bundler(&self) -> &ZoneBundler {
-        &self.zone_bundler
-    }
-
-    /// Ensures that the storage manager tracks exactly the provided disks.
-    ///
-    /// This acts similar to a batch [Self::upsert_disk] for all new disks, and
-    /// [Self::delete_disk] for all removed disks.
-    ///
-    /// If errors occur, an arbitrary "one" of them will be returned, but a
-    /// best-effort attempt to add all disks will still be attempted.
-    // Receiver implemented by [StorageWorker::ensure_using_exactly_these_disks]
-    pub async fn ensure_using_exactly_these_disks<I>(&self, unparsed_disks: I)
-    where
-        I: IntoIterator<Item = UnparsedDisk>,
-    {
-        self.inner
-            .tx
-            .send(StorageWorkerRequest::DisksChanged(
-                unparsed_disks.into_iter().collect::<Vec<_>>(),
-            ))
-            .await
-            .map_err(|e| e.to_string())
-            .expect("Failed to send DisksChanged request");
-    }
-
-    /// Adds a disk and associated zpool to the storage manager.
-    // Receiver implemented by [StorageWorker::upsert_disk].
-    pub async fn upsert_disk(&self, disk: UnparsedDisk) {
-        info!(self.inner.log, "Upserting disk: {disk:?}");
-        self.inner
-            .tx
-            .send(StorageWorkerRequest::AddDisk(disk))
-            .await
-            .map_err(|e| e.to_string())
-            .expect("Failed to send AddDisk request");
-    }
-
-    /// Removes a disk, if it's tracked by the storage manager, as well
-    /// as any associated zpools.
-    // Receiver implemented by [StorageWorker::delete_disk].
-    pub async fn delete_disk(&self, disk: UnparsedDisk) {
-        info!(self.inner.log, "Deleting disk: {disk:?}");
-        self.inner
-            .tx
-            .send(StorageWorkerRequest::RemoveDisk(disk))
-            .await
-            .map_err(|e| e.to_string())
-            .expect("Failed to send RemoveDisk request");
-    }
-
-    /// Adds a synthetic zpool to the storage manager.
-    // Receiver implemented by [StorageWorker::upsert_synthetic_disk].
-    pub async fn upsert_synthetic_disk(&self, name: ZpoolName) {
-        self.inner
-            .tx
-            .send(StorageWorkerRequest::AddSyntheticDisk(name))
-            .await
-            .map_err(|e| e.to_string())
-            .expect("Failed to send AddSyntheticDisk request");
-    }
-
-    /// Adds underlay access to the storage manager.
-    pub async fn setup_underlay_access(
-        &self,
-        underlay: UnderlayAccess,
-    ) -> Result<(), Error> {
-        let (tx, rx) = oneshot::channel();
-        self.inner
-            .tx
-            .send(StorageWorkerRequest::SetupUnderlayAccess(UnderlayRequest {
-                underlay,
-                responder: tx,
-            }))
-            .await
-            .map_err(|e| e.to_string())
-            .expect("Failed to send SetupUnderlayAccess request");
-        rx.await.expect("Failed to await underlay setup")
-    }
-
-    pub async fn get_zpools(&self) -> Result<Vec<crate::params::Zpool>, Error> {
-        let disks = self.inner.resources.disks.lock().await;
-        let pools = self.inner.resources.pools.lock().await;
-
-        let mut zpools = Vec::with_capacity(pools.len());
-
-        for (id, pool) in pools.iter() {
-            let disk_identity = &pool.parent;
-            let disk_type = if let Some(disk) = disks.get(&disk_identity) {
-                disk.variant().into()
-            } else {
-                // If the zpool claims to be attached to a disk that we
-                // don't know about, that's an error.
-                return Err(Error::ZpoolNotFound(
-                    format!("zpool: {id} claims to be from unknown disk: {disk_identity:#?}")
-                ));
-            };
-            zpools.push(crate::params::Zpool { id: *id, disk_type });
-        }
-
-        Ok(zpools)
-    }
-
-    pub async fn upsert_filesystem(
-        &self,
-        dataset_id: Uuid,
-        dataset_name: DatasetName,
-    ) -> Result<DatasetName, Error> {
-        let (tx, rx) = oneshot::channel();
-        let request =
-            NewFilesystemRequest { dataset_id, dataset_name, responder: tx };
-
-        self.inner
-            .tx
-            .send(StorageWorkerRequest::NewFilesystem(request))
-            .await
-            .map_err(|e| e.to_string())
-            .expect("Storage worker bug (not alive)");
-        let dataset_name = rx.await.expect(
-            "Storage worker bug (dropped responder without responding)",
-        )?;
-
-        Ok(dataset_name)
-    }
-
-    /// Inform the storage worker that the KeyManager is capable of retrieving
-    /// secrets now and that any queued disks can be upserted.
-    pub async fn key_manager_ready(&self) {
-        info!(self.inner.log, "KeyManger ready");
-        self.inner
-            .tx
-            .send(StorageWorkerRequest::KeyManagerReady)
-            .await
-            .map_err(|e| e.to_string())
-            .expect("Failed to send KeyManagerReady request");
-    }
-
-    pub fn resources(&self) -> &StorageResources {
-        &self.inner.resources
-    }
-}
-
-impl Drop for StorageManagerInner {
-    fn drop(&mut self) {
-        // NOTE: Ideally, with async drop, we'd await completion of the worker
-        // somehow.
-        //
-        // Without that option, we instead opt to simply cancel the worker
-        // task to ensure it does not remain alive beyond the StorageManager
-        // itself.
-        self.task.abort();
-    }
-}
diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs
new file mode 100644
index 0000000000..f552fdfd86
--- /dev/null
+++ b/sled-agent/src/storage_monitor.rs
@@ -0,0 +1,373 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! A task that listens for storage events from [`sled_storage::manager::StorageManager`]
+//! and dispatches them to other parst of the bootstrap agent and sled agent
+//! code.
+
+use crate::dump_setup::DumpSetup;
+use crate::nexus::NexusClientWithResolver;
+use derive_more::From;
+use futures::stream::FuturesOrdered;
+use futures::FutureExt;
+use futures::StreamExt;
+use nexus_client::types::PhysicalDiskDeleteRequest;
+use nexus_client::types::PhysicalDiskPutRequest;
+use nexus_client::types::ZpoolPutRequest;
+use omicron_common::api::external::ByteCount;
+use omicron_common::backoff;
+use omicron_common::disk::DiskIdentity;
+use sled_storage::manager::StorageHandle;
+use sled_storage::pool::Pool;
+use sled_storage::resources::StorageResources;
+use slog::Logger;
+use std::fmt::Debug;
+use std::pin::Pin;
+use tokio::sync::oneshot;
+use uuid::Uuid;
+
+#[derive(From, Clone, Debug)]
+enum NexusDiskRequest {
+    Put(PhysicalDiskPutRequest),
+    Delete(PhysicalDiskDeleteRequest),
+}
+
+/// Describes the access to the underlay used by the StorageManager.
+#[derive(Clone)]
+pub struct UnderlayAccess {
+    pub nexus_client: NexusClientWithResolver,
+    pub sled_id: Uuid,
+}
+
+impl Debug for UnderlayAccess {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("UnderlayAccess")
+            .field("sled_id", &self.sled_id)
+            .finish()
+    }
+}
+
+pub struct StorageMonitor {
+    log: Logger,
+    storage_manager: StorageHandle,
+
+    // Receive a onetime notification that the underlay is available
+    underlay_available_rx: oneshot::Receiver<UnderlayAccess>,
+
+    // A cached copy of the `StorageResources` from the last update
+    storage_resources: StorageResources,
+
+    // Ability to access the underlay network
+    underlay: Option<UnderlayAccess>,
+
+    // A queue for sending nexus notifications in order
+    nexus_notifications: FuturesOrdered<NotifyFut>,
+
+    // Invokes dumpadm(8) and savecore(8) when new disks are encountered
+    dump_setup: DumpSetup,
+}
+
+impl StorageMonitor {
+    pub fn new(
+        log: &Logger,
+        storage_manager: StorageHandle,
+    ) -> (StorageMonitor, oneshot::Sender<UnderlayAccess>) {
+        let (underlay_available_tx, underlay_available_rx) = oneshot::channel();
+        let storage_resources = StorageResources::default();
+        let dump_setup = DumpSetup::new(&log);
+        let log = log.new(o!("component" => "StorageMonitor"));
+        (
+            StorageMonitor {
+                log,
+                storage_manager,
+                underlay_available_rx,
+                storage_resources,
+                underlay: None,
+                nexus_notifications: FuturesOrdered::new(),
+                dump_setup,
+            },
+            underlay_available_tx,
+        )
+    }
+
+    /// Run the main receive loop of the `StorageMonitor`
+    ///
+    /// This should be spawned into a tokio task
+    pub async fn run(mut self) {
+        loop {
+            tokio::select! {
+                res = self.nexus_notifications.next(),
+                    if !self.nexus_notifications.is_empty() =>
+                {
+                    match res {
+                        Some(Ok(s)) => {
+                            info!(self.log, "Nexus notification complete: {s}");
+                        }
+                        e => error!(self.log, "Nexus notification error: {e:?}")
+                    }
+                }
+                resources = self.storage_manager.wait_for_changes() => {
+                    info!(
+                        self.log,
+                        "Received storage manager update";
+                        "resources" => ?resources
+                    );
+                    self.handle_resource_update(resources).await;
+                }
+                Ok(underlay) = &mut self.underlay_available_rx,
+                    if self.underlay.is_none() =>
+                {
+                    let sled_id = underlay.sled_id;
+                    info!(
+                        self.log,
+                        "Underlay Available"; "sled_id" => %sled_id
+                    );
+                    self.underlay = Some(underlay);
+                    self.notify_nexus_about_existing_resources(sled_id).await;
+                }
+            }
+        }
+    }
+
+    /// When the underlay becomes available, we need to notify nexus about any
+    /// discovered disks and pools, since we don't attempt to notify until there
+    /// is an underlay available.
+    async fn notify_nexus_about_existing_resources(&mut self, sled_id: Uuid) {
+        let current = StorageResources::default();
+        let updated = &self.storage_resources;
+        let nexus_updates =
+            compute_resource_diffs(&self.log, &sled_id, &current, updated);
+        for put in nexus_updates.disk_puts {
+            self.physical_disk_notify(put.into()).await;
+        }
+        for (pool, put) in nexus_updates.zpool_puts {
+            self.add_zpool_notify(pool, put).await;
+        }
+    }
+
+    async fn handle_resource_update(
+        &mut self,
+        updated_resources: StorageResources,
+    ) {
+        // If the underlay isn't available, we only record the changes. Nexus
+        // isn't yet reachable to notify.
+        if self.underlay.is_some() {
+            let nexus_updates = compute_resource_diffs(
+                &self.log,
+                &self.underlay.as_ref().unwrap().sled_id,
+                &self.storage_resources,
+                &updated_resources,
+            );
+
+            for put in nexus_updates.disk_puts {
+                self.physical_disk_notify(put.into()).await;
+            }
+            for del in nexus_updates.disk_deletes {
+                self.physical_disk_notify(del.into()).await;
+            }
+            for (pool, put) in nexus_updates.zpool_puts {
+                self.add_zpool_notify(pool, put).await;
+            }
+        }
+        self.dump_setup.update_dumpdev_setup(updated_resources.disks()).await;
+
+        // Save the updated `StorageResources`
+        self.storage_resources = updated_resources;
+    }
+
+    // Adds a "notification to nexus" to `self.nexus_notifications`, informing it
+    // about the addition/removal of a physical disk to this sled.
+    async fn physical_disk_notify(&mut self, disk: NexusDiskRequest) {
+        let underlay = self.underlay.as_ref().unwrap().clone();
+        let disk2 = disk.clone();
+        let notify_nexus = move || {
+            let underlay = underlay.clone();
+            let disk = disk.clone();
+            async move {
+                let nexus_client = underlay.nexus_client.client().clone();
+
+                match &disk {
+                    NexusDiskRequest::Put(request) => {
+                        nexus_client
+                            .physical_disk_put(&request)
+                            .await
+                            .map_err(|e| {
+                                backoff::BackoffError::transient(e.to_string())
+                            })?;
+                    }
+                    NexusDiskRequest::Delete(request) => {
+                        nexus_client
+                            .physical_disk_delete(&request)
+                            .await
+                            .map_err(|e| {
+                                backoff::BackoffError::transient(e.to_string())
+                            })?;
+                    }
+                }
+                let msg = format!("{:?}", disk);
+                Ok(msg)
+            }
+        };
+
+        let log = self.log.clone();
+        // This notification is often invoked before Nexus has started
+        // running, so avoid flagging any errors as concerning until some
+        // time has passed.
+        let log_post_failure = move |err, call_count, total_duration| {
+            if call_count == 0 {
+                info!(log, "failed to notify nexus about {disk2:?}";
+                    "err" => ?err
+                );
+            } else if total_duration > std::time::Duration::from_secs(30) {
+                warn!(log, "failed to notify nexus about {disk2:?}";
+                    "err" => ?err,
+                    "total duration" => ?total_duration);
+            }
+        };
+        self.nexus_notifications.push_back(
+            backoff::retry_notify_ext(
+                backoff::retry_policy_internal_service_aggressive(),
+                notify_nexus,
+                log_post_failure,
+            )
+            .boxed(),
+        );
+    }
+
+    // Adds a "notification to nexus" to `nexus_notifications`,
+    // informing it about the addition of `pool_id` to this sled.
+    async fn add_zpool_notify(
+        &mut self,
+        pool: Pool,
+        zpool_request: ZpoolPutRequest,
+    ) {
+        let pool_id = pool.name.id();
+        let underlay = self.underlay.as_ref().unwrap().clone();
+
+        let notify_nexus = move || {
+            let underlay = underlay.clone();
+            let zpool_request = zpool_request.clone();
+            async move {
+                let sled_id = underlay.sled_id;
+                let nexus_client = underlay.nexus_client.client().clone();
+                nexus_client
+                    .zpool_put(&sled_id, &pool_id, &zpool_request)
+                    .await
+                    .map_err(|e| {
+                        backoff::BackoffError::transient(e.to_string())
+                    })?;
+                let msg = format!("{:?}", zpool_request);
+                Ok(msg)
+            }
+        };
+
+        let log = self.log.clone();
+        let name = pool.name.clone();
+        let disk = pool.parent.clone();
+        let log_post_failure = move |err, call_count, total_duration| {
+            if call_count == 0 {
+                info!(log, "failed to notify nexus about a new pool {name} on disk {disk:?}";
+                    "err" => ?err);
+            } else if total_duration > std::time::Duration::from_secs(30) {
+                warn!(log, "failed to notify nexus about a new pool {name} on disk {disk:?}";
+                    "err" => ?err,
+                    "total duration" => ?total_duration);
+            }
+        };
+        self.nexus_notifications.push_back(
+            backoff::retry_notify_ext(
+                backoff::retry_policy_internal_service_aggressive(),
+                notify_nexus,
+                log_post_failure,
+            )
+            .boxed(),
+        );
+    }
+}
+
+// The type of a future which is used to send a notification to Nexus.
+type NotifyFut =
+    Pin<Box<dyn futures::Future<Output = Result<String, String>> + Send>>;
+
+struct NexusUpdates {
+    disk_puts: Vec<PhysicalDiskPutRequest>,
+    disk_deletes: Vec<PhysicalDiskDeleteRequest>,
+    zpool_puts: Vec<(Pool, ZpoolPutRequest)>,
+}
+
+fn compute_resource_diffs(
+    log: &Logger,
+    sled_id: &Uuid,
+    current: &StorageResources,
+    updated: &StorageResources,
+) -> NexusUpdates {
+    let mut disk_puts = vec![];
+    let mut disk_deletes = vec![];
+    let mut zpool_puts = vec![];
+
+    let mut put_pool = |disk_id: &DiskIdentity, updated_pool: &Pool| {
+        match ByteCount::try_from(updated_pool.info.size()) {
+            Ok(size) => zpool_puts.push((
+                updated_pool.clone(),
+                ZpoolPutRequest {
+                    size: size.into(),
+                    disk_model: disk_id.model.clone(),
+                    disk_serial: disk_id.serial.clone(),
+                    disk_vendor: disk_id.vendor.clone(),
+                },
+            )),
+            Err(err) => {
+                error!(
+                    log,
+                    "Error parsing pool size";
+                    "name" => updated_pool.name.to_string(),
+                    "err" => ?err);
+            }
+        }
+    };
+
+    // Diff the existing resources with the update to see what has changed
+    // This loop finds disks and pools that were modified or deleted
+    for (disk_id, (disk, pool)) in current.disks().iter() {
+        match updated.disks().get(disk_id) {
+            Some((updated_disk, updated_pool)) => {
+                if disk != updated_disk {
+                    disk_puts.push(PhysicalDiskPutRequest {
+                        sled_id: *sled_id,
+                        model: disk_id.model.clone(),
+                        serial: disk_id.serial.clone(),
+                        vendor: disk_id.vendor.clone(),
+                        variant: updated_disk.variant().into(),
+                    });
+                }
+                if pool != updated_pool {
+                    put_pool(disk_id, updated_pool);
+                }
+            }
+            None => disk_deletes.push(PhysicalDiskDeleteRequest {
+                model: disk_id.model.clone(),
+                serial: disk_id.serial.clone(),
+                vendor: disk_id.vendor.clone(),
+                sled_id: *sled_id,
+            }),
+        }
+    }
+
+    // Diff the existing resources with the update to see what has changed
+    // This loop finds new disks and pools
+    for (disk_id, (updated_disk, updated_pool)) in updated.disks().iter() {
+        if !current.disks().contains_key(disk_id) {
+            disk_puts.push(PhysicalDiskPutRequest {
+                sled_id: *sled_id,
+                model: disk_id.model.clone(),
+                serial: disk_id.serial.clone(),
+                vendor: disk_id.vendor.clone(),
+                variant: updated_disk.variant().into(),
+            });
+            put_pool(disk_id, updated_pool);
+        }
+    }
+
+    NexusUpdates { disk_puts, disk_deletes, zpool_puts }
+}
diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs
index 91cb850df4..70b9da7708 100644
--- a/sled-agent/src/zone_bundle.rs
+++ b/sled-agent/src/zone_bundle.rs
@@ -6,7 +6,6 @@
 
 //! Tools for collecting and inspecting service bundles for zones.
 
-use crate::storage_manager::StorageResources;
 use anyhow::anyhow;
 use anyhow::Context;
 use camino::FromPathBufError;
@@ -33,6 +32,8 @@ use illumos_utils::zone::AdmError;
 use schemars::JsonSchema;
 use serde::Deserialize;
 use serde::Serialize;
+use sled_storage::dataset::U2_DEBUG_DATASET;
+use sled_storage::manager::StorageHandle;
 use slog::Logger;
 use std::cmp::Ord;
 use std::cmp::Ordering;
@@ -221,20 +222,12 @@ pub struct ZoneBundler {
     inner: Arc<Mutex<Inner>>,
     // Channel for notifying the cleanup task that it should reevaluate.
     notify_cleanup: Arc<Notify>,
-    // Tokio task handle running the period cleanup operation.
-    cleanup_task: Arc<tokio::task::JoinHandle<()>>,
-}
-
-impl Drop for ZoneBundler {
-    fn drop(&mut self) {
-        self.cleanup_task.abort();
-    }
 }
 
 // State shared between tasks, e.g., used when creating a bundle in different
 // tasks or between a creation and cleanup.
 struct Inner {
-    resources: StorageResources,
+    storage_handle: StorageHandle,
     cleanup_context: CleanupContext,
     last_cleanup_at: Instant,
 }
@@ -262,7 +255,8 @@ impl Inner {
     // that can exist but do not, i.e., those whose parent datasets already
     // exist; and returns those.
     async fn bundle_directories(&self) -> Vec<Utf8PathBuf> {
-        let expected = self.resources.all_zone_bundle_directories().await;
+        let resources = self.storage_handle.get_latest_resources().await;
+        let expected = resources.all_zone_bundle_directories();
         let mut out = Vec::with_capacity(expected.len());
         for each in expected.into_iter() {
             if tokio::fs::create_dir_all(&each).await.is_ok() {
@@ -322,11 +316,11 @@ impl ZoneBundler {
     /// Create a new zone bundler.
     ///
     /// This creates an object that manages zone bundles on the system. It can
-    /// be used to create bundles from running zones, and runs a period task to
-    /// clean them up to free up space.
+    /// be used to create bundles from running zones, and runs a periodic task
+    /// to clean them up to free up space.
     pub fn new(
         log: Logger,
-        resources: StorageResources,
+        storage_handle: StorageHandle,
         cleanup_context: CleanupContext,
     ) -> Self {
         // This is compiled out in tests because there's no way to set our
@@ -336,17 +330,19 @@ impl ZoneBundler {
             .expect("Failed to initialize existing ZFS resources");
         let notify_cleanup = Arc::new(Notify::new());
         let inner = Arc::new(Mutex::new(Inner {
-            resources,
+            storage_handle,
             cleanup_context,
             last_cleanup_at: Instant::now(),
         }));
         let cleanup_log = log.new(slog::o!("component" => "auto-cleanup-task"));
         let notify_clone = notify_cleanup.clone();
         let inner_clone = inner.clone();
-        let cleanup_task = Arc::new(tokio::task::spawn(
-            Self::periodic_cleanup(cleanup_log, inner_clone, notify_clone),
+        tokio::task::spawn(Self::periodic_cleanup(
+            cleanup_log,
+            inner_clone,
+            notify_clone,
         ));
-        Self { log, inner, notify_cleanup, cleanup_task }
+        Self { log, inner, notify_cleanup }
     }
 
     /// Trigger an immediate cleanup of low-priority zone bundles.
@@ -431,10 +427,9 @@ impl ZoneBundler {
     ) -> Result<ZoneBundleMetadata, BundleError> {
         let inner = self.inner.lock().await;
         let storage_dirs = inner.bundle_directories().await;
-        let extra_log_dirs = inner
-            .resources
-            .all_u2_mountpoints(sled_hardware::disk::U2_DEBUG_DATASET)
-            .await
+        let resources = inner.storage_handle.get_latest_resources().await;
+        let extra_log_dirs = resources
+            .all_u2_mountpoints(U2_DEBUG_DATASET)
             .into_iter()
             .collect();
         let context = ZoneBundleContext { cause, storage_dirs, extra_log_dirs };
@@ -2165,7 +2160,6 @@ mod illumos_tests {
     use super::CleanupPeriod;
     use super::PriorityOrder;
     use super::StorageLimit;
-    use super::StorageResources;
     use super::Utf8Path;
     use super::Utf8PathBuf;
     use super::Uuid;
@@ -2178,6 +2172,10 @@ mod illumos_tests {
     use anyhow::Context;
     use chrono::TimeZone;
     use chrono::Utc;
+    use illumos_utils::zpool::ZpoolName;
+    use sled_storage::disk::RawDisk;
+    use sled_storage::disk::SyntheticDisk;
+    use sled_storage::manager::{FakeStorageManager, StorageHandle};
     use slog::Drain;
     use slog::Logger;
     use tokio::process::Command;
@@ -2219,22 +2217,43 @@ mod illumos_tests {
     // system, that creates the directories implied by the `StorageResources`
     // expected disk structure.
     struct ResourceWrapper {
-        resources: StorageResources,
+        storage_handle: StorageHandle,
         dirs: Vec<Utf8PathBuf>,
     }
 
+    async fn setup_storage() -> StorageHandle {
+        let (manager, handle) = FakeStorageManager::new();
+
+        // Spawn the storage manager as done by sled-agent
+        tokio::spawn(async move {
+            manager.run().await;
+        });
+
+        // These must be internal zpools
+        for _ in 0..2 {
+            let internal_zpool_name = ZpoolName::new_internal(Uuid::new_v4());
+            let internal_disk: RawDisk =
+                SyntheticDisk::new(internal_zpool_name.clone()).into();
+            handle.upsert_disk(internal_disk).await;
+        }
+        handle
+    }
+
     impl ResourceWrapper {
         // Create new storage resources, and mount fake datasets at the required
         // locations.
         async fn new() -> Self {
-            let resources = StorageResources::new_for_test();
-            let dirs = resources.all_zone_bundle_directories().await;
+            // Spawn the storage related tasks required for testing and insert
+            // synthetic disks.
+            let storage_handle = setup_storage().await;
+            let resources = storage_handle.get_latest_resources().await;
+            let dirs = resources.all_zone_bundle_directories();
             for d in dirs.iter() {
                 let id =
                     d.components().nth(3).unwrap().as_str().parse().unwrap();
                 create_test_dataset(&id, d).await.unwrap();
             }
-            Self { resources, dirs }
+            Self { storage_handle, dirs }
         }
     }
 
@@ -2261,8 +2280,11 @@ mod illumos_tests {
         let log = test_logger();
         let context = CleanupContext::default();
         let resource_wrapper = ResourceWrapper::new().await;
-        let bundler =
-            ZoneBundler::new(log, resource_wrapper.resources.clone(), context);
+        let bundler = ZoneBundler::new(
+            log,
+            resource_wrapper.storage_handle.clone(),
+            context,
+        );
         Ok(CleanupTestContext { resource_wrapper, context, bundler })
     }
 
diff --git a/sled-hardware/Cargo.toml b/sled-hardware/Cargo.toml
index 14ae15996b..36ba633067 100644
--- a/sled-hardware/Cargo.toml
+++ b/sled-hardware/Cargo.toml
@@ -11,10 +11,8 @@ camino.workspace = true
 cfg-if.workspace = true
 futures.workspace = true
 illumos-utils.workspace = true
-key-manager.workspace = true
 libc.workspace = true
 macaddr.workspace = true
-nexus-client.workspace = true
 omicron-common.workspace = true
 rand.workspace = true
 schemars.workspace = true
diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs
index e3078cbeea..44658658be 100644
--- a/sled-hardware/src/disk.rs
+++ b/sled-hardware/src/disk.rs
@@ -4,34 +4,14 @@
 
 use camino::{Utf8Path, Utf8PathBuf};
 use illumos_utils::fstyp::Fstyp;
-use illumos_utils::zfs;
-use illumos_utils::zfs::DestroyDatasetErrorVariant;
-use illumos_utils::zfs::EncryptionDetails;
-use illumos_utils::zfs::Keypath;
-use illumos_utils::zfs::Mountpoint;
-use illumos_utils::zfs::SizeDetails;
-use illumos_utils::zfs::Zfs;
 use illumos_utils::zpool::Zpool;
 use illumos_utils::zpool::ZpoolKind;
 use illumos_utils::zpool::ZpoolName;
-use key_manager::StorageKeyRequester;
 use omicron_common::disk::DiskIdentity;
-use rand::distributions::{Alphanumeric, DistString};
 use slog::Logger;
 use slog::{info, warn};
-use std::sync::OnceLock;
-use tokio::fs::{remove_file, File};
-use tokio::io::{AsyncSeekExt, AsyncWriteExt, SeekFrom};
 use uuid::Uuid;
 
-/// This path is intentionally on a `tmpfs` to prevent copy-on-write behavior
-/// and to ensure it goes away on power off.
-///
-/// We want minimize the time the key files are in memory, and so we rederive
-/// the keys and recreate the files on demand when creating and mounting
-/// encrypted filesystems. We then zero them and unlink them.
-pub const KEYPATH_ROOT: &str = "/var/run/oxide/";
-
 cfg_if::cfg_if! {
     if #[cfg(target_os = "illumos")] {
         use crate::illumos::*;
@@ -41,7 +21,7 @@ cfg_if::cfg_if! {
 }
 
 #[derive(Debug, thiserror::Error)]
-pub enum DiskError {
+pub enum PooledDiskError {
     #[error("Cannot open {path} due to {error}")]
     IoError { path: Utf8PathBuf, error: std::io::Error },
     #[error("Failed to open partition at {path} due to {error}")]
@@ -51,10 +31,6 @@ pub enum DiskError {
     #[error("Requested partition {partition:?} not found on device {path}")]
     NotFound { path: Utf8PathBuf, partition: Partition },
     #[error(transparent)]
-    DestroyFilesystem(#[from] illumos_utils::zfs::DestroyDatasetError),
-    #[error(transparent)]
-    EnsureFilesystem(#[from] illumos_utils::zfs::EnsureFilesystemError),
-    #[error(transparent)]
     ZpoolCreate(#[from] illumos_utils::zpool::CreateError),
     #[error("Cannot import zpool: {0}")]
     ZpoolImport(illumos_utils::zpool::Error),
@@ -62,18 +38,6 @@ pub enum DiskError {
     CannotFormatMissingDevPath { path: Utf8PathBuf },
     #[error("Formatting M.2 devices is not yet implemented")]
     CannotFormatM2NotImplemented,
-    #[error("KeyManager error: {0}")]
-    KeyManager(#[from] key_manager::Error),
-    #[error("Missing StorageKeyRequester when creating U.2 disk")]
-    MissingStorageKeyRequester,
-    #[error("Encrypted filesystem '{0}' missing 'oxide:epoch' property")]
-    CannotParseEpochProperty(String),
-    #[error("Encrypted dataset '{dataset}' cannot set 'oxide:agent' property: {err}")]
-    CannotSetAgentProperty {
-        dataset: String,
-        #[source]
-        err: Box<zfs::SetValueError>,
-    },
 }
 
 /// A partition (or 'slice') of a disk.
@@ -126,17 +90,17 @@ impl DiskPaths {
     }
 
     // Finds the first 'variant' partition, and returns the path to it.
-    fn partition_device_path(
+    pub fn partition_device_path(
         &self,
         partitions: &[Partition],
         expected_partition: Partition,
         raw: bool,
-    ) -> Result<Utf8PathBuf, DiskError> {
+    ) -> Result<Utf8PathBuf, PooledDiskError> {
         for (index, partition) in partitions.iter().enumerate() {
             if &expected_partition == partition {
                 let path =
                     self.partition_path(index, raw).ok_or_else(|| {
-                        DiskError::NotFound {
+                        PooledDiskError::NotFound {
                             path: self.devfs_path.clone(),
                             partition: expected_partition,
                         }
@@ -144,7 +108,7 @@ impl DiskPaths {
                 return Ok(path);
             }
         }
-        Err(DiskError::NotFound {
+        Err(PooledDiskError::NotFound {
             path: self.devfs_path.clone(),
             partition: expected_partition,
         })
@@ -154,9 +118,9 @@ impl DiskPaths {
 /// A disk which has been observed by monitoring hardware.
 ///
 /// No guarantees are made about the partitions which exist within this disk.
-/// This exists as a distinct entity from [Disk] because it may be desirable to
-/// monitor for hardware in one context, and conform disks to partition layouts
-/// in a different context.
+/// This exists as a distinct entity from `Disk` in `sled-storage` because it
+/// may be desirable to monitor for hardware in one context, and conform disks
+/// to partition layouts in a different context.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct UnparsedDisk {
     paths: DiskPaths,
@@ -202,127 +166,34 @@ impl UnparsedDisk {
     }
 }
 
-/// A physical disk conforming to the expected partition layout.
+/// A physical disk that is partitioned to contain exactly one zpool
+///
+/// A PooledDisk relies on hardware specific information to be constructed
+/// and is the highest level disk structure in the `sled-hardware` package.
+/// The `sled-storage` package contains `Disk`s whose zpool and datasets can be
+/// manipulated. This separation exists to remove the hardware dependent logic
+/// from the ZFS related logic which can also operate on file backed zpools.
+/// Doing things this way allows us to not put higher level concepts like
+/// storage keys into this hardware related package.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct Disk {
-    paths: DiskPaths,
-    slot: i64,
-    variant: DiskVariant,
-    identity: DiskIdentity,
-    is_boot_disk: bool,
-    partitions: Vec<Partition>,
-
+pub struct PooledDisk {
+    pub paths: DiskPaths,
+    pub slot: i64,
+    pub variant: DiskVariant,
+    pub identity: DiskIdentity,
+    pub is_boot_disk: bool,
+    pub partitions: Vec<Partition>,
     // This embeds the assumtion that there is exactly one parsed zpool per
     // disk.
-    zpool_name: ZpoolName,
-}
-
-// Helper type for describing expected datasets and their optional quota.
-#[derive(Clone, Copy, Debug)]
-struct ExpectedDataset {
-    // Name for the dataset
-    name: &'static str,
-    // Optional quota, in _bytes_
-    quota: Option<usize>,
-    // Identifies if the dataset should be deleted on boot
-    wipe: bool,
-    // Optional compression mode
-    compression: Option<&'static str>,
+    pub zpool_name: ZpoolName,
 }
 
-impl ExpectedDataset {
-    const fn new(name: &'static str) -> Self {
-        ExpectedDataset { name, quota: None, wipe: false, compression: None }
-    }
-
-    const fn quota(mut self, quota: usize) -> Self {
-        self.quota = Some(quota);
-        self
-    }
-
-    const fn wipe(mut self) -> Self {
-        self.wipe = true;
-        self
-    }
-
-    const fn compression(mut self, compression: &'static str) -> Self {
-        self.compression = Some(compression);
-        self
-    }
-}
-
-pub const INSTALL_DATASET: &'static str = "install";
-pub const CRASH_DATASET: &'static str = "crash";
-pub const CLUSTER_DATASET: &'static str = "cluster";
-pub const CONFIG_DATASET: &'static str = "config";
-pub const M2_DEBUG_DATASET: &'static str = "debug";
-pub const M2_BACKING_DATASET: &'static str = "backing";
-// TODO-correctness: This value of 100GiB is a pretty wild guess, and should be
-// tuned as needed.
-pub const DEBUG_DATASET_QUOTA: usize = 100 * (1 << 30);
-// ditto.
-pub const DUMP_DATASET_QUOTA: usize = 100 * (1 << 30);
-// passed to zfs create -o compression=
-pub const DUMP_DATASET_COMPRESSION: &'static str = "gzip-9";
-
-// U.2 datasets live under the encrypted dataset and inherit encryption
-pub const ZONE_DATASET: &'static str = "crypt/zone";
-pub const DUMP_DATASET: &'static str = "crypt/debug";
-pub const U2_DEBUG_DATASET: &'static str = "crypt/debug";
-
-// This is the root dataset for all U.2 drives. Encryption is inherited.
-pub const CRYPT_DATASET: &'static str = "crypt";
-
-const U2_EXPECTED_DATASET_COUNT: usize = 2;
-static U2_EXPECTED_DATASETS: [ExpectedDataset; U2_EXPECTED_DATASET_COUNT] = [
-    // Stores filesystems for zones
-    ExpectedDataset::new(ZONE_DATASET).wipe(),
-    // For storing full kernel RAM dumps
-    ExpectedDataset::new(DUMP_DATASET)
-        .quota(DUMP_DATASET_QUOTA)
-        .compression(DUMP_DATASET_COMPRESSION),
-];
-
-const M2_EXPECTED_DATASET_COUNT: usize = 6;
-static M2_EXPECTED_DATASETS: [ExpectedDataset; M2_EXPECTED_DATASET_COUNT] = [
-    // Stores software images.
-    //
-    // Should be duplicated to both M.2s.
-    ExpectedDataset::new(INSTALL_DATASET),
-    // Stores crash dumps.
-    ExpectedDataset::new(CRASH_DATASET),
-    // Backing store for OS data that should be persisted across reboots.
-    // Its children are selectively overlay mounted onto parts of the ramdisk
-    // root.
-    ExpectedDataset::new(M2_BACKING_DATASET),
-    // Stores cluster configuration information.
-    //
-    // Should be duplicated to both M.2s.
-    ExpectedDataset::new(CLUSTER_DATASET),
-    // Stores configuration data, including:
-    // - What services should be launched on this sled
-    // - Information about how to initialize the Sled Agent
-    // - (For scrimlets) RSS setup information
-    //
-    // Should be duplicated to both M.2s.
-    ExpectedDataset::new(CONFIG_DATASET),
-    // Store debugging data, such as service bundles.
-    ExpectedDataset::new(M2_DEBUG_DATASET).quota(DEBUG_DATASET_QUOTA),
-];
-
-impl Disk {
-    /// Create a new Disk
-    ///
-    /// WARNING: In all cases where a U.2 is a possible `DiskVariant`, a
-    /// `StorageKeyRequester` must be passed so that disk encryption can
-    /// be used. The `StorageManager` for the sled-agent  always has a
-    /// `StorageKeyRequester` available, and so the only place we should pass
-    /// `None` is for the M.2s touched by the Installinator.
-    pub async fn new(
+impl PooledDisk {
+    /// Create a new PooledDisk
+    pub fn new(
         log: &Logger,
         unparsed_disk: UnparsedDisk,
-        key_requester: Option<&StorageKeyRequester>,
-    ) -> Result<Self, DiskError> {
+    ) -> Result<Self, PooledDiskError> {
         let paths = &unparsed_disk.paths;
         let variant = unparsed_disk.variant;
         // Ensure the GPT has the right format. This does not necessarily
@@ -340,13 +211,8 @@ impl Disk {
         )?;
 
         let zpool_name = Self::ensure_zpool_exists(log, variant, &zpool_path)?;
-        Self::ensure_zpool_ready(
-            log,
-            &zpool_name,
-            &unparsed_disk.identity,
-            key_requester,
-        )
-        .await?;
+        Self::ensure_zpool_imported(log, &zpool_name)?;
+        Self::ensure_zpool_failmode_is_continue(log, &zpool_name)?;
 
         Ok(Self {
             paths: unparsed_disk.paths,
@@ -359,29 +225,11 @@ impl Disk {
         })
     }
 
-    pub async fn ensure_zpool_ready(
-        log: &Logger,
-        zpool_name: &ZpoolName,
-        disk_identity: &DiskIdentity,
-        key_requester: Option<&StorageKeyRequester>,
-    ) -> Result<(), DiskError> {
-        Self::ensure_zpool_imported(log, &zpool_name)?;
-        Self::ensure_zpool_failmode_is_continue(log, &zpool_name)?;
-        Self::ensure_zpool_has_datasets(
-            log,
-            &zpool_name,
-            disk_identity,
-            key_requester,
-        )
-        .await?;
-        Ok(())
-    }
-
     fn ensure_zpool_exists(
         log: &Logger,
         variant: DiskVariant,
         zpool_path: &Utf8Path,
-    ) -> Result<ZpoolName, DiskError> {
+    ) -> Result<ZpoolName, PooledDiskError> {
         let zpool_name = match Fstyp::get_zpool(&zpool_path) {
             Ok(zpool_name) => zpool_name,
             Err(_) => {
@@ -406,13 +254,13 @@ impl Disk {
                     DiskVariant::M2 => ZpoolName::new_internal(Uuid::new_v4()),
                     DiskVariant::U2 => ZpoolName::new_external(Uuid::new_v4()),
                 };
-                Zpool::create(zpool_name.clone(), &zpool_path)?;
+                Zpool::create(&zpool_name, &zpool_path)?;
                 zpool_name
             }
         };
-        Zpool::import(zpool_name.clone()).map_err(|e| {
+        Zpool::import(&zpool_name).map_err(|e| {
             warn!(log, "Failed to import zpool {zpool_name}: {e}");
-            DiskError::ZpoolImport(e)
+            PooledDiskError::ZpoolImport(e)
         })?;
 
         Ok(zpool_name)
@@ -421,10 +269,10 @@ impl Disk {
     fn ensure_zpool_imported(
         log: &Logger,
         zpool_name: &ZpoolName,
-    ) -> Result<(), DiskError> {
-        Zpool::import(zpool_name.clone()).map_err(|e| {
+    ) -> Result<(), PooledDiskError> {
+        Zpool::import(&zpool_name).map_err(|e| {
             warn!(log, "Failed to import zpool {zpool_name}: {e}");
-            DiskError::ZpoolImport(e)
+            PooledDiskError::ZpoolImport(e)
         })?;
         Ok(())
     }
@@ -432,7 +280,7 @@ impl Disk {
     fn ensure_zpool_failmode_is_continue(
         log: &Logger,
         zpool_name: &ZpoolName,
-    ) -> Result<(), DiskError> {
+    ) -> Result<(), PooledDiskError> {
         // Ensure failmode is set to `continue`. See
         // https://github.com/oxidecomputer/omicron/issues/2766 for details. The
         // short version is, each pool is only backed by one vdev. There is no
@@ -445,214 +293,10 @@ impl Disk {
                 log,
                 "Failed to set failmode=continue on zpool {zpool_name}: {e}"
             );
-            DiskError::ZpoolImport(e)
+            PooledDiskError::ZpoolImport(e)
         })?;
         Ok(())
     }
-
-    // Ensure that the zpool contains all the datasets we would like it to
-    // contain.
-    async fn ensure_zpool_has_datasets(
-        log: &Logger,
-        zpool_name: &ZpoolName,
-        disk_identity: &DiskIdentity,
-        key_requester: Option<&StorageKeyRequester>,
-    ) -> Result<(), DiskError> {
-        let (root, datasets) = match zpool_name.kind().into() {
-            DiskVariant::M2 => (None, M2_EXPECTED_DATASETS.iter()),
-            DiskVariant::U2 => {
-                (Some(CRYPT_DATASET), U2_EXPECTED_DATASETS.iter())
-            }
-        };
-
-        let zoned = false;
-        let do_format = true;
-
-        // Ensure the root encrypted filesystem exists
-        // Datasets below this in the hierarchy will inherit encryption
-        if let Some(dataset) = root {
-            let Some(key_requester) = key_requester else {
-                return Err(DiskError::MissingStorageKeyRequester);
-            };
-            let mountpoint = zpool_name.dataset_mountpoint(dataset);
-            let keypath: Keypath = disk_identity.into();
-
-            let epoch =
-                if let Ok(epoch_str) = Zfs::get_oxide_value(dataset, "epoch") {
-                    if let Ok(epoch) = epoch_str.parse::<u64>() {
-                        epoch
-                    } else {
-                        return Err(DiskError::CannotParseEpochProperty(
-                            dataset.to_string(),
-                        ));
-                    }
-                } else {
-                    // We got an error trying to call `Zfs::get_oxide_value`
-                    // which indicates that the dataset doesn't exist or there
-                    // was a problem  running the command.
-                    //
-                    // Note that `Zfs::get_oxide_value` will succeed even if
-                    // the epoch is missing. `epoch_str` will show up as a dash
-                    // (`-`) and will not parse into a `u64`. So we don't have
-                    // to worry about that case here as it is handled above.
-                    //
-                    // If the error indicated that the command failed for some
-                    // other reason, but the dataset actually existed, we will
-                    // try to create the dataset below and that will fail. So
-                    // there is no harm in just loading the latest secret here.
-                    key_requester.load_latest_secret().await?
-                };
-
-            let key =
-                key_requester.get_key(epoch, disk_identity.clone()).await?;
-
-            let mut keyfile =
-                KeyFile::create(keypath.clone(), key.expose_secret(), log)
-                    .await
-                    .map_err(|error| DiskError::IoError {
-                        path: keypath.0.clone(),
-                        error,
-                    })?;
-
-            let encryption_details = EncryptionDetails { keypath, epoch };
-
-            info!(
-                log,
-                "Ensuring encrypted filesystem: {} for epoch {}",
-                dataset,
-                epoch
-            );
-            let result = Zfs::ensure_filesystem(
-                &format!("{}/{}", zpool_name, dataset),
-                Mountpoint::Path(mountpoint),
-                zoned,
-                do_format,
-                Some(encryption_details),
-                None,
-                None,
-            );
-
-            keyfile.zero_and_unlink().await.map_err(|error| {
-                DiskError::IoError { path: keyfile.path().0.clone(), error }
-            })?;
-
-            result?;
-        };
-
-        for dataset in datasets.into_iter() {
-            let mountpoint = zpool_name.dataset_mountpoint(dataset.name);
-            let name = &format!("{}/{}", zpool_name, dataset.name);
-
-            // Use a value that's alive for the duration of this sled agent
-            // to answer the question: should we wipe this disk, or have
-            // we seen it before?
-            //
-            // If this value comes from a prior iteration of the sled agent,
-            // we opt to remove the corresponding dataset.
-            static AGENT_LOCAL_VALUE: OnceLock<String> = OnceLock::new();
-            let agent_local_value = AGENT_LOCAL_VALUE.get_or_init(|| {
-                Alphanumeric.sample_string(&mut rand::thread_rng(), 20)
-            });
-
-            if dataset.wipe {
-                match Zfs::get_oxide_value(name, "agent") {
-                    Ok(v) if &v == agent_local_value => {
-                        info!(
-                            log,
-                            "Skipping automatic wipe for dataset: {}", name
-                        );
-                    }
-                    Ok(_) | Err(_) => {
-                        info!(
-                            log,
-                            "Automatically destroying dataset: {}", name
-                        );
-                        Zfs::destroy_dataset(name).or_else(|err| {
-                            // If we can't find the dataset, that's fine -- it
-                            // might not have been formatted yet.
-                            if let DestroyDatasetErrorVariant::NotFound =
-                                err.err
-                            {
-                                Ok(())
-                            } else {
-                                Err(err)
-                            }
-                        })?;
-                    }
-                }
-            }
-
-            let encryption_details = None;
-            let size_details = Some(SizeDetails {
-                quota: dataset.quota,
-                compression: dataset.compression,
-            });
-            Zfs::ensure_filesystem(
-                name,
-                Mountpoint::Path(mountpoint),
-                zoned,
-                do_format,
-                encryption_details,
-                size_details,
-                None,
-            )?;
-
-            if dataset.wipe {
-                Zfs::set_oxide_value(name, "agent", agent_local_value)
-                    .map_err(|err| DiskError::CannotSetAgentProperty {
-                        dataset: name.clone(),
-                        err: Box::new(err),
-                    })?;
-            }
-        }
-        Ok(())
-    }
-
-    pub fn is_boot_disk(&self) -> bool {
-        self.is_boot_disk
-    }
-
-    pub fn identity(&self) -> &DiskIdentity {
-        &self.identity
-    }
-
-    pub fn variant(&self) -> DiskVariant {
-        self.variant
-    }
-
-    pub fn devfs_path(&self) -> &Utf8PathBuf {
-        &self.paths.devfs_path
-    }
-
-    pub fn zpool_name(&self) -> &ZpoolName {
-        &self.zpool_name
-    }
-
-    pub fn boot_image_devfs_path(
-        &self,
-        raw: bool,
-    ) -> Result<Utf8PathBuf, DiskError> {
-        self.paths.partition_device_path(
-            &self.partitions,
-            Partition::BootImage,
-            raw,
-        )
-    }
-
-    pub fn dump_device_devfs_path(
-        &self,
-        raw: bool,
-    ) -> Result<Utf8PathBuf, DiskError> {
-        self.paths.partition_device_path(
-            &self.partitions,
-            Partition::DumpDevice,
-            raw,
-        )
-    }
-
-    pub fn slot(&self) -> i64 {
-        self.slot
-    }
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
@@ -671,56 +315,6 @@ impl From<ZpoolKind> for DiskVariant {
     }
 }
 
-/// A file that wraps a zfs encryption key.
-///
-/// We put this in a RAM backed filesystem and zero and delete it when we are
-/// done with it. Unfortunately we cannot do this inside `Drop` because there is no
-/// equivalent async drop.
-pub struct KeyFile {
-    path: Keypath,
-    file: File,
-    log: Logger,
-}
-
-impl KeyFile {
-    pub async fn create(
-        path: Keypath,
-        key: &[u8; 32],
-        log: &Logger,
-    ) -> std::io::Result<KeyFile> {
-        // TODO: fix this to not truncate
-        // We want to overwrite any existing contents.
-        // If we truncate we may leave dirty pages around
-        // containing secrets.
-        let mut file = tokio::fs::OpenOptions::new()
-            .create(true)
-            .write(true)
-            .open(&path.0)
-            .await?;
-        file.write_all(key).await?;
-        info!(log, "Created keyfile {}", path);
-        Ok(KeyFile { path, file, log: log.clone() })
-    }
-
-    /// These keyfiles live on a tmpfs and we zero the file so the data doesn't
-    /// linger on the page in memory.
-    ///
-    /// It'd be nice to `impl Drop for `KeyFile` and then call `zero`
-    /// from within the drop handler, but async `Drop` isn't supported.
-    pub async fn zero_and_unlink(&mut self) -> std::io::Result<()> {
-        let zeroes = [0u8; 32];
-        let _ = self.file.seek(SeekFrom::Start(0)).await?;
-        self.file.write_all(&zeroes).await?;
-        info!(self.log, "Zeroed and unlinked keyfile {}", self.path);
-        remove_file(&self.path().0).await?;
-        Ok(())
-    }
-
-    pub fn path(&self) -> &Keypath {
-        &self.path
-    }
-}
-
 #[cfg(test)]
 mod test {
     use super::*;
@@ -832,7 +426,7 @@ mod test {
             paths
                 .partition_device_path(&[], Partition::ZfsPool, false)
                 .expect_err("Should not have found partition"),
-            DiskError::NotFound { .. },
+            PooledDiskError::NotFound { .. },
         ));
     }
 }
diff --git a/sled-hardware/src/illumos/mod.rs b/sled-hardware/src/illumos/mod.rs
index c0145b75e8..19111c6cda 100644
--- a/sled-hardware/src/illumos/mod.rs
+++ b/sled-hardware/src/illumos/mod.rs
@@ -19,7 +19,6 @@ use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 use std::sync::Mutex;
 use tokio::sync::broadcast;
-use tokio::task::JoinHandle;
 use uuid::Uuid;
 
 mod gpt;
@@ -589,11 +588,11 @@ async fn hardware_tracking_task(
 ///
 /// This structure provides interfaces for both querying and for receiving new
 /// events.
+#[derive(Clone)]
 pub struct HardwareManager {
     log: Logger,
     inner: Arc<Mutex<HardwareView>>,
     tx: broadcast::Sender<HardwareUpdate>,
-    _worker: JoinHandle<()>,
 }
 
 impl HardwareManager {
@@ -663,11 +662,11 @@ impl HardwareManager {
         let log2 = log.clone();
         let inner2 = inner.clone();
         let tx2 = tx.clone();
-        let _worker = tokio::task::spawn(async move {
+        tokio::task::spawn(async move {
             hardware_tracking_task(log2, inner2, tx2).await
         });
 
-        Ok(Self { log, inner, tx, _worker })
+        Ok(Self { log, inner, tx })
     }
 
     pub fn baseboard(&self) -> Baseboard {
diff --git a/sled-hardware/src/illumos/partitions.rs b/sled-hardware/src/illumos/partitions.rs
index 950074bd3a..4b7e69057d 100644
--- a/sled-hardware/src/illumos/partitions.rs
+++ b/sled-hardware/src/illumos/partitions.rs
@@ -5,7 +5,7 @@
 //! illumos-specific mechanisms for parsing disk info.
 
 use crate::illumos::gpt;
-use crate::{DiskError, DiskPaths, DiskVariant, Partition};
+use crate::{DiskPaths, DiskVariant, Partition, PooledDiskError};
 use camino::Utf8Path;
 use illumos_utils::zpool::ZpoolName;
 use slog::info;
@@ -41,9 +41,9 @@ fn parse_partition_types<const N: usize>(
     path: &Utf8Path,
     partitions: &Vec<impl gpt::LibEfiPartition>,
     expected_partitions: &[Partition; N],
-) -> Result<Vec<Partition>, DiskError> {
+) -> Result<Vec<Partition>, PooledDiskError> {
     if partitions.len() != N {
-        return Err(DiskError::BadPartitionLayout {
+        return Err(PooledDiskError::BadPartitionLayout {
             path: path.to_path_buf(),
             why: format!(
                 "Expected {} partitions, only saw {}",
@@ -54,7 +54,7 @@ fn parse_partition_types<const N: usize>(
     }
     for i in 0..N {
         if partitions[i].index() != i {
-            return Err(DiskError::BadPartitionLayout {
+            return Err(PooledDiskError::BadPartitionLayout {
                 path: path.to_path_buf(),
                 why: format!(
                     "The {i}-th partition has index {}",
@@ -80,7 +80,7 @@ pub fn ensure_partition_layout(
     log: &Logger,
     paths: &DiskPaths,
     variant: DiskVariant,
-) -> Result<Vec<Partition>, DiskError> {
+) -> Result<Vec<Partition>, PooledDiskError> {
     internal_ensure_partition_layout::<libefi_illumos::Gpt>(log, paths, variant)
 }
 
@@ -90,7 +90,7 @@ fn internal_ensure_partition_layout<GPT: gpt::LibEfiGpt>(
     log: &Logger,
     paths: &DiskPaths,
     variant: DiskVariant,
-) -> Result<Vec<Partition>, DiskError> {
+) -> Result<Vec<Partition>, PooledDiskError> {
     // Open the "Whole Disk" as a raw device to be parsed by the
     // libefi-illumos library. This lets us peek at the GPT before
     // making too many assumptions about it.
@@ -114,14 +114,16 @@ fn internal_ensure_partition_layout<GPT: gpt::LibEfiGpt>(
             let dev_path = if let Some(dev_path) = &paths.dev_path {
                 dev_path
             } else {
-                return Err(DiskError::CannotFormatMissingDevPath { path });
+                return Err(PooledDiskError::CannotFormatMissingDevPath {
+                    path,
+                });
             };
             match variant {
                 DiskVariant::U2 => {
                     info!(log, "Formatting zpool on disk {}", paths.devfs_path);
                     // If a zpool does not already exist, create one.
                     let zpool_name = ZpoolName::new_external(Uuid::new_v4());
-                    Zpool::create(zpool_name, dev_path)?;
+                    Zpool::create(&zpool_name, dev_path)?;
                     return Ok(vec![Partition::ZfsPool]);
                 }
                 DiskVariant::M2 => {
@@ -129,12 +131,12 @@ fn internal_ensure_partition_layout<GPT: gpt::LibEfiGpt>(
                     // the expected partitions? Or would it be wiser to infer
                     // that this indicates an unexpected error conditions that
                     // needs mitigation?
-                    return Err(DiskError::CannotFormatM2NotImplemented);
+                    return Err(PooledDiskError::CannotFormatM2NotImplemented);
                 }
             }
         }
         Err(err) => {
-            return Err(DiskError::Gpt {
+            return Err(PooledDiskError::Gpt {
                 path,
                 error: anyhow::Error::new(err),
             });
@@ -197,7 +199,7 @@ mod test {
             DiskVariant::U2,
         );
         match result {
-            Err(DiskError::CannotFormatMissingDevPath { .. }) => {}
+            Err(PooledDiskError::CannotFormatMissingDevPath { .. }) => {}
             _ => panic!("Should have failed with a missing dev path error"),
         }
 
@@ -373,7 +375,7 @@ mod test {
                 DiskVariant::M2,
             )
             .expect_err("Should have failed parsing empty GPT"),
-            DiskError::BadPartitionLayout { .. }
+            PooledDiskError::BadPartitionLayout { .. }
         ));
 
         logctx.cleanup_successful();
@@ -398,7 +400,7 @@ mod test {
                 DiskVariant::U2,
             )
             .expect_err("Should have failed parsing empty GPT"),
-            DiskError::BadPartitionLayout { .. }
+            PooledDiskError::BadPartitionLayout { .. }
         ));
 
         logctx.cleanup_successful();
diff --git a/sled-hardware/src/lib.rs b/sled-hardware/src/lib.rs
index 654dfd59d9..2e3fd4a576 100644
--- a/sled-hardware/src/lib.rs
+++ b/sled-hardware/src/lib.rs
@@ -163,13 +163,3 @@ impl std::fmt::Display for Baseboard {
         }
     }
 }
-
-impl From<Baseboard> for nexus_client::types::Baseboard {
-    fn from(b: Baseboard) -> nexus_client::types::Baseboard {
-        nexus_client::types::Baseboard {
-            serial_number: b.identifier().to_string(),
-            part_number: b.model().to_string(),
-            revision: b.revision(),
-        }
-    }
-}
diff --git a/sled-hardware/src/non_illumos/mod.rs b/sled-hardware/src/non_illumos/mod.rs
index 6e36330df0..d8372dd8aa 100644
--- a/sled-hardware/src/non_illumos/mod.rs
+++ b/sled-hardware/src/non_illumos/mod.rs
@@ -2,7 +2,9 @@
 // License, v. 2.0. If a copy of the MPL was not distributed with this
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
-use crate::disk::{DiskError, DiskPaths, DiskVariant, Partition, UnparsedDisk};
+use crate::disk::{
+    DiskPaths, DiskVariant, Partition, PooledDiskError, UnparsedDisk,
+};
 use crate::{Baseboard, SledMode};
 use slog::Logger;
 use std::collections::HashSet;
@@ -16,6 +18,7 @@ use tokio::sync::broadcast;
 ///
 /// If you're actually trying to run the Sled Agent on non-illumos platforms,
 /// use the simulated sled agent, which does not attempt to abstract hardware.
+#[derive(Clone)]
 pub struct HardwareManager {}
 
 impl HardwareManager {
@@ -56,7 +59,7 @@ pub fn ensure_partition_layout(
     _log: &Logger,
     _paths: &DiskPaths,
     _variant: DiskVariant,
-) -> Result<Vec<Partition>, DiskError> {
+) -> Result<Vec<Partition>, PooledDiskError> {
     unimplemented!("Accessing hardware unsupported on non-illumos");
 }
 
diff --git a/sled-storage/Cargo.toml b/sled-storage/Cargo.toml
new file mode 100644
index 0000000000..cb3a790631
--- /dev/null
+++ b/sled-storage/Cargo.toml
@@ -0,0 +1,33 @@
+[package]
+name = "sled-storage"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+async-trait.workspace = true
+camino.workspace = true
+cfg-if.workspace = true
+derive_more.workspace = true
+glob.workspace = true
+illumos-utils.workspace = true
+key-manager.workspace = true
+omicron-common.workspace = true
+rand.workspace = true
+schemars = { workspace = true, features = [ "chrono", "uuid1" ] }
+serde.workspace = true
+serde_json.workspace = true
+sled-hardware.workspace = true
+slog.workspace = true
+thiserror.workspace = true
+tokio.workspace = true
+uuid.workspace = true
+omicron-workspace-hack.workspace = true
+
+[dev-dependencies]
+illumos-utils = { workspace = true, features = ["tmp_keypath", "testing"] }
+omicron-test-utils.workspace = true
+camino-tempfile.workspace = true
+
+[features]
+# Quotas and the like can be shrunk via this feature
+testing = []
diff --git a/sled-storage/src/dataset.rs b/sled-storage/src/dataset.rs
new file mode 100644
index 0000000000..a2878af7f6
--- /dev/null
+++ b/sled-storage/src/dataset.rs
@@ -0,0 +1,379 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! ZFS dataset related functionality
+
+use crate::keyfile::KeyFile;
+use camino::Utf8PathBuf;
+use cfg_if::cfg_if;
+use illumos_utils::zfs::{
+    self, DestroyDatasetErrorVariant, EncryptionDetails, Keypath, Mountpoint,
+    SizeDetails, Zfs,
+};
+use illumos_utils::zpool::ZpoolName;
+use key_manager::StorageKeyRequester;
+use omicron_common::disk::DiskIdentity;
+use rand::distributions::{Alphanumeric, DistString};
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+use sled_hardware::DiskVariant;
+use slog::{info, Logger};
+use std::sync::OnceLock;
+
+pub const INSTALL_DATASET: &'static str = "install";
+pub const CRASH_DATASET: &'static str = "crash";
+pub const CLUSTER_DATASET: &'static str = "cluster";
+pub const CONFIG_DATASET: &'static str = "config";
+pub const M2_DEBUG_DATASET: &'static str = "debug";
+pub const M2_BACKING_DATASET: &'static str = "backing";
+
+cfg_if! {
+    if #[cfg(any(test, feature = "testing"))] {
+        // Tuned for zone_bundle tests
+        pub const DEBUG_DATASET_QUOTA: usize = 100 * (1 << 10);
+    } else {
+        // TODO-correctness: This value of 100GiB is a pretty wild guess, and should be
+        // tuned as needed.
+        pub const DEBUG_DATASET_QUOTA: usize = 100 * (1 << 30);
+    }
+}
+// TODO-correctness: This value of 100GiB is a pretty wild guess, and should be
+// tuned as needed.
+pub const DUMP_DATASET_QUOTA: usize = 100 * (1 << 30);
+// passed to zfs create -o compression=
+pub const DUMP_DATASET_COMPRESSION: &'static str = "gzip-9";
+
+// U.2 datasets live under the encrypted dataset and inherit encryption
+pub const ZONE_DATASET: &'static str = "crypt/zone";
+pub const DUMP_DATASET: &'static str = "crypt/debug";
+pub const U2_DEBUG_DATASET: &'static str = "crypt/debug";
+
+// This is the root dataset for all U.2 drives. Encryption is inherited.
+pub const CRYPT_DATASET: &'static str = "crypt";
+
+const U2_EXPECTED_DATASET_COUNT: usize = 2;
+static U2_EXPECTED_DATASETS: [ExpectedDataset; U2_EXPECTED_DATASET_COUNT] = [
+    // Stores filesystems for zones
+    ExpectedDataset::new(ZONE_DATASET).wipe(),
+    // For storing full kernel RAM dumps
+    ExpectedDataset::new(DUMP_DATASET)
+        .quota(DUMP_DATASET_QUOTA)
+        .compression(DUMP_DATASET_COMPRESSION),
+];
+
+const M2_EXPECTED_DATASET_COUNT: usize = 6;
+static M2_EXPECTED_DATASETS: [ExpectedDataset; M2_EXPECTED_DATASET_COUNT] = [
+    // Stores software images.
+    //
+    // Should be duplicated to both M.2s.
+    ExpectedDataset::new(INSTALL_DATASET),
+    // Stores crash dumps.
+    ExpectedDataset::new(CRASH_DATASET),
+    // Backing store for OS data that should be persisted across reboots.
+    // Its children are selectively overlay mounted onto parts of the ramdisk
+    // root.
+    ExpectedDataset::new(M2_BACKING_DATASET),
+    // Stores cluter configuration information.
+    //
+    // Should be duplicated to both M.2s.
+    ExpectedDataset::new(CLUSTER_DATASET),
+    // Stores configuration data, including:
+    // - What services should be launched on this sled
+    // - Information about how to initialize the Sled Agent
+    // - (For scrimlets) RSS setup information
+    //
+    // Should be duplicated to both M.2s.
+    ExpectedDataset::new(CONFIG_DATASET),
+    // Store debugging data, such as service bundles.
+    ExpectedDataset::new(M2_DEBUG_DATASET).quota(DEBUG_DATASET_QUOTA),
+];
+
+// Helper type for describing expected datasets and their optional quota.
+#[derive(Clone, Copy, Debug)]
+struct ExpectedDataset {
+    // Name for the dataset
+    name: &'static str,
+    // Optional quota, in _bytes_
+    quota: Option<usize>,
+    // Identifies if the dataset should be deleted on boot
+    wipe: bool,
+    // Optional compression mode
+    compression: Option<&'static str>,
+}
+
+impl ExpectedDataset {
+    const fn new(name: &'static str) -> Self {
+        ExpectedDataset { name, quota: None, wipe: false, compression: None }
+    }
+
+    const fn quota(mut self, quota: usize) -> Self {
+        self.quota = Some(quota);
+        self
+    }
+
+    const fn wipe(mut self) -> Self {
+        self.wipe = true;
+        self
+    }
+
+    const fn compression(mut self, compression: &'static str) -> Self {
+        self.compression = Some(compression);
+        self
+    }
+}
+
+/// The type of a dataset, and an auxiliary information necessary
+/// to successfully launch a zone managing the associated data.
+#[derive(
+    Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash,
+)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum DatasetKind {
+    CockroachDb,
+    Crucible,
+    Clickhouse,
+    ClickhouseKeeper,
+    ExternalDns,
+    InternalDns,
+}
+
+impl std::fmt::Display for DatasetKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        use DatasetKind::*;
+        let s = match self {
+            Crucible => "crucible",
+            CockroachDb { .. } => "cockroachdb",
+            Clickhouse => "clickhouse",
+            ClickhouseKeeper => "clickhouse_keeper",
+            ExternalDns { .. } => "external_dns",
+            InternalDns { .. } => "internal_dns",
+        };
+        write!(f, "{}", s)
+    }
+}
+
+#[derive(
+    Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone, JsonSchema,
+)]
+pub struct DatasetName {
+    // A unique identifier for the Zpool on which the dataset is stored.
+    pool_name: ZpoolName,
+    // A name for the dataset within the Zpool.
+    kind: DatasetKind,
+}
+
+impl DatasetName {
+    pub fn new(pool_name: ZpoolName, kind: DatasetKind) -> Self {
+        Self { pool_name, kind }
+    }
+
+    pub fn pool(&self) -> &ZpoolName {
+        &self.pool_name
+    }
+
+    pub fn dataset(&self) -> &DatasetKind {
+        &self.kind
+    }
+
+    pub fn full(&self) -> String {
+        format!("{}/{}", self.pool_name, self.kind)
+    }
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum DatasetError {
+    #[error("Cannot open {path} due to {error}")]
+    IoError { path: Utf8PathBuf, error: std::io::Error },
+    #[error(transparent)]
+    DestroyFilesystem(#[from] illumos_utils::zfs::DestroyDatasetError),
+    #[error(transparent)]
+    EnsureFilesystem(#[from] illumos_utils::zfs::EnsureFilesystemError),
+    #[error("KeyManager error: {0}")]
+    KeyManager(#[from] key_manager::Error),
+    #[error("Missing StorageKeyRequester when creating U.2 disk")]
+    MissingStorageKeyRequester,
+    #[error("Encrypted filesystem '{0}' missing 'oxide:epoch' property")]
+    CannotParseEpochProperty(String),
+    #[error("Encrypted dataset '{dataset}' cannot set 'oxide:agent' property: {err}")]
+    CannotSetAgentProperty {
+        dataset: String,
+        #[source]
+        err: Box<zfs::SetValueError>,
+    },
+}
+
+/// Ensure that the zpool contains all the datasets we would like it to
+/// contain.
+///
+/// WARNING: In all cases where a U.2 is a possible `DiskVariant`, a
+/// `StorageKeyRequester` must be passed so that disk encryption can
+/// be used. The `StorageManager` for the sled-agent always has a
+/// `StorageKeyRequester` available, and so the only place we should pass
+/// `None` is for the M.2s touched by the Installinator.
+pub(crate) async fn ensure_zpool_has_datasets(
+    log: &Logger,
+    zpool_name: &ZpoolName,
+    disk_identity: &DiskIdentity,
+    key_requester: Option<&StorageKeyRequester>,
+) -> Result<(), DatasetError> {
+    let (root, datasets) = match zpool_name.kind().into() {
+        DiskVariant::M2 => (None, M2_EXPECTED_DATASETS.iter()),
+        DiskVariant::U2 => (Some(CRYPT_DATASET), U2_EXPECTED_DATASETS.iter()),
+    };
+
+    let zoned = false;
+    let do_format = true;
+
+    // Ensure the root encrypted filesystem exists
+    // Datasets below this in the hierarchy will inherit encryption
+    if let Some(dataset) = root {
+        let Some(key_requester) = key_requester else {
+            return Err(DatasetError::MissingStorageKeyRequester);
+        };
+        let mountpoint = zpool_name.dataset_mountpoint(dataset);
+        let keypath: Keypath = disk_identity.into();
+
+        let epoch = if let Ok(epoch_str) =
+            Zfs::get_oxide_value(dataset, "epoch")
+        {
+            if let Ok(epoch) = epoch_str.parse::<u64>() {
+                epoch
+            } else {
+                return Err(DatasetError::CannotParseEpochProperty(
+                    dataset.to_string(),
+                ));
+            }
+        } else {
+            // We got an error trying to call `Zfs::get_oxide_value`
+            // which indicates that the dataset doesn't exist or there
+            // was a problem  running the command.
+            //
+            // Note that `Zfs::get_oxide_value` will succeed even if
+            // the epoch is missing. `epoch_str` will show up as a dash
+            // (`-`) and will not parse into a `u64`. So we don't have
+            // to worry about that case here as it is handled above.
+            //
+            // If the error indicated that the command failed for some
+            // other reason, but the dataset actually existed, we will
+            // try to create the dataset below and that will fail. So
+            // there is no harm in just loading the latest secret here.
+            info!(log, "Loading latest secret"; "disk_id"=>#?disk_identity);
+            let epoch = key_requester.load_latest_secret().await?;
+            info!(log, "Loaded latest secret"; "epoch"=>%epoch, "disk_id"=>#?disk_identity);
+            epoch
+        };
+
+        info!(log, "Retrieving key"; "epoch"=>%epoch, "disk_id"=>#?disk_identity);
+        let key = key_requester.get_key(epoch, disk_identity.clone()).await?;
+        info!(log, "Got key"; "epoch"=>%epoch, "disk_id"=>#?disk_identity);
+
+        let mut keyfile =
+            KeyFile::create(keypath.clone(), key.expose_secret(), log)
+                .await
+                .map_err(|error| DatasetError::IoError {
+                    path: keypath.0.clone(),
+                    error,
+                })?;
+
+        let encryption_details = EncryptionDetails { keypath, epoch };
+
+        info!(
+            log,
+            "Ensuring encrypted filesystem: {} for epoch {}", dataset, epoch
+        );
+        let result = Zfs::ensure_filesystem(
+            &format!("{}/{}", zpool_name, dataset),
+            Mountpoint::Path(mountpoint),
+            zoned,
+            do_format,
+            Some(encryption_details),
+            None,
+            None,
+        );
+
+        keyfile.zero_and_unlink().await.map_err(|error| {
+            DatasetError::IoError { path: keyfile.path().0.clone(), error }
+        })?;
+
+        result?;
+    };
+
+    for dataset in datasets.into_iter() {
+        let mountpoint = zpool_name.dataset_mountpoint(dataset.name);
+        let name = &format!("{}/{}", zpool_name, dataset.name);
+
+        // Use a value that's alive for the duration of this sled agent
+        // to answer the question: should we wipe this disk, or have
+        // we seen it before?
+        //
+        // If this value comes from a prior iteration of the sled agent,
+        // we opt to remove the corresponding dataset.
+        static AGENT_LOCAL_VALUE: OnceLock<String> = OnceLock::new();
+        let agent_local_value = AGENT_LOCAL_VALUE.get_or_init(|| {
+            Alphanumeric.sample_string(&mut rand::thread_rng(), 20)
+        });
+
+        if dataset.wipe {
+            match Zfs::get_oxide_value(name, "agent") {
+                Ok(v) if &v == agent_local_value => {
+                    info!(log, "Skipping automatic wipe for dataset: {}", name);
+                }
+                Ok(_) | Err(_) => {
+                    info!(log, "Automatically destroying dataset: {}", name);
+                    Zfs::destroy_dataset(name).or_else(|err| {
+                        // If we can't find the dataset, that's fine -- it might
+                        // not have been formatted yet.
+                        if matches!(
+                            err.err,
+                            DestroyDatasetErrorVariant::NotFound
+                        ) {
+                            Ok(())
+                        } else {
+                            Err(err)
+                        }
+                    })?;
+                }
+            }
+        }
+
+        let encryption_details = None;
+        let size_details = Some(SizeDetails {
+            quota: dataset.quota,
+            compression: dataset.compression,
+        });
+        Zfs::ensure_filesystem(
+            name,
+            Mountpoint::Path(mountpoint),
+            zoned,
+            do_format,
+            encryption_details,
+            size_details,
+            None,
+        )?;
+
+        if dataset.wipe {
+            Zfs::set_oxide_value(name, "agent", agent_local_value).map_err(
+                |err| DatasetError::CannotSetAgentProperty {
+                    dataset: name.clone(),
+                    err: Box::new(err),
+                },
+            )?;
+        }
+    }
+    Ok(())
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use uuid::Uuid;
+
+    #[test]
+    fn serialize_dataset_name() {
+        let pool = ZpoolName::new_internal(Uuid::new_v4());
+        let kind = DatasetKind::Crucible;
+        let name = DatasetName::new(pool, kind);
+        serde_json::to_string(&name).unwrap();
+    }
+}
diff --git a/sled-storage/src/disk.rs b/sled-storage/src/disk.rs
new file mode 100644
index 0000000000..f5209def77
--- /dev/null
+++ b/sled-storage/src/disk.rs
@@ -0,0 +1,243 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Disk related types
+
+use camino::{Utf8Path, Utf8PathBuf};
+use derive_more::From;
+use illumos_utils::zpool::{Zpool, ZpoolKind, ZpoolName};
+use key_manager::StorageKeyRequester;
+use omicron_common::disk::DiskIdentity;
+use sled_hardware::{
+    DiskVariant, Partition, PooledDisk, PooledDiskError, UnparsedDisk,
+};
+use slog::Logger;
+use std::fs::File;
+
+use crate::dataset;
+
+#[derive(Debug, thiserror::Error)]
+pub enum DiskError {
+    #[error(transparent)]
+    Dataset(#[from] crate::dataset::DatasetError),
+    #[error(transparent)]
+    PooledDisk(#[from] sled_hardware::PooledDiskError),
+}
+
+// A synthetic disk that acts as one "found" by the hardware and that is backed
+// by a zpool
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct SyntheticDisk {
+    pub identity: DiskIdentity,
+    pub zpool_name: ZpoolName,
+}
+
+impl SyntheticDisk {
+    // Create a zpool and import it for the synthetic disk
+    // Zpools willl be set to the min size of 64Mib
+    pub fn create_zpool(
+        dir: &Utf8Path,
+        zpool_name: &ZpoolName,
+    ) -> SyntheticDisk {
+        // 64 MiB (min size of zpool)
+        const DISK_SIZE: u64 = 64 * 1024 * 1024;
+        let path = dir.join(zpool_name.to_string());
+        let file = File::create(&path).unwrap();
+        file.set_len(DISK_SIZE).unwrap();
+        drop(file);
+        Zpool::create(zpool_name, &path).unwrap();
+        Zpool::import(zpool_name).unwrap();
+        Zpool::set_failmode_continue(zpool_name).unwrap();
+        Self::new(zpool_name.clone())
+    }
+
+    pub fn new(zpool_name: ZpoolName) -> SyntheticDisk {
+        let id = zpool_name.id();
+        let identity = DiskIdentity {
+            vendor: "synthetic-vendor".to_string(),
+            serial: format!("synthetic-serial-{id}"),
+            model: "synthetic-model".to_string(),
+        };
+        SyntheticDisk { identity, zpool_name }
+    }
+}
+
+// An [`UnparsedDisk`] disk learned about from the hardware or a wrapped zpool
+#[derive(Debug, Clone, PartialEq, Eq, Hash, From)]
+pub enum RawDisk {
+    Real(UnparsedDisk),
+    Synthetic(SyntheticDisk),
+}
+
+impl RawDisk {
+    pub fn is_boot_disk(&self) -> bool {
+        match self {
+            Self::Real(disk) => disk.is_boot_disk(),
+            Self::Synthetic(disk) => {
+                // Just label any M.2 the boot disk.
+                disk.zpool_name.kind() == ZpoolKind::Internal
+            }
+        }
+    }
+
+    pub fn identity(&self) -> &DiskIdentity {
+        match self {
+            Self::Real(disk) => &disk.identity(),
+            Self::Synthetic(disk) => &disk.identity,
+        }
+    }
+
+    pub fn variant(&self) -> DiskVariant {
+        match self {
+            Self::Real(disk) => disk.variant(),
+            Self::Synthetic(disk) => match disk.zpool_name.kind() {
+                ZpoolKind::External => DiskVariant::U2,
+                ZpoolKind::Internal => DiskVariant::M2,
+            },
+        }
+    }
+
+    #[cfg(test)]
+    pub fn zpool_name(&self) -> &ZpoolName {
+        match self {
+            Self::Real(_) => unreachable!(),
+            Self::Synthetic(disk) => &disk.zpool_name,
+        }
+    }
+
+    pub fn is_synthetic(&self) -> bool {
+        match self {
+            Self::Real(_) => false,
+            Self::Synthetic(_) => true,
+        }
+    }
+
+    pub fn is_real(&self) -> bool {
+        !self.is_synthetic()
+    }
+
+    pub fn devfs_path(&self) -> &Utf8PathBuf {
+        match self {
+            Self::Real(disk) => disk.devfs_path(),
+            Self::Synthetic(_) => unreachable!(),
+        }
+    }
+}
+
+/// A physical [`PooledDisk`] or a [`SyntheticDisk`] that contains or is backed
+/// by a single zpool and that has provisioned datasets. This disk is ready for
+/// usage by higher level software.
+#[derive(Debug, Clone, PartialEq, Eq, Hash, From)]
+pub enum Disk {
+    Real(PooledDisk),
+    Synthetic(SyntheticDisk),
+}
+
+impl Disk {
+    pub async fn new(
+        log: &Logger,
+        raw_disk: RawDisk,
+        key_requester: Option<&StorageKeyRequester>,
+    ) -> Result<Self, DiskError> {
+        let disk = match raw_disk {
+            RawDisk::Real(disk) => PooledDisk::new(log, disk)?.into(),
+            RawDisk::Synthetic(disk) => Disk::Synthetic(disk),
+        };
+        dataset::ensure_zpool_has_datasets(
+            log,
+            disk.zpool_name(),
+            disk.identity(),
+            key_requester,
+        )
+        .await?;
+        Ok(disk)
+    }
+
+    pub fn is_synthetic(&self) -> bool {
+        match self {
+            Self::Real(_) => false,
+            Self::Synthetic(_) => true,
+        }
+    }
+
+    pub fn is_real(&self) -> bool {
+        !self.is_synthetic()
+    }
+
+    pub fn is_boot_disk(&self) -> bool {
+        match self {
+            Self::Real(disk) => disk.is_boot_disk,
+            Self::Synthetic(disk) => {
+                // Just label any M.2 the boot disk.
+                disk.zpool_name.kind() == ZpoolKind::Internal
+            }
+        }
+    }
+
+    pub fn identity(&self) -> &DiskIdentity {
+        match self {
+            Self::Real(disk) => &disk.identity,
+            Self::Synthetic(disk) => &disk.identity,
+        }
+    }
+
+    pub fn variant(&self) -> DiskVariant {
+        match self {
+            Self::Real(disk) => disk.variant,
+            Self::Synthetic(disk) => match disk.zpool_name.kind() {
+                ZpoolKind::External => DiskVariant::U2,
+                ZpoolKind::Internal => DiskVariant::M2,
+            },
+        }
+    }
+
+    pub fn devfs_path(&self) -> &Utf8PathBuf {
+        match self {
+            Self::Real(disk) => &disk.paths.devfs_path,
+            Self::Synthetic(_) => unreachable!(),
+        }
+    }
+
+    pub fn zpool_name(&self) -> &ZpoolName {
+        match self {
+            Self::Real(disk) => &disk.zpool_name,
+            Self::Synthetic(disk) => &disk.zpool_name,
+        }
+    }
+
+    pub fn boot_image_devfs_path(
+        &self,
+        raw: bool,
+    ) -> Result<Utf8PathBuf, PooledDiskError> {
+        match self {
+            Self::Real(disk) => disk.paths.partition_device_path(
+                &disk.partitions,
+                Partition::BootImage,
+                raw,
+            ),
+            Self::Synthetic(_) => unreachable!(),
+        }
+    }
+
+    pub fn dump_device_devfs_path(
+        &self,
+        raw: bool,
+    ) -> Result<Utf8PathBuf, PooledDiskError> {
+        match self {
+            Self::Real(disk) => disk.paths.partition_device_path(
+                &disk.partitions,
+                Partition::DumpDevice,
+                raw,
+            ),
+            Self::Synthetic(_) => unreachable!(),
+        }
+    }
+
+    pub fn slot(&self) -> i64 {
+        match self {
+            Self::Real(disk) => disk.slot,
+            Self::Synthetic(_) => unreachable!(),
+        }
+    }
+}
diff --git a/sled-storage/src/error.rs b/sled-storage/src/error.rs
new file mode 100644
index 0000000000..b9f97ee428
--- /dev/null
+++ b/sled-storage/src/error.rs
@@ -0,0 +1,81 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Storage related errors
+
+use crate::dataset::{DatasetError, DatasetName};
+use crate::disk::DiskError;
+use camino::Utf8PathBuf;
+use omicron_common::api::external::ByteCountRangeError;
+use uuid::Uuid;
+
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+    #[error(transparent)]
+    DiskError(#[from] DiskError),
+
+    #[error(transparent)]
+    DatasetError(#[from] DatasetError),
+
+    // TODO: We could add the context of "why are we doint this op", maybe?
+    #[error(transparent)]
+    ZfsListDataset(#[from] illumos_utils::zfs::ListDatasetsError),
+
+    #[error(transparent)]
+    ZfsEnsureFilesystem(#[from] illumos_utils::zfs::EnsureFilesystemError),
+
+    #[error(transparent)]
+    ZfsSetValue(#[from] illumos_utils::zfs::SetValueError),
+
+    #[error(transparent)]
+    ZfsGetValue(#[from] illumos_utils::zfs::GetValueError),
+
+    #[error(transparent)]
+    GetZpoolInfo(#[from] illumos_utils::zpool::GetInfoError),
+
+    #[error(transparent)]
+    Fstyp(#[from] illumos_utils::fstyp::Error),
+
+    #[error(transparent)]
+    ZoneCommand(#[from] illumos_utils::running_zone::RunCommandError),
+
+    #[error(transparent)]
+    ZoneBoot(#[from] illumos_utils::running_zone::BootError),
+
+    #[error(transparent)]
+    ZoneEnsureAddress(#[from] illumos_utils::running_zone::EnsureAddressError),
+
+    #[error(transparent)]
+    ZoneInstall(#[from] illumos_utils::running_zone::InstallZoneError),
+
+    #[error("No U.2 Zpools found")]
+    NoU2Zpool,
+
+    #[error("Failed to parse UUID from {path}: {err}")]
+    ParseUuid {
+        path: Utf8PathBuf,
+        #[source]
+        err: uuid::Error,
+    },
+
+    #[error("Dataset {name:?} exists with a different uuid (has {old}, requested {new})")]
+    UuidMismatch { name: Box<DatasetName>, old: Uuid, new: Uuid },
+
+    #[error("Error parsing pool {name}'s size: {err}")]
+    BadPoolSize {
+        name: String,
+        #[source]
+        err: ByteCountRangeError,
+    },
+
+    #[error("Failed to parse the dataset {name}'s UUID: {err}")]
+    ParseDatasetUuid {
+        name: String,
+        #[source]
+        err: uuid::Error,
+    },
+
+    #[error("Zpool Not Found: {0}")]
+    ZpoolNotFound(String),
+}
diff --git a/sled-storage/src/keyfile.rs b/sled-storage/src/keyfile.rs
new file mode 100644
index 0000000000..48e5d9a528
--- /dev/null
+++ b/sled-storage/src/keyfile.rs
@@ -0,0 +1,76 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Key file support for ZFS dataset encryption
+
+use illumos_utils::zfs::Keypath;
+use slog::{error, info, Logger};
+use tokio::fs::{remove_file, File};
+use tokio::io::{AsyncSeekExt, AsyncWriteExt, SeekFrom};
+
+/// A file that wraps a zfs encryption key.
+///
+/// We put this in a RAM backed filesystem and zero and delete it when we are
+/// done with it. Unfortunately we cannot do this inside `Drop` because there is no
+/// equivalent async drop.
+pub struct KeyFile {
+    path: Keypath,
+    file: File,
+    log: Logger,
+    zero_and_unlink_called: bool,
+}
+
+impl KeyFile {
+    pub async fn create(
+        path: Keypath,
+        key: &[u8; 32],
+        log: &Logger,
+    ) -> std::io::Result<KeyFile> {
+        // We want to overwrite any existing contents.
+        let mut file = tokio::fs::OpenOptions::new()
+            .create(true)
+            .write(true)
+            .open(&path.0)
+            .await?;
+        file.write_all(key).await?;
+        info!(log, "Created keyfile {}", path);
+        Ok(KeyFile {
+            path,
+            file,
+            log: log.clone(),
+            zero_and_unlink_called: false,
+        })
+    }
+
+    /// These keyfiles live on a tmpfs and we zero the file so the data doesn't
+    /// linger on the page in memory.
+    ///
+    /// It'd be nice to `impl Drop for `KeyFile` and then call `zero`
+    /// from within the drop handler, but async `Drop` isn't supported.
+    pub async fn zero_and_unlink(&mut self) -> std::io::Result<()> {
+        self.zero_and_unlink_called = true;
+        let zeroes = [0u8; 32];
+        let _ = self.file.seek(SeekFrom::Start(0)).await?;
+        self.file.write_all(&zeroes).await?;
+        info!(self.log, "Zeroed and unlinked keyfile {}", self.path);
+        remove_file(&self.path().0).await?;
+        Ok(())
+    }
+
+    pub fn path(&self) -> &Keypath {
+        &self.path
+    }
+}
+
+impl Drop for KeyFile {
+    fn drop(&mut self) {
+        if !self.zero_and_unlink_called {
+            error!(
+                self.log,
+                "Failed to call zero_and_unlink for keyfile";
+                "path" => %self.path
+            );
+        }
+    }
+}
diff --git a/sled-storage/src/lib.rs b/sled-storage/src/lib.rs
new file mode 100644
index 0000000000..d4b64c55a5
--- /dev/null
+++ b/sled-storage/src/lib.rs
@@ -0,0 +1,17 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Local storage abstraction for use by sled-agent
+//!
+//! This abstraction operates at the ZFS level and relies on zpool setup on
+//! hardware partitions from the `sled-hardware` crate. It utilizes the
+//! `illumos-utils` crate to actually perform ZFS related OS calls.
+
+pub mod dataset;
+pub mod disk;
+pub mod error;
+pub(crate) mod keyfile;
+pub mod manager;
+pub mod pool;
+pub mod resources;
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
new file mode 100644
index 0000000000..50b1c44148
--- /dev/null
+++ b/sled-storage/src/manager.rs
@@ -0,0 +1,1034 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! The storage manager task
+
+use std::collections::HashSet;
+
+use crate::dataset::{DatasetError, DatasetName};
+use crate::disk::{Disk, DiskError, RawDisk};
+use crate::error::Error;
+use crate::resources::{AddDiskResult, StorageResources};
+use camino::Utf8PathBuf;
+use illumos_utils::zfs::{Mountpoint, Zfs};
+use illumos_utils::zpool::ZpoolName;
+use key_manager::StorageKeyRequester;
+use omicron_common::disk::DiskIdentity;
+use sled_hardware::DiskVariant;
+use slog::{error, info, o, warn, Logger};
+use tokio::sync::{mpsc, oneshot, watch};
+use tokio::time::{interval, Duration, MissedTickBehavior};
+use uuid::Uuid;
+
+// The size of the mpsc bounded channel used to communicate
+// between the `StorageHandle` and `StorageManager`.
+//
+// How did we choose this bound, and why?
+//
+// Picking a bound can be tricky, but in general, you want the channel to act
+// unbounded, such that sends never fail. This makes the channels reliable,
+// such that we never drop messages inside the process, and the caller doesn't
+// have to choose what to do when overloaded. This simplifies things drastically
+// for developers. However, you also don't want to make the channel actually
+// unbounded, because that can lead to run-away memory growth and pathological
+// behaviors, such that requests get slower over time until the system crashes.
+//
+// Our team's chosen solution, and used elsewhere in the codebase, is is to
+// choose a large enough bound such that we should never hit it in practice
+// unless we are truly overloaded. If we hit the bound it means that beyond that
+// requests will start to build up and we will eventually topple over. So when
+// we hit this bound, we just go ahead and panic.
+//
+// Picking a channel bound is hard to do empirically, but practically, if
+// requests are mostly mutating task local state, a bound of 1024 or even 8192
+// should be plenty. Tasks that must perform longer running ops can spawn helper
+// tasks as necessary or include their own handles for replies rather than
+// synchronously waiting. Memory for the queue can be kept small with boxing of
+// large messages.
+//
+// Here we start relatively small so that we can evaluate our choice over time.
+const QUEUE_SIZE: usize = 256;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum StorageManagerState {
+    WaitingForKeyManager,
+    QueueingDisks,
+    Normal,
+}
+
+#[derive(Debug)]
+struct NewFilesystemRequest {
+    dataset_id: Uuid,
+    dataset_name: DatasetName,
+    responder: oneshot::Sender<Result<(), Error>>,
+}
+
+#[derive(Debug)]
+enum StorageRequest {
+    AddDisk(RawDisk),
+    RemoveDisk(RawDisk),
+    DisksChanged(HashSet<RawDisk>),
+    NewFilesystem(NewFilesystemRequest),
+    KeyManagerReady,
+    /// This will always grab the latest state after any new updates, as it
+    /// serializes through the `StorageManager` task after all prior requests.
+    /// This serialization is particularly useful for tests.
+    GetLatestResources(oneshot::Sender<StorageResources>),
+
+    /// Get the internal task state of the manager
+    GetManagerState(oneshot::Sender<StorageManagerData>),
+}
+
+/// Data managed internally to the StorageManagerTask that can be useful
+/// to clients for debugging purposes, and that isn't exposed in other ways.
+#[derive(Debug, Clone)]
+pub struct StorageManagerData {
+    pub state: StorageManagerState,
+    pub queued_u2_drives: HashSet<RawDisk>,
+}
+
+/// A mechanism for interacting with the [`StorageManager`]
+#[derive(Clone)]
+pub struct StorageHandle {
+    tx: mpsc::Sender<StorageRequest>,
+    resource_updates: watch::Receiver<StorageResources>,
+}
+
+impl StorageHandle {
+    /// Adds a disk and associated zpool to the storage manager.
+    pub async fn upsert_disk(&self, disk: RawDisk) {
+        self.tx.send(StorageRequest::AddDisk(disk)).await.unwrap();
+    }
+
+    /// Removes a disk, if it's tracked by the storage manager, as well
+    /// as any associated zpools.
+    pub async fn delete_disk(&self, disk: RawDisk) {
+        self.tx.send(StorageRequest::RemoveDisk(disk)).await.unwrap();
+    }
+
+    /// Ensures that the storage manager tracks exactly the provided disks.
+    ///
+    /// This acts similar to a batch [Self::upsert_disk] for all new disks, and
+    /// [Self::delete_disk] for all removed disks.
+    ///
+    /// If errors occur, an arbitrary "one" of them will be returned, but a
+    /// best-effort attempt to add all disks will still be attempted.
+    pub async fn ensure_using_exactly_these_disks<I>(&self, raw_disks: I)
+    where
+        I: IntoIterator<Item = RawDisk>,
+    {
+        self.tx
+            .send(StorageRequest::DisksChanged(raw_disks.into_iter().collect()))
+            .await
+            .unwrap();
+    }
+
+    /// Notify the [`StorageManager`] that the [`key_manager::KeyManager`]
+    /// has determined what [`key_manager::SecretRetriever`] to use and
+    /// it is now possible to retrieve secrets and construct keys. Note
+    /// that in cases of using the trust quorum, it is possible that the
+    /// [`key_manager::SecretRetriever`] is ready, but enough key shares cannot
+    /// be retrieved from other sleds. In this case, we still will be unable
+    /// to add the disks successfully. In the common case this is a transient
+    /// error. In other cases it may be fatal. However, that is outside the
+    /// scope of the cares of this module.
+    pub async fn key_manager_ready(&self) {
+        self.tx.send(StorageRequest::KeyManagerReady).await.unwrap();
+    }
+
+    /// Wait for a boot disk to be initialized
+    pub async fn wait_for_boot_disk(&mut self) -> (DiskIdentity, ZpoolName) {
+        loop {
+            let resources = self.resource_updates.borrow_and_update();
+            if let Some((disk_id, zpool_name)) = resources.boot_disk() {
+                return (disk_id, zpool_name);
+            }
+            drop(resources);
+            // We panic if the sender is dropped, as this means
+            // the StorageManager has gone away, which it should not do.
+            self.resource_updates.changed().await.unwrap();
+        }
+    }
+
+    /// Wait for any storage resource changes
+    pub async fn wait_for_changes(&mut self) -> StorageResources {
+        self.resource_updates.changed().await.unwrap();
+        self.resource_updates.borrow_and_update().clone()
+    }
+
+    /// Retrieve the latest value of `StorageResources` from the
+    /// `StorageManager` task.
+    pub async fn get_latest_resources(&self) -> StorageResources {
+        let (tx, rx) = oneshot::channel();
+        self.tx.send(StorageRequest::GetLatestResources(tx)).await.unwrap();
+        rx.await.unwrap()
+    }
+
+    /// Return internal data useful for debugging and testing
+    pub async fn get_manager_state(&self) -> StorageManagerData {
+        let (tx, rx) = oneshot::channel();
+        self.tx.send(StorageRequest::GetManagerState(tx)).await.unwrap();
+        rx.await.unwrap()
+    }
+
+    pub async fn upsert_filesystem(
+        &self,
+        dataset_id: Uuid,
+        dataset_name: DatasetName,
+    ) -> Result<(), Error> {
+        let (tx, rx) = oneshot::channel();
+        let request =
+            NewFilesystemRequest { dataset_id, dataset_name, responder: tx };
+        self.tx.send(StorageRequest::NewFilesystem(request)).await.unwrap();
+        rx.await.unwrap()
+    }
+}
+
+// Some sled-agent tests cannot currently use the real StorageManager
+// and want to fake the entire behavior, but still have access to the
+// `StorageResources`. We allow this via use of the `FakeStorageManager`
+// that will respond to real storage requests from a real `StorageHandle`.
+#[cfg(feature = "testing")]
+pub struct FakeStorageManager {
+    rx: mpsc::Receiver<StorageRequest>,
+    resources: StorageResources,
+    resource_updates: watch::Sender<StorageResources>,
+}
+
+#[cfg(feature = "testing")]
+impl FakeStorageManager {
+    pub fn new() -> (Self, StorageHandle) {
+        let (tx, rx) = mpsc::channel(QUEUE_SIZE);
+        let resources = StorageResources::default();
+        let (update_tx, update_rx) = watch::channel(resources.clone());
+        (
+            Self { rx, resources, resource_updates: update_tx },
+            StorageHandle { tx, resource_updates: update_rx },
+        )
+    }
+
+    /// Run the main receive loop of the `FakeStorageManager`
+    ///
+    /// This should be spawned into a tokio task
+    pub async fn run(mut self) {
+        loop {
+            match self.rx.recv().await {
+                Some(StorageRequest::AddDisk(raw_disk)) => {
+                    if self.add_disk(raw_disk).disk_inserted() {
+                        self.resource_updates
+                            .send_replace(self.resources.clone());
+                    }
+                }
+                Some(StorageRequest::GetLatestResources(tx)) => {
+                    let _ = tx.send(self.resources.clone());
+                }
+                Some(_) => {
+                    unreachable!();
+                }
+                None => break,
+            }
+        }
+    }
+
+    // Add a disk to `StorageResources` if it is new and return true if so
+    fn add_disk(&mut self, raw_disk: RawDisk) -> AddDiskResult {
+        let disk = match raw_disk {
+            RawDisk::Real(_) => {
+                panic!(
+                    "Only synthetic disks can be used with `FakeStorageManager`"
+                );
+            }
+            RawDisk::Synthetic(synthetic_disk) => {
+                Disk::Synthetic(synthetic_disk)
+            }
+        };
+        self.resources.insert_fake_disk(disk)
+    }
+}
+
+/// The storage manager responsible for the state of the storage
+/// on a sled. The storage manager runs in its own task and is interacted
+/// with via the [`StorageHandle`].
+pub struct StorageManager {
+    log: Logger,
+    state: StorageManagerState,
+    // Used to find the capacity of the channel for tracking purposes
+    tx: mpsc::Sender<StorageRequest>,
+    rx: mpsc::Receiver<StorageRequest>,
+    resources: StorageResources,
+    queued_u2_drives: HashSet<RawDisk>,
+    key_requester: StorageKeyRequester,
+    resource_updates: watch::Sender<StorageResources>,
+    last_logged_capacity: usize,
+}
+
+impl StorageManager {
+    pub fn new(
+        log: &Logger,
+        key_requester: StorageKeyRequester,
+    ) -> (StorageManager, StorageHandle) {
+        let (tx, rx) = mpsc::channel(QUEUE_SIZE);
+        let resources = StorageResources::default();
+        let (update_tx, update_rx) = watch::channel(resources.clone());
+        (
+            StorageManager {
+                log: log.new(o!("component" => "StorageManager")),
+                state: StorageManagerState::WaitingForKeyManager,
+                tx: tx.clone(),
+                rx,
+                resources,
+                queued_u2_drives: HashSet::new(),
+                key_requester,
+                resource_updates: update_tx,
+                last_logged_capacity: QUEUE_SIZE,
+            },
+            StorageHandle { tx, resource_updates: update_rx },
+        )
+    }
+
+    /// Run the main receive loop of the `StorageManager`
+    ///
+    /// This should be spawned into a tokio task
+    pub async fn run(mut self) {
+        loop {
+            const QUEUED_DISK_RETRY_TIMEOUT: Duration = Duration::from_secs(10);
+            let mut interval = interval(QUEUED_DISK_RETRY_TIMEOUT);
+            interval.set_missed_tick_behavior(MissedTickBehavior::Delay);
+            tokio::select! {
+                res = self.step() => {
+                    if let Err(e) = res {
+                        warn!(self.log, "{e}");
+                    }
+                }
+                _ = interval.tick(),
+                    if self.state == StorageManagerState::QueueingDisks =>
+                {
+                    if self.add_queued_disks().await {
+                        let _ = self.resource_updates.send_replace(self.resources.clone());
+                    }
+                }
+            }
+        }
+    }
+
+    /// Process the next event
+    ///
+    /// This is useful for testing/debugging
+    pub async fn step(&mut self) -> Result<(), Error> {
+        const CAPACITY_LOG_THRESHOLD: usize = 10;
+        // We check the capacity and log it every time it changes by at least 10
+        // entries in either direction.
+        let current = self.tx.capacity();
+        if self.last_logged_capacity.saturating_sub(current)
+            >= CAPACITY_LOG_THRESHOLD
+        {
+            info!(
+                self.log,
+                "Channel capacity decreased";
+                "previous" => ?self.last_logged_capacity,
+                "current" => ?current
+            );
+            self.last_logged_capacity = current;
+        } else if current.saturating_sub(self.last_logged_capacity)
+            >= CAPACITY_LOG_THRESHOLD
+        {
+            info!(
+                self.log,
+                "Channel capacity increased";
+                "previous" => ?self.last_logged_capacity,
+                "current" => ?current
+            );
+            self.last_logged_capacity = current;
+        }
+        // The sending side never disappears because we hold a copy
+        let req = self.rx.recv().await.unwrap();
+        info!(self.log, "Received {:?}", req);
+        let should_send_updates = match req {
+            StorageRequest::AddDisk(raw_disk) => {
+                self.add_disk(raw_disk).await?.disk_inserted()
+            }
+            StorageRequest::RemoveDisk(raw_disk) => self.remove_disk(raw_disk),
+            StorageRequest::DisksChanged(raw_disks) => {
+                self.ensure_using_exactly_these_disks(raw_disks).await
+            }
+            StorageRequest::NewFilesystem(request) => {
+                let result = self.add_dataset(&request).await;
+                if result.is_err() {
+                    warn!(self.log, "{result:?}");
+                }
+                let _ = request.responder.send(result);
+                false
+            }
+            StorageRequest::KeyManagerReady => {
+                self.state = StorageManagerState::Normal;
+                self.add_queued_disks().await
+            }
+            StorageRequest::GetLatestResources(tx) => {
+                let _ = tx.send(self.resources.clone());
+                false
+            }
+            StorageRequest::GetManagerState(tx) => {
+                let _ = tx.send(StorageManagerData {
+                    state: self.state,
+                    queued_u2_drives: self.queued_u2_drives.clone(),
+                });
+                false
+            }
+        };
+
+        if should_send_updates {
+            let _ = self.resource_updates.send_replace(self.resources.clone());
+        }
+
+        Ok(())
+    }
+
+    // Loop through all queued disks inserting them into [`StorageResources`]
+    // unless we hit a transient error. If we hit a transient error, we return
+    // and wait for the next retry window to re-call this method. If we hit a
+    // permanent error we log it, but we continue inserting queued disks.
+    //
+    // Return true if updates should be sent to watchers, false otherwise
+    async fn add_queued_disks(&mut self) -> bool {
+        info!(
+            self.log,
+            "Attempting to add queued disks";
+            "num_disks" => %self.queued_u2_drives.len()
+        );
+        self.state = StorageManagerState::Normal;
+
+        let mut send_updates = false;
+
+        // Disks that should be requeued.
+        let queued = self.queued_u2_drives.clone();
+        let mut to_dequeue = HashSet::new();
+        for disk in queued.iter() {
+            if self.state == StorageManagerState::QueueingDisks {
+                // We hit a transient error in a prior iteration.
+                break;
+            } else {
+                match self.add_u2_disk(disk.clone()).await {
+                    Err(_) => {
+                        // This is an unrecoverable error, so we don't queue the
+                        // disk again.
+                        to_dequeue.insert(disk);
+                    }
+                    Ok(AddDiskResult::DiskInserted) => {
+                        send_updates = true;
+                        to_dequeue.insert(disk);
+                    }
+                    Ok(AddDiskResult::DiskAlreadyInserted) => {
+                        to_dequeue.insert(disk);
+                    }
+                    Ok(AddDiskResult::DiskQueued) => (),
+                }
+            }
+        }
+        // Dequeue any inserted disks
+        self.queued_u2_drives.retain(|k| !to_dequeue.contains(k));
+        send_updates
+    }
+
+    // Add a disk to `StorageResources` if it is new,
+    // updated, or its pool has been updated as determined by
+    // [`$crate::resources::StorageResources::insert_disk`] and we decide not to
+    // queue the disk for later addition.
+    async fn add_disk(
+        &mut self,
+        raw_disk: RawDisk,
+    ) -> Result<AddDiskResult, Error> {
+        match raw_disk.variant() {
+            DiskVariant::U2 => self.add_u2_disk(raw_disk).await,
+            DiskVariant::M2 => self.add_m2_disk(raw_disk).await,
+        }
+    }
+
+    // Add a U.2 disk to [`StorageResources`] or queue it to be added later
+    async fn add_u2_disk(
+        &mut self,
+        raw_disk: RawDisk,
+    ) -> Result<AddDiskResult, Error> {
+        if self.state != StorageManagerState::Normal {
+            self.queued_u2_drives.insert(raw_disk);
+            return Ok(AddDiskResult::DiskQueued);
+        }
+
+        match Disk::new(&self.log, raw_disk.clone(), Some(&self.key_requester))
+            .await
+        {
+            Ok(disk) => self.resources.insert_disk(disk),
+            Err(err @ DiskError::Dataset(DatasetError::KeyManager(_))) => {
+                warn!(
+                    self.log,
+                    "Transient error: {err}: queuing disk";
+                    "disk_id" => ?raw_disk.identity()
+                );
+                self.queued_u2_drives.insert(raw_disk);
+                self.state = StorageManagerState::QueueingDisks;
+                Ok(AddDiskResult::DiskQueued)
+            }
+            Err(err) => {
+                error!(
+                    self.log,
+                    "Persistent error: {err}: not queueing disk";
+                    "disk_id" => ?raw_disk.identity()
+                );
+                Err(err.into())
+            }
+        }
+    }
+
+    // Add a U.2 disk to [`StorageResources`] if new and return `Ok(true)` if so
+    //
+    //
+    // We never queue M.2 drives, as they don't rely on [`KeyManager`] based
+    // encryption
+    async fn add_m2_disk(
+        &mut self,
+        raw_disk: RawDisk,
+    ) -> Result<AddDiskResult, Error> {
+        let disk =
+            Disk::new(&self.log, raw_disk.clone(), Some(&self.key_requester))
+                .await?;
+        self.resources.insert_disk(disk)
+    }
+
+    // Delete a real disk and return `true` if the disk was actually removed
+    fn remove_disk(&mut self, raw_disk: RawDisk) -> bool {
+        // If the disk is a U.2, we want to first delete it from any queued disks
+        let _ = self.queued_u2_drives.remove(&raw_disk);
+        self.resources.remove_disk(raw_disk.identity())
+    }
+
+    // Find all disks to remove that are not in raw_disks and remove them. Then
+    // take the remaining disks and try to add them all. `StorageResources` will
+    // inform us if anything changed, and if so we return true, otherwise we
+    // return false.
+    async fn ensure_using_exactly_these_disks(
+        &mut self,
+        raw_disks: HashSet<RawDisk>,
+    ) -> bool {
+        let mut should_update = false;
+
+        // Clear out any queued U.2 disks that are real.
+        // We keep synthetic disks, as they are only added once.
+        self.queued_u2_drives.retain(|d| d.is_synthetic());
+
+        let all_ids: HashSet<_> =
+            raw_disks.iter().map(|d| d.identity()).collect();
+
+        // Find all existing disks not in the current set
+        let to_remove: Vec<DiskIdentity> = self
+            .resources
+            .disks()
+            .keys()
+            .filter_map(|id| {
+                if !all_ids.contains(id) {
+                    Some(id.clone())
+                } else {
+                    None
+                }
+            })
+            .collect();
+
+        for id in to_remove {
+            if self.resources.remove_disk(&id) {
+                should_update = true;
+            }
+        }
+
+        for raw_disk in raw_disks {
+            let disk_id = raw_disk.identity().clone();
+            match self.add_disk(raw_disk).await {
+                Ok(AddDiskResult::DiskInserted) => should_update = true,
+                Ok(_) => (),
+                Err(err) => {
+                    warn!(
+                        self.log,
+                        "Failed to add disk to storage resources: {err}";
+                        "disk_id" => ?disk_id
+                    );
+                }
+            }
+        }
+
+        should_update
+    }
+
+    // Attempts to add a dataset within a zpool, according to `request`.
+    async fn add_dataset(
+        &mut self,
+        request: &NewFilesystemRequest,
+    ) -> Result<(), Error> {
+        info!(self.log, "add_dataset: {:?}", request);
+        if !self
+            .resources
+            .disks()
+            .values()
+            .any(|(_, pool)| &pool.name == request.dataset_name.pool())
+        {
+            return Err(Error::ZpoolNotFound(format!(
+                "{}, looked up while trying to add dataset",
+                request.dataset_name.pool(),
+            )));
+        }
+
+        let zoned = true;
+        let fs_name = &request.dataset_name.full();
+        let do_format = true;
+        let encryption_details = None;
+        let size_details = None;
+        Zfs::ensure_filesystem(
+            fs_name,
+            Mountpoint::Path(Utf8PathBuf::from("/data")),
+            zoned,
+            do_format,
+            encryption_details,
+            size_details,
+            None,
+        )?;
+        // Ensure the dataset has a usable UUID.
+        if let Ok(id_str) = Zfs::get_oxide_value(&fs_name, "uuid") {
+            if let Ok(id) = id_str.parse::<Uuid>() {
+                if id != request.dataset_id {
+                    return Err(Error::UuidMismatch {
+                        name: Box::new(request.dataset_name.clone()),
+                        old: id,
+                        new: request.dataset_id,
+                    });
+                }
+                return Ok(());
+            }
+        }
+        Zfs::set_oxide_value(
+            &fs_name,
+            "uuid",
+            &request.dataset_id.to_string(),
+        )?;
+
+        Ok(())
+    }
+}
+
+/// All tests only use synthetic disks, but are expected to be run on illumos
+/// systems.
+#[cfg(all(test, target_os = "illumos"))]
+mod tests {
+    use crate::dataset::DatasetKind;
+    use crate::disk::SyntheticDisk;
+
+    use super::*;
+    use async_trait::async_trait;
+    use camino_tempfile::tempdir;
+    use illumos_utils::zpool::Zpool;
+    use key_manager::{
+        KeyManager, SecretRetriever, SecretRetrieverError, SecretState,
+        VersionedIkm,
+    };
+    use omicron_test_utils::dev::test_setup_log;
+    use std::sync::{
+        atomic::{AtomicBool, Ordering},
+        Arc,
+    };
+    use uuid::Uuid;
+
+    /// A [`key-manager::SecretRetriever`] that only returns hardcoded IKM for
+    /// epoch 0
+    #[derive(Debug, Default)]
+    struct HardcodedSecretRetriever {
+        inject_error: Arc<AtomicBool>,
+    }
+
+    #[async_trait]
+    impl SecretRetriever for HardcodedSecretRetriever {
+        async fn get_latest(
+            &self,
+        ) -> Result<VersionedIkm, SecretRetrieverError> {
+            if self.inject_error.load(Ordering::SeqCst) {
+                return Err(SecretRetrieverError::Bootstore(
+                    "Timeout".to_string(),
+                ));
+            }
+
+            let epoch = 0;
+            let salt = [0u8; 32];
+            let secret = [0x1d; 32];
+
+            Ok(VersionedIkm::new(epoch, salt, &secret))
+        }
+
+        /// We don't plan to do any key rotation before trust quorum is ready
+        async fn get(
+            &self,
+            epoch: u64,
+        ) -> Result<SecretState, SecretRetrieverError> {
+            if self.inject_error.load(Ordering::SeqCst) {
+                return Err(SecretRetrieverError::Bootstore(
+                    "Timeout".to_string(),
+                ));
+            }
+            if epoch != 0 {
+                return Err(SecretRetrieverError::NoSuchEpoch(epoch));
+            }
+            Ok(SecretState::Current(self.get_latest().await?))
+        }
+    }
+
+    #[tokio::test]
+    async fn add_u2_disk_while_not_in_normal_stage_and_ensure_it_gets_queued() {
+        illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);
+        let logctx = test_setup_log(
+            "add_u2_disk_while_not_in_normal_stage_and_ensure_it_gets_queued",
+        );
+        let (mut _key_manager, key_requester) =
+            KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
+        let (mut manager, _) = StorageManager::new(&logctx.log, key_requester);
+        let zpool_name = ZpoolName::new_external(Uuid::new_v4());
+        let raw_disk: RawDisk = SyntheticDisk::new(zpool_name).into();
+        assert_eq!(StorageManagerState::WaitingForKeyManager, manager.state);
+        manager.add_u2_disk(raw_disk.clone()).await.unwrap();
+        assert!(manager.resources.all_u2_zpools().is_empty());
+        assert_eq!(manager.queued_u2_drives, HashSet::from([raw_disk.clone()]));
+
+        // Check other non-normal stages and ensure disk gets queued
+        manager.queued_u2_drives.clear();
+        manager.state = StorageManagerState::QueueingDisks;
+        manager.add_u2_disk(raw_disk.clone()).await.unwrap();
+        assert!(manager.resources.all_u2_zpools().is_empty());
+        assert_eq!(manager.queued_u2_drives, HashSet::from([raw_disk]));
+        logctx.cleanup_successful();
+    }
+
+    #[tokio::test]
+    async fn ensure_u2_gets_added_to_resources() {
+        illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);
+        let logctx = test_setup_log("ensure_u2_gets_added_to_resources");
+        let (mut key_manager, key_requester) =
+            KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
+        let (mut manager, _) = StorageManager::new(&logctx.log, key_requester);
+        let zpool_name = ZpoolName::new_external(Uuid::new_v4());
+        let dir = tempdir().unwrap();
+        let disk = SyntheticDisk::create_zpool(dir.path(), &zpool_name).into();
+
+        // Spawn the key_manager so that it will respond to requests for encryption keys
+        tokio::spawn(async move { key_manager.run().await });
+
+        // Set the stage to pretend we've progressed enough to have a key_manager available.
+        manager.state = StorageManagerState::Normal;
+        manager.add_u2_disk(disk).await.unwrap();
+        assert_eq!(manager.resources.all_u2_zpools().len(), 1);
+        Zpool::destroy(&zpool_name).unwrap();
+        logctx.cleanup_successful();
+    }
+
+    #[tokio::test]
+    async fn wait_for_bootdisk() {
+        illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);
+        let logctx = test_setup_log("wait_for_bootdisk");
+        let (mut key_manager, key_requester) =
+            KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
+        let (manager, mut handle) =
+            StorageManager::new(&logctx.log, key_requester);
+        // Spawn the key_manager so that it will respond to requests for encryption keys
+        tokio::spawn(async move { key_manager.run().await });
+
+        // Spawn the storage manager as done by sled-agent
+        tokio::spawn(async move {
+            manager.run().await;
+        });
+
+        // Create a synthetic internal disk
+        let zpool_name = ZpoolName::new_internal(Uuid::new_v4());
+        let dir = tempdir().unwrap();
+        let disk = SyntheticDisk::create_zpool(dir.path(), &zpool_name).into();
+
+        handle.upsert_disk(disk).await;
+        handle.wait_for_boot_disk().await;
+        Zpool::destroy(&zpool_name).unwrap();
+        logctx.cleanup_successful();
+    }
+
+    #[tokio::test]
+    async fn queued_disks_get_added_as_resources() {
+        illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);
+        let logctx = test_setup_log("queued_disks_get_added_as_resources");
+        let (mut key_manager, key_requester) =
+            KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
+        let (manager, handle) = StorageManager::new(&logctx.log, key_requester);
+
+        // Spawn the key_manager so that it will respond to requests for encryption keys
+        tokio::spawn(async move { key_manager.run().await });
+
+        // Spawn the storage manager as done by sled-agent
+        tokio::spawn(async move {
+            manager.run().await;
+        });
+
+        // Queue up a disks, as we haven't told the `StorageManager` that
+        // the `KeyManager` is ready yet.
+        let zpool_name = ZpoolName::new_external(Uuid::new_v4());
+        let dir = tempdir().unwrap();
+        let disk = SyntheticDisk::create_zpool(dir.path(), &zpool_name).into();
+        handle.upsert_disk(disk).await;
+        let resources = handle.get_latest_resources().await;
+        assert!(resources.all_u2_zpools().is_empty());
+
+        // Now inform the storage manager that the key manager is ready
+        // The queued disk should be successfully added
+        handle.key_manager_ready().await;
+        let resources = handle.get_latest_resources().await;
+        assert_eq!(resources.all_u2_zpools().len(), 1);
+        Zpool::destroy(&zpool_name).unwrap();
+        logctx.cleanup_successful();
+    }
+
+    /// For this test, we are going to step through the msg recv loop directly
+    /// without running the `StorageManager` in a tokio task.
+    /// This allows us to control timing precisely.
+    #[tokio::test]
+    async fn queued_disks_get_requeued_on_secret_retriever_error() {
+        illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);
+        let logctx = test_setup_log(
+            "queued_disks_get_requeued_on_secret_retriever_error",
+        );
+        let inject_error = Arc::new(AtomicBool::new(false));
+        let (mut key_manager, key_requester) = KeyManager::new(
+            &logctx.log,
+            HardcodedSecretRetriever { inject_error: inject_error.clone() },
+        );
+        let (mut manager, handle) =
+            StorageManager::new(&logctx.log, key_requester);
+
+        // Spawn the key_manager so that it will respond to requests for encryption keys
+        tokio::spawn(async move { key_manager.run().await });
+
+        // Queue up a disks, as we haven't told the `StorageManager` that
+        // the `KeyManager` is ready yet.
+        let zpool_name = ZpoolName::new_external(Uuid::new_v4());
+        let dir = tempdir().unwrap();
+        let disk = SyntheticDisk::create_zpool(dir.path(), &zpool_name).into();
+        handle.upsert_disk(disk).await;
+        manager.step().await.unwrap();
+
+        // We can't wait for a reply through the handle as the storage manager task
+        // isn't actually running. We just check the resources directly.
+        assert!(manager.resources.all_u2_zpools().is_empty());
+
+        // Let's inject an error to the `SecretRetriever` to simulate a trust
+        // quorum timeout
+        inject_error.store(true, Ordering::SeqCst);
+
+        // Now inform the storage manager that the key manager is ready
+        // The queued disk should not be added due to the error
+        handle.key_manager_ready().await;
+        manager.step().await.unwrap();
+        assert!(manager.resources.all_u2_zpools().is_empty());
+
+        // Manually simulating a timer tick to add queued disks should also
+        // still hit the error
+        manager.add_queued_disks().await;
+        assert!(manager.resources.all_u2_zpools().is_empty());
+
+        // Clearing the injected error will cause the disk to get added
+        inject_error.store(false, Ordering::SeqCst);
+        manager.add_queued_disks().await;
+        assert_eq!(1, manager.resources.all_u2_zpools().len());
+
+        Zpool::destroy(&zpool_name).unwrap();
+        logctx.cleanup_successful();
+    }
+
+    #[tokio::test]
+    async fn delete_disk_triggers_notification() {
+        illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);
+        let logctx = test_setup_log("delete_disk_triggers_notification");
+        let (mut key_manager, key_requester) =
+            KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
+        let (manager, mut handle) =
+            StorageManager::new(&logctx.log, key_requester);
+
+        // Spawn the key_manager so that it will respond to requests for encryption keys
+        tokio::spawn(async move { key_manager.run().await });
+
+        // Spawn the storage manager as done by sled-agent
+        tokio::spawn(async move {
+            manager.run().await;
+        });
+
+        // Inform the storage manager that the key manager is ready, so disks
+        // don't get queued
+        handle.key_manager_ready().await;
+
+        // Create and add a disk
+        let zpool_name = ZpoolName::new_external(Uuid::new_v4());
+        let dir = tempdir().unwrap();
+        let disk: RawDisk =
+            SyntheticDisk::create_zpool(dir.path(), &zpool_name).into();
+        handle.upsert_disk(disk.clone()).await;
+
+        // Wait for the add disk notification
+        let resources = handle.wait_for_changes().await;
+        assert_eq!(resources.all_u2_zpools().len(), 1);
+
+        // Delete the disk and wait for a notification
+        handle.delete_disk(disk).await;
+        let resources = handle.wait_for_changes().await;
+        assert!(resources.all_u2_zpools().is_empty());
+
+        Zpool::destroy(&zpool_name).unwrap();
+        logctx.cleanup_successful();
+    }
+
+    #[tokio::test]
+    async fn ensure_using_exactly_these_disks() {
+        illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);
+        let logctx = test_setup_log("ensure_using_exactly_these_disks");
+        let (mut key_manager, key_requester) =
+            KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
+        let (manager, mut handle) =
+            StorageManager::new(&logctx.log, key_requester);
+
+        // Spawn the key_manager so that it will respond to requests for encryption keys
+        tokio::spawn(async move { key_manager.run().await });
+
+        // Spawn the storage manager as done by sled-agent
+        tokio::spawn(async move {
+            manager.run().await;
+        });
+
+        // Create a bunch of file backed external disks with zpools
+        let dir = tempdir().unwrap();
+        let zpools: Vec<ZpoolName> =
+            (0..10).map(|_| ZpoolName::new_external(Uuid::new_v4())).collect();
+        let disks: Vec<RawDisk> = zpools
+            .iter()
+            .map(|zpool_name| {
+                SyntheticDisk::create_zpool(dir.path(), zpool_name).into()
+            })
+            .collect();
+
+        // Add the first 3 disks, and ensure they get queued, as we haven't
+        // marked our key manager ready yet
+        handle
+            .ensure_using_exactly_these_disks(disks.iter().take(3).cloned())
+            .await;
+        let state = handle.get_manager_state().await;
+        assert_eq!(state.queued_u2_drives.len(), 3);
+        assert_eq!(state.state, StorageManagerState::WaitingForKeyManager);
+        assert!(handle.get_latest_resources().await.all_u2_zpools().is_empty());
+
+        // Mark the key manager ready and wait for the storage update
+        handle.key_manager_ready().await;
+        let resources = handle.wait_for_changes().await;
+        let expected: HashSet<_> =
+            disks.iter().take(3).map(|d| d.identity()).collect();
+        let actual: HashSet<_> = resources.disks().keys().collect();
+        assert_eq!(expected, actual);
+
+        // Add first three disks after the initial one. The returned resources
+        // should not contain the first disk.
+        handle
+            .ensure_using_exactly_these_disks(
+                disks.iter().skip(1).take(3).cloned(),
+            )
+            .await;
+        let resources = handle.wait_for_changes().await;
+        let expected: HashSet<_> =
+            disks.iter().skip(1).take(3).map(|d| d.identity()).collect();
+        let actual: HashSet<_> = resources.disks().keys().collect();
+        assert_eq!(expected, actual);
+
+        // Ensure the same set of disks and make sure no change occurs
+        // Note that we directly request the resources this time so we aren't
+        // waiting forever for a change notification.
+        handle
+            .ensure_using_exactly_these_disks(
+                disks.iter().skip(1).take(3).cloned(),
+            )
+            .await;
+        let resources2 = handle.get_latest_resources().await;
+        assert_eq!(resources, resources2);
+
+        // Add a disjoint set of disks and see that only they come through
+        handle
+            .ensure_using_exactly_these_disks(
+                disks.iter().skip(4).take(5).cloned(),
+            )
+            .await;
+        let resources = handle.wait_for_changes().await;
+        let expected: HashSet<_> =
+            disks.iter().skip(4).take(5).map(|d| d.identity()).collect();
+        let actual: HashSet<_> = resources.disks().keys().collect();
+        assert_eq!(expected, actual);
+
+        // Finally, change the zpool backing of the 5th disk to be that of the 10th
+        // and ensure that disk changes. Note that we don't change the identity
+        // of the 5th disk.
+        let mut modified_disk = disks[4].clone();
+        if let RawDisk::Synthetic(disk) = &mut modified_disk {
+            disk.zpool_name = disks[9].zpool_name().clone();
+        } else {
+            panic!();
+        }
+        let mut expected: HashSet<_> =
+            disks.iter().skip(5).take(4).cloned().collect();
+        expected.insert(modified_disk);
+
+        handle
+            .ensure_using_exactly_these_disks(expected.clone().into_iter())
+            .await;
+        let resources = handle.wait_for_changes().await;
+
+        // Ensure the one modified disk changed as we expected
+        assert_eq!(5, resources.disks().len());
+        for raw_disk in expected {
+            let (disk, pool) =
+                resources.disks().get(raw_disk.identity()).unwrap();
+            assert_eq!(disk.zpool_name(), raw_disk.zpool_name());
+            assert_eq!(&pool.name, disk.zpool_name());
+            assert_eq!(raw_disk.identity(), &pool.parent);
+        }
+
+        // Cleanup
+        for zpool in zpools {
+            Zpool::destroy(&zpool).unwrap();
+        }
+        logctx.cleanup_successful();
+    }
+
+    #[tokio::test]
+    async fn upsert_filesystem() {
+        illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);
+        let logctx = test_setup_log("upsert_filesystem");
+        let (mut key_manager, key_requester) =
+            KeyManager::new(&logctx.log, HardcodedSecretRetriever::default());
+        let (manager, handle) = StorageManager::new(&logctx.log, key_requester);
+
+        // Spawn the key_manager so that it will respond to requests for encryption keys
+        tokio::spawn(async move { key_manager.run().await });
+
+        // Spawn the storage manager as done by sled-agent
+        tokio::spawn(async move {
+            manager.run().await;
+        });
+
+        handle.key_manager_ready().await;
+
+        // Create and add a disk
+        let zpool_name = ZpoolName::new_external(Uuid::new_v4());
+        let dir = tempdir().unwrap();
+        let disk: RawDisk =
+            SyntheticDisk::create_zpool(dir.path(), &zpool_name).into();
+        handle.upsert_disk(disk.clone()).await;
+
+        // Create a filesystem
+        let dataset_id = Uuid::new_v4();
+        let dataset_name =
+            DatasetName::new(zpool_name.clone(), DatasetKind::Crucible);
+        handle.upsert_filesystem(dataset_id, dataset_name).await.unwrap();
+
+        Zpool::destroy(&zpool_name).unwrap();
+        logctx.cleanup_successful();
+    }
+}
diff --git a/sled-storage/src/pool.rs b/sled-storage/src/pool.rs
new file mode 100644
index 0000000000..cc71aeb19d
--- /dev/null
+++ b/sled-storage/src/pool.rs
@@ -0,0 +1,35 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! ZFS storage pool
+
+use crate::error::Error;
+use illumos_utils::zpool::{Zpool, ZpoolInfo, ZpoolName};
+use omicron_common::disk::DiskIdentity;
+
+/// A ZFS storage pool wrapper that tracks information returned from
+/// `zpool` commands
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct Pool {
+    pub name: ZpoolName,
+    pub info: ZpoolInfo,
+    pub parent: DiskIdentity,
+}
+
+impl Pool {
+    /// Queries for an existing Zpool by name.
+    ///
+    /// Returns Ok if the pool exists.
+    pub fn new(name: ZpoolName, parent: DiskIdentity) -> Result<Pool, Error> {
+        let info = Zpool::get_info(&name.to_string())?;
+        Ok(Pool { name, info, parent })
+    }
+
+    /// Return a Pool consisting of fake info
+    #[cfg(feature = "testing")]
+    pub fn new_with_fake_info(name: ZpoolName, parent: DiskIdentity) -> Pool {
+        let info = ZpoolInfo::new_hardcoded(name.to_string());
+        Pool { name, info, parent }
+    }
+}
diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs
new file mode 100644
index 0000000000..c1f460dc92
--- /dev/null
+++ b/sled-storage/src/resources.rs
@@ -0,0 +1,206 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Discovered and usable disks and zpools
+
+use crate::dataset::M2_DEBUG_DATASET;
+use crate::disk::Disk;
+use crate::error::Error;
+use crate::pool::Pool;
+use camino::Utf8PathBuf;
+use cfg_if::cfg_if;
+use illumos_utils::zpool::ZpoolName;
+use omicron_common::disk::DiskIdentity;
+use sled_hardware::DiskVariant;
+use std::collections::BTreeMap;
+use std::sync::Arc;
+
+// The directory within the debug dataset in which bundles are created.
+const BUNDLE_DIRECTORY: &str = "bundle";
+
+// The directory for zone bundles.
+const ZONE_BUNDLE_DIRECTORY: &str = "zone";
+
+pub enum AddDiskResult {
+    DiskInserted,
+    DiskAlreadyInserted,
+    DiskQueued,
+}
+
+impl AddDiskResult {
+    pub fn disk_inserted(&self) -> bool {
+        match self {
+            AddDiskResult::DiskInserted => true,
+            _ => false,
+        }
+    }
+}
+
+/// Storage related resources: disks and zpools
+///
+/// This state is internal to the [`crate::manager::StorageManager`] task. Clones
+/// of this state can be retrieved by requests to the `StorageManager` task
+/// from the [`crate::manager::StorageHandle`]. This state is not `Sync`, and
+/// as such does not require any mutexes. However, we do expect to share it
+/// relatively frequently, and we want copies of it to be as cheaply made
+/// as possible. So any large state is stored inside `Arc`s. On the other
+/// hand, we expect infrequent updates to this state, and as such, we use
+/// [`std::sync::Arc::make_mut`] to implement clone on write functionality
+/// inside the `StorageManager` task if there are any outstanding copies.
+/// Therefore, we only pay the cost to update infrequently, and no locks are
+/// required by callers when operating on cloned data. The only contention here
+/// is for the reference counters of the internal Arcs when `StorageResources`
+/// gets cloned or dropped.
+#[derive(Debug, Clone, Default, PartialEq, Eq)]
+pub struct StorageResources {
+    // All disks, real and synthetic, being managed by this sled
+    disks: Arc<BTreeMap<DiskIdentity, (Disk, Pool)>>,
+}
+
+impl StorageResources {
+    /// Return a reference to the current snapshot of disks
+    pub fn disks(&self) -> &BTreeMap<DiskIdentity, (Disk, Pool)> {
+        &self.disks
+    }
+
+    /// Insert a disk and its zpool
+    ///
+    /// If the disk passed in is new or modified, or its pool size or pool
+    /// name changed, then insert the changed values and return `DiskInserted`.
+    /// Otherwise, do not insert anything and return `DiskAlreadyInserted`.
+    /// For instance, if only the pool health changes, because it is not one
+    /// of the checked values, we will not insert the update and will return
+    /// `DiskAlreadyInserted`.
+    pub(crate) fn insert_disk(
+        &mut self,
+        disk: Disk,
+    ) -> Result<AddDiskResult, Error> {
+        let disk_id = disk.identity().clone();
+        let zpool_name = disk.zpool_name().clone();
+        let zpool = Pool::new(zpool_name, disk_id.clone())?;
+        if let Some((stored_disk, stored_pool)) = self.disks.get(&disk_id) {
+            if stored_disk == &disk
+                && stored_pool.info.size() == zpool.info.size()
+                && stored_pool.name == zpool.name
+            {
+                return Ok(AddDiskResult::DiskAlreadyInserted);
+            }
+        }
+        // Either the disk or zpool changed
+        Arc::make_mut(&mut self.disks).insert(disk_id, (disk, zpool));
+        Ok(AddDiskResult::DiskInserted)
+    }
+
+    /// Insert a disk while creating a fake pool
+    /// This is a workaround for current mock based testing strategies
+    /// in the sled-agent.
+    #[cfg(feature = "testing")]
+    pub fn insert_fake_disk(&mut self, disk: Disk) -> AddDiskResult {
+        let disk_id = disk.identity().clone();
+        let zpool_name = disk.zpool_name().clone();
+        let zpool = Pool::new_with_fake_info(zpool_name, disk_id.clone());
+        if self.disks.contains_key(&disk_id) {
+            return AddDiskResult::DiskAlreadyInserted;
+        }
+        // Either the disk or zpool changed
+        Arc::make_mut(&mut self.disks).insert(disk_id, (disk, zpool));
+        AddDiskResult::DiskInserted
+    }
+
+    /// Delete a disk and its zpool
+    ///
+    /// Return true, if data was changed, false otherwise
+    ///
+    /// Note: We never allow removal of synthetic disks in production as they
+    /// are only added once.
+    pub(crate) fn remove_disk(&mut self, id: &DiskIdentity) -> bool {
+        let Some((disk, _)) = self.disks.get(id) else {
+            return false;
+        };
+
+        cfg_if! {
+            if #[cfg(test)] {
+                // For testing purposes, we allow synthetic disks to be deleted.
+                // Silence an unused variable warning.
+                _ = disk;
+            } else {
+                // In production, we disallow removal of synthetic disks as they
+                // are only added once.
+                if disk.is_synthetic() {
+                    return false;
+                }
+            }
+        }
+
+        // Safe to unwrap as we just checked the key existed above
+        Arc::make_mut(&mut self.disks).remove(id).unwrap();
+        true
+    }
+
+    /// Returns the identity of the boot disk.
+    ///
+    /// If this returns `None`, we have not processed the boot disk yet.
+    pub fn boot_disk(&self) -> Option<(DiskIdentity, ZpoolName)> {
+        for (id, (disk, _)) in self.disks.iter() {
+            if disk.is_boot_disk() {
+                return Some((id.clone(), disk.zpool_name().clone()));
+            }
+        }
+        None
+    }
+
+    /// Returns all M.2 zpools
+    pub fn all_m2_zpools(&self) -> Vec<ZpoolName> {
+        self.all_zpools(DiskVariant::M2)
+    }
+
+    /// Returns all U.2 zpools
+    pub fn all_u2_zpools(&self) -> Vec<ZpoolName> {
+        self.all_zpools(DiskVariant::U2)
+    }
+
+    /// Returns all mountpoints within all M.2s for a particular dataset.
+    pub fn all_m2_mountpoints(&self, dataset: &str) -> Vec<Utf8PathBuf> {
+        self.all_m2_zpools()
+            .iter()
+            .map(|zpool| zpool.dataset_mountpoint(dataset))
+            .collect()
+    }
+
+    /// Returns all mountpoints within all U.2s for a particular dataset.
+    pub fn all_u2_mountpoints(&self, dataset: &str) -> Vec<Utf8PathBuf> {
+        self.all_u2_zpools()
+            .iter()
+            .map(|zpool| zpool.dataset_mountpoint(dataset))
+            .collect()
+    }
+
+    pub fn get_all_zpools(&self) -> Vec<(ZpoolName, DiskVariant)> {
+        self.disks
+            .values()
+            .map(|(disk, _)| (disk.zpool_name().clone(), disk.variant()))
+            .collect()
+    }
+
+    // Returns all zpools of a particular variant
+    fn all_zpools(&self, variant: DiskVariant) -> Vec<ZpoolName> {
+        self.disks
+            .values()
+            .filter_map(|(disk, _)| {
+                if disk.variant() == variant {
+                    return Some(disk.zpool_name().clone());
+                }
+                None
+            })
+            .collect()
+    }
+
+    /// Return the directories for storing zone service bundles.
+    pub fn all_zone_bundle_directories(&self) -> Vec<Utf8PathBuf> {
+        self.all_m2_mountpoints(M2_DEBUG_DATASET)
+            .into_iter()
+            .map(|p| p.join(BUNDLE_DIRECTORY).join(ZONE_BUNDLE_DIRECTORY))
+            .collect()
+    }
+}
diff --git a/wicketd/src/artifacts/extracted_artifacts.rs b/wicketd/src/artifacts/extracted_artifacts.rs
index 352d8ad3d5..b796201936 100644
--- a/wicketd/src/artifacts/extracted_artifacts.rs
+++ b/wicketd/src/artifacts/extracted_artifacts.rs
@@ -169,7 +169,7 @@ impl ExtractedArtifacts {
     ///
     /// As the returned file is written to, the data will be hashed; once
     /// writing is complete, call [`ExtractedArtifacts::store_tempfile()`] to
-    /// persist the temporary file into an [`ExtractedArtifactDataHandle()`].
+    /// persist the temporary file into an [`ExtractedArtifactDataHandle`].
     pub(super) fn new_tempfile(
         &self,
     ) -> Result<HashingNamedUtf8TempFile, RepositoryError> {