diff --git a/Cargo.lock b/Cargo.lock
index 142a1ed939..02e09d74d0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1332,6 +1332,32 @@ version = "0.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
 
+[[package]]
+name = "clickana"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "camino",
+ "chrono",
+ "clap",
+ "clickhouse-admin-server-client",
+ "clickhouse-admin-types",
+ "dropshot 0.13.0",
+ "futures",
+ "omicron-common",
+ "omicron-workspace-hack",
+ "ratatui",
+ "schemars",
+ "serde_json",
+ "slog",
+ "slog-async",
+ "slog-dtrace",
+ "slog-error-chain",
+ "slog-term",
+ "tokio",
+ "tokio-postgres",
+]
+
 [[package]]
 name = "clickhouse-admin-api"
 version = "0.1.0"
@@ -6032,6 +6058,19 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "nexus-reconfigurator-blippy"
+version = "0.1.0"
+dependencies = [
+ "nexus-reconfigurator-planning",
+ "nexus-sled-agent-shared",
+ "nexus-types",
+ "omicron-common",
+ "omicron-test-utils",
+ "omicron-uuid-kinds",
+ "omicron-workspace-hack",
+]
+
 [[package]]
 name = "nexus-reconfigurator-execution"
 version = "0.1.0"
@@ -6099,6 +6138,7 @@ dependencies = [
  "maplit",
  "nexus-config",
  "nexus-inventory",
+ "nexus-reconfigurator-blippy",
  "nexus-sled-agent-shared",
  "nexus-types",
  "omicron-common",
@@ -7141,6 +7181,7 @@ dependencies = [
  "ring 0.17.8",
  "semver 1.0.23",
  "serde",
+ "shell-words",
  "sled-hardware",
  "slog",
  "slog-async",
diff --git a/Cargo.toml b/Cargo.toml
index 639064580b..12be26ecbe 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -27,6 +27,7 @@ members = [
     "cockroach-admin/types",
     "common",
     "dev-tools/cert-dev",
+    "dev-tools/clickana",
     "dev-tools/clickhouse-cluster-dev",
     "dev-tools/ch-dev",
     "dev-tools/crdb-seed",
@@ -78,6 +79,7 @@ members = [
     "nexus/macros-common",
     "nexus/metrics-producer-gc",
     "nexus/networking",
+    "nexus/reconfigurator/blippy",
     "nexus/reconfigurator/execution",
     "nexus/reconfigurator/planning",
     "nexus/reconfigurator/preparation",
@@ -158,6 +160,7 @@ default-members = [
     "cockroach-admin/types",
     "common",
     "dev-tools/cert-dev",
+    "dev-tools/clickana",
     "dev-tools/clickhouse-cluster-dev",
     "dev-tools/ch-dev",
     "dev-tools/crdb-seed",
@@ -211,6 +214,7 @@ default-members = [
     "nexus/macros-common",
     "nexus/metrics-producer-gc",
     "nexus/networking",
+    "nexus/reconfigurator/blippy",
     "nexus/reconfigurator/execution",
     "nexus/reconfigurator/planning",
     "nexus/reconfigurator/preparation",
@@ -332,6 +336,7 @@ chrono = { version = "0.4", features = [ "serde" ] }
 chrono-tz = "0.10.0"
 ciborium = "0.2.2"
 clap = { version = "4.5", features = ["cargo", "derive", "env", "wrap_help"] }
+clickana = { path = "dev-tools/clickana" }
 clickhouse-admin-api = { path = "clickhouse-admin/api" }
 clickhouse-admin-keeper-client = { path = "clients/clickhouse-admin-keeper-client" }
 clickhouse-admin-server-client = { path = "clients/clickhouse-admin-server-client" }
@@ -466,6 +471,7 @@ nexus-internal-api = { path = "nexus/internal-api" }
 nexus-macros-common = { path = "nexus/macros-common" }
 nexus-metrics-producer-gc = { path = "nexus/metrics-producer-gc" }
 nexus-networking = { path = "nexus/networking" }
+nexus-reconfigurator-blippy = { path = "nexus/reconfigurator/blippy" }
 nexus-reconfigurator-execution = { path = "nexus/reconfigurator/execution" }
 nexus-reconfigurator-planning = { path = "nexus/reconfigurator/planning" }
 nexus-reconfigurator-preparation = { path = "nexus/reconfigurator/preparation" }
diff --git a/clickhouse-admin/Cargo.toml b/clickhouse-admin/Cargo.toml
index 80b080b2ff..65ceb3fdbf 100644
--- a/clickhouse-admin/Cargo.toml
+++ b/clickhouse-admin/Cargo.toml
@@ -22,7 +22,9 @@ slog.workspace = true
 slog-async.workspace = true
 slog-dtrace.workspace = true
 slog-error-chain.workspace = true
+slog-term.workspace = true
 serde.workspace = true
+serde_json.workspace = true
 thiserror.workspace = true
 tokio.workspace = true
 tokio-postgres.workspace = true
diff --git a/clickhouse-admin/types/src/lib.rs b/clickhouse-admin/types/src/lib.rs
index 726f4ad6e4..b4dd21652c 100644
--- a/clickhouse-admin/types/src/lib.rs
+++ b/clickhouse-admin/types/src/lib.rs
@@ -1203,7 +1203,7 @@ pub enum Timestamp {
 #[derive(Debug, Serialize, Deserialize, JsonSchema, PartialEq)]
 #[serde(rename_all = "snake_case")]
 pub struct SystemTimeSeries {
-    pub time: Timestamp,
+    pub time: String,
     pub value: f64,
     // TODO: Would be really nice to have an enum with possible units (s, ms, bytes)
     // Not sure if I can even add this, the system tables don't mention units at all.
@@ -2102,15 +2102,15 @@ snapshot_storage_disk=LocalSnapshotDisk
 
         let expected = vec![
             SystemTimeSeries {
-                time: crate::Timestamp::Unix("1732494720".to_string()),
+                time: "1732494720".to_string(),
                 value: 110220450825.75238,
             },
             SystemTimeSeries {
-                time: crate::Timestamp::Unix("1732494840".to_string()),
+                time: "1732494840".to_string(),
                 value: 110339992917.33331,
             },
             SystemTimeSeries {
-                time: crate::Timestamp::Unix("1732494960".to_string()),
+                time: "1732494960".to_string(),
                 value: 110421854037.33331,
             },
         ];
@@ -2130,21 +2130,15 @@ snapshot_storage_disk=LocalSnapshotDisk
 
         let expected = vec![
             SystemTimeSeries {
-                time: crate::Timestamp::Utc(
-                    "2024-11-25T00:34:00Z".parse::<DateTime<Utc>>().unwrap(),
-                ),
+                time: "2024-11-25T00:34:00Z".to_string(),
                 value: 110220450825.75238,
             },
             SystemTimeSeries {
-                time: crate::Timestamp::Utc(
-                    "2024-11-25T00:35:00Z".parse::<DateTime<Utc>>().unwrap(),
-                ),
+                time: "2024-11-25T00:35:00Z".to_string(),
                 value: 110339992917.33331,
             },
             SystemTimeSeries {
-                time: crate::Timestamp::Utc(
-                    "2024-11-25T00:36:00Z".parse::<DateTime<Utc>>().unwrap(),
-                ),
+                time: "2024-11-25T00:36:00Z".to_string(),
                 value: 110421854037.33331,
             },
         ];
@@ -2179,7 +2173,7 @@ snapshot_storage_disk=LocalSnapshotDisk
 
         assert_eq!(
             format!("{}", root_cause),
-           "data did not match any variant of untagged enum Timestamp at line 1 column 12",
+           "invalid type: integer `2024`, expected a string at line 1 column 12",
         );
     }
 }
diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs
index d8022f51c0..4c8f032fcb 100644
--- a/common/src/api/external/mod.rs
+++ b/common/src/api/external/mod.rs
@@ -1934,6 +1934,8 @@ impl JsonSchema for L4PortRange {
     DeserializeFromStr,
     PartialEq,
     Eq,
+    PartialOrd,
+    Ord,
     SerializeDisplay,
     Hash,
 )]
diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs
index ff14e899fb..588117c71c 100644
--- a/common/src/api/internal/shared.rs
+++ b/common/src/api/internal/shared.rs
@@ -57,6 +57,8 @@ pub enum NetworkInterfaceKind {
     JsonSchema,
     PartialEq,
     Eq,
+    PartialOrd,
+    Ord,
     Hash,
     Diffus,
 )]
@@ -79,7 +81,17 @@ pub struct NetworkInterface {
 // Note that `Deserialize` is manually implemented; if you make any changes to
 // the fields of this structure, you must make them to that implementation too.
 #[derive(
-    Debug, Clone, Copy, Serialize, JsonSchema, PartialEq, Eq, Hash, Diffus,
+    Debug,
+    Clone,
+    Copy,
+    Serialize,
+    JsonSchema,
+    PartialEq,
+    Eq,
+    PartialOrd,
+    Ord,
+    Hash,
+    Diffus,
 )]
 pub struct SourceNatConfig {
     /// The external address provided to the instance or service.
diff --git a/dev-tools/clickana/Cargo.toml b/dev-tools/clickana/Cargo.toml
new file mode 100644
index 0000000000..a9f91b890b
--- /dev/null
+++ b/dev-tools/clickana/Cargo.toml
@@ -0,0 +1,31 @@
+[package]
+name = "clickana"
+version = "0.1.0"
+edition = "2021"
+license = "MPL-2.0"
+
+[dependencies]
+anyhow.workspace = true
+camino.workspace = true
+chrono.workspace = true
+clap.workspace = true
+clickhouse-admin-types.workspace = true
+clickhouse-admin-server-client.workspace = true
+dropshot.workspace = true
+futures.workspace = true
+omicron-common.workspace = true
+ratatui.workspace = true
+schemars.workspace = true
+slog.workspace = true
+slog-async.workspace = true
+slog-dtrace.workspace = true
+slog-error-chain.workspace = true
+slog-term.workspace = true
+serde_json.workspace = true
+tokio.workspace = true
+tokio-postgres.workspace = true
+
+omicron-workspace-hack.workspace = true
+
+[lints]
+workspace = true
diff --git a/dev-tools/clickana/src/bin/clickana.rs b/dev-tools/clickana/src/bin/clickana.rs
new file mode 100644
index 0000000000..0c8d06156e
--- /dev/null
+++ b/dev-tools/clickana/src/bin/clickana.rs
@@ -0,0 +1,57 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use anyhow::Result;
+use camino::Utf8PathBuf;
+use clap::Parser;
+use clickana::Clickana;
+use std::net::SocketAddr;
+
+const CLICKANA_LOG_FILE: &str = "/tmp/clickana.log";
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let args = Cli::parse();
+
+    let terminal = ratatui::init();
+    let result = Clickana::new(
+        args.clickhouse_addr,
+        args.log_path,
+        args.sampling_interval,
+        args.time_range,
+        args.refresh_interval,
+    )
+    .run(terminal)
+    .await;
+    ratatui::restore();
+    result
+}
+
+#[derive(Debug, Parser)]
+struct Cli {
+    /// Path to the log file
+    #[arg(
+        long,
+        short,
+        env = "CLICKANA_LOG_PATH",
+        default_value = CLICKANA_LOG_FILE,
+    )]
+    log_path: Utf8PathBuf,
+
+    /// Address where a clickhouse admin server is listening on
+    #[arg(long, short = 'a')]
+    clickhouse_addr: SocketAddr,
+
+    /// The interval to collect monitoring data in seconds
+    #[arg(long, short, default_value_t = 60)]
+    sampling_interval: u64,
+
+    /// Range of time to collect monitoring data in seconds
+    #[arg(long, short, default_value_t = 3600)]
+    time_range: u64,
+
+    /// The interval at which the dashboards will refresh
+    #[arg(long, short, default_value_t = 60)]
+    refresh_interval: u64,
+}
diff --git a/dev-tools/clickana/src/chart.rs b/dev-tools/clickana/src/chart.rs
new file mode 100644
index 0000000000..f8a78fb63d
--- /dev/null
+++ b/dev-tools/clickana/src/chart.rs
@@ -0,0 +1,765 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use anyhow::{bail, Result};
+use chrono::{DateTime, Utc};
+use clickhouse_admin_server_client::types::{SystemTable, SystemTimeSeries};
+use ratatui::{
+    layout::{Constraint, Rect},
+    style::{Color, Style, Stylize},
+    symbols::Marker,
+    text::Line,
+    widgets::{Axis, Block, Chart, Dataset, GraphType, LegendPosition},
+    Frame,
+};
+use std::fmt::Display;
+
+// Ratatui requires data points in a Dataset to be f64
+const GIBIBYTE_F64: f64 = 1073741824.0;
+const MEBIBYTE_F64: f64 = 1048576.0;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum Unit {
+    Count,
+    Gibibyte,
+    Mebibyte,
+}
+
+impl Display for Unit {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let s = match self {
+            Unit::Count => "",
+            Unit::Gibibyte => "GiB",
+            Unit::Mebibyte => "MiB",
+        };
+        write!(f, "{s}")
+    }
+}
+
+impl Unit {
+    /// Returns the value of the unit represented in bytes.
+    fn as_bytes_f64(&self) -> Result<f64> {
+        let bytes = match self {
+            Unit::Gibibyte => GIBIBYTE_F64,
+            Unit::Mebibyte => MEBIBYTE_F64,
+            Unit::Count => bail!("Count cannot be converted into bytes"),
+        };
+        Ok(bytes)
+    }
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub enum MetricName {
+    DiskUsage,
+    MemoryTracking,
+    QueryCount,
+    RunningQueries,
+}
+
+impl Display for MetricName {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let s = match self {
+            MetricName::DiskUsage => "DiskUsed_default",
+            MetricName::MemoryTracking => "CurrentMetric_MemoryTracking",
+            MetricName::QueryCount => "ProfileEvent_Query",
+            MetricName::RunningQueries => "CurrentMetric_Query",
+        };
+        write!(f, "{s}")
+    }
+}
+
+impl MetricName {
+    /// Returns the associated table to query for each metric.
+    pub fn table(&self) -> SystemTable {
+        match self {
+            MetricName::DiskUsage => SystemTable::AsynchronousMetricLog,
+            MetricName::MemoryTracking
+            | MetricName::QueryCount
+            | MetricName::RunningQueries => SystemTable::MetricLog,
+        }
+    }
+
+    /// Returns the unit the data values will be represented as.
+    fn unit(&self) -> Unit {
+        match self {
+            MetricName::DiskUsage => Unit::Gibibyte,
+            MetricName::MemoryTracking => Unit::Mebibyte,
+            MetricName::QueryCount | MetricName::RunningQueries => Unit::Count,
+        }
+    }
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct ChartMetadata {
+    pub title: String,
+    pub unit: Unit,
+}
+
+impl ChartMetadata {
+    pub fn new(metric: MetricName, title: String) -> Self {
+        let unit = metric.unit();
+        Self { title, unit }
+    }
+}
+
+#[derive(Debug)]
+struct TimeSeriesValues {
+    /// A collection of all the values from the timeseries
+    values: Vec<f64>,
+}
+
+impl TimeSeriesValues {
+    fn new(raw_data: &Vec<SystemTimeSeries>) -> Self {
+        let values: Vec<f64> = raw_data.iter().map(|ts| ts.value).collect();
+        Self { values }
+    }
+
+    fn min(&self) -> Result<&f64> {
+        let Some(min_value) =
+            self.values.iter().min_by(|a, b| a.partial_cmp(b).unwrap())
+        else {
+            bail!("no values have been retrieved")
+        };
+
+        Ok(min_value)
+    }
+
+    fn max(&self) -> Result<&f64> {
+        let Some(max_value) =
+            self.values.iter().max_by(|a, b| a.partial_cmp(b).unwrap())
+        else {
+            bail!("no values have been retrieved")
+        };
+
+        Ok(max_value)
+    }
+
+    /// Returns the average value of the max and min values.
+    fn avg(&self, min_value_label: f64, max_value_label: f64) -> f64 {
+        (min_value_label + max_value_label) / 2.0
+    }
+}
+
+// The result of the following functions will not be precise, but it doesn't
+// matter since we just want an estimate for the chart's labels and bounds.
+// all we need are values that are larger than the maximum value in the
+// timeseries or smaller than the minimum value in the timeseries.
+
+/// Returns the sum of the maximum raw value and 1 or the equivalent of 1
+/// MiB or GiB in bytes.
+fn padded_max_value_raw(unit: Unit, max_value_raw: &f64) -> Result<f64> {
+    let ceil_value = max_value_raw.ceil();
+    let padded_value = match unit {
+        Unit::Count => ceil_value + 1.0,
+        Unit::Gibibyte | Unit::Mebibyte => ceil_value + unit.as_bytes_f64()?,
+    };
+    Ok(padded_value)
+}
+
+/// Returns the sum of the max raw value and 1 or the equivalent of 1
+/// Mib or Gib.
+fn padded_max_value_as_unit(unit: Unit, max_value_raw: &f64) -> Result<f64> {
+    let label_value = match unit {
+        Unit::Count => max_value_raw + 1.0,
+        Unit::Gibibyte | Unit::Mebibyte => {
+            (max_value_raw / unit.as_bytes_f64()?) + 1.0
+        }
+    };
+    Ok(label_value.ceil())
+}
+
+/// Returns the difference of the minimum raw value and 1 or the equivalent
+/// of 1 in MiB or GiB in bytes. If the minimum is equal to or less than 1.0,
+/// or the equivalent of 1 once converted from bytes to the expected unit
+/// (e.g. less than or equal to 1048576 if we're using MiB) we'll use 0.0 as
+/// the minimum value as we don't expect any of our charts
+/// to require negative numbers for now.
+fn padded_min_value_raw(unit: Unit, min_value_raw: &f64) -> Result<f64> {
+    let padded_value = match unit {
+        Unit::Count => {
+            if *min_value_raw <= 1.0 {
+                0.0
+            } else {
+                min_value_raw - 1.0
+            }
+        }
+        Unit::Gibibyte | Unit::Mebibyte => {
+            let bytes = unit.as_bytes_f64()?;
+            if *min_value_raw <= bytes {
+                0.0
+            } else {
+                min_value_raw - bytes
+            }
+        }
+    };
+    Ok(padded_value.floor())
+}
+
+/// Returns the difference of the minimum raw value and 1 or the equivalent
+/// of 1 in MiB or GiB in bytes. If the minimum is less than 1, we'll use
+/// 0 as the minimum value as we don't expect any of our charts to require
+/// negative numbers for now.
+fn padded_min_value_as_unit(unit: Unit, min_value_raw: &f64) -> Result<f64> {
+    let padded_value = match unit {
+        Unit::Count => {
+            if *min_value_raw < 1.0 {
+                0.0
+            } else {
+                min_value_raw - 1.0
+            }
+        }
+        Unit::Gibibyte | Unit::Mebibyte => {
+            let value_as_unit = min_value_raw / unit.as_bytes_f64()?;
+            if value_as_unit < 1.0 {
+                0.0
+            } else {
+                value_as_unit - 1.0
+            }
+        }
+    };
+    Ok(padded_value.floor())
+}
+
+#[derive(Debug, PartialEq)]
+struct YAxisValues {
+    lower_label: String,
+    mid_label: String,
+    upper_label: String,
+    lower_bound: f64,
+    upper_bound: f64,
+}
+
+impl YAxisValues {
+    fn new(unit: Unit, raw_data: &Vec<SystemTimeSeries>) -> Result<Self> {
+        // Retrieve values only to create Y axis bounds and labels
+        let values = TimeSeriesValues::new(&raw_data);
+        let max_value = values.max()?;
+        let min_value = values.min()?;
+
+        // In case there is very little variance in the y axis, we will be adding some
+        // padding to the bounds and labels so we don't end up with repeated labels or
+        // straight lines too close to the upper bounds.
+        let upper_bound = padded_max_value_raw(unit, max_value)?;
+        let upper_label_as_unit = padded_max_value_as_unit(unit, max_value)?;
+        let lower_bound = padded_min_value_raw(unit, min_value)?;
+        let lower_label_as_unit = padded_min_value_as_unit(unit, min_value)?;
+        let mid_label_as_unit =
+            values.avg(lower_label_as_unit, upper_label_as_unit);
+
+        // To nicely display the mid value label for the Y axis, we do the following:
+        // - It is not displayed it if it is a 0.0.
+        // - If it does not have a fractional number we display it as an integer.
+        // - Else, we display the number as is up to the first fractional number.
+        //let mid_value = mid_label;
+        let fractional_of_mid_value =
+            mid_label_as_unit - mid_label_as_unit.floor();
+        let mid_value_formatted = format!("{:.1}", mid_label_as_unit);
+        let mid_label = if mid_value_formatted == *"0.0" {
+            "".to_string()
+        } else if fractional_of_mid_value == 0.0 {
+            format!(
+                "{} {}",
+                mid_value_formatted.split('.').next().unwrap(),
+                unit
+            )
+        } else {
+            format!("{} {}", mid_value_formatted, unit)
+        };
+
+        let upper_label = format!("{} {}", upper_label_as_unit, unit);
+        let lower_label = format!("{} {}", lower_label_as_unit, unit);
+
+        Ok(Self {
+            lower_label,
+            mid_label,
+            upper_label,
+            lower_bound,
+            upper_bound,
+        })
+    }
+}
+
+#[derive(Debug)]
+struct TimeSeriesTimestamps {
+    /// A collection of all the timestamps from the timeseries
+    timestamps: Vec<i64>,
+}
+
+impl TimeSeriesTimestamps {
+    fn new(raw_data: &Vec<SystemTimeSeries>) -> Self {
+        let timestamps: Vec<i64> = raw_data
+            .iter()
+            .map(|ts| {
+                ts.time.trim_matches('"').parse::<i64>().unwrap_or_else(|_| {
+                    panic!("could not parse timestamp {} into i64", ts.time)
+                })
+            })
+            .collect();
+        Self { timestamps }
+    }
+
+    fn min(&self) -> Result<&i64> {
+        let Some(start_time) = self.timestamps.iter().min() else {
+            bail!("failed to retrieve start time, timestamp list is empty")
+        };
+        Ok(start_time)
+    }
+
+    fn max(&self) -> Result<&i64> {
+        let Some(end_time) = self.timestamps.iter().max() else {
+            bail!("failed to retrieve end time, timestamp list is empty")
+        };
+        Ok(end_time)
+    }
+
+    fn avg(&self, start_time: &i64, end_time: &i64) -> i64 {
+        (start_time + end_time) / 2
+    }
+}
+
+#[derive(Debug, PartialEq)]
+pub struct XAxisTimestamps {
+    mid_time_label: DateTime<Utc>,
+    pub start_time_label: DateTime<Utc>,
+    pub end_time_label: DateTime<Utc>,
+    start_time_bound: f64,
+    end_time_bound: f64,
+}
+
+impl XAxisTimestamps {
+    fn new(raw_data: &Vec<SystemTimeSeries>) -> Result<Self> {
+        // Retrieve timestamps only to create chart bounds and labels
+        let timestamps = TimeSeriesTimestamps::new(&raw_data);
+        // These timestamps will be used to calculate start and end timestamps in order
+        // to create labels and set bounds for the X axis. As above, some of these conversions
+        // may lose precision, but it's OK as these values are only used to make sure the
+        // datapoints fit within the graph nicely.
+        let start_time = timestamps.min()?;
+        let end_time = timestamps.max()?;
+        let avg_time = timestamps.avg(start_time, end_time);
+
+        let Some(start_time_label) = DateTime::from_timestamp(*start_time, 0)
+        else {
+            bail!(
+                "failed to convert timestamp to UTC date and time;
+        timestamp = {}",
+                start_time
+            )
+        };
+        let Some(end_time_label) = DateTime::from_timestamp(*end_time, 0)
+        else {
+            bail!(
+                "failed to convert timestamp to UTC date and time;
+        timestamp = {}",
+                end_time
+            )
+        };
+        let Some(mid_time_label) = DateTime::from_timestamp(avg_time, 0) else {
+            bail!(
+                "failed to convert timestamp to UTC date and time;
+        timestamp = {}",
+                avg_time
+            )
+        };
+
+        let start_time_bound = *start_time as f64;
+        let end_time_bound = *end_time as f64;
+
+        Ok(Self {
+            mid_time_label,
+            start_time_label,
+            end_time_label,
+            start_time_bound,
+            end_time_bound,
+        })
+    }
+}
+
+#[derive(Debug, PartialEq)]
+struct DataPoints {
+    data: Vec<(f64, f64)>,
+}
+
+impl DataPoints {
+    fn new(timeseries: &Vec<SystemTimeSeries>) -> Self {
+        // These values will be used to render the graph and ratatui
+        // requires them to be f64
+        let data: Vec<(f64, f64)> = timeseries
+            .iter()
+            .map(|ts| {
+                (
+                    ts.time.trim_matches('"').parse::<f64>().unwrap_or_else(
+                        |_| {
+                            panic!(
+                                "could not parse timestamp {} into f64",
+                                ts.time
+                            )
+                        },
+                    ),
+                    ts.value,
+                )
+            })
+            .collect();
+        Self { data }
+    }
+}
+
+#[derive(Debug, PartialEq)]
+pub struct ChartData {
+    metadata: ChartMetadata,
+    data_points: DataPoints,
+    pub x_axis_timestamps: XAxisTimestamps,
+    y_axis_values: YAxisValues,
+}
+
+impl ChartData {
+    pub fn new(
+        raw_data: Vec<SystemTimeSeries>,
+        metadata: ChartMetadata,
+    ) -> Result<Self> {
+        // Retrieve datapoints that will be charted
+        let data_points = DataPoints::new(&raw_data);
+
+        // Retrieve X axis bounds and labels
+        let x_axis_timestamps = XAxisTimestamps::new(&raw_data)?;
+
+        // Retrieve X axis bounds and labels
+        let y_axis_values = YAxisValues::new(metadata.unit, &raw_data)?;
+
+        Ok(Self { metadata, data_points, x_axis_timestamps, y_axis_values })
+    }
+
+    pub fn render_line_chart(&self, frame: &mut Frame, area: Rect) {
+        let datasets = vec![Dataset::default()
+            .marker(Marker::Braille)
+            .style(Style::default().fg(Color::LightGreen))
+            .graph_type(GraphType::Line)
+            .data(&self.data_points.data)];
+
+        let chart = Chart::new(datasets)
+            .block(
+                Block::bordered()
+                    .title(Line::from(self.title()).cyan().bold().centered()),
+            )
+            .x_axis(
+                Axis::default()
+                    .style(Style::default().gray())
+                    .bounds([self.start_time_bound(), self.end_time_bound()])
+                    .labels([
+                        self.start_time_label().bold(),
+                        self.mid_time_label().bold(),
+                        self.end_time_label().bold(),
+                    ]),
+            )
+            .y_axis(
+                Axis::default()
+                    .style(Style::default().gray())
+                    .bounds([
+                        self.lower_value_bound(),
+                        self.upper_value_bound(),
+                    ])
+                    .labels([
+                        self.lower_value_label().bold(),
+                        self.mid_value_label().bold(),
+                        self.upper_value_label().bold(),
+                    ]),
+            )
+            .legend_position(Some(LegendPosition::TopLeft))
+            .hidden_legend_constraints((
+                Constraint::Ratio(1, 2),
+                Constraint::Ratio(1, 2),
+            ));
+
+        frame.render_widget(chart, area);
+    }
+
+    fn title(&self) -> String {
+        self.metadata.title.clone()
+    }
+
+    pub fn start_date_time(&self) -> DateTime<Utc> {
+        self.x_axis_timestamps.start_time_label
+    }
+
+    pub fn end_date_time(&self) -> DateTime<Utc> {
+        self.x_axis_timestamps.end_time_label
+    }
+
+    fn start_time_label(&self) -> String {
+        self.x_axis_timestamps.start_time_label.time().to_string()
+    }
+
+    fn mid_time_label(&self) -> String {
+        self.x_axis_timestamps.mid_time_label.time().to_string()
+    }
+
+    fn end_time_label(&self) -> String {
+        self.x_axis_timestamps.end_time_label.time().to_string()
+    }
+
+    fn start_time_bound(&self) -> f64 {
+        self.x_axis_timestamps.start_time_bound
+    }
+
+    fn end_time_bound(&self) -> f64 {
+        self.x_axis_timestamps.end_time_bound
+    }
+
+    fn lower_value_label(&self) -> String {
+        self.y_axis_values.lower_label.clone()
+    }
+
+    fn mid_value_label(&self) -> String {
+        self.y_axis_values.mid_label.clone()
+    }
+
+    fn upper_value_label(&self) -> String {
+        self.y_axis_values.upper_label.clone()
+    }
+
+    fn lower_value_bound(&self) -> f64 {
+        self.y_axis_values.lower_bound
+    }
+
+    fn upper_value_bound(&self) -> f64 {
+        self.y_axis_values.upper_bound
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::{
+        chart::{Unit, YAxisValues},
+        ChartData, ChartMetadata, MetricName,
+    };
+    use chrono::DateTime;
+    use clickhouse_admin_server_client::types::SystemTimeSeries;
+
+    use super::{DataPoints, XAxisTimestamps};
+
+    #[test]
+    fn gather_chart_data_for_disk_usage_success() {
+        let metadata =
+            ChartMetadata::new(MetricName::DiskUsage, "Test Chart".to_string());
+        let raw_data = vec![
+            SystemTimeSeries {
+                time: "1732223400".to_string(),
+                value: 479551511587.3104,
+            },
+            SystemTimeSeries {
+                time: "1732223520".to_string(),
+                value: 479555459822.93335,
+            },
+            SystemTimeSeries {
+                time: "1732223640".to_string(),
+                value: 479560290201.6,
+            },
+        ];
+
+        let expected_result = ChartData {
+            metadata: ChartMetadata {
+                title: "Test Chart".to_string(),
+                unit: Unit::Gibibyte,
+            },
+            data_points: DataPoints {
+                data: vec![
+                    (1732223400.0, 479551511587.3104),
+                    (1732223520.0, 479555459822.93335),
+                    (1732223640.0, 479560290201.6),
+                ],
+            },
+            x_axis_timestamps: XAxisTimestamps {
+                start_time_label: DateTime::from_timestamp(1732223400, 0)
+                    .unwrap(),
+                mid_time_label: DateTime::from_timestamp(1732223520, 0)
+                    .unwrap(),
+                end_time_label: DateTime::from_timestamp(1732223640, 0)
+                    .unwrap(),
+                start_time_bound: 1732223400.0,
+                end_time_bound: 1732223640.0,
+            },
+            y_axis_values: YAxisValues {
+                lower_label: "445 GiB".to_string(),
+                mid_label: "446.5 GiB".to_string(),
+                upper_label: "448 GiB".to_string(),
+                lower_bound: 478477769763.0,
+                upper_bound: 480634032026.0,
+            },
+        };
+        let result = ChartData::new(raw_data, metadata).unwrap();
+        assert_eq!(result, expected_result);
+    }
+
+    #[test]
+    fn gather_chart_data_for_memory_tracking_success() {
+        let metadata = ChartMetadata::new(
+            MetricName::MemoryTracking,
+            "Test Chart".to_string(),
+        );
+        let raw_data = vec![
+            SystemTimeSeries {
+                time: "1732223400".to_string(),
+                value: 479551511587.3104,
+            },
+            SystemTimeSeries {
+                time: "1732223520".to_string(),
+                value: 479555459822.93335,
+            },
+            SystemTimeSeries {
+                time: "1732223640".to_string(),
+                value: 479560290201.6,
+            },
+        ];
+
+        let expected_result = ChartData {
+            metadata: ChartMetadata {
+                title: "Test Chart".to_string(),
+                unit: Unit::Mebibyte,
+            },
+            data_points: DataPoints {
+                data: vec![
+                    (1732223400.0, 479551511587.3104),
+                    (1732223520.0, 479555459822.93335),
+                    (1732223640.0, 479560290201.6),
+                ],
+            },
+            x_axis_timestamps: XAxisTimestamps {
+                start_time_label: DateTime::from_timestamp(1732223400, 0)
+                    .unwrap(),
+                mid_time_label: DateTime::from_timestamp(1732223520, 0)
+                    .unwrap(),
+                end_time_label: DateTime::from_timestamp(1732223640, 0)
+                    .unwrap(),
+                start_time_bound: 1732223400.0,
+                end_time_bound: 1732223640.0,
+            },
+            y_axis_values: YAxisValues {
+                lower_label: "457334 MiB".to_string(),
+                mid_label: "457340 MiB".to_string(),
+                upper_label: "457346 MiB".to_string(),
+                lower_bound: 479550463011.0,
+                upper_bound: 479561338778.0,
+            },
+        };
+        let result = ChartData::new(raw_data, metadata).unwrap();
+        assert_eq!(result, expected_result);
+    }
+
+    #[test]
+    fn gather_chart_data_for_query_count_success() {
+        let metadata = ChartMetadata::new(
+            MetricName::QueryCount,
+            "Test Chart".to_string(),
+        );
+        let raw_data = vec![
+            SystemTimeSeries { time: "1732223400".to_string(), value: 0.0 },
+            SystemTimeSeries { time: "1732223520".to_string(), value: 0.004 },
+            SystemTimeSeries { time: "1732223640".to_string(), value: 0.0 },
+        ];
+
+        let expected_result = ChartData {
+            metadata: ChartMetadata {
+                title: "Test Chart".to_string(),
+                unit: Unit::Count,
+            },
+            data_points: DataPoints {
+                data: vec![
+                    (1732223400.0, 0.0),
+                    (1732223520.0, 0.004),
+                    (1732223640.0, 0.0),
+                ],
+            },
+            x_axis_timestamps: XAxisTimestamps {
+                start_time_label: DateTime::from_timestamp(1732223400, 0)
+                    .unwrap(),
+                mid_time_label: DateTime::from_timestamp(1732223520, 0)
+                    .unwrap(),
+                end_time_label: DateTime::from_timestamp(1732223640, 0)
+                    .unwrap(),
+                start_time_bound: 1732223400.0,
+                end_time_bound: 1732223640.0,
+            },
+            y_axis_values: YAxisValues {
+                lower_label: "0 ".to_string(),
+                mid_label: "1 ".to_string(),
+                upper_label: "2 ".to_string(),
+                lower_bound: 0.0,
+                upper_bound: 2.0,
+            },
+        };
+        let result = ChartData::new(raw_data, metadata).unwrap();
+        assert_eq!(result, expected_result);
+    }
+
+    #[test]
+    fn gather_chart_data_for_running_queries_success() {
+        let metadata = ChartMetadata::new(
+            MetricName::RunningQueries,
+            "Test Chart".to_string(),
+        );
+        let raw_data = vec![
+            SystemTimeSeries { time: "1732223400".to_string(), value: 1.554 },
+            SystemTimeSeries { time: "1732223520".to_string(), value: 1.877 },
+            SystemTimeSeries { time: "1732223640".to_string(), value: 1.3456 },
+        ];
+
+        let expected_result = ChartData {
+            metadata: ChartMetadata {
+                title: "Test Chart".to_string(),
+                unit: Unit::Count,
+            },
+            data_points: DataPoints {
+                data: vec![
+                    (1732223400.0, 1.554),
+                    (1732223520.0, 1.877),
+                    (1732223640.0, 1.3456),
+                ],
+            },
+            x_axis_timestamps: XAxisTimestamps {
+                start_time_label: DateTime::from_timestamp(1732223400, 0)
+                    .unwrap(),
+                mid_time_label: DateTime::from_timestamp(1732223520, 0)
+                    .unwrap(),
+                end_time_label: DateTime::from_timestamp(1732223640, 0)
+                    .unwrap(),
+                start_time_bound: 1732223400.0,
+                end_time_bound: 1732223640.0,
+            },
+            y_axis_values: YAxisValues {
+                lower_label: "0 ".to_string(),
+                mid_label: "1.5 ".to_string(),
+                upper_label: "3 ".to_string(),
+                lower_bound: 0.0,
+                upper_bound: 3.0,
+            },
+        };
+        let result = ChartData::new(raw_data, metadata).unwrap();
+        assert_eq!(result, expected_result);
+    }
+
+    #[test]
+    #[should_panic(
+        expected = "could not parse timestamp Some nonsense string into f64"
+    )]
+    fn gather_chart_data_failure() {
+        let metadata =
+            ChartMetadata::new(MetricName::DiskUsage, "Test Chart".to_string());
+        let raw_data = vec![
+            SystemTimeSeries {
+                time: "Some nonsense string".to_string(),
+                value: 479551511587.3104,
+            },
+            SystemTimeSeries {
+                time: "1732223520".to_string(),
+                value: 479555459822.93335,
+            },
+            SystemTimeSeries {
+                time: "1732223640".to_string(),
+                value: 479560290201.6,
+            },
+        ];
+
+        let _ = ChartData::new(raw_data, metadata);
+    }
+}
diff --git a/dev-tools/clickana/src/lib.rs b/dev-tools/clickana/src/lib.rs
new file mode 100644
index 0000000000..76af35ba8d
--- /dev/null
+++ b/dev-tools/clickana/src/lib.rs
@@ -0,0 +1,274 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use anyhow::{anyhow, bail, Context, Result};
+use camino::Utf8PathBuf;
+use chrono::{DateTime, Utc};
+use clickhouse_admin_server_client::types::{
+    SystemTimeSeries, TimestampFormat,
+};
+use clickhouse_admin_server_client::Client as ClickhouseServerClient;
+use futures::stream::FuturesOrdered;
+use futures::StreamExt;
+use omicron_common::FileKv;
+use ratatui::crossterm::event::{self, Event, KeyCode};
+use ratatui::layout::{Constraint, Layout, Rect};
+use ratatui::style::{Color, Style, Stylize};
+use ratatui::text::Span;
+use ratatui::widgets::Paragraph;
+use ratatui::{DefaultTerminal, Frame};
+use slog::{o, Drain, Logger};
+use slog_async::Async;
+use slog_term::{FullFormat, PlainDecorator};
+use std::collections::BTreeMap;
+use std::future::Future;
+use std::net::SocketAddr;
+use std::pin::Pin;
+use std::time::{Duration, Instant};
+
+use crate::chart::{ChartData, ChartMetadata, MetricName};
+
+mod chart;
+
+#[derive(Debug)]
+struct Dashboard {
+    start_time: DateTime<Utc>,
+    end_time: DateTime<Utc>,
+    top_left_frame: ChartData,
+    top_right_frame: ChartData,
+    bottom_left_frame: ChartData,
+    bottom_right_frame: ChartData,
+}
+
+#[derive(Clone, Debug)]
+pub struct Clickana {
+    clickhouse_addr: SocketAddr,
+    log_path: Utf8PathBuf,
+    sampling_interval: u64,
+    time_range: u64,
+    refresh_interval: u64,
+}
+
+impl Clickana {
+    pub fn new(
+        clickhouse_addr: SocketAddr,
+        log_path: Utf8PathBuf,
+        sampling_interval: u64,
+        time_range: u64,
+        refresh_interval: u64,
+    ) -> Self {
+        Self {
+            clickhouse_addr,
+            log_path,
+            sampling_interval,
+            time_range,
+            refresh_interval,
+        }
+    }
+
+    pub async fn run(self, mut terminal: DefaultTerminal) -> Result<()> {
+        let admin_url = format!("http://{}", self.clickhouse_addr);
+        let log = self.new_logger()?;
+        let client = ClickhouseServerClient::new(&admin_url, log.clone());
+
+        let tick_interval = Duration::from_secs(self.refresh_interval);
+        let mut last_tick = Instant::now();
+        loop {
+            // Charts we will be showing in the dashboard
+            //
+            // We are hardcoding these for now. In the future these will likely be taken
+            // from a TOML config file.
+            let charts = BTreeMap::from([
+                (MetricName::DiskUsage, "Disk Usage".to_string()),
+                (
+                    MetricName::MemoryTracking,
+                    "Memory Allocated by the Server".to_string(),
+                ),
+                (
+                    MetricName::QueryCount,
+                    "Queries Started per Second".to_string(),
+                ),
+                (MetricName::RunningQueries, "Queries Running".to_string()),
+            ]);
+
+            let mut tasks = FuturesOrdered::<
+                Pin<Box<dyn Future<Output = Result<ChartData>>>>,
+            >::new();
+
+            for (metric_name, title) in charts {
+                let s = self.clone();
+                let c = client.clone();
+
+                let task = Box::pin(async move {
+                    let metadata = ChartMetadata::new(metric_name, title);
+                    let data = s.get_api_data(&c, metric_name).await?;
+                    ChartData::new(data, metadata)
+                });
+                tasks.push_back(task);
+            }
+
+            if tasks.len() != 4 {
+                bail!(
+                    "expected information for 4 charts, received {} instead",
+                    tasks.len()
+                );
+            }
+
+            // TODO: Eventually we may want to not have a set amount of charts and make the
+            // dashboard a bit more dynamic. Perhaps taking a toml configuration file or
+            // something like that. We can then create a vector of "ChartData"s for Dashboard
+            // to take and create the layout dynamically.
+            //
+            // IDEA (ajs): I think it would be useful to be able to have a little menu of charts
+            // on the side of the pane, and then you can scroll and select which ones to show
+            // without having to restart the app, or mess with a toml file.
+            // You could also allow toggling between a set of predefined layouts to make it always
+            // look nice. So you could show, 1, 2, 4, 6, 8 charts or something and allow selecting
+            // which to show in each view. You could even remember which charts to show in each layout,
+            // so you could toggle back and forth between different layouts and see all the charts,
+            // some with more detail.
+            //
+            // We have already checked that the length of tasks is 4, so it's safe to unwrap
+            let top_left_frame: ChartData = tasks.next().await.unwrap()?;
+            let top_right_frame: ChartData = tasks.next().await.unwrap()?;
+            let bottom_left_frame: ChartData = tasks.next().await.unwrap()?;
+            let bottom_right_frame: ChartData = tasks.next().await.unwrap()?;
+
+            // We only need to retrieve from one chart as they will all be relatively the same.
+            // Rarely, the charts may have a variance of a second or so depending on when
+            // the API calls were made, but for the header block we don't need exact precision.
+            let start_time = top_left_frame.start_date_time();
+            let end_time = top_left_frame.end_date_time();
+
+            let dashboard = Dashboard {
+                start_time,
+                end_time,
+                top_left_frame,
+                top_right_frame,
+                bottom_left_frame,
+                bottom_right_frame,
+            };
+            terminal.draw(|frame| self.draw(frame, dashboard))?;
+
+            let timeout = tick_interval.saturating_sub(last_tick.elapsed());
+            if event::poll(timeout)? {
+                if let Event::Key(key) = event::read()? {
+                    // To exit the dashboard press the "q" key
+                    if key.code == KeyCode::Char('q') {
+                        return Ok(());
+                    }
+                }
+            }
+
+            if last_tick.elapsed() >= tick_interval {
+                last_tick = Instant::now();
+            }
+        }
+    }
+
+    fn draw(&self, frame: &mut Frame, dashboard: Dashboard) {
+        let [heading, top, bottom] = Layout::vertical([
+            Constraint::Length(4),
+            // TODO: If we make the dashboard with too many charts
+            // we may want to reconsider setting sizes instead of filling
+            // the space
+            Constraint::Fill(1),
+            Constraint::Fill(1),
+        ])
+        .areas(frame.area());
+        let [title] =
+            Layout::horizontal([Constraint::Fill(1); 1]).areas(heading);
+        let [top_left_frame, top_right_frame] =
+            Layout::horizontal([Constraint::Fill(1); 2]).areas(top);
+        let [bottom_left_frame, bottom_right_frame] =
+            Layout::horizontal([Constraint::Fill(1); 2]).areas(bottom);
+
+        self.render_title_bar(frame, title, &dashboard);
+
+        dashboard.top_left_frame.render_line_chart(frame, top_left_frame);
+        dashboard.top_right_frame.render_line_chart(frame, top_right_frame);
+        dashboard.bottom_left_frame.render_line_chart(frame, bottom_left_frame);
+        dashboard
+            .bottom_right_frame
+            .render_line_chart(frame, bottom_right_frame);
+    }
+
+    fn render_title_bar(
+        &self,
+        frame: &mut Frame,
+        area: Rect,
+        dashboard: &Dashboard,
+    ) {
+        let style = Style::new().fg(Color::Green).bold();
+        let title = vec![
+            Span::styled("CLICKANA", style).into_centered_line(),
+            Span::styled(
+                format!("Sampling Interval: {}s", self.sampling_interval),
+                style,
+            )
+            .into_left_aligned_line(),
+            Span::styled(
+                format!(
+                    "Time Range: {} - {} ({}s)",
+                    dashboard.start_time, dashboard.end_time, self.time_range
+                ),
+                style,
+            )
+            .into_left_aligned_line(),
+            Span::styled(
+                format!("Refresh Interval {}s", self.refresh_interval),
+                style,
+            )
+            .into_left_aligned_line(),
+        ];
+        let p = Paragraph::new(title);
+
+        frame.render_widget(p, area);
+    }
+
+    async fn get_api_data(
+        &self,
+        client: &ClickhouseServerClient,
+        metric: MetricName,
+    ) -> Result<Vec<SystemTimeSeries>> {
+        let timeseries = client
+            .system_timeseries_avg(
+                metric.table(),
+                &format!("{metric}"),
+                Some(self.sampling_interval),
+                Some(self.time_range),
+                Some(TimestampFormat::UnixEpoch),
+            )
+            .await
+            .map(|t| t.into_inner())
+            .map_err(|e| {
+                anyhow!(
+                    concat!(
+                "failed to retrieve timeseries from clickhouse server; ",
+                "error = {}",
+            ),
+                    e
+                )
+            });
+
+        timeseries
+    }
+
+    fn new_logger(&self) -> Result<Logger> {
+        let file = std::fs::OpenOptions::new()
+            .create(true)
+            .write(true)
+            .truncate(true)
+            .open(self.log_path.clone())
+            .with_context(|| {
+                format!("error opening log file {}", self.log_path)
+            })?;
+
+        let decorator = PlainDecorator::new(file);
+        let drain = FullFormat::new(decorator).build().fuse();
+        let drain = Async::new(drain).build().fuse();
+
+        Ok(slog::Logger::root(drain, o!(FileKv)))
+    }
+}
diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs
index 667a666375..e501e650b1 100644
--- a/dev-tools/omdb/src/bin/omdb/db.rs
+++ b/dev-tools/omdb/src/bin/omdb/db.rs
@@ -797,12 +797,23 @@ enum ValidateCommands {
     /// Validate each `volume_references` column in the region snapshots table
     ValidateVolumeReferences,
 
+    /// Find either regions Nexus knows about that the corresponding Crucible
+    /// agent says were deleted, or regions that Nexus doesn't know about.
+    ValidateRegions(ValidateRegionsArgs),
+
     /// Find either region snapshots Nexus knows about that the corresponding
     /// Crucible agent says were deleted, or region snapshots that Nexus doesn't
     /// know about.
     ValidateRegionSnapshots,
 }
 
+#[derive(Debug, Args)]
+struct ValidateRegionsArgs {
+    /// Delete Regions Nexus is unaware of
+    #[clap(long, default_value_t = false)]
+    clean_up_orphaned_regions: bool,
+}
+
 #[derive(Debug, Args)]
 struct VolumeArgs {
     #[command(subcommand)]
@@ -1093,6 +1104,20 @@ impl DbArgs {
             DbCommands::Validate(ValidateArgs {
                 command: ValidateCommands::ValidateVolumeReferences,
             }) => cmd_db_validate_volume_references(&datastore).await,
+            DbCommands::Validate(ValidateArgs {
+                command: ValidateCommands::ValidateRegions(args),
+            }) => {
+                let clean_up_orphaned_regions =
+                    if args.clean_up_orphaned_regions {
+                        let token = omdb.check_allow_destructive()?;
+                        CleanUpOrphanedRegions::Yes { _token: token }
+                    } else {
+                        CleanUpOrphanedRegions::No
+                    };
+
+                cmd_db_validate_regions(&datastore, clean_up_orphaned_regions)
+                    .await
+            }
             DbCommands::Validate(ValidateArgs {
                 command: ValidateCommands::ValidateRegionSnapshots,
             }) => cmd_db_validate_region_snapshots(&datastore).await,
@@ -2566,39 +2591,48 @@ async fn cmd_db_region_used_by(
 async fn cmd_db_region_find_deleted(
     datastore: &DataStore,
 ) -> Result<(), anyhow::Error> {
-    let datasets_regions_volumes =
+    let freed_crucible_resources =
         datastore.find_deleted_volume_regions().await?;
 
     #[derive(Tabled)]
-    struct Row {
+    struct RegionRow {
         dataset_id: DatasetUuid,
         region_id: Uuid,
-        volume_id: String,
     }
 
-    let rows: Vec<Row> = datasets_regions_volumes
-        .into_iter()
+    #[derive(Tabled)]
+    struct VolumeRow {
+        volume_id: Uuid,
+    }
+
+    let region_rows: Vec<RegionRow> = freed_crucible_resources
+        .datasets_and_regions
+        .iter()
         .map(|row| {
-            let (dataset, region, volume) = row;
+            let (dataset, region) = row;
 
-            Row {
-                dataset_id: dataset.id(),
-                region_id: region.id(),
-                volume_id: if let Some(volume) = volume {
-                    volume.id().to_string()
-                } else {
-                    String::from("")
-                },
-            }
+            RegionRow { dataset_id: dataset.id(), region_id: region.id() }
         })
         .collect();
 
-    let table = tabled::Table::new(rows)
+    let table = tabled::Table::new(region_rows)
         .with(tabled::settings::Style::psql())
         .to_string();
 
     println!("{}", table);
 
+    let volume_rows: Vec<VolumeRow> = freed_crucible_resources
+        .volumes
+        .iter()
+        .map(|volume_id| VolumeRow { volume_id: *volume_id })
+        .collect();
+
+    let volume_table = tabled::Table::new(volume_rows)
+        .with(tabled::settings::Style::psql())
+        .to_string();
+
+    println!("{}", volume_table);
+
     Ok(())
 }
 
@@ -4526,6 +4560,283 @@ async fn cmd_db_validate_volume_references(
     Ok(())
 }
 
+enum CleanUpOrphanedRegions {
+    Yes { _token: DestructiveOperationToken },
+    No,
+}
+
+async fn cmd_db_validate_regions(
+    datastore: &DataStore,
+    clean_up_orphaned_regions: CleanUpOrphanedRegions,
+) -> Result<(), anyhow::Error> {
+    // *Lifetime note*:
+    //
+    // The lifetime of the region record in cockroachdb is longer than the time
+    // the Crucible agent's region is in a non-destroyed state: Nexus will
+    // perform the query to allocate regions (inserting them into the database)
+    // before it ensures those regions are created (i.e. making the POST request
+    // to the appropriate Crucible agent to create them), and it will request
+    // that the regions be deleted (then wait for that region to transition to
+    // the destroyed state) before hard-deleting the records in the database.
+
+    // First, get all region records (with their corresponding dataset)
+    let datasets_and_regions: Vec<(Dataset, Region)> = datastore
+        .pool_connection_for_tests()
+        .await?
+        .transaction_async(|conn| async move {
+            // Selecting all datasets and regions requires a full table scan
+            conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await?;
+
+            use db::schema::dataset::dsl as dataset_dsl;
+            use db::schema::region::dsl;
+
+            dsl::region
+                .inner_join(
+                    dataset_dsl::dataset
+                        .on(dsl::dataset_id.eq(dataset_dsl::id)),
+                )
+                .select((Dataset::as_select(), Region::as_select()))
+                .get_results_async(&conn)
+                .await
+        })
+        .await?;
+
+    #[derive(Tabled)]
+    struct Row {
+        dataset_id: DatasetUuid,
+        region_id: Uuid,
+        dataset_addr: std::net::SocketAddrV6,
+        error: String,
+    }
+
+    let mut rows = Vec::new();
+
+    // Reconcile with the corresponding Crucible Agent: are they aware of each
+    // region in the database?
+    for (dataset, region) in &datasets_and_regions {
+        // If the dataset was expunged, do not attempt to contact the Crucible
+        // agent!
+        let in_service =
+            datastore.dataset_physical_disk_in_service(dataset.id()).await?;
+
+        if !in_service {
+            eprintln!(
+                "dataset {} {:?} is not in service, skipping",
+                dataset.id(),
+                dataset.address(),
+            );
+            continue;
+        }
+
+        use crucible_agent_client::types::RegionId;
+        use crucible_agent_client::types::State;
+        use crucible_agent_client::Client as CrucibleAgentClient;
+
+        let Some(dataset_addr) = dataset.address() else {
+            eprintln!("Dataset {} missing an IP address", dataset.id());
+            continue;
+        };
+
+        let url = format!("http://{}", dataset_addr);
+        let client = CrucibleAgentClient::new(&url);
+
+        let actual_region =
+            match client.region_get(&RegionId(region.id().to_string())).await {
+                Ok(region) => region.into_inner(),
+
+                Err(e) => {
+                    // Either there was a communication error, or the agent is
+                    // unaware of the Region (this would be a 404).
+                    match e {
+                        crucible_agent_client::Error::ErrorResponse(rv)
+                            if rv.status() == http::StatusCode::NOT_FOUND =>
+                        {
+                            rows.push(Row {
+                                dataset_id: dataset.id(),
+                                region_id: region.id(),
+                                dataset_addr,
+                                error: String::from(
+                                    "Agent does not know about this region!",
+                                ),
+                            });
+                        }
+
+                        _ => {
+                            eprintln!(
+                                "{} region_get {:?}: {e}",
+                                dataset_addr,
+                                region.id(),
+                            );
+                        }
+                    }
+
+                    continue;
+                }
+            };
+
+        // The Agent is aware of this region, but is it in the appropriate
+        // state?
+
+        match actual_region.state {
+            State::Destroyed => {
+                // If it is destroyed, then this is invalid as the record should
+                // be hard-deleted as well (see the lifetime note above). Note
+                // that omdb could be racing a Nexus that is performing region
+                // deletion: if the region transitioned to Destroyed but Nexus
+                // is waiting to re-poll, it will not have hard-deleted the
+                // region record yet.
+
+                rows.push(Row {
+                    dataset_id: dataset.id(),
+                    region_id: region.id(),
+                    dataset_addr,
+                    error: String::from(
+                        "region may need to be manually hard-deleted",
+                    ),
+                });
+            }
+
+            _ => {
+                // ok
+            }
+        }
+    }
+
+    // Reconcile with the Crucible agents: are there regions that Nexus does not
+    // know about? Ask each Crucible agent for its list of regions, then check
+    // in the database: if that region is _not_ in the database, then either it
+    // was never created by Nexus, or it was hard-deleted by Nexus. Either way,
+    // omdb should (if the command line argument is supplied) request that the
+    // orphaned region be deleted.
+    //
+    // Note: This should not delete what is actually a valid region, see the
+    // lifetime note above.
+
+    let mut orphaned_bytes: u64 = 0;
+
+    let db_region_ids: BTreeSet<Uuid> =
+        datasets_and_regions.iter().map(|(_, r)| r.id()).collect();
+
+    // Find all the Crucible datasets
+    let datasets: Vec<Dataset> = datastore
+        .pool_connection_for_tests()
+        .await?
+        .transaction_async(|conn| async move {
+            // Selecting all datasets and regions requires a full table scan
+            conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await?;
+
+            use db::schema::dataset::dsl;
+
+            dsl::dataset
+                .filter(dsl::kind.eq(nexus_db_model::DatasetKind::Crucible))
+                .select(Dataset::as_select())
+                .get_results_async(&conn)
+                .await
+        })
+        .await?;
+
+    for dataset in &datasets {
+        // If the dataset was expunged, do not attempt to contact the Crucible
+        // agent!
+        let in_service =
+            datastore.dataset_physical_disk_in_service(dataset.id()).await?;
+
+        if !in_service {
+            eprintln!(
+                "dataset {} {:?} is not in service, skipping",
+                dataset.id(),
+                dataset.address(),
+            );
+            continue;
+        }
+
+        use crucible_agent_client::types::State;
+        use crucible_agent_client::Client as CrucibleAgentClient;
+
+        let Some(dataset_addr) = dataset.address() else {
+            eprintln!("Dataset {} missing an IP address", dataset.id());
+            continue;
+        };
+
+        let url = format!("http://{}", dataset_addr);
+        let client = CrucibleAgentClient::new(&url);
+
+        let actual_regions = match client.region_list().await {
+            Ok(v) => v.into_inner(),
+            Err(e) => {
+                eprintln!("{} region_list: {e}", dataset_addr);
+                continue;
+            }
+        };
+
+        for actual_region in actual_regions {
+            // Skip doing anything if the region is already tombstoned or
+            // destroyed
+            match actual_region.state {
+                State::Destroyed | State::Tombstoned => {
+                    // the Crucible agent will eventually clean this up, or
+                    // already has.
+                    continue;
+                }
+
+                State::Failed | State::Requested | State::Created => {
+                    // this region needs cleaning up if there isn't an
+                    // associated db record
+                }
+            }
+
+            let actual_region_id: Uuid = actual_region.id.0.parse().unwrap();
+            if !db_region_ids.contains(&actual_region_id) {
+                orphaned_bytes += actual_region.block_size
+                    * actual_region.extent_size
+                    * u64::from(actual_region.extent_count);
+
+                match clean_up_orphaned_regions {
+                    CleanUpOrphanedRegions::Yes { .. } => {
+                        match client.region_delete(&actual_region.id).await {
+                            Ok(_) => {
+                                eprintln!(
+                                    "{} region {} deleted ok",
+                                    dataset_addr, actual_region.id,
+                                );
+                            }
+
+                            Err(e) => {
+                                eprintln!(
+                                    "{} region_delete {:?}: {e}",
+                                    dataset_addr, actual_region.id,
+                                );
+                            }
+                        }
+                    }
+
+                    CleanUpOrphanedRegions::No => {
+                        // Do not delete this region, just print a row
+                        rows.push(Row {
+                            dataset_id: dataset.id(),
+                            region_id: actual_region_id,
+                            dataset_addr,
+                            error: String::from(
+                                "Nexus does not know about this region!",
+                            ),
+                        });
+                    }
+                }
+            }
+        }
+    }
+
+    let table = tabled::Table::new(rows)
+        .with(tabled::settings::Style::empty())
+        .to_string();
+
+    println!("{}", table);
+
+    eprintln!("found {} orphaned bytes", orphaned_bytes);
+
+    Ok(())
+}
+
 async fn cmd_db_validate_region_snapshots(
     datastore: &DataStore,
 ) -> Result<(), anyhow::Error> {
@@ -4581,6 +4892,15 @@ async fn cmd_db_validate_region_snapshots(
             .or_default()
             .insert(region_snapshot.snapshot_id);
 
+        // If the dataset was expunged, do not attempt to contact the Crucible
+        // agent!
+        let in_service =
+            datastore.dataset_physical_disk_in_service(dataset.id()).await?;
+
+        if !in_service {
+            continue;
+        }
+
         use crucible_agent_client::types::RegionId;
         use crucible_agent_client::types::State;
         use crucible_agent_client::Client as CrucibleAgentClient;
@@ -4593,11 +4913,21 @@ async fn cmd_db_validate_region_snapshots(
         let url = format!("http://{}", dataset_addr);
         let client = CrucibleAgentClient::new(&url);
 
-        let actual_region_snapshots = client
+        let actual_region_snapshots = match client
             .region_get_snapshots(&RegionId(
                 region_snapshot.region_id.to_string(),
             ))
-            .await?;
+            .await
+        {
+            Ok(v) => v,
+            Err(e) => {
+                eprintln!(
+                    "{} region_get_snapshots {:?}: {e}",
+                    dataset_addr, region_snapshot.region_id,
+                );
+                continue;
+            }
+        };
 
         let snapshot_id = region_snapshot.snapshot_id.to_string();
 
@@ -4741,6 +5071,15 @@ async fn cmd_db_validate_region_snapshots(
     // Reconcile with the Crucible agents: are there snapshots that Nexus does
     // not know about?
     for (dataset, region) in datasets_and_regions {
+        // If the dataset was expunged, do not attempt to contact the Crucible
+        // agent!
+        let in_service =
+            datastore.dataset_physical_disk_in_service(dataset.id()).await?;
+
+        if !in_service {
+            continue;
+        }
+
         use crucible_agent_client::types::RegionId;
         use crucible_agent_client::types::State;
         use crucible_agent_client::Client as CrucibleAgentClient;
@@ -4753,9 +5092,20 @@ async fn cmd_db_validate_region_snapshots(
         let url = format!("http://{}", dataset_addr);
         let client = CrucibleAgentClient::new(&url);
 
-        let actual_region_snapshots = client
+        let actual_region_snapshots = match client
             .region_get_snapshots(&RegionId(region.id().to_string()))
-            .await?;
+            .await
+        {
+            Ok(v) => v,
+            Err(e) => {
+                eprintln!(
+                    "{} region_get_snapshots {:?}: {e}",
+                    dataset_addr,
+                    region.id(),
+                );
+                continue;
+            }
+        };
 
         let default = HashSet::default();
         let nexus_region_snapshots: &HashSet<Uuid> =
diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs
index fe16b4076e..eb7d74b340 100644
--- a/dev-tools/omdb/src/bin/omdb/nexus.rs
+++ b/dev-tools/omdb/src/bin/omdb/nexus.rs
@@ -1655,6 +1655,14 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
                     println!("    > {line}");
                 }
 
+                println!(
+                    "    total requests completed ok: {}",
+                    status.requests_completed_ok.len(),
+                );
+                for line in &status.requests_completed_ok {
+                    println!("    > {line}");
+                }
+
                 println!("    errors: {}", status.errors.len());
                 for line in &status.errors {
                     println!("    > {line}");
@@ -1720,6 +1728,14 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
                     println!("    > {line}");
                 }
 
+                println!(
+                    "    total steps set to volume_deleted ok: {}",
+                    status.step_set_volume_deleted_ok.len(),
+                );
+                for line in &status.step_set_volume_deleted_ok {
+                    println!("    > {line}");
+                }
+
                 println!("    errors: {}", status.errors.len());
                 for line in &status.errors {
                     println!("    > {line}");
@@ -1831,10 +1847,11 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
 
             Ok(status) => {
                 println!(
-                    "    total records transitioned to done: {}",
-                    status.records_set_to_done.len(),
+                    "    region snapshot replacement finish sagas started \
+                    ok: {}",
+                    status.finish_invoked_ok.len()
                 );
-                for line in &status.records_set_to_done {
+                for line in &status.finish_invoked_ok {
                     println!("    > {line}");
                 }
 
diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out
index dcf1671d8f..75f6ce0d0e 100644
--- a/dev-tools/omdb/tests/successes.out
+++ b/dev-tools/omdb/tests/successes.out
@@ -627,7 +627,7 @@ task: "region_snapshot_replacement_finish"
   currently executing: no
   last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
     started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
-    total records transitioned to done: 0
+    region snapshot replacement finish sagas started ok: 0
     errors: 0
 
 task: "region_snapshot_replacement_garbage_collection"
@@ -645,6 +645,7 @@ task: "region_snapshot_replacement_start"
     started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
     total requests created ok: 0
     total start saga invoked ok: 0
+    total requests completed ok: 0
     errors: 0
 
 task: "region_snapshot_replacement_step"
@@ -655,6 +656,7 @@ task: "region_snapshot_replacement_step"
     total step records created ok: 0
     total step garbage collect saga invoked ok: 0
     total step saga invoked ok: 0
+    total steps set to volume_deleted ok: 0
     errors: 0
 
 task: "saga_recovery"
@@ -1070,7 +1072,7 @@ task: "region_snapshot_replacement_finish"
   currently executing: no
   last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
     started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
-    total records transitioned to done: 0
+    region snapshot replacement finish sagas started ok: 0
     errors: 0
 
 task: "region_snapshot_replacement_garbage_collection"
@@ -1088,6 +1090,7 @@ task: "region_snapshot_replacement_start"
     started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
     total requests created ok: 0
     total start saga invoked ok: 0
+    total requests completed ok: 0
     errors: 0
 
 task: "region_snapshot_replacement_step"
@@ -1098,6 +1101,7 @@ task: "region_snapshot_replacement_step"
     total step records created ok: 0
     total step garbage collect saga invoked ok: 0
     total step saga invoked ok: 0
+    total steps set to volume_deleted ok: 0
     errors: 0
 
 task: "saga_recovery"
diff --git a/internal-dns/resolver/src/resolver.rs b/internal-dns/resolver/src/resolver.rs
index af47bb23ad..8d0e071e19 100644
--- a/internal-dns/resolver/src/resolver.rs
+++ b/internal-dns/resolver/src/resolver.rs
@@ -12,7 +12,7 @@ use omicron_common::address::{
     get_internal_dns_server_addresses, Ipv6Subnet, AZ_PREFIX, DNS_PORT,
 };
 use slog::{debug, error, info, trace};
-use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6};
+use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6};
 
 #[derive(Debug, Clone, thiserror::Error)]
 pub enum ResolveError {
@@ -323,20 +323,6 @@ impl Resolver {
         }
     }
 
-    pub async fn lookup_ip(
-        &self,
-        srv: ServiceName,
-    ) -> Result<IpAddr, ResolveError> {
-        let name = srv.srv_name();
-        debug!(self.log, "lookup srv"; "dns_name" => &name);
-        let response = self.resolver.lookup_ip(&name).await?;
-        let address = response
-            .iter()
-            .next()
-            .ok_or_else(|| ResolveError::NotFound(srv))?;
-        Ok(address)
-    }
-
     /// Returns an iterator of [`SocketAddrV6`]'s for the targets of the given
     /// SRV lookup response.
     // SRV records have a target, which is itself another DNS name that needs
@@ -534,7 +520,7 @@ mod test {
         let resolver = dns_server.resolver().unwrap();
 
         let err = resolver
-            .lookup_ip(ServiceName::Cockroach)
+            .lookup_srv(ServiceName::Cockroach)
             .await
             .expect_err("Looking up non-existent service should fail");
 
diff --git a/live-tests/tests/common/mod.rs b/live-tests/tests/common/mod.rs
index 50f84d0b59..8a60b332b6 100644
--- a/live-tests/tests/common/mod.rs
+++ b/live-tests/tests/common/mod.rs
@@ -157,7 +157,7 @@ async fn check_execution_environment(
     // The only real requirement for these tests is that they're run from a
     // place with connectivity to the underlay network of a deployed control
     // plane.  The easiest way to tell is to look up something in internal DNS.
-    resolver.lookup_ip(ServiceName::InternalDns).await.map_err(|e| {
+    resolver.lookup_srv(ServiceName::InternalDns).await.map_err(|e| {
         let text = format!(
             "check_execution_environment(): failed to look up internal DNS \
                  in the internal DNS servers.\n\n \
diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs
index 88fd3a0978..b2b1001f9b 100644
--- a/nexus-sled-agent-shared/src/inventory.rs
+++ b/nexus-sled-agent-shared/src/inventory.rs
@@ -181,6 +181,8 @@ impl OmicronZoneConfig {
     JsonSchema,
     PartialEq,
     Eq,
+    PartialOrd,
+    Ord,
     Hash,
     Diffus,
 )]
diff --git a/nexus/db-model/src/region_snapshot_replacement.rs b/nexus/db-model/src/region_snapshot_replacement.rs
index bcbd55028d..28627d8379 100644
--- a/nexus/db-model/src/region_snapshot_replacement.rs
+++ b/nexus/db-model/src/region_snapshot_replacement.rs
@@ -28,6 +28,7 @@ impl_enum_type!(
     ReplacementDone => b"replacement_done"
     DeletingOldVolume => b"deleting_old_volume"
     Running => b"running"
+    Completing => b"completing"
     Complete => b"complete"
 );
 
@@ -46,6 +47,7 @@ impl std::str::FromStr for RegionSnapshotReplacementState {
                 Ok(RegionSnapshotReplacementState::DeletingOldVolume)
             }
             "running" => Ok(RegionSnapshotReplacementState::Running),
+            "completing" => Ok(RegionSnapshotReplacementState::Completing),
             "complete" => Ok(RegionSnapshotReplacementState::Complete),
             _ => Err(format!("unrecognized value {} for enum", s)),
         }
@@ -79,9 +81,14 @@ impl std::str::FromStr for RegionSnapshotReplacementState {
 ///          |                        |
 ///          v                        ---
 ///                                   ---
-///       Running                     |
-///                                   | set in region snapshot replacement
-///          |                        | finish background task
+///       Running  <--                |
+///                  |                |
+///          |       |                |
+///          v       |                |
+///                  |                | responsibility of region snapshot
+///     Completing --                 | replacement finish saga
+///                                   |
+///          |                        |
 ///          v                        |
 ///                                   |
 ///      Complete                     ---
@@ -133,6 +140,12 @@ pub struct RegionSnapshotReplacement {
     pub replacement_state: RegionSnapshotReplacementState,
 
     pub operating_saga_id: Option<Uuid>,
+
+    /// In order for the newly created region not to be deleted inadvertently,
+    /// an additional reference count bump is required. This volume should live
+    /// as long as this request so that all necessary replacements can be
+    /// completed.
+    pub new_region_volume_id: Option<Uuid>,
 }
 
 impl RegionSnapshotReplacement {
@@ -157,6 +170,7 @@ impl RegionSnapshotReplacement {
             old_snapshot_id,
             old_snapshot_volume_id: None,
             new_region_id: None,
+            new_region_volume_id: None,
             replacement_state: RegionSnapshotReplacementState::Requested,
             operating_saga_id: None,
         }
diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs
index 399da81ea4..a54c2e3029 100644
--- a/nexus/db-model/src/schema.rs
+++ b/nexus/db-model/src/schema.rs
@@ -1932,6 +1932,7 @@ table! {
         new_region_id -> Nullable<Uuid>,
         replacement_state -> crate::RegionSnapshotReplacementStateEnum,
         operating_saga_id -> Nullable<Uuid>,
+        new_region_volume_id -> Nullable<Uuid>,
     }
 }
 
diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs
index 02646bc6dd..8b663d2549 100644
--- a/nexus/db-model/src/schema_versions.rs
+++ b/nexus/db-model/src/schema_versions.rs
@@ -17,7 +17,7 @@ use std::collections::BTreeMap;
 ///
 /// This must be updated when you change the database schema.  Refer to
 /// schema/crdb/README.adoc in the root of this repository for details.
-pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(116, 0, 0);
+pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(117, 0, 0);
 
 /// List of all past database schema versions, in *reverse* order
 ///
@@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy<Vec<KnownVersion>> = Lazy::new(|| {
         // |  leaving the first copy as an example for the next person.
         // v
         // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
+        KnownVersion::new(117, "add-completing-and-new-region-volume"),
         KnownVersion::new(116, "bp-physical-disk-disposition"),
         KnownVersion::new(115, "inv-omicron-physical-disks-generation"),
         KnownVersion::new(114, "crucible-ref-count-records"),
diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs
index c90f1cc92e..d20f24e773 100644
--- a/nexus/db-queries/src/db/datastore/mod.rs
+++ b/nexus/db-queries/src/db/datastore/mod.rs
@@ -125,16 +125,7 @@ pub use sled::TransitionError;
 pub use switch_port::SwitchPortSettingsCombinedResult;
 pub use virtual_provisioning_collection::StorageType;
 pub use vmm::VmmStateUpdateResult;
-pub use volume::read_only_resources_associated_with_volume;
-pub use volume::CrucibleResources;
-pub use volume::CrucibleTargets;
-pub use volume::ExistingTarget;
-pub use volume::ReplacementTarget;
-pub use volume::VolumeCheckoutReason;
-pub use volume::VolumeReplaceResult;
-pub use volume::VolumeReplacementParams;
-pub use volume::VolumeToDelete;
-pub use volume::VolumeWithTarget;
+pub use volume::*;
 
 // Number of unique datasets required to back a region.
 // TODO: This should likely turn into a configuration option.
diff --git a/nexus/db-queries/src/db/datastore/region.rs b/nexus/db-queries/src/db/datastore/region.rs
index 885cb622b8..67bd37cf69 100644
--- a/nexus/db-queries/src/db/datastore/region.rs
+++ b/nexus/db-queries/src/db/datastore/region.rs
@@ -422,8 +422,8 @@ impl DataStore {
         }
     }
 
-    /// Find regions on expunged disks
-    pub async fn find_regions_on_expunged_physical_disks(
+    /// Find read/write regions on expunged disks
+    pub async fn find_read_write_regions_on_expunged_physical_disks(
         &self,
         opctx: &OpContext,
     ) -> LookupResult<Vec<Region>> {
@@ -450,6 +450,8 @@ impl DataStore {
                     ))
                     .select(dataset_dsl::id)
             ))
+            // only return read-write regions here
+            .filter(region_dsl::read_only.eq(false))
             .select(Region::as_select())
             .load_async(&*conn)
             .await
@@ -546,6 +548,42 @@ impl DataStore {
 
         Ok(records)
     }
+
+    /// Find regions not on expunged disks that match a volume id
+    pub async fn find_non_expunged_regions(
+        &self,
+        opctx: &OpContext,
+        volume_id: Uuid,
+    ) -> LookupResult<Vec<Region>> {
+        let conn = self.pool_connection_authorized(opctx).await?;
+
+        use db::schema::dataset::dsl as dataset_dsl;
+        use db::schema::physical_disk::dsl as physical_disk_dsl;
+        use db::schema::region::dsl as region_dsl;
+        use db::schema::zpool::dsl as zpool_dsl;
+
+        region_dsl::region
+            .filter(region_dsl::dataset_id.eq_any(
+                dataset_dsl::dataset
+                    .filter(dataset_dsl::time_deleted.is_null())
+                    .filter(dataset_dsl::pool_id.eq_any(
+                        zpool_dsl::zpool
+                            .filter(zpool_dsl::time_deleted.is_null())
+                            .filter(zpool_dsl::physical_disk_id.eq_any(
+                                physical_disk_dsl::physical_disk
+                                    .filter(physical_disk_dsl::disk_policy.eq(PhysicalDiskPolicy::InService))
+                                    .select(physical_disk_dsl::id)
+                            ))
+                            .select(zpool_dsl::id)
+                    ))
+                    .select(dataset_dsl::id)
+            ))
+            .filter(region_dsl::volume_id.eq(volume_id))
+            .select(Region::as_select())
+            .load_async(&*conn)
+            .await
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+    }
 }
 
 #[cfg(test)]
diff --git a/nexus/db-queries/src/db/datastore/region_replacement.rs b/nexus/db-queries/src/db/datastore/region_replacement.rs
index 0fda6b46ba..8aad7f2cfd 100644
--- a/nexus/db-queries/src/db/datastore/region_replacement.rs
+++ b/nexus/db-queries/src/db/datastore/region_replacement.rs
@@ -37,6 +37,13 @@ impl DataStore {
         opctx: &OpContext,
         region: &Region,
     ) -> Result<Uuid, Error> {
+        if region.read_only() {
+            return Err(Error::invalid_request(format!(
+                "region {} is read-only",
+                region.id(),
+            )));
+        }
+
         let request = RegionReplacement::for_region(region);
         let request_id = request.id;
 
@@ -52,19 +59,22 @@ impl DataStore {
         opctx: &OpContext,
         request: RegionReplacement,
     ) -> Result<(), Error> {
+        let err = OptionalError::new();
         let conn = self.pool_connection_authorized(opctx).await?;
 
         self.transaction_retry_wrapper("insert_region_replacement_request")
             .transaction(&conn, |conn| {
                 let request = request.clone();
+                let err = err.clone();
                 async move {
                     use db::schema::region_replacement::dsl;
 
-                    Self::volume_repair_insert_query(
+                    Self::volume_repair_insert_in_txn(
+                        &conn,
+                        err,
                         request.volume_id,
                         request.id,
                     )
-                    .execute_async(&conn)
                     .await?;
 
                     diesel::insert_into(dsl::region_replacement)
@@ -76,7 +86,13 @@ impl DataStore {
                 }
             })
             .await
-            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+            .map_err(|e| {
+                if let Some(err) = err.take() {
+                    err
+                } else {
+                    public_error_from_diesel(e, ErrorHandler::Server)
+                }
+            })
     }
 
     pub async fn get_region_replacement_request_by_id(
@@ -908,6 +924,7 @@ mod test {
 
     use crate::db::pub_test_utils::TestDatabase;
     use omicron_test_utils::dev;
+    use sled_agent_client::types::VolumeConstructionRequest;
 
     #[tokio::test]
     async fn test_one_replacement_per_volume() {
@@ -919,6 +936,20 @@ mod test {
         let region_2_id = Uuid::new_v4();
         let volume_id = Uuid::new_v4();
 
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: volume_id,
+                    block_size: 512,
+                    sub_volumes: vec![],
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         let request_1 = RegionReplacement::new(region_1_id, volume_id);
         let request_2 = RegionReplacement::new(region_2_id, volume_id);
 
@@ -950,6 +981,20 @@ mod test {
         let region_id = Uuid::new_v4();
         let volume_id = Uuid::new_v4();
 
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: volume_id,
+                    block_size: 512,
+                    sub_volumes: vec![],
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         let request = {
             let mut request = RegionReplacement::new(region_id, volume_id);
             request.replacement_state = RegionReplacementState::Running;
@@ -1042,6 +1087,20 @@ mod test {
         let region_id = Uuid::new_v4();
         let volume_id = Uuid::new_v4();
 
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: volume_id,
+                    block_size: 512,
+                    sub_volumes: vec![],
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         let request = {
             let mut request = RegionReplacement::new(region_id, volume_id);
             request.replacement_state = RegionReplacementState::ReplacementDone;
diff --git a/nexus/db-queries/src/db/datastore/region_snapshot.rs b/nexus/db-queries/src/db/datastore/region_snapshot.rs
index 0129869f4f..f7a34fdb52 100644
--- a/nexus/db-queries/src/db/datastore/region_snapshot.rs
+++ b/nexus/db-queries/src/db/datastore/region_snapshot.rs
@@ -120,4 +120,40 @@ impl DataStore {
             .await
             .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
     }
+
+    /// Find region snapshots not on expunged disks that match a snapshot id
+    pub async fn find_non_expunged_region_snapshots(
+        &self,
+        opctx: &OpContext,
+        snapshot_id: Uuid,
+    ) -> LookupResult<Vec<RegionSnapshot>> {
+        let conn = self.pool_connection_authorized(opctx).await?;
+
+        use db::schema::dataset::dsl as dataset_dsl;
+        use db::schema::physical_disk::dsl as physical_disk_dsl;
+        use db::schema::region_snapshot::dsl as region_snapshot_dsl;
+        use db::schema::zpool::dsl as zpool_dsl;
+
+        region_snapshot_dsl::region_snapshot
+            .filter(region_snapshot_dsl::dataset_id.eq_any(
+                dataset_dsl::dataset
+                    .filter(dataset_dsl::time_deleted.is_null())
+                    .filter(dataset_dsl::pool_id.eq_any(
+                        zpool_dsl::zpool
+                            .filter(zpool_dsl::time_deleted.is_null())
+                            .filter(zpool_dsl::physical_disk_id.eq_any(
+                                physical_disk_dsl::physical_disk
+                                    .filter(physical_disk_dsl::disk_policy.eq(PhysicalDiskPolicy::InService))
+                                    .select(physical_disk_dsl::id)
+                            ))
+                            .select(zpool_dsl::id)
+                    ))
+                    .select(dataset_dsl::id)
+            ))
+            .filter(region_snapshot_dsl::snapshot_id.eq(snapshot_id))
+            .select(RegionSnapshot::as_select())
+            .load_async(&*conn)
+            .await
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+    }
 }
diff --git a/nexus/db-queries/src/db/datastore/region_snapshot_replacement.rs b/nexus/db-queries/src/db/datastore/region_snapshot_replacement.rs
index 76a83cca2a..90b014c582 100644
--- a/nexus/db-queries/src/db/datastore/region_snapshot_replacement.rs
+++ b/nexus/db-queries/src/db/datastore/region_snapshot_replacement.rs
@@ -16,7 +16,6 @@ use crate::db::model::RegionSnapshotReplacement;
 use crate::db::model::RegionSnapshotReplacementState;
 use crate::db::model::RegionSnapshotReplacementStep;
 use crate::db::model::RegionSnapshotReplacementStepState;
-use crate::db::model::VolumeRepair;
 use crate::db::pagination::paginated;
 use crate::db::pagination::Paginator;
 use crate::db::update_and_check::UpdateAndCheck;
@@ -93,6 +92,7 @@ impl DataStore {
         request: RegionSnapshotReplacement,
         volume_id: Uuid,
     ) -> Result<(), Error> {
+        let err = OptionalError::new();
         let conn = self.pool_connection_authorized(opctx).await?;
 
         self.transaction_retry_wrapper(
@@ -100,20 +100,24 @@ impl DataStore {
         )
         .transaction(&conn, |conn| {
             let request = request.clone();
+            let err = err.clone();
             async move {
                 use db::schema::region_snapshot_replacement::dsl;
-                use db::schema::volume_repair::dsl as volume_repair_dsl;
 
-                // An associated volume repair record isn't _strictly_ needed:
-                // snapshot volumes should never be directly constructed, and
-                // therefore won't ever have an associated Upstairs that
-                // receives a volume replacement request. However it's being
-                // done in an attempt to be overly cautious.
-
-                diesel::insert_into(volume_repair_dsl::volume_repair)
-                    .values(VolumeRepair { volume_id, repair_id: request.id })
-                    .execute_async(&conn)
-                    .await?;
+                // An associated volume repair record isn't _strictly_
+                // needed: snapshot volumes should never be directly
+                // constructed, and therefore won't ever have an associated
+                // Upstairs that receives a volume replacement request.
+                // However it's being done in an attempt to be overly
+                // cautious, and it validates that the volume exist:
+                // otherwise it would be possible to create a region
+                // snapshot replacement request for a volume that didn't
+                // exist!
+
+                Self::volume_repair_insert_in_txn(
+                    &conn, err, volume_id, request.id,
+                )
+                .await?;
 
                 diesel::insert_into(dsl::region_snapshot_replacement)
                     .values(request)
@@ -124,7 +128,13 @@ impl DataStore {
             }
         })
         .await
-        .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+        .map_err(|e| {
+            if let Some(err) = err.take() {
+                err
+            } else {
+                public_error_from_diesel(e, ErrorHandler::Server)
+            }
+        })
     }
 
     pub async fn get_region_snapshot_replacement_request_by_id(
@@ -342,6 +352,7 @@ impl DataStore {
         region_snapshot_replacement_id: Uuid,
         operating_saga_id: Uuid,
         new_region_id: Uuid,
+        new_region_volume_id: Uuid,
         old_snapshot_volume_id: Uuid,
     ) -> Result<(), Error> {
         use db::schema::region_snapshot_replacement::dsl;
@@ -357,6 +368,7 @@ impl DataStore {
                     .eq(RegionSnapshotReplacementState::ReplacementDone),
                 dsl::old_snapshot_volume_id.eq(Some(old_snapshot_volume_id)),
                 dsl::new_region_id.eq(Some(new_region_id)),
+                dsl::new_region_volume_id.eq(Some(new_region_volume_id)),
                 dsl::operating_saga_id.eq(Option::<Uuid>::None),
             ))
             .check_if_exists::<RegionSnapshotReplacement>(
@@ -375,6 +387,8 @@ impl DataStore {
                         && record.replacement_state
                             == RegionSnapshotReplacementState::ReplacementDone
                         && record.new_region_id == Some(new_region_id)
+                        && record.new_region_volume_id
+                            == Some(new_region_volume_id)
                         && record.old_snapshot_volume_id
                             == Some(old_snapshot_volume_id)
                     {
@@ -557,15 +571,201 @@ impl DataStore {
         }
     }
 
-    /// Transition a RegionSnapshotReplacement record from Running to Complete.
-    /// Also removes the `volume_repair` record that is taking a "lock" on the
-    /// Volume. Note this doesn't occur from a saga context, and therefore 1)
-    /// doesn't accept an operating saga id parameter, and 2) checks that
-    /// operating_saga_id is null for the corresponding record.
+    /// Transition a RegionSnapshotReplacement record from Running to
+    /// Completing, setting a unique id at the same time.
+    pub async fn set_region_snapshot_replacement_completing(
+        &self,
+        opctx: &OpContext,
+        region_snapshot_replacement_id: Uuid,
+        operating_saga_id: Uuid,
+    ) -> Result<(), Error> {
+        use db::schema::region_snapshot_replacement::dsl;
+        let updated = diesel::update(dsl::region_snapshot_replacement)
+            .filter(dsl::id.eq(region_snapshot_replacement_id))
+            .filter(
+                dsl::replacement_state
+                    .eq(RegionSnapshotReplacementState::Running),
+            )
+            .filter(dsl::operating_saga_id.is_null())
+            .set((
+                dsl::replacement_state
+                    .eq(RegionSnapshotReplacementState::Completing),
+                dsl::operating_saga_id.eq(operating_saga_id),
+            ))
+            .check_if_exists::<RegionSnapshotReplacement>(
+                region_snapshot_replacement_id,
+            )
+            .execute_and_check(&*self.pool_connection_authorized(opctx).await?)
+            .await;
+
+        match updated {
+            Ok(result) => match result.status {
+                UpdateStatus::Updated => Ok(()),
+                UpdateStatus::NotUpdatedButExists => {
+                    let record = result.found;
+
+                    if record.operating_saga_id == Some(operating_saga_id)
+                        && record.replacement_state
+                            == RegionSnapshotReplacementState::Completing
+                    {
+                        Ok(())
+                    } else {
+                        Err(Error::conflict(format!(
+                            "region snapshot replacement {} set to {:?} \
+                            (operating saga id {:?})",
+                            region_snapshot_replacement_id,
+                            record.replacement_state,
+                            record.operating_saga_id,
+                        )))
+                    }
+                }
+            },
+
+            Err(e) => Err(public_error_from_diesel(e, ErrorHandler::Server)),
+        }
+    }
+
+    /// Transition a RegionReplacement record from Completing to Running,
+    /// clearing the operating saga id.
+    pub async fn undo_set_region_snapshot_replacement_completing(
+        &self,
+        opctx: &OpContext,
+        region_snapshot_replacement_id: Uuid,
+        operating_saga_id: Uuid,
+    ) -> Result<(), Error> {
+        use db::schema::region_snapshot_replacement::dsl;
+        let updated = diesel::update(dsl::region_snapshot_replacement)
+            .filter(dsl::id.eq(region_snapshot_replacement_id))
+            .filter(
+                dsl::replacement_state
+                    .eq(RegionSnapshotReplacementState::Completing),
+            )
+            .filter(dsl::operating_saga_id.eq(operating_saga_id))
+            .set((
+                dsl::replacement_state
+                    .eq(RegionSnapshotReplacementState::Running),
+                dsl::operating_saga_id.eq(Option::<Uuid>::None),
+            ))
+            .check_if_exists::<RegionSnapshotReplacement>(
+                region_snapshot_replacement_id,
+            )
+            .execute_and_check(&*self.pool_connection_authorized(opctx).await?)
+            .await;
+
+        match updated {
+            Ok(result) => match result.status {
+                UpdateStatus::Updated => Ok(()),
+                UpdateStatus::NotUpdatedButExists => {
+                    let record = result.found;
+
+                    if record.operating_saga_id == None
+                        && record.replacement_state
+                            == RegionSnapshotReplacementState::Running
+                    {
+                        Ok(())
+                    } else {
+                        Err(Error::conflict(format!(
+                            "region snapshot replacement {} set to {:?} \
+                            (operating saga id {:?})",
+                            region_snapshot_replacement_id,
+                            record.replacement_state,
+                            record.operating_saga_id,
+                        )))
+                    }
+                }
+            },
+
+            Err(e) => Err(public_error_from_diesel(e, ErrorHandler::Server)),
+        }
+    }
+
+    /// Transition a RegionSnapshotReplacement record from Completing to
+    /// Complete. Also removes the `volume_repair` record that is taking a
+    /// "lock" on the Volume.
     pub async fn set_region_snapshot_replacement_complete(
         &self,
         opctx: &OpContext,
         region_snapshot_replacement_id: Uuid,
+        operating_saga_id: Uuid,
+    ) -> Result<(), Error> {
+        let err = OptionalError::new();
+        let conn = self.pool_connection_authorized(opctx).await?;
+
+        self.transaction_retry_wrapper(
+            "set_region_snapshot_replacement_complete",
+        )
+        .transaction(&conn, |conn| {
+            let err = err.clone();
+            async move {
+                use db::schema::volume_repair::dsl as volume_repair_dsl;
+
+                diesel::delete(
+                    volume_repair_dsl::volume_repair.filter(
+                        volume_repair_dsl::repair_id
+                            .eq(region_snapshot_replacement_id),
+                    ),
+                )
+                .execute_async(&conn)
+                .await?;
+
+                use db::schema::region_snapshot_replacement::dsl;
+
+                let result = diesel::update(dsl::region_snapshot_replacement)
+                    .filter(dsl::id.eq(region_snapshot_replacement_id))
+                    .filter(
+                        dsl::replacement_state
+                            .eq(RegionSnapshotReplacementState::Completing),
+                    )
+                    .filter(dsl::operating_saga_id.eq(operating_saga_id))
+                    .set((
+                        dsl::replacement_state
+                            .eq(RegionSnapshotReplacementState::Complete),
+                        dsl::operating_saga_id.eq(Option::<Uuid>::None),
+                    ))
+                    .check_if_exists::<RegionSnapshotReplacement>(
+                        region_snapshot_replacement_id,
+                    )
+                    .execute_and_check(&conn)
+                    .await?;
+
+                match result.status {
+                    UpdateStatus::Updated => Ok(()),
+                    UpdateStatus::NotUpdatedButExists => {
+                        let record = result.found;
+
+                        if record.replacement_state
+                            == RegionSnapshotReplacementState::Complete
+                        {
+                            Ok(())
+                        } else {
+                            Err(err.bail(Error::conflict(format!(
+                                "region snapshot replacement {} set to {:?} \
+                                    (operating saga id {:?})",
+                                region_snapshot_replacement_id,
+                                record.replacement_state,
+                                record.operating_saga_id,
+                            ))))
+                        }
+                    }
+                }
+            }
+        })
+        .await
+        .map_err(|e| match err.take() {
+            Some(error) => error,
+            None => public_error_from_diesel(e, ErrorHandler::Server),
+        })
+    }
+
+    /// Transition a RegionSnapshotReplacement record from Requested to Complete
+    /// - this is required when the region snapshot is hard-deleted, which means
+    /// that all volume references are gone and no replacement is required. Also
+    /// removes the `volume_repair` record that is taking a "lock" on the
+    /// Volume.
+    pub async fn set_region_snapshot_replacement_complete_from_requested(
+        &self,
+        opctx: &OpContext,
+        region_snapshot_replacement_id: Uuid,
     ) -> Result<(), Error> {
         type TxnError = TransactionError<Error>;
 
@@ -577,6 +777,7 @@ impl DataStore {
                 let err = err.clone();
                 async move {
                     use db::schema::volume_repair::dsl as volume_repair_dsl;
+                    use db::schema::region_snapshot_replacement::dsl;
 
                     diesel::delete(
                         volume_repair_dsl::volume_repair.filter(
@@ -587,17 +788,16 @@ impl DataStore {
                     .execute_async(&conn)
                     .await?;
 
-                    use db::schema::region_snapshot_replacement::dsl;
-
                     let result = diesel::update(dsl::region_snapshot_replacement)
                         .filter(dsl::id.eq(region_snapshot_replacement_id))
                         .filter(
                             dsl::replacement_state
-                                .eq(RegionSnapshotReplacementState::Running),
+                                .eq(RegionSnapshotReplacementState::Requested),
                         )
                         .filter(dsl::operating_saga_id.is_null())
-                        .set((dsl::replacement_state
-                            .eq(RegionSnapshotReplacementState::Complete),))
+                        .filter(dsl::new_region_volume_id.is_null())
+                        .set(dsl::replacement_state
+                            .eq(RegionSnapshotReplacementState::Complete))
                         .check_if_exists::<RegionSnapshotReplacement>(
                             region_snapshot_replacement_id,
                         )
@@ -621,7 +821,7 @@ impl DataStore {
                                     region_snapshot_replacement_id,
                                     record.replacement_state,
                                     record.operating_saga_id,
-                                ),
+                                )
                                 ))))
                             }
                         }
@@ -651,6 +851,7 @@ impl DataStore {
         opctx: &OpContext,
         request: RegionSnapshotReplacementStep,
     ) -> Result<InsertStepResult, Error> {
+        let err = OptionalError::new();
         let conn = self.pool_connection_authorized(opctx).await?;
 
         self.transaction_retry_wrapper(
@@ -658,10 +859,10 @@ impl DataStore {
         )
         .transaction(&conn, |conn| {
             let request = request.clone();
+            let err = err.clone();
 
             async move {
                 use db::schema::region_snapshot_replacement_step::dsl;
-                use db::schema::volume_repair::dsl as volume_repair_dsl;
 
                 // Skip inserting this new record if we found another region
                 // snapshot replacement step with this volume in the step's
@@ -714,13 +915,13 @@ impl DataStore {
                 // volume replacement: create an associated volume repair
                 // record.
 
-                diesel::insert_into(volume_repair_dsl::volume_repair)
-                    .values(VolumeRepair {
-                        volume_id: request.volume_id,
-                        repair_id: request.id,
-                    })
-                    .execute_async(&conn)
-                    .await?;
+                Self::volume_repair_insert_in_txn(
+                    &conn,
+                    err,
+                    request.volume_id,
+                    request.id,
+                )
+                .await?;
 
                 let request_id = request.id;
 
@@ -733,7 +934,13 @@ impl DataStore {
             }
         })
         .await
-        .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+        .map_err(|e| {
+            if let Some(err) = err.take() {
+                err
+            } else {
+                public_error_from_diesel(e, ErrorHandler::Server)
+            }
+        })
     }
 
     pub async fn get_region_snapshot_replacement_step_by_id(
@@ -1073,6 +1280,87 @@ impl DataStore {
             Err(e) => Err(public_error_from_diesel(e, ErrorHandler::Server)),
         }
     }
+
+    /// Transition from Requested to VolumeDeleted, and remove the associated
+    /// `volume_repair` record. This occurs when the associated snapshot's
+    /// volume is deleted. Note this doesn't occur from a saga context, and
+    /// therefore 1) doesn't accept an operating saga id parameter, and 2)
+    /// checks that operating_saga_id is null for the corresponding record.
+    pub async fn set_region_snapshot_replacement_step_volume_deleted_from_requested(
+        &self,
+        opctx: &OpContext,
+        region_snapshot_replacement_step: RegionSnapshotReplacementStep,
+    ) -> Result<(), Error> {
+        let conn = self.pool_connection_authorized(opctx).await?;
+        let err = OptionalError::new();
+
+        self.transaction_retry_wrapper(
+            "set_region_snapshot_replacement_complete",
+        )
+        .transaction(&conn, |conn| {
+            let err = err.clone();
+
+            async move {
+                use db::schema::volume_repair::dsl as volume_repair_dsl;
+
+                diesel::delete(
+                    volume_repair_dsl::volume_repair.filter(
+                        volume_repair_dsl::repair_id
+                            .eq(region_snapshot_replacement_step.id),
+                    ),
+                )
+                .execute_async(&conn)
+                .await?;
+
+                use db::schema::region_snapshot_replacement_step::dsl;
+                let result =
+                    diesel::update(dsl::region_snapshot_replacement_step)
+                        .filter(dsl::id.eq(region_snapshot_replacement_step.id))
+                        .filter(dsl::operating_saga_id.is_null())
+                        .filter(dsl::old_snapshot_volume_id.is_null())
+                        .filter(
+                            dsl::replacement_state.eq(
+                                RegionSnapshotReplacementStepState::Requested,
+                            ),
+                        )
+                        .set(dsl::replacement_state.eq(
+                            RegionSnapshotReplacementStepState::VolumeDeleted,
+                        ))
+                        .check_if_exists::<RegionSnapshotReplacementStep>(
+                            region_snapshot_replacement_step.id,
+                        )
+                        .execute_and_check(&conn)
+                        .await?;
+
+                match result.status {
+                    UpdateStatus::Updated => Ok(()),
+
+                    UpdateStatus::NotUpdatedButExists => {
+                        let record = result.found;
+
+                        if record.replacement_state
+                            == RegionSnapshotReplacementStepState::VolumeDeleted
+                        {
+                            Ok(())
+                        } else {
+                            Err(err.bail(Error::conflict(format!(
+                                "region snapshot replacement step {} set \
+                                    to {:?} (operating saga id {:?})",
+                                region_snapshot_replacement_step.id,
+                                record.replacement_state,
+                                record.operating_saga_id,
+                            ))))
+                        }
+                    }
+                }
+            }
+        })
+        .await
+        .map_err(|e| match err.take() {
+            Some(error) => error,
+            None => public_error_from_diesel(e, ErrorHandler::Server),
+        })
+    }
 }
 
 #[cfg(test)]
@@ -1083,6 +1371,7 @@ mod test {
     use crate::db::pub_test_utils::TestDatabase;
     use omicron_test_utils::dev;
     use omicron_uuid_kinds::DatasetUuid;
+    use sled_agent_client::types::VolumeConstructionRequest;
 
     #[tokio::test]
     async fn test_one_replacement_per_volume() {
@@ -1100,6 +1389,20 @@ mod test {
 
         let volume_id = Uuid::new_v4();
 
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(), // not required to match!
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         let request_1 = RegionSnapshotReplacement::new(
             dataset_1_id,
             region_1_id,
@@ -1146,6 +1449,20 @@ mod test {
 
         let volume_id = Uuid::new_v4();
 
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(), // not required to match!
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         let request_1 = RegionSnapshotReplacement::new(
             dataset_1_id,
             region_1_id,
@@ -1182,6 +1499,20 @@ mod test {
 
         let volume_id = Uuid::new_v4();
 
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(), // not required to match!
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         let request =
             RegionSnapshotReplacement::new(dataset_id, region_id, snapshot_id);
 
@@ -1214,11 +1545,25 @@ mod test {
 
         // Insert some replacement steps, and make sure counting works
 
+        let step_volume_id = Uuid::new_v4();
+
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                step_volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(), // not required to match!
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         {
-            let step = RegionSnapshotReplacementStep::new(
-                request_id,
-                Uuid::new_v4(), // volume id
-            );
+            let step =
+                RegionSnapshotReplacementStep::new(request_id, step_volume_id);
 
             let result = datastore
                 .insert_region_snapshot_replacement_step(&opctx, step)
@@ -1247,11 +1592,25 @@ mod test {
             1,
         );
 
+        let step_volume_id = Uuid::new_v4();
+
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                step_volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(), // not required to match!
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         {
-            let mut step = RegionSnapshotReplacementStep::new(
-                request_id,
-                Uuid::new_v4(), // volume id
-            );
+            let mut step =
+                RegionSnapshotReplacementStep::new(request_id, step_volume_id);
 
             step.replacement_state =
                 RegionSnapshotReplacementStepState::Running;
@@ -1283,11 +1642,25 @@ mod test {
             1,
         );
 
+        let step_volume_id = Uuid::new_v4();
+
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                step_volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(), // not required to match!
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         {
-            let mut step = RegionSnapshotReplacementStep::new(
-                request_id,
-                Uuid::new_v4(), // volume id
-            );
+            let mut step =
+                RegionSnapshotReplacementStep::new(request_id, step_volume_id);
 
             // VolumeDeleted does not count as "in-progress"
             step.replacement_state =
@@ -1337,6 +1710,20 @@ mod test {
 
         let volume_id = Uuid::new_v4();
 
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(), // not required to match!
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         let step =
             RegionSnapshotReplacementStep::new(Uuid::new_v4(), volume_id);
         let first_request_id = step.id;
@@ -1431,6 +1818,22 @@ mod test {
         let db = TestDatabase::new_with_datastore(&logctx.log).await;
         let (opctx, datastore) = (db.opctx(), db.datastore());
 
+        let volume_id = Uuid::new_v4();
+
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(), // not required to match!
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         let mut request = RegionSnapshotReplacement::new(
             DatasetUuid::new_v4(),
             Uuid::new_v4(),
@@ -1442,9 +1845,7 @@ mod test {
 
         datastore
             .insert_region_snapshot_replacement_request_with_volume_id(
-                &opctx,
-                request,
-                Uuid::new_v4(),
+                &opctx, request, volume_id,
             )
             .await
             .unwrap();
@@ -1457,8 +1858,24 @@ mod test {
             .unwrap()
             .is_empty());
 
+        let step_volume_id = Uuid::new_v4();
+
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                step_volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(), // not required to match!
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         let mut step =
-            RegionSnapshotReplacementStep::new(request_id, Uuid::new_v4());
+            RegionSnapshotReplacementStep::new(request_id, step_volume_id);
         step.replacement_state = RegionSnapshotReplacementStepState::Complete;
 
         let result = datastore
@@ -1468,8 +1885,24 @@ mod test {
 
         assert!(matches!(result, InsertStepResult::Inserted { .. }));
 
+        let step_volume_id = Uuid::new_v4();
+
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                step_volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(), // not required to match!
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         let mut step =
-            RegionSnapshotReplacementStep::new(request_id, Uuid::new_v4());
+            RegionSnapshotReplacementStep::new(request_id, step_volume_id);
         step.replacement_state = RegionSnapshotReplacementStepState::Complete;
 
         let result = datastore
@@ -1509,6 +1942,34 @@ mod test {
         let volume_id = Uuid::new_v4();
         let old_snapshot_volume_id = Uuid::new_v4();
 
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(), // not required to match!
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                old_snapshot_volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(), // not required to match!
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         let mut step =
             RegionSnapshotReplacementStep::new(request_id, volume_id);
         step.replacement_state = RegionSnapshotReplacementStepState::Complete;
@@ -1558,6 +2019,20 @@ mod test {
 
         let volume_id = Uuid::new_v4();
 
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: volume_id,
+                    block_size: 512,
+                    sub_volumes: vec![],
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         let request = RegionReplacement::new(Uuid::new_v4(), volume_id);
 
         datastore
diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs
index 4e0d1ccac1..505533da50 100644
--- a/nexus/db-queries/src/db/datastore/volume.rs
+++ b/nexus/db-queries/src/db/datastore/volume.rs
@@ -59,6 +59,7 @@ use serde::Deserialize;
 use serde::Deserializer;
 use serde::Serialize;
 use sled_agent_client::types::VolumeConstructionRequest;
+use std::collections::HashSet;
 use std::collections::VecDeque;
 use std::net::AddrParseError;
 use std::net::SocketAddr;
@@ -179,6 +180,23 @@ enum ReplaceSnapshotError {
     MultipleResourceUsageRecords(String),
 }
 
+/// Crucible resources freed by previous volume deletes
+#[derive(Debug, Serialize, Deserialize)]
+pub struct FreedCrucibleResources {
+    /// Regions that previously could not be deleted (often due to region
+    /// snaphots) that were freed by a volume delete
+    pub datasets_and_regions: Vec<(Dataset, Region)>,
+
+    /// Previously soft-deleted volumes that can now be hard-deleted
+    pub volumes: Vec<Uuid>,
+}
+
+impl FreedCrucibleResources {
+    pub fn is_empty(&self) -> bool {
+        self.datasets_and_regions.is_empty() && self.volumes.is_empty()
+    }
+}
+
 impl DataStore {
     async fn volume_create_in_txn(
         conn: &async_bb8_diesel::Connection<DbConnection>,
@@ -412,7 +430,7 @@ impl DataStore {
             })
     }
 
-    async fn volume_get_impl(
+    pub(super) async fn volume_get_impl(
         conn: &async_bb8_diesel::Connection<DbConnection>,
         volume_id: Uuid,
     ) -> Result<Option<Volume>, diesel::result::Error> {
@@ -1115,12 +1133,12 @@ impl DataStore {
         .await
     }
 
-    /// Find regions for deleted volumes that do not have associated region
-    /// snapshots and are not being used by any other non-deleted volumes, and
-    /// return them for garbage collection
+    /// Find read/write regions for deleted volumes that do not have associated
+    /// region snapshots and are not being used by any other non-deleted
+    /// volumes, and return them for garbage collection
     pub async fn find_deleted_volume_regions(
         &self,
-    ) -> ListResultVec<(Dataset, Region, Option<Volume>)> {
+    ) -> LookupResult<FreedCrucibleResources> {
         let conn = self.pool_connection_unauthorized().await?;
         self.transaction_retry_wrapper("find_deleted_volume_regions")
             .transaction(&conn, |conn| async move {
@@ -1132,8 +1150,7 @@ impl DataStore {
 
     async fn find_deleted_volume_regions_in_txn(
         conn: &async_bb8_diesel::Connection<DbConnection>,
-    ) -> Result<Vec<(Dataset, Region, Option<Volume>)>, diesel::result::Error>
-    {
+    ) -> Result<FreedCrucibleResources, diesel::result::Error> {
         use db::schema::dataset::dsl as dataset_dsl;
         use db::schema::region::dsl as region_dsl;
         use db::schema::region_snapshot::dsl;
@@ -1179,6 +1196,9 @@ impl DataStore {
         let mut deleted_regions =
             Vec::with_capacity(unfiltered_deleted_regions.len());
 
+        let mut volume_set: HashSet<Uuid> =
+            HashSet::with_capacity(unfiltered_deleted_regions.len());
+
         for (dataset, region, region_snapshot, volume) in
             unfiltered_deleted_regions
         {
@@ -1212,10 +1232,61 @@ impl DataStore {
                 continue;
             }
 
+            if let Some(volume) = &volume {
+                volume_set.insert(volume.id());
+            }
+
             deleted_regions.push((dataset, region, volume));
         }
 
-        Ok(deleted_regions)
+        let regions_for_deletion: HashSet<Uuid> =
+            deleted_regions.iter().map(|(_, region, _)| region.id()).collect();
+
+        let mut volumes = Vec::with_capacity(deleted_regions.len());
+
+        for volume_id in volume_set {
+            // Do not return a volume hard-deletion if there are still lingering
+            // read/write regions, unless all those lingering read/write regions
+            // will be deleted from the result of returning from this function.
+            let allocated_rw_regions: HashSet<Uuid> =
+                Self::get_allocated_regions_query(volume_id)
+                    .get_results_async::<(Dataset, Region)>(conn)
+                    .await?
+                    .into_iter()
+                    .filter_map(|(_, region)| {
+                        if !region.read_only() {
+                            Some(region.id())
+                        } else {
+                            None
+                        }
+                    })
+                    .collect();
+
+            if allocated_rw_regions.is_subset(&regions_for_deletion) {
+                // If all the allocated rw regions for this volume are in the
+                // set of regions being returned for deletion, then we can
+                // hard-delete this volume. Read-only region accounting should
+                // have already been updated by soft-deleting this volume.
+                //
+                // Note: we'll be in this branch if allocated_rw_regions is
+                // empty. I believe the only time we'll hit this empty case is
+                // when the volume is fully populated with read-only resources
+                // (read-only regions and region snapshots).
+                volumes.push(volume_id);
+            } else {
+                // Not all r/w regions allocated to this volume are being
+                // deleted here, so we can't hard-delete the volume yet.
+            }
+        }
+
+        Ok(FreedCrucibleResources {
+            datasets_and_regions: deleted_regions
+                .into_iter()
+                .map(|(d, r, _)| (d, r))
+                .collect(),
+
+            volumes,
+        })
     }
 
     pub async fn read_only_resources_associated_with_volume(
@@ -2621,8 +2692,11 @@ pub enum VolumeReplaceResult {
     // this call performed the replacement
     Done,
 
-    // the "existing" volume was deleted
-    ExistingVolumeDeleted,
+    // the "existing" volume was soft deleted
+    ExistingVolumeSoftDeleted,
+
+    // the "existing" volume was hard deleted
+    ExistingVolumeHardDeleted,
 }
 
 impl DataStore {
@@ -2747,14 +2821,14 @@ impl DataStore {
             // perform the region replacement now, and this will short-circuit
             // the rest of the process.
 
-            return Ok(VolumeReplaceResult::ExistingVolumeDeleted);
+            return Ok(VolumeReplaceResult::ExistingVolumeHardDeleted);
         };
 
         if old_volume.time_deleted.is_some() {
             // Existing volume was soft-deleted, so return here for the same
             // reason: the region replacement process should be short-circuited
             // now.
-            return Ok(VolumeReplaceResult::ExistingVolumeDeleted);
+            return Ok(VolumeReplaceResult::ExistingVolumeSoftDeleted);
         }
 
         let old_vcr: VolumeConstructionRequest =
@@ -3001,14 +3075,14 @@ impl DataStore {
             // perform the region replacement now, and this will short-circuit
             // the rest of the process.
 
-            return Ok(VolumeReplaceResult::ExistingVolumeDeleted);
+            return Ok(VolumeReplaceResult::ExistingVolumeHardDeleted);
         };
 
         if old_volume.time_deleted.is_some() {
             // Existing volume was soft-deleted, so return here for the same
             // reason: the region replacement process should be short-circuited
             // now.
-            return Ok(VolumeReplaceResult::ExistingVolumeDeleted);
+            return Ok(VolumeReplaceResult::ExistingVolumeSoftDeleted);
         }
 
         let old_vcr: VolumeConstructionRequest =
@@ -3682,7 +3756,15 @@ impl DataStore {
         let mut paginator = Paginator::new(SQL_BATCH_SIZE);
         let conn = self.pool_connection_authorized(opctx).await?;
 
-        let needle = address.to_string();
+        let needle = match address {
+            SocketAddr::V4(_) => {
+                return Err(Error::internal_error(&format!(
+                    "find_volumes_referencing_socket_addr not ipv6: {address}"
+                )));
+            }
+
+            SocketAddr::V6(addr) => addr,
+        };
 
         while let Some(p) = paginator.next() {
             use db::schema::volume::dsl;
@@ -3699,7 +3781,23 @@ impl DataStore {
             paginator = p.found_batch(&haystack, &|r| r.id());
 
             for volume in haystack {
-                if volume.data().contains(&needle) {
+                let vcr: VolumeConstructionRequest =
+                    match serde_json::from_str(&volume.data()) {
+                        Ok(vcr) => vcr,
+                        Err(e) => {
+                            return Err(Error::internal_error(&format!(
+                                "cannot deserialize volume data for {}: {e}",
+                                volume.id(),
+                            )));
+                        }
+                    };
+
+                let rw_reference = region_in_vcr(&vcr, &needle)
+                    .map_err(|e| Error::internal_error(&e.to_string()))?;
+                let ro_reference = read_only_target_in_vcr(&vcr, &needle)
+                    .map_err(|e| Error::internal_error(&e.to_string()))?;
+
+                if rw_reference || ro_reference {
                     volumes.push(volume);
                 }
             }
diff --git a/nexus/db-queries/src/db/datastore/volume_repair.rs b/nexus/db-queries/src/db/datastore/volume_repair.rs
index 7ea88c8542..598d9d77a2 100644
--- a/nexus/db-queries/src/db/datastore/volume_repair.rs
+++ b/nexus/db-queries/src/db/datastore/volume_repair.rs
@@ -11,6 +11,8 @@ use crate::db::datastore::RunnableQuery;
 use crate::db::error::public_error_from_diesel;
 use crate::db::error::ErrorHandler;
 use crate::db::model::VolumeRepair;
+use crate::db::DbConnection;
+use crate::transaction_retry::OptionalError;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use diesel::prelude::*;
 use diesel::result::DatabaseErrorKind;
@@ -19,14 +21,75 @@ use omicron_common::api::external::Error;
 use uuid::Uuid;
 
 impl DataStore {
-    pub(super) fn volume_repair_insert_query(
+    /// Insert a volume repair record, taking a "lock" on the volume pointed to
+    /// by volume id with some repair id.
+    ///
+    /// If there exists a record that has a matching volume id and repair id,
+    /// return Ok(()).
+    ///
+    /// If there is no volume that matches the given volume id, return an error:
+    /// it should not be possible to lock a volume that does not exist! Note
+    /// that it is possible to lock a soft-deleted volume.
+    ///
+    /// If there is already an existing record that has a matching volume id but
+    /// a different repair id, then this function returns an Error::conflict.
+    pub(super) async fn volume_repair_insert_in_txn(
+        conn: &async_bb8_diesel::Connection<DbConnection>,
+        err: OptionalError<Error>,
         volume_id: Uuid,
         repair_id: Uuid,
-    ) -> impl RunnableQuery<VolumeRepair> {
+    ) -> Result<(), diesel::result::Error> {
         use db::schema::volume_repair::dsl;
 
-        diesel::insert_into(dsl::volume_repair)
+        // If a lock that matches the arguments exists already, return Ok
+        //
+        // Note: if rerunning this function (for example if a saga node was
+        // rerun), the volume could have existed when this lock was inserted the
+        // first time, but have been deleted now.
+        let maybe_lock = dsl::volume_repair
+            .filter(dsl::repair_id.eq(repair_id))
+            .filter(dsl::volume_id.eq(volume_id))
+            .first_async::<VolumeRepair>(conn)
+            .await
+            .optional()?;
+
+        if maybe_lock.is_some() {
+            return Ok(());
+        }
+
+        // Do not allow a volume repair record to be created if the volume does
+        // not exist, or was hard-deleted!
+        let maybe_volume = Self::volume_get_impl(conn, volume_id).await?;
+
+        if maybe_volume.is_none() {
+            return Err(err.bail(Error::invalid_request(format!(
+                "cannot create record: volume {volume_id} does not exist"
+            ))));
+        }
+
+        // Do not check for soft-deletion here: We may want to request locks for
+        // soft-deleted volumes.
+
+        match diesel::insert_into(dsl::volume_repair)
             .values(VolumeRepair { volume_id, repair_id })
+            .execute_async(conn)
+            .await
+        {
+            Ok(_) => Ok(()),
+
+            Err(e) => match e {
+                DieselError::DatabaseError(
+                    DatabaseErrorKind::UniqueViolation,
+                    ref error_information,
+                ) if error_information.constraint_name()
+                    == Some("volume_repair_pkey") =>
+                {
+                    Err(err.bail(Error::conflict("volume repair lock")))
+                }
+
+                _ => Err(e),
+            },
+        }
     }
 
     pub async fn volume_repair_lock(
@@ -36,21 +99,25 @@ impl DataStore {
         repair_id: Uuid,
     ) -> Result<(), Error> {
         let conn = self.pool_connection_authorized(opctx).await?;
-        Self::volume_repair_insert_query(volume_id, repair_id)
-            .execute_async(&*conn)
+        let err = OptionalError::new();
+
+        self.transaction_retry_wrapper("volume_repair_lock")
+            .transaction(&conn, |conn| {
+                let err = err.clone();
+                async move {
+                    Self::volume_repair_insert_in_txn(
+                        &conn, err, volume_id, repair_id,
+                    )
+                    .await
+                }
+            })
             .await
-            .map(|_| ())
-            .map_err(|e| match e {
-                DieselError::DatabaseError(
-                    DatabaseErrorKind::UniqueViolation,
-                    ref error_information,
-                ) if error_information.constraint_name()
-                    == Some("volume_repair_pkey") =>
-                {
-                    Error::conflict("volume repair lock")
+            .map_err(|e| {
+                if let Some(err) = err.take() {
+                    err
+                } else {
+                    public_error_from_diesel(e, ErrorHandler::Server)
                 }
-
-                _ => public_error_from_diesel(e, ErrorHandler::Server),
             })
     }
 
@@ -102,6 +169,7 @@ mod test {
 
     use crate::db::pub_test_utils::TestDatabase;
     use omicron_test_utils::dev;
+    use sled_agent_client::types::VolumeConstructionRequest;
 
     #[tokio::test]
     async fn volume_lock_conflict_error_returned() {
@@ -113,6 +181,20 @@ mod test {
         let lock_2 = Uuid::new_v4();
         let volume_id = Uuid::new_v4();
 
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: volume_id,
+                    block_size: 512,
+                    sub_volumes: vec![],
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         datastore.volume_repair_lock(&opctx, volume_id, lock_1).await.unwrap();
 
         let err = datastore
@@ -125,4 +207,55 @@ mod test {
         db.terminate().await;
         logctx.cleanup_successful();
     }
+
+    /// Assert that you can't take a volume repair lock if the volume does not
+    /// exist yet!
+    #[tokio::test]
+    async fn volume_lock_should_fail_without_volume() {
+        let logctx =
+            dev::test_setup_log("volume_lock_should_fail_without_volume");
+        let db = TestDatabase::new_with_datastore(&logctx.log).await;
+        let (opctx, datastore) = (db.opctx(), db.datastore());
+
+        let lock_1 = Uuid::new_v4();
+        let volume_id = Uuid::new_v4();
+
+        datastore
+            .volume_repair_lock(&opctx, volume_id, lock_1)
+            .await
+            .unwrap_err();
+
+        db.terminate().await;
+        logctx.cleanup_successful();
+    }
+
+    #[tokio::test]
+    async fn volume_lock_relock_allowed() {
+        let logctx = dev::test_setup_log("volume_lock_relock_allowed");
+        let db = TestDatabase::new_with_datastore(&logctx.log).await;
+        let (opctx, datastore) = (db.opctx(), db.datastore());
+
+        let lock_id = Uuid::new_v4();
+        let volume_id = Uuid::new_v4();
+
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: volume_id,
+                    block_size: 512,
+                    sub_volumes: vec![],
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
+        datastore.volume_repair_lock(&opctx, volume_id, lock_id).await.unwrap();
+        datastore.volume_repair_lock(&opctx, volume_id, lock_id).await.unwrap();
+
+        db.terminate().await;
+        logctx.cleanup_successful();
+    }
 }
diff --git a/nexus/reconfigurator/blippy/Cargo.toml b/nexus/reconfigurator/blippy/Cargo.toml
new file mode 100644
index 0000000000..e7f7208871
--- /dev/null
+++ b/nexus/reconfigurator/blippy/Cargo.toml
@@ -0,0 +1,19 @@
+[package]
+name = "nexus-reconfigurator-blippy"
+version = "0.1.0"
+edition = "2021"
+
+[lints]
+workspace = true
+
+[dependencies]
+nexus-sled-agent-shared.workspace = true
+nexus-types.workspace = true
+omicron-common.workspace = true
+omicron-uuid-kinds.workspace = true
+
+omicron-workspace-hack.workspace = true
+
+[dev-dependencies]
+nexus-reconfigurator-planning.workspace = true
+omicron-test-utils.workspace = true
diff --git a/nexus/reconfigurator/blippy/src/blippy.rs b/nexus/reconfigurator/blippy/src/blippy.rs
new file mode 100644
index 0000000000..9e9cc84b32
--- /dev/null
+++ b/nexus/reconfigurator/blippy/src/blippy.rs
@@ -0,0 +1,428 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use crate::checks;
+use crate::report::BlippyReport;
+use crate::report::BlippyReportSortKey;
+use core::fmt;
+use nexus_types::deployment::Blueprint;
+use nexus_types::deployment::BlueprintDatasetConfig;
+use nexus_types::deployment::BlueprintZoneConfig;
+use nexus_types::inventory::ZpoolName;
+use omicron_common::address::DnsSubnet;
+use omicron_common::address::Ipv6Subnet;
+use omicron_common::address::SLED_PREFIX;
+use omicron_common::api::external::MacAddr;
+use omicron_common::disk::DatasetKind;
+use omicron_uuid_kinds::SledUuid;
+use omicron_uuid_kinds::ZpoolUuid;
+use std::collections::BTreeSet;
+use std::net::IpAddr;
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct Note {
+    pub severity: Severity,
+    pub kind: Kind,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub enum Severity {
+    /// Indicator of a serious problem that means the blueprint is invalid.
+    Fatal,
+}
+
+impl fmt::Display for Severity {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Severity::Fatal => write!(f, "FATAL"),
+        }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub enum Kind {
+    Sled { sled_id: SledUuid, kind: SledKind },
+}
+
+impl Kind {
+    pub fn display_component(&self) -> impl fmt::Display + '_ {
+        enum Component<'a> {
+            Sled(&'a SledUuid),
+        }
+
+        impl fmt::Display for Component<'_> {
+            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+                match self {
+                    Component::Sled(id) => write!(f, "sled {id}"),
+                }
+            }
+        }
+
+        match self {
+            Kind::Sled { sled_id, .. } => Component::Sled(sled_id),
+        }
+    }
+
+    pub fn display_subkind(&self) -> impl fmt::Display + '_ {
+        enum Subkind<'a> {
+            Sled(&'a SledKind),
+        }
+
+        impl fmt::Display for Subkind<'_> {
+            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+                match self {
+                    Subkind::Sled(kind) => write!(f, "{kind}"),
+                }
+            }
+        }
+
+        match self {
+            Kind::Sled { kind, .. } => Subkind::Sled(kind),
+        }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub enum SledKind {
+    /// Two running zones have the same underlay IP address.
+    DuplicateUnderlayIp {
+        zone1: BlueprintZoneConfig,
+        zone2: BlueprintZoneConfig,
+    },
+    /// A sled has two zones that are not members of the same sled subnet.
+    SledWithMixedUnderlaySubnets {
+        zone1: BlueprintZoneConfig,
+        zone2: BlueprintZoneConfig,
+    },
+    /// Two sleds are using the same sled subnet.
+    ConflictingSledSubnets {
+        other_sled: SledUuid,
+        subnet: Ipv6Subnet<SLED_PREFIX>,
+    },
+    /// An internal DNS zone has an IP that is not one of the expected rack DNS
+    /// subnets.
+    InternalDnsZoneBadSubnet {
+        zone: BlueprintZoneConfig,
+        rack_dns_subnets: BTreeSet<DnsSubnet>,
+    },
+    /// Two running zones have the same external IP address.
+    DuplicateExternalIp {
+        zone1: BlueprintZoneConfig,
+        zone2: BlueprintZoneConfig,
+        ip: IpAddr,
+    },
+    /// Two running zones' NICs have the same IP address.
+    DuplicateNicIp {
+        zone1: BlueprintZoneConfig,
+        zone2: BlueprintZoneConfig,
+        ip: IpAddr,
+    },
+    /// Two running zones' NICs have the same MAC address.
+    DuplicateNicMac {
+        zone1: BlueprintZoneConfig,
+        zone2: BlueprintZoneConfig,
+        mac: MacAddr,
+    },
+    /// Two zones with the same durable dataset kind are on the same zpool.
+    ZoneDurableDatasetCollision {
+        zone1: BlueprintZoneConfig,
+        zone2: BlueprintZoneConfig,
+        zpool: ZpoolName,
+    },
+    /// Two zones with the same filesystem dataset kind are on the same zpool.
+    ZoneFilesystemDatasetCollision {
+        zone1: BlueprintZoneConfig,
+        zone2: BlueprintZoneConfig,
+        zpool: ZpoolName,
+    },
+    /// One zpool has two datasets of the same kind.
+    ZpoolWithDuplicateDatasetKinds {
+        dataset1: BlueprintDatasetConfig,
+        dataset2: BlueprintDatasetConfig,
+        zpool: ZpoolUuid,
+    },
+    /// A zpool is missing its Debug dataset.
+    ZpoolMissingDebugDataset { zpool: ZpoolUuid },
+    /// A zpool is missing its Zone Root dataset.
+    ZpoolMissingZoneRootDataset { zpool: ZpoolUuid },
+    /// A zone's filesystem dataset is missing from `blueprint_datasets`.
+    ZoneMissingFilesystemDataset { zone: BlueprintZoneConfig },
+    /// A zone's durable dataset is missing from `blueprint_datasets`.
+    ZoneMissingDurableDataset { zone: BlueprintZoneConfig },
+    /// A zone's durable dataset and transient root dataset are on different
+    /// zpools.
+    ZoneWithDatasetsOnDifferentZpools {
+        zone: BlueprintZoneConfig,
+        durable_zpool: ZpoolName,
+        transient_zpool: ZpoolName,
+    },
+    /// A sled is missing entries in `Blueprint::blueprint_datasets`.
+    ///
+    /// `why` indicates why we expected this sled to have an entry.
+    SledMissingDatasets { why: &'static str },
+    /// A sled is missing entries in `Blueprint::blueprint_disks`.
+    ///
+    /// `why` indicates why we expected this sled to have an entry.
+    SledMissingDisks { why: &'static str },
+    /// A dataset is present but not referenced by any in-service zone or disk.
+    OrphanedDataset { dataset: BlueprintDatasetConfig },
+    /// A dataset claims to be on a zpool that does not exist.
+    DatasetOnNonexistentZpool { dataset: BlueprintDatasetConfig },
+}
+
+impl fmt::Display for SledKind {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            SledKind::DuplicateUnderlayIp { zone1, zone2 } => {
+                write!(
+                    f,
+                    "duplicate underlay IP {} ({:?} {} and {:?} {})",
+                    zone1.underlay_ip(),
+                    zone1.zone_type.kind(),
+                    zone1.id,
+                    zone2.zone_type.kind(),
+                    zone2.id,
+                )
+            }
+            SledKind::SledWithMixedUnderlaySubnets { zone1, zone2 } => {
+                write!(
+                    f,
+                    "zones have underlay IPs on two different sled subnets: \
+                     {:?} {} ({}) and {:?} {} ({})",
+                    zone1.zone_type.kind(),
+                    zone1.id,
+                    zone1.underlay_ip(),
+                    zone2.zone_type.kind(),
+                    zone2.id,
+                    zone2.underlay_ip(),
+                )
+            }
+            SledKind::ConflictingSledSubnets { other_sled, subnet } => {
+                write!(
+                    f,
+                    "duplicate sled subnet {} with sled {other_sled}",
+                    subnet.net()
+                )
+            }
+            SledKind::InternalDnsZoneBadSubnet { zone, rack_dns_subnets } => {
+                write!(
+                    f,
+                    "internal DNS zone {} underlay IP {} is not \
+                     one of the reserved rack DNS subnets ({:?})",
+                    zone.id,
+                    zone.underlay_ip(),
+                    rack_dns_subnets
+                )
+            }
+            SledKind::DuplicateExternalIp { zone1, zone2, ip } => {
+                write!(
+                    f,
+                    "duplicate external IP {ip} ({:?} {} and {:?} {})",
+                    zone1.zone_type.kind(),
+                    zone1.id,
+                    zone2.zone_type.kind(),
+                    zone2.id,
+                )
+            }
+            SledKind::DuplicateNicIp { zone1, zone2, ip } => {
+                write!(
+                    f,
+                    "duplicate NIC IP {ip} ({:?} {} and {:?} {})",
+                    zone1.zone_type.kind(),
+                    zone1.id,
+                    zone2.zone_type.kind(),
+                    zone2.id,
+                )
+            }
+            SledKind::DuplicateNicMac { zone1, zone2, mac } => {
+                write!(
+                    f,
+                    "duplicate NIC MAC {mac} ({:?} {} and {:?} {})",
+                    zone1.zone_type.kind(),
+                    zone1.id,
+                    zone2.zone_type.kind(),
+                    zone2.id,
+                )
+            }
+            SledKind::ZoneDurableDatasetCollision { zone1, zone2, zpool } => {
+                write!(
+                    f,
+                    "zpool {zpool} has two zone datasets of the same kind \
+                     ({:?} {} and {:?} {})",
+                    zone1.zone_type.kind(),
+                    zone1.id,
+                    zone2.zone_type.kind(),
+                    zone2.id,
+                )
+            }
+            SledKind::ZoneFilesystemDatasetCollision {
+                zone1,
+                zone2,
+                zpool,
+            } => {
+                write!(
+                    f,
+                    "zpool {zpool} has two zone filesystems of the same kind \
+                     ({:?} {} and {:?} {})",
+                    zone1.zone_type.kind(),
+                    zone1.id,
+                    zone2.zone_type.kind(),
+                    zone2.id,
+                )
+            }
+            SledKind::ZpoolWithDuplicateDatasetKinds {
+                dataset1,
+                dataset2,
+                zpool,
+            } => {
+                write!(
+                    f,
+                    "two datasets of the same kind on zpool {zpool} \
+                     ({:?} {} and {:?} {})",
+                    dataset1.kind, dataset1.id, dataset2.kind, dataset2.id,
+                )
+            }
+            SledKind::ZpoolMissingDebugDataset { zpool } => {
+                write!(f, "zpool {zpool} is missing its Debug dataset")
+            }
+            SledKind::ZpoolMissingZoneRootDataset { zpool } => {
+                write!(f, "zpool {zpool} is missing its Zone Root dataset")
+            }
+            SledKind::ZoneMissingFilesystemDataset { zone } => {
+                write!(
+                    f,
+                    "in-service zone's filesytem dataset is missing: {:?} {}",
+                    zone.zone_type.kind(),
+                    zone.id,
+                )
+            }
+            SledKind::ZoneMissingDurableDataset { zone } => {
+                write!(
+                    f,
+                    "in-service zone's durable dataset is missing: {:?} {}",
+                    zone.zone_type.kind(),
+                    zone.id,
+                )
+            }
+            SledKind::ZoneWithDatasetsOnDifferentZpools {
+                zone,
+                durable_zpool,
+                transient_zpool,
+            } => {
+                write!(
+                    f,
+                    "zone {:?} {} has its durable dataset on \
+                     zpool {durable_zpool} but its root dataset on \
+                     zpool {transient_zpool}",
+                    zone.zone_type.kind(),
+                    zone.id,
+                )
+            }
+            SledKind::SledMissingDatasets { why } => {
+                write!(f, "missing entry in blueprint_datasets ({why})")
+            }
+            SledKind::SledMissingDisks { why } => {
+                write!(f, "missing entry in blueprint_disks ({why})")
+            }
+            SledKind::OrphanedDataset { dataset } => {
+                let parent = match dataset.kind {
+                    DatasetKind::Cockroach
+                    | DatasetKind::Crucible
+                    | DatasetKind::Clickhouse
+                    | DatasetKind::ClickhouseKeeper
+                    | DatasetKind::ClickhouseServer
+                    | DatasetKind::ExternalDns
+                    | DatasetKind::InternalDns
+                    | DatasetKind::TransientZone { .. } => "zone",
+                    DatasetKind::TransientZoneRoot
+                    | DatasetKind::Debug
+                    | DatasetKind::Update => "disk",
+                };
+                write!(
+                    f,
+                    "in-service dataset ({:?} {}) with no associated {parent}",
+                    dataset.kind, dataset.id
+                )
+            }
+            SledKind::DatasetOnNonexistentZpool { dataset } => {
+                write!(
+                    f,
+                    "in-service dataset ({:?} {}) on non-existent zpool {}",
+                    dataset.kind, dataset.id, dataset.pool
+                )
+            }
+        }
+    }
+}
+
+impl Note {
+    pub fn display(&self, sort_key: BlippyReportSortKey) -> NoteDisplay<'_> {
+        NoteDisplay { note: self, sort_key }
+    }
+}
+
+#[derive(Debug)]
+pub struct NoteDisplay<'a> {
+    note: &'a Note,
+    sort_key: BlippyReportSortKey,
+}
+
+impl fmt::Display for NoteDisplay<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self.sort_key {
+            BlippyReportSortKey::Kind => {
+                write!(
+                    f,
+                    "{}: {} note: {}",
+                    self.note.kind.display_component(),
+                    self.note.severity,
+                    self.note.kind.display_subkind(),
+                )
+            }
+            BlippyReportSortKey::Severity => {
+                write!(
+                    f,
+                    "{} note: {}: {}",
+                    self.note.severity,
+                    self.note.kind.display_component(),
+                    self.note.kind.display_subkind(),
+                )
+            }
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct Blippy<'a> {
+    blueprint: &'a Blueprint,
+    notes: Vec<Note>,
+}
+
+impl<'a> Blippy<'a> {
+    pub fn new(blueprint: &'a Blueprint) -> Self {
+        let mut slf = Self { blueprint, notes: Vec::new() };
+        checks::perform_all_blueprint_only_checks(&mut slf);
+        slf
+    }
+
+    pub fn blueprint(&self) -> &'a Blueprint {
+        self.blueprint
+    }
+
+    pub(crate) fn push_sled_note(
+        &mut self,
+        sled_id: SledUuid,
+        severity: Severity,
+        kind: SledKind,
+    ) {
+        self.notes.push(Note { severity, kind: Kind::Sled { sled_id, kind } });
+    }
+
+    pub fn into_report(
+        self,
+        sort_key: BlippyReportSortKey,
+    ) -> BlippyReport<'a> {
+        BlippyReport::new(self.blueprint, self.notes, sort_key)
+    }
+}
diff --git a/nexus/reconfigurator/blippy/src/checks.rs b/nexus/reconfigurator/blippy/src/checks.rs
new file mode 100644
index 0000000000..f5673cb77c
--- /dev/null
+++ b/nexus/reconfigurator/blippy/src/checks.rs
@@ -0,0 +1,1564 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use crate::blippy::Blippy;
+use crate::blippy::Severity;
+use crate::blippy::SledKind;
+use nexus_sled_agent_shared::inventory::ZoneKind;
+use nexus_types::deployment::BlueprintDatasetConfig;
+use nexus_types::deployment::BlueprintDatasetFilter;
+use nexus_types::deployment::BlueprintZoneConfig;
+use nexus_types::deployment::BlueprintZoneFilter;
+use nexus_types::deployment::OmicronZoneExternalIp;
+use omicron_common::address::DnsSubnet;
+use omicron_common::address::Ipv6Subnet;
+use omicron_common::address::SLED_PREFIX;
+use omicron_common::disk::DatasetKind;
+use omicron_uuid_kinds::SledUuid;
+use omicron_uuid_kinds::ZpoolUuid;
+use std::collections::btree_map::Entry;
+use std::collections::BTreeMap;
+use std::collections::BTreeSet;
+use std::net::Ipv6Addr;
+
+pub(crate) fn perform_all_blueprint_only_checks(blippy: &mut Blippy<'_>) {
+    check_underlay_ips(blippy);
+    check_external_networking(blippy);
+    check_dataset_zpool_uniqueness(blippy);
+    check_datasets(blippy);
+}
+
+fn check_underlay_ips(blippy: &mut Blippy<'_>) {
+    let mut underlay_ips: BTreeMap<Ipv6Addr, &BlueprintZoneConfig> =
+        BTreeMap::new();
+    let mut inferred_sled_subnets_by_sled: BTreeMap<
+        SledUuid,
+        (Ipv6Subnet<SLED_PREFIX>, &BlueprintZoneConfig),
+    > = BTreeMap::new();
+    let mut inferred_sled_subnets_by_subnet: BTreeMap<
+        Ipv6Subnet<SLED_PREFIX>,
+        SledUuid,
+    > = BTreeMap::new();
+    let mut rack_dns_subnets: BTreeSet<DnsSubnet> = BTreeSet::new();
+
+    for (sled_id, zone) in blippy
+        .blueprint()
+        .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning)
+    {
+        let ip = zone.underlay_ip();
+
+        // There should be no duplicate underlay IPs.
+        if let Some(previous) = underlay_ips.insert(ip, zone) {
+            blippy.push_sled_note(
+                sled_id,
+                Severity::Fatal,
+                SledKind::DuplicateUnderlayIp {
+                    zone1: previous.clone(),
+                    zone2: zone.clone(),
+                },
+            );
+        }
+
+        if zone.zone_type.is_internal_dns() {
+            // Internal DNS zones should have IPs coming from the reserved rack
+            // DNS subnets.
+            let subnet = DnsSubnet::from_addr(ip);
+            if rack_dns_subnets.is_empty() {
+                // The blueprint doesn't store the rack subnet explicitly, so we
+                // infer it based on the first internal DNS zone we see.
+                rack_dns_subnets.extend(subnet.rack_subnet().get_dns_subnets());
+            }
+            if !rack_dns_subnets.contains(&subnet) {
+                blippy.push_sled_note(
+                    sled_id,
+                    Severity::Fatal,
+                    SledKind::InternalDnsZoneBadSubnet {
+                        zone: zone.clone(),
+                        rack_dns_subnets: rack_dns_subnets.clone(),
+                    },
+                );
+            }
+        } else {
+            let subnet = Ipv6Subnet::new(ip);
+
+            // Any given subnet should be used by at most one sled.
+            match inferred_sled_subnets_by_subnet.entry(subnet) {
+                Entry::Vacant(slot) => {
+                    slot.insert(sled_id);
+                }
+                Entry::Occupied(prev) => {
+                    if *prev.get() != sled_id {
+                        blippy.push_sled_note(
+                            sled_id,
+                            Severity::Fatal,
+                            SledKind::ConflictingSledSubnets {
+                                other_sled: *prev.get(),
+                                subnet,
+                            },
+                        );
+                    }
+                }
+            }
+
+            // Any given sled should have IPs within at most one subnet.
+            //
+            // The blueprint doesn't store sled subnets explicitly, so we can't
+            // check that each sled is using the subnet it's supposed to. The
+            // best we can do is check that the sleds are internally consistent.
+            match inferred_sled_subnets_by_sled.entry(sled_id) {
+                Entry::Vacant(slot) => {
+                    slot.insert((subnet, zone));
+                }
+                Entry::Occupied(prev) => {
+                    if prev.get().0 != subnet {
+                        blippy.push_sled_note(
+                            sled_id,
+                            Severity::Fatal,
+                            SledKind::SledWithMixedUnderlaySubnets {
+                                zone1: prev.get().1.clone(),
+                                zone2: zone.clone(),
+                            },
+                        );
+                    }
+                }
+            }
+        }
+    }
+}
+
+fn check_external_networking(blippy: &mut Blippy<'_>) {
+    let mut used_external_ips = BTreeMap::new();
+    let mut used_external_floating_ips = BTreeMap::new();
+    let mut used_external_snat_ips = BTreeMap::new();
+
+    let mut used_nic_ips = BTreeMap::new();
+    let mut used_nic_macs = BTreeMap::new();
+
+    for (sled_id, zone, external_ip, nic) in blippy
+        .blueprint()
+        .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning)
+        .filter_map(|(sled_id, zone)| {
+            zone.zone_type
+                .external_networking()
+                .map(|(external_ip, nic)| (sled_id, zone, external_ip, nic))
+        })
+    {
+        // There should be no duplicate external IPs.
+        if let Some(prev_zone) = used_external_ips.insert(external_ip, zone) {
+            blippy.push_sled_note(
+                sled_id,
+                Severity::Fatal,
+                SledKind::DuplicateExternalIp {
+                    zone1: prev_zone.clone(),
+                    zone2: zone.clone(),
+                    ip: external_ip.ip(),
+                },
+            );
+        }
+
+        // See the loop below; we build up separate maps to check for
+        // Floating/SNAT overlap that wouldn't be caught by the exact
+        // `used_external_ips` map above.
+        match external_ip {
+            OmicronZoneExternalIp::Floating(floating) => {
+                used_external_floating_ips.insert(floating.ip, zone);
+            }
+            OmicronZoneExternalIp::Snat(snat) => {
+                used_external_snat_ips
+                    .insert(snat.snat_cfg.ip, (sled_id, zone));
+            }
+        }
+
+        // There should be no duplicate NIC IPs or MACs.
+        if let Some(prev_zone) = used_nic_ips.insert(nic.ip, zone) {
+            blippy.push_sled_note(
+                sled_id,
+                Severity::Fatal,
+                SledKind::DuplicateNicIp {
+                    zone1: prev_zone.clone(),
+                    zone2: zone.clone(),
+                    ip: nic.ip,
+                },
+            );
+        }
+        if let Some(prev_zone) = used_nic_macs.insert(nic.mac, zone) {
+            blippy.push_sled_note(
+                sled_id,
+                Severity::Fatal,
+                SledKind::DuplicateNicMac {
+                    zone1: prev_zone.clone(),
+                    zone2: zone.clone(),
+                    mac: nic.mac,
+                },
+            );
+        }
+    }
+
+    // The loop above noted any exact duplicates; we should also check for any
+    // SNAT / Floating overlaps. For each SNAT IP, ensure we don't have a
+    // floating IP at the same address.
+    for (ip, (sled_id, zone2)) in used_external_snat_ips {
+        if let Some(&zone1) = used_external_floating_ips.get(&ip) {
+            blippy.push_sled_note(
+                sled_id,
+                Severity::Fatal,
+                SledKind::DuplicateExternalIp {
+                    zone1: zone1.clone(),
+                    zone2: zone2.clone(),
+                    ip,
+                },
+            );
+        }
+    }
+}
+
+fn check_dataset_zpool_uniqueness(blippy: &mut Blippy<'_>) {
+    let mut durable_kinds_by_zpool: BTreeMap<ZpoolUuid, BTreeMap<ZoneKind, _>> =
+        BTreeMap::new();
+    let mut transient_kinds_by_zpool: BTreeMap<
+        ZpoolUuid,
+        BTreeMap<ZoneKind, _>,
+    > = BTreeMap::new();
+
+    // On any given zpool, we should have at most one zone of any given
+    // kind.
+    for (sled_id, zone) in blippy
+        .blueprint()
+        .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning)
+    {
+        // Check "one kind per zpool" for durable datasets...
+        if let Some(dataset) = zone.zone_type.durable_dataset() {
+            let kind = zone.zone_type.kind();
+            if let Some(previous) = durable_kinds_by_zpool
+                .entry(dataset.dataset.pool_name.id())
+                .or_default()
+                .insert(kind, zone)
+            {
+                blippy.push_sled_note(
+                    sled_id,
+                    Severity::Fatal,
+                    SledKind::ZoneDurableDatasetCollision {
+                        zone1: previous.clone(),
+                        zone2: zone.clone(),
+                        zpool: dataset.dataset.pool_name.clone(),
+                    },
+                );
+            }
+        }
+
+        // ... and transient datasets.
+        if let Some(dataset) = zone.filesystem_dataset() {
+            let kind = zone.zone_type.kind();
+            if let Some(previous) = transient_kinds_by_zpool
+                .entry(dataset.pool().id())
+                .or_default()
+                .insert(kind, zone)
+            {
+                blippy.push_sled_note(
+                    sled_id,
+                    Severity::Fatal,
+                    SledKind::ZoneFilesystemDatasetCollision {
+                        zone1: previous.clone(),
+                        zone2: zone.clone(),
+                        zpool: dataset.into_parts().0,
+                    },
+                );
+            }
+        }
+
+        // If a zone has both durable and transient datasets, they should be on
+        // the same pool.
+        match (zone.zone_type.durable_zpool(), zone.filesystem_pool.as_ref()) {
+            (Some(durable), Some(transient)) if durable != transient => {
+                blippy.push_sled_note(
+                    sled_id,
+                    Severity::Fatal,
+                    SledKind::ZoneWithDatasetsOnDifferentZpools {
+                        zone: zone.clone(),
+                        durable_zpool: durable.clone(),
+                        transient_zpool: transient.clone(),
+                    },
+                );
+            }
+            _ => (),
+        }
+    }
+}
+
+type DatasetByKind<'a> = BTreeMap<DatasetKind, &'a BlueprintDatasetConfig>;
+type DatasetsByZpool<'a> = BTreeMap<ZpoolUuid, DatasetByKind<'a>>;
+
+#[derive(Debug)]
+struct DatasetsBySled<'a> {
+    by_sled: BTreeMap<SledUuid, DatasetsByZpool<'a>>,
+    noted_sleds_missing_datasets: BTreeSet<SledUuid>,
+}
+
+impl<'a> DatasetsBySled<'a> {
+    fn new(blippy: &mut Blippy<'a>) -> Self {
+        let mut by_sled = BTreeMap::new();
+
+        for (&sled_id, config) in &blippy.blueprint().blueprint_datasets {
+            let by_zpool: &mut BTreeMap<_, _> =
+                by_sled.entry(sled_id).or_default();
+
+            for dataset in config.datasets.values() {
+                let by_kind: &mut BTreeMap<_, _> =
+                    by_zpool.entry(dataset.pool.id()).or_default();
+
+                match by_kind.entry(dataset.kind.clone()) {
+                    Entry::Vacant(slot) => {
+                        slot.insert(dataset);
+                    }
+                    Entry::Occupied(prev) => {
+                        blippy.push_sled_note(
+                            sled_id,
+                            Severity::Fatal,
+                            SledKind::ZpoolWithDuplicateDatasetKinds {
+                                dataset1: (*prev.get()).clone(),
+                                dataset2: dataset.clone(),
+                                zpool: dataset.pool.id(),
+                            },
+                        );
+                    }
+                }
+            }
+        }
+
+        Self { by_sled, noted_sleds_missing_datasets: BTreeSet::new() }
+    }
+
+    // Get the datasets for each zpool on a given sled, or add a fatal note to
+    // `blippy` that the sled is missing an entry in `blueprint_datasets` for
+    // the specified reason `why`.
+    fn get_sled_or_note_missing(
+        &mut self,
+        blippy: &mut Blippy<'_>,
+        sled_id: SledUuid,
+        why: &'static str,
+    ) -> Option<&DatasetsByZpool<'a>> {
+        let maybe_datasets = self.by_sled.get(&sled_id);
+        if maybe_datasets.is_none()
+            && self.noted_sleds_missing_datasets.insert(sled_id)
+        {
+            blippy.push_sled_note(
+                sled_id,
+                Severity::Fatal,
+                SledKind::SledMissingDatasets { why },
+            );
+        }
+        maybe_datasets
+    }
+}
+
+fn check_datasets(blippy: &mut Blippy<'_>) {
+    let mut datasets = DatasetsBySled::new(blippy);
+
+    // As we loop through all the datasets we expect to see, mark them down.
+    // Afterwards, we'll check for any datasets present that we _didn't_ expect
+    // to see.
+    let mut expected_datasets = BTreeSet::new();
+
+    // All disks should have debug and zone root datasets.
+    //
+    // TODO-correctness We currently only include in-service disks in the
+    // blueprint; once we include expunged or decommissioned disks too, we
+    // should filter here to only in-service.
+    for (&sled_id, disk_config) in &blippy.blueprint().blueprint_disks {
+        let Some(sled_datasets) = datasets.get_sled_or_note_missing(
+            blippy,
+            sled_id,
+            "sled has an entry in blueprint_disks",
+        ) else {
+            continue;
+        };
+
+        for disk in &disk_config.disks {
+            let sled_datasets = sled_datasets.get(&disk.pool_id);
+
+            match sled_datasets
+                .and_then(|by_zpool| by_zpool.get(&DatasetKind::Debug))
+            {
+                Some(dataset) => {
+                    expected_datasets.insert(dataset.id);
+                }
+                None => {
+                    blippy.push_sled_note(
+                        sled_id,
+                        Severity::Fatal,
+                        SledKind::ZpoolMissingDebugDataset {
+                            zpool: disk.pool_id,
+                        },
+                    );
+                }
+            }
+
+            match sled_datasets.and_then(|by_zpool| {
+                by_zpool.get(&DatasetKind::TransientZoneRoot)
+            }) {
+                Some(dataset) => {
+                    expected_datasets.insert(dataset.id);
+                }
+                None => {
+                    blippy.push_sled_note(
+                        sled_id,
+                        Severity::Fatal,
+                        SledKind::ZpoolMissingZoneRootDataset {
+                            zpool: disk.pool_id,
+                        },
+                    );
+                }
+            }
+        }
+    }
+
+    // There should be a dataset for every dataset referenced by a running zone
+    // (filesystem or durable).
+    for (sled_id, zone_config) in blippy
+        .blueprint()
+        .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning)
+    {
+        let Some(sled_datasets) = datasets.get_sled_or_note_missing(
+            blippy,
+            sled_id,
+            "sled has running zones",
+        ) else {
+            continue;
+        };
+
+        match &zone_config.filesystem_dataset() {
+            Some(dataset) => {
+                match sled_datasets
+                    .get(&dataset.pool().id())
+                    .and_then(|by_zpool| by_zpool.get(dataset.dataset()))
+                {
+                    Some(dataset) => {
+                        expected_datasets.insert(dataset.id);
+                    }
+                    None => {
+                        blippy.push_sled_note(
+                            sled_id,
+                            Severity::Fatal,
+                            SledKind::ZoneMissingFilesystemDataset {
+                                zone: zone_config.clone(),
+                            },
+                        );
+                    }
+                }
+            }
+            None => {
+                // TODO-john Add a Severity::BackwardsCompatibility and note the
+                // missing filesystem pool
+            }
+        }
+
+        if let Some(dataset) = zone_config.zone_type.durable_dataset() {
+            match sled_datasets
+                .get(&dataset.dataset.pool_name.id())
+                .and_then(|by_zpool| by_zpool.get(&dataset.kind))
+            {
+                Some(dataset) => {
+                    expected_datasets.insert(dataset.id);
+                }
+                None => {
+                    blippy.push_sled_note(
+                        sled_id,
+                        Severity::Fatal,
+                        SledKind::ZoneMissingDurableDataset {
+                            zone: zone_config.clone(),
+                        },
+                    );
+                }
+            }
+        }
+    }
+
+    // TODO-correctness We currently only include in-service disks in the
+    // blueprint; once we include expunged or decommissioned disks too, we
+    // should filter here to only in-service.
+    let in_service_sled_zpools = blippy
+        .blueprint()
+        .blueprint_disks
+        .iter()
+        .map(|(sled_id, disk_config)| {
+            (
+                sled_id,
+                disk_config
+                    .disks
+                    .iter()
+                    .map(|disk| disk.pool_id)
+                    .collect::<BTreeSet<_>>(),
+            )
+        })
+        .collect::<BTreeMap<_, _>>();
+    let mut noted_sleds_without_disks = BTreeSet::new();
+
+    // All datasets should be on zpools that have disk records, and all datasets
+    // should have been referenced by either a zone or a disk above.
+    for (sled_id, dataset) in blippy
+        .blueprint()
+        .all_omicron_datasets(BlueprintDatasetFilter::InService)
+    {
+        if !expected_datasets.contains(&dataset.id) {
+            blippy.push_sled_note(
+                sled_id,
+                Severity::Fatal,
+                SledKind::OrphanedDataset { dataset: dataset.clone() },
+            );
+            continue;
+        }
+
+        let Some(sled_zpools) = in_service_sled_zpools.get(&sled_id) else {
+            if noted_sleds_without_disks.insert(sled_id) {
+                blippy.push_sled_note(
+                    sled_id,
+                    Severity::Fatal,
+                    SledKind::SledMissingDisks {
+                        why: "sled has in-service datasets",
+                    },
+                );
+            }
+            continue;
+        };
+
+        if !sled_zpools.contains(&dataset.pool.id()) {
+            blippy.push_sled_note(
+                sled_id,
+                Severity::Fatal,
+                SledKind::DatasetOnNonexistentZpool {
+                    dataset: dataset.clone(),
+                },
+            );
+            continue;
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::blippy::Kind;
+    use crate::blippy::Note;
+    use crate::BlippyReportSortKey;
+    use nexus_reconfigurator_planning::example::example;
+    use nexus_reconfigurator_planning::example::ExampleSystemBuilder;
+    use nexus_types::deployment::blueprint_zone_type;
+    use nexus_types::deployment::BlueprintZoneType;
+    use omicron_test_utils::dev::test_setup_log;
+
+    // The tests below all take the example blueprint, mutate in some invalid
+    // way, and confirm that blippy reports the invalidity. This test confirms
+    // the unmutated blueprint has no blippy notes.
+    #[test]
+    fn test_example_blueprint_is_blippy_clean() {
+        static TEST_NAME: &str = "test_example_blueprint_is_blippy_clean";
+        let logctx = test_setup_log(TEST_NAME);
+        let (_, _, blueprint) = example(&logctx.log, TEST_NAME);
+
+        let report =
+            Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind);
+        if !report.notes().is_empty() {
+            eprintln!("{}", report.display());
+            panic!("example blueprint should have no blippy notes");
+        }
+
+        logctx.cleanup_successful();
+    }
+
+    #[test]
+    fn test_duplicate_underlay_ips() {
+        static TEST_NAME: &str = "test_duplicate_underlay_ips";
+        let logctx = test_setup_log(TEST_NAME);
+        let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME);
+
+        // Copy the underlay IP from one Nexus to another.
+        let mut nexus_iter = blueprint.blueprint_zones.iter_mut().flat_map(
+            |(sled_id, zones_config)| {
+                zones_config.zones.iter_mut().filter_map(move |zone| {
+                    if zone.zone_type.is_nexus() {
+                        Some((*sled_id, zone))
+                    } else {
+                        None
+                    }
+                })
+            },
+        );
+        let (nexus0_sled_id, nexus0) =
+            nexus_iter.next().expect("at least one Nexus zone");
+        let (nexus1_sled_id, nexus1) =
+            nexus_iter.next().expect("at least two Nexus zones");
+        assert_ne!(nexus0_sled_id, nexus1_sled_id);
+
+        let dup_ip = nexus0.underlay_ip();
+        match &mut nexus1.zone_type {
+            BlueprintZoneType::Nexus(blueprint_zone_type::Nexus {
+                internal_address,
+                ..
+            }) => {
+                internal_address.set_ip(dup_ip);
+            }
+            _ => unreachable!("this is a Nexus zone"),
+        };
+
+        // This illegal modification should result in at least three notes: a
+        // duplicate underlay IP, duplicate sled subnets, and sled1 having mixed
+        // underlay subnets (the details of which depend on the ordering of
+        // zones, so we'll sort that out here).
+        let nexus0 = nexus0.clone();
+        let nexus1 = nexus1.clone();
+        let (mixed_underlay_zone1, mixed_underlay_zone2) = {
+            let mut sled1_zones = blueprint
+                .blueprint_zones
+                .get(&nexus1_sled_id)
+                .unwrap()
+                .zones
+                .iter();
+            let sled1_zone1 = sled1_zones.next().expect("at least one zone");
+            let sled1_zone2 = sled1_zones.next().expect("at least two zones");
+            if sled1_zone1.id == nexus1.id {
+                (nexus1.clone(), sled1_zone2.clone())
+            } else {
+                (sled1_zone1.clone(), nexus1.clone())
+            }
+        };
+        let expected_notes = [
+            Note {
+                severity: Severity::Fatal,
+                kind: Kind::Sled {
+                    sled_id: nexus1_sled_id,
+                    kind: SledKind::DuplicateUnderlayIp {
+                        zone1: nexus0.clone(),
+                        zone2: nexus1.clone(),
+                    },
+                },
+            },
+            Note {
+                severity: Severity::Fatal,
+                kind: Kind::Sled {
+                    sled_id: nexus1_sled_id,
+                    kind: SledKind::SledWithMixedUnderlaySubnets {
+                        zone1: mixed_underlay_zone1,
+                        zone2: mixed_underlay_zone2,
+                    },
+                },
+            },
+            Note {
+                severity: Severity::Fatal,
+                kind: Kind::Sled {
+                    sled_id: nexus1_sled_id,
+                    kind: SledKind::ConflictingSledSubnets {
+                        other_sled: nexus0_sled_id,
+                        subnet: Ipv6Subnet::new(dup_ip),
+                    },
+                },
+            },
+        ];
+
+        let report =
+            Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind);
+        eprintln!("{}", report.display());
+        for note in expected_notes {
+            assert!(
+                report.notes().contains(&note),
+                "did not find expected note {note:?}"
+            );
+        }
+
+        logctx.cleanup_successful();
+    }
+
+    #[test]
+    fn test_bad_internal_dns_subnet() {
+        static TEST_NAME: &str = "test_bad_internal_dns_subnet";
+        let logctx = test_setup_log(TEST_NAME);
+        let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME);
+
+        // Change the second internal DNS zone to be from a different rack
+        // subnet.
+        let mut internal_dns_iter = blueprint
+            .blueprint_zones
+            .iter_mut()
+            .flat_map(|(sled_id, zones_config)| {
+                zones_config.zones.iter_mut().filter_map(move |zone| {
+                    if zone.zone_type.is_internal_dns() {
+                        Some((*sled_id, zone))
+                    } else {
+                        None
+                    }
+                })
+            });
+        let (dns0_sled_id, dns0) =
+            internal_dns_iter.next().expect("at least one internal DNS zone");
+        let (dns1_sled_id, dns1) =
+            internal_dns_iter.next().expect("at least two internal DNS zones");
+        assert_ne!(dns0_sled_id, dns1_sled_id);
+
+        let dns0_ip = dns0.underlay_ip();
+        let rack_subnet = DnsSubnet::from_addr(dns0_ip).rack_subnet();
+        let different_rack_subnet = {
+            // Flip the high bit of the existing underlay IP to guarantee a
+            // different rack subnet
+            let hi_bit = 1_u128 << 127;
+            let lo_bits = !hi_bit;
+            let hi_bit_ip = Ipv6Addr::from(hi_bit);
+            let lo_bits_ip = Ipv6Addr::from(lo_bits);
+            // Build XOR out of the operations we have...
+            let flipped_ip = if hi_bit_ip & dns0_ip == hi_bit_ip {
+                dns0_ip & lo_bits_ip
+            } else {
+                dns0_ip | hi_bit_ip
+            };
+            DnsSubnet::from_addr(flipped_ip).rack_subnet()
+        };
+        let different_dns_subnet = different_rack_subnet.get_dns_subnet(0);
+
+        match &mut dns1.zone_type {
+            BlueprintZoneType::InternalDns(
+                blueprint_zone_type::InternalDns {
+                    http_address,
+                    dns_address,
+                    ..
+                },
+            ) => {
+                http_address.set_ip(different_dns_subnet.dns_address());
+                dns_address.set_ip(different_dns_subnet.dns_address());
+            }
+            _ => unreachable!("this is an internal DNS zone"),
+        };
+
+        let expected_note = Note {
+            severity: Severity::Fatal,
+            kind: Kind::Sled {
+                sled_id: dns1_sled_id,
+                kind: SledKind::InternalDnsZoneBadSubnet {
+                    zone: dns1.clone(),
+                    rack_dns_subnets: rack_subnet
+                        .get_dns_subnets()
+                        .into_iter()
+                        .collect(),
+                },
+            },
+        };
+
+        let report =
+            Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind);
+        eprintln!("{}", report.display());
+        assert!(
+            report.notes().contains(&expected_note),
+            "did not find expected note {expected_note:?}"
+        );
+
+        logctx.cleanup_successful();
+    }
+
+    #[test]
+    fn test_duplicate_external_ip() {
+        static TEST_NAME: &str = "test_duplicate_external_ip";
+        let logctx = test_setup_log(TEST_NAME);
+        let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME);
+
+        // Copy the external IP from one Nexus to another.
+        let mut nexus_iter = blueprint.blueprint_zones.iter_mut().flat_map(
+            |(sled_id, zones_config)| {
+                zones_config.zones.iter_mut().filter_map(move |zone| {
+                    if zone.zone_type.is_nexus() {
+                        Some((*sled_id, zone))
+                    } else {
+                        None
+                    }
+                })
+            },
+        );
+        let (nexus0_sled_id, nexus0) =
+            nexus_iter.next().expect("at least one Nexus zone");
+        let (nexus1_sled_id, nexus1) =
+            nexus_iter.next().expect("at least two Nexus zones");
+        assert_ne!(nexus0_sled_id, nexus1_sled_id);
+
+        let dup_ip = match nexus0
+            .zone_type
+            .external_networking()
+            .expect("Nexus has external networking")
+            .0
+        {
+            OmicronZoneExternalIp::Floating(ip) => ip,
+            OmicronZoneExternalIp::Snat(_) => {
+                unreachable!("Nexus has a floating IP")
+            }
+        };
+        match &mut nexus1.zone_type {
+            BlueprintZoneType::Nexus(blueprint_zone_type::Nexus {
+                external_ip,
+                ..
+            }) => {
+                *external_ip = dup_ip;
+            }
+            _ => unreachable!("this is a Nexus zone"),
+        };
+
+        let expected_notes = [Note {
+            severity: Severity::Fatal,
+            kind: Kind::Sled {
+                sled_id: nexus1_sled_id,
+                kind: SledKind::DuplicateExternalIp {
+                    zone1: nexus0.clone(),
+                    zone2: nexus1.clone(),
+                    ip: dup_ip.ip,
+                },
+            },
+        }];
+
+        let report =
+            Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind);
+        eprintln!("{}", report.display());
+        for note in expected_notes {
+            assert!(
+                report.notes().contains(&note),
+                "did not find expected note {note:?}"
+            );
+        }
+
+        logctx.cleanup_successful();
+    }
+
+    #[test]
+    fn test_duplicate_nic_ip() {
+        static TEST_NAME: &str = "test_duplicate_nic_ip";
+        let logctx = test_setup_log(TEST_NAME);
+        let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME);
+
+        // Copy the external IP from one Nexus to another.
+        let mut nexus_iter = blueprint.blueprint_zones.iter_mut().flat_map(
+            |(sled_id, zones_config)| {
+                zones_config.zones.iter_mut().filter_map(move |zone| {
+                    if zone.zone_type.is_nexus() {
+                        Some((*sled_id, zone))
+                    } else {
+                        None
+                    }
+                })
+            },
+        );
+        let (nexus0_sled_id, nexus0) =
+            nexus_iter.next().expect("at least one Nexus zone");
+        let (nexus1_sled_id, nexus1) =
+            nexus_iter.next().expect("at least two Nexus zones");
+        assert_ne!(nexus0_sled_id, nexus1_sled_id);
+
+        let dup_ip = nexus0
+            .zone_type
+            .external_networking()
+            .expect("Nexus has external networking")
+            .1
+            .ip;
+        match &mut nexus1.zone_type {
+            BlueprintZoneType::Nexus(blueprint_zone_type::Nexus {
+                nic,
+                ..
+            }) => {
+                nic.ip = dup_ip;
+            }
+            _ => unreachable!("this is a Nexus zone"),
+        };
+
+        let expected_notes = [Note {
+            severity: Severity::Fatal,
+            kind: Kind::Sled {
+                sled_id: nexus1_sled_id,
+                kind: SledKind::DuplicateNicIp {
+                    zone1: nexus0.clone(),
+                    zone2: nexus1.clone(),
+                    ip: dup_ip,
+                },
+            },
+        }];
+
+        let report =
+            Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind);
+        eprintln!("{}", report.display());
+        for note in expected_notes {
+            assert!(
+                report.notes().contains(&note),
+                "did not find expected note {note:?}"
+            );
+        }
+
+        logctx.cleanup_successful();
+    }
+
+    #[test]
+    fn test_duplicate_nic_mac() {
+        static TEST_NAME: &str = "test_duplicate_nic_mac";
+        let logctx = test_setup_log(TEST_NAME);
+        let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME);
+
+        // Copy the external IP from one Nexus to another.
+        let mut nexus_iter = blueprint.blueprint_zones.iter_mut().flat_map(
+            |(sled_id, zones_config)| {
+                zones_config.zones.iter_mut().filter_map(move |zone| {
+                    if zone.zone_type.is_nexus() {
+                        Some((*sled_id, zone))
+                    } else {
+                        None
+                    }
+                })
+            },
+        );
+        let (nexus0_sled_id, nexus0) =
+            nexus_iter.next().expect("at least one Nexus zone");
+        let (nexus1_sled_id, nexus1) =
+            nexus_iter.next().expect("at least two Nexus zones");
+        assert_ne!(nexus0_sled_id, nexus1_sled_id);
+
+        let dup_mac = nexus0
+            .zone_type
+            .external_networking()
+            .expect("Nexus has external networking")
+            .1
+            .mac;
+        match &mut nexus1.zone_type {
+            BlueprintZoneType::Nexus(blueprint_zone_type::Nexus {
+                nic,
+                ..
+            }) => {
+                nic.mac = dup_mac;
+            }
+            _ => unreachable!("this is a Nexus zone"),
+        };
+
+        let expected_notes = [Note {
+            severity: Severity::Fatal,
+            kind: Kind::Sled {
+                sled_id: nexus1_sled_id,
+                kind: SledKind::DuplicateNicMac {
+                    zone1: nexus0.clone(),
+                    zone2: nexus1.clone(),
+                    mac: dup_mac,
+                },
+            },
+        }];
+
+        let report =
+            Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind);
+        eprintln!("{}", report.display());
+        for note in expected_notes {
+            assert!(
+                report.notes().contains(&note),
+                "did not find expected note {note:?}"
+            );
+        }
+
+        logctx.cleanup_successful();
+    }
+
+    #[test]
+    fn test_durable_dataset_collision() {
+        static TEST_NAME: &str = "test_durable_dataset_collision";
+        let logctx = test_setup_log(TEST_NAME);
+        let (_, mut blueprint) =
+            ExampleSystemBuilder::new(&logctx.log, TEST_NAME)
+                .external_dns_count(2)
+                .unwrap()
+                .build();
+
+        // Copy the durable zpool from one external DNS to another.
+        let mut dns_iter = blueprint.blueprint_zones.iter_mut().flat_map(
+            |(sled_id, zones_config)| {
+                zones_config.zones.iter_mut().filter_map(move |zone| {
+                    if zone.zone_type.is_external_dns() {
+                        Some((*sled_id, zone))
+                    } else {
+                        None
+                    }
+                })
+            },
+        );
+        let (dns0_sled_id, dns0) =
+            dns_iter.next().expect("at least one external DNS zone");
+        let (dns1_sled_id, dns1) =
+            dns_iter.next().expect("at least two external DNS zones");
+        assert_ne!(dns0_sled_id, dns1_sled_id);
+
+        let dup_zpool = dns0
+            .zone_type
+            .durable_zpool()
+            .expect("external DNS has a durable zpool")
+            .clone();
+        match &mut dns1.zone_type {
+            BlueprintZoneType::ExternalDns(
+                blueprint_zone_type::ExternalDns { dataset, .. },
+            ) => {
+                dataset.pool_name = dup_zpool.clone();
+            }
+            _ => unreachable!("this is an external DNS zone"),
+        };
+
+        let expected_notes = [
+            Note {
+                severity: Severity::Fatal,
+                kind: Kind::Sled {
+                    sled_id: dns1_sled_id,
+                    kind: SledKind::ZoneDurableDatasetCollision {
+                        zone1: dns0.clone(),
+                        zone2: dns1.clone(),
+                        zpool: dup_zpool.clone(),
+                    },
+                },
+            },
+            Note {
+                severity: Severity::Fatal,
+                kind: Kind::Sled {
+                    sled_id: dns1_sled_id,
+                    kind: SledKind::ZoneWithDatasetsOnDifferentZpools {
+                        zone: dns1.clone(),
+                        durable_zpool: dup_zpool.clone(),
+                        transient_zpool: dns1.filesystem_pool.clone().unwrap(),
+                    },
+                },
+            },
+        ];
+
+        let report =
+            Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind);
+        eprintln!("{}", report.display());
+        for note in expected_notes {
+            assert!(
+                report.notes().contains(&note),
+                "did not find expected note {note:?}"
+            );
+        }
+
+        logctx.cleanup_successful();
+    }
+
+    #[test]
+    fn test_transient_root_dataset_collision() {
+        static TEST_NAME: &str = "test_transient_root_dataset_collision";
+        let logctx = test_setup_log(TEST_NAME);
+        let (_, mut blueprint) =
+            ExampleSystemBuilder::new(&logctx.log, TEST_NAME)
+                .external_dns_count(2)
+                .unwrap()
+                .build();
+
+        // Copy the filesystem zpool from one external DNS to another.
+        let mut dns_iter = blueprint.blueprint_zones.iter_mut().flat_map(
+            |(sled_id, zones_config)| {
+                zones_config.zones.iter_mut().filter_map(move |zone| {
+                    if zone.zone_type.is_external_dns() {
+                        Some((*sled_id, zone))
+                    } else {
+                        None
+                    }
+                })
+            },
+        );
+        let (dns0_sled_id, dns0) =
+            dns_iter.next().expect("at least one external DNS zone");
+        let (dns1_sled_id, dns1) =
+            dns_iter.next().expect("at least two external DNS zones");
+        assert_ne!(dns0_sled_id, dns1_sled_id);
+
+        let dup_zpool = dns0
+            .filesystem_pool
+            .as_ref()
+            .expect("external DNS has a filesystem zpool")
+            .clone();
+        dns1.filesystem_pool = Some(dup_zpool.clone());
+
+        let expected_notes = [
+            Note {
+                severity: Severity::Fatal,
+                kind: Kind::Sled {
+                    sled_id: dns1_sled_id,
+                    kind: SledKind::ZoneFilesystemDatasetCollision {
+                        zone1: dns0.clone(),
+                        zone2: dns1.clone(),
+                        zpool: dup_zpool.clone(),
+                    },
+                },
+            },
+            Note {
+                severity: Severity::Fatal,
+                kind: Kind::Sled {
+                    sled_id: dns1_sled_id,
+                    kind: SledKind::ZoneWithDatasetsOnDifferentZpools {
+                        zone: dns1.clone(),
+                        durable_zpool: dns1
+                            .zone_type
+                            .durable_zpool()
+                            .unwrap()
+                            .clone(),
+                        transient_zpool: dup_zpool.clone(),
+                    },
+                },
+            },
+        ];
+
+        let report =
+            Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind);
+        eprintln!("{}", report.display());
+        for note in expected_notes {
+            assert!(
+                report.notes().contains(&note),
+                "did not find expected note {note:?}"
+            );
+        }
+
+        logctx.cleanup_successful();
+    }
+
+    #[test]
+    fn test_zpool_with_duplicate_dataset_kinds() {
+        static TEST_NAME: &str = "test_zpool_with_duplicate_dataset_kinds";
+        let logctx = test_setup_log(TEST_NAME);
+        let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME);
+
+        let mut by_kind = BTreeMap::new();
+
+        // Loop over the datasets until we find a dataset kind that already
+        // exists on a different zpool, then copy it over.
+        let mut found_sled_id = None;
+        let mut dataset1 = None;
+        let mut dataset2 = None;
+        let mut zpool = None;
+        'outer: for (sled_id, datasets_config) in
+            blueprint.blueprint_datasets.iter_mut()
+        {
+            for dataset in datasets_config.datasets.values_mut() {
+                if let Some(prev) =
+                    by_kind.insert(dataset.kind.clone(), dataset.clone())
+                {
+                    dataset.pool = prev.pool.clone();
+
+                    found_sled_id = Some(*sled_id);
+                    dataset1 = Some(prev);
+                    dataset2 = Some(dataset.clone());
+                    zpool = Some(dataset.pool.clone());
+                    break 'outer;
+                }
+            }
+        }
+        let sled_id = found_sled_id.expect("found dataset to move");
+        let dataset1 = dataset1.expect("found dataset to move");
+        let dataset2 = dataset2.expect("found dataset to move");
+        let zpool = zpool.expect("found dataset to move");
+
+        let expected_notes = [Note {
+            severity: Severity::Fatal,
+            kind: Kind::Sled {
+                sled_id,
+                kind: SledKind::ZpoolWithDuplicateDatasetKinds {
+                    dataset1,
+                    dataset2,
+                    zpool: zpool.id(),
+                },
+            },
+        }];
+
+        let report =
+            Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind);
+        eprintln!("{}", report.display());
+        for note in expected_notes {
+            assert!(
+                report.notes().contains(&note),
+                "did not find expected note {note:?}"
+            );
+        }
+
+        logctx.cleanup_successful();
+    }
+
+    #[test]
+    fn test_zpool_missing_default_datasets() {
+        static TEST_NAME: &str = "test_zpool_missing_default_datasets";
+        let logctx = test_setup_log(TEST_NAME);
+        let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME);
+
+        // Drop the Debug dataset from one zpool and the ZoneRoot dataset from
+        // another; we should catch both errors.
+        let (sled_id, datasets_config) = blueprint
+            .blueprint_datasets
+            .iter_mut()
+            .next()
+            .expect("at least one sled");
+
+        let mut debug_dataset = None;
+        let mut zoneroot_dataset = None;
+        for dataset in &mut datasets_config.datasets.values_mut() {
+            match &dataset.kind {
+                DatasetKind::Debug if debug_dataset.is_none() => {
+                    debug_dataset = Some(dataset.clone());
+                }
+                DatasetKind::TransientZoneRoot
+                    if debug_dataset.is_some()
+                        && zoneroot_dataset.is_none() =>
+                {
+                    if Some(&dataset.pool)
+                        != debug_dataset.as_ref().map(|d| &d.pool)
+                    {
+                        zoneroot_dataset = Some(dataset.clone());
+                        break;
+                    }
+                }
+                _ => (),
+            }
+        }
+        let debug_dataset =
+            debug_dataset.expect("found Debug dataset to prune");
+        let zoneroot_dataset =
+            zoneroot_dataset.expect("found ZoneRoot dataset to prune");
+        assert_ne!(debug_dataset.pool, zoneroot_dataset.pool);
+
+        // Actually strip these from the blueprint.
+        datasets_config.datasets.retain(|&dataset_id, _| {
+            dataset_id != debug_dataset.id && dataset_id != zoneroot_dataset.id
+        });
+
+        let expected_notes = [
+            Note {
+                severity: Severity::Fatal,
+                kind: Kind::Sled {
+                    sled_id: *sled_id,
+                    kind: SledKind::ZpoolMissingDebugDataset {
+                        zpool: debug_dataset.pool.id(),
+                    },
+                },
+            },
+            Note {
+                severity: Severity::Fatal,
+                kind: Kind::Sled {
+                    sled_id: *sled_id,
+                    kind: SledKind::ZpoolMissingZoneRootDataset {
+                        zpool: zoneroot_dataset.pool.id(),
+                    },
+                },
+            },
+        ];
+
+        let report =
+            Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind);
+        eprintln!("{}", report.display());
+        for note in expected_notes {
+            assert!(
+                report.notes().contains(&note),
+                "did not find expected note {note:?}"
+            );
+        }
+
+        logctx.cleanup_successful();
+    }
+
+    #[test]
+    fn test_zone_missing_datasets() {
+        static TEST_NAME: &str = "test_zone_missing_datasets";
+        let logctx = test_setup_log(TEST_NAME);
+        let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME);
+
+        let (sled_id, datasets_config) = blueprint
+            .blueprint_datasets
+            .iter_mut()
+            .next()
+            .expect("at least one sled");
+        let zones_config = blueprint
+            .blueprint_zones
+            .get(sled_id)
+            .expect("got zones for sled with datasets");
+
+        // Pick a zone with a durable dataset to remove, and a different zone
+        // with a filesystem_pool dataset to remove.
+        let mut durable_zone = None;
+        let mut root_zone = None;
+        for z in &zones_config.zones {
+            if durable_zone.is_none() {
+                if z.zone_type.durable_zpool().is_some() {
+                    durable_zone = Some(z.clone());
+                }
+            } else if root_zone.is_none() {
+                root_zone = Some(z);
+                break;
+            }
+        }
+        let durable_zone =
+            durable_zone.expect("found zone with durable dataset to prune");
+        let root_zone =
+            root_zone.expect("found zone with root dataset to prune");
+        assert_ne!(durable_zone.filesystem_pool, root_zone.filesystem_pool);
+
+        // Actually strip these from the blueprint.
+        datasets_config.datasets.retain(|_, dataset| {
+            let matches_durable = (dataset.pool
+                == *durable_zone.zone_type.durable_zpool().unwrap())
+                && (dataset.kind
+                    == durable_zone.zone_type.durable_dataset().unwrap().kind);
+            let root_dataset = root_zone.filesystem_dataset().unwrap();
+            let matches_root = (dataset.pool == *root_dataset.pool())
+                && (dataset.kind == *root_dataset.dataset());
+            !matches_durable && !matches_root
+        });
+
+        let expected_notes = [
+            Note {
+                severity: Severity::Fatal,
+                kind: Kind::Sled {
+                    sled_id: *sled_id,
+                    kind: SledKind::ZoneMissingFilesystemDataset {
+                        zone: root_zone.clone(),
+                    },
+                },
+            },
+            Note {
+                severity: Severity::Fatal,
+                kind: Kind::Sled {
+                    sled_id: *sled_id,
+                    kind: SledKind::ZoneMissingDurableDataset {
+                        zone: durable_zone,
+                    },
+                },
+            },
+        ];
+
+        let report =
+            Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind);
+        eprintln!("{}", report.display());
+        for note in expected_notes {
+            assert!(
+                report.notes().contains(&note),
+                "did not find expected note {note:?}"
+            );
+        }
+
+        logctx.cleanup_successful();
+    }
+
+    #[test]
+    fn test_sled_missing_datasets() {
+        static TEST_NAME: &str = "test_sled_missing_datasets";
+        let logctx = test_setup_log(TEST_NAME);
+        let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME);
+
+        // Pick one sled and remove its blueprint_datasets entry entirely.
+        let removed_sled_id = *blueprint
+            .blueprint_datasets
+            .keys()
+            .next()
+            .expect("at least one sled");
+        blueprint
+            .blueprint_datasets
+            .retain(|&sled_id, _| sled_id != removed_sled_id);
+
+        let report =
+            Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind);
+        eprintln!("{}", report.display());
+        let mut found_sled_missing_note = false;
+        for note in report.notes() {
+            if note.severity == Severity::Fatal {
+                match &note.kind {
+                    Kind::Sled {
+                        sled_id,
+                        kind: SledKind::SledMissingDatasets { .. },
+                    } if *sled_id == removed_sled_id => {
+                        found_sled_missing_note = true;
+                    }
+                    _ => (),
+                }
+            }
+        }
+        assert!(
+            found_sled_missing_note,
+            "did not find expected note for missing datasets entry for \
+             sled {removed_sled_id}"
+        );
+
+        logctx.cleanup_successful();
+    }
+
+    #[test]
+    fn test_sled_missing_disks() {
+        static TEST_NAME: &str = "test_sled_missing_disks";
+        let logctx = test_setup_log(TEST_NAME);
+        let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME);
+
+        // Pick one sled and remove its blueprint_disks entry entirely.
+        let removed_sled_id = *blueprint
+            .blueprint_disks
+            .keys()
+            .next()
+            .expect("at least one sled");
+        blueprint
+            .blueprint_disks
+            .retain(|&sled_id, _| sled_id != removed_sled_id);
+
+        let report =
+            Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind);
+        eprintln!("{}", report.display());
+        let mut found_sled_missing_note = false;
+        for note in report.notes() {
+            if note.severity == Severity::Fatal {
+                match &note.kind {
+                    Kind::Sled {
+                        sled_id,
+                        kind: SledKind::SledMissingDisks { .. },
+                    } if *sled_id == removed_sled_id => {
+                        found_sled_missing_note = true;
+                    }
+                    _ => (),
+                }
+            }
+        }
+        assert!(
+            found_sled_missing_note,
+            "did not find expected note for missing disks entry for \
+             sled {removed_sled_id}"
+        );
+
+        logctx.cleanup_successful();
+    }
+
+    #[test]
+    fn test_orphaned_datasets() {
+        static TEST_NAME: &str = "test_orphaned_datasets";
+        let logctx = test_setup_log(TEST_NAME);
+        let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME);
+
+        // Pick two zones (one with a durable dataset and one with a filesystem
+        // root dataset), and remove both those zones, which should orphan their
+        // datasets.
+        let (sled_id, datasets_config) = blueprint
+            .blueprint_datasets
+            .iter_mut()
+            .next()
+            .expect("at least one sled");
+        let zones_config = blueprint
+            .blueprint_zones
+            .get_mut(sled_id)
+            .expect("got zones for sled with datasets");
+        let mut durable_zone = None;
+        let mut root_zone = None;
+        for z in &zones_config.zones {
+            if durable_zone.is_none() {
+                if z.zone_type.durable_zpool().is_some() {
+                    durable_zone = Some(z.clone());
+                }
+            } else if root_zone.is_none() {
+                root_zone = Some(z.clone());
+                break;
+            }
+        }
+        let durable_zone =
+            durable_zone.expect("found zone with durable dataset to prune");
+        let root_zone =
+            root_zone.expect("found zone with root dataset to prune");
+        zones_config
+            .zones
+            .retain(|z| z.id != durable_zone.id && z.id != root_zone.id);
+
+        let durable_dataset = durable_zone.zone_type.durable_dataset().unwrap();
+        let root_dataset = root_zone.filesystem_dataset().unwrap();
+
+        // Find the datasets we expect to have been orphaned.
+        let expected_notes = datasets_config
+            .datasets
+            .values()
+            .filter_map(|dataset| {
+                if (dataset.pool == durable_dataset.dataset.pool_name
+                    && dataset.kind == durable_dataset.kind)
+                    || (dataset.pool == *root_dataset.pool()
+                        && dataset.kind == *root_dataset.dataset())
+                {
+                    Some(Note {
+                        severity: Severity::Fatal,
+                        kind: Kind::Sled {
+                            sled_id: *sled_id,
+                            kind: SledKind::OrphanedDataset {
+                                dataset: dataset.clone(),
+                            },
+                        },
+                    })
+                } else {
+                    None
+                }
+            })
+            .collect::<Vec<_>>();
+
+        let report =
+            Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind);
+        eprintln!("{}", report.display());
+        for note in expected_notes {
+            assert!(
+                report.notes().contains(&note),
+                "did not find expected note {note:?}"
+            );
+        }
+
+        logctx.cleanup_successful();
+    }
+
+    #[test]
+    fn test_dataset_on_nonexistent_zpool() {
+        static TEST_NAME: &str = "test_dataset_on_nonexistent_zpool";
+        let logctx = test_setup_log(TEST_NAME);
+        let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME);
+
+        // Remove one zpool from one sled, then check that all datasets on that
+        // zpool produce report notes.
+        let (sled_id, disks_config) = blueprint
+            .blueprint_disks
+            .iter_mut()
+            .next()
+            .expect("at least one sled");
+        let removed_disk = disks_config.disks.remove(0);
+        eprintln!("removed disk {removed_disk:?}");
+
+        let expected_notes = blueprint
+            .blueprint_datasets
+            .get(sled_id)
+            .unwrap()
+            .datasets
+            .values()
+            .filter_map(|dataset| {
+                if dataset.pool.id() != removed_disk.pool_id {
+                    return None;
+                }
+
+                let note = match dataset.kind {
+                    DatasetKind::Debug | DatasetKind::TransientZoneRoot => {
+                        Note {
+                            severity: Severity::Fatal,
+                            kind: Kind::Sled {
+                                sled_id: *sled_id,
+                                kind: SledKind::OrphanedDataset {
+                                    dataset: dataset.clone(),
+                                },
+                            },
+                        }
+                    }
+                    _ => Note {
+                        severity: Severity::Fatal,
+                        kind: Kind::Sled {
+                            sled_id: *sled_id,
+                            kind: SledKind::DatasetOnNonexistentZpool {
+                                dataset: dataset.clone(),
+                            },
+                        },
+                    },
+                };
+                Some(note)
+            })
+            .collect::<Vec<_>>();
+        assert!(!expected_notes.is_empty());
+
+        let report =
+            Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind);
+        eprintln!("{}", report.display());
+        for note in expected_notes {
+            assert!(
+                report.notes().contains(&note),
+                "did not find expected note {note:?}"
+            );
+        }
+
+        logctx.cleanup_successful();
+    }
+}
diff --git a/nexus/reconfigurator/blippy/src/lib.rs b/nexus/reconfigurator/blippy/src/lib.rs
new file mode 100644
index 0000000000..283bbfc0d0
--- /dev/null
+++ b/nexus/reconfigurator/blippy/src/lib.rs
@@ -0,0 +1,24 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Blippy: the blueprint checker
+//!
+//! [`Blippy`] performs a variety of checks on blueprints to ensure they are
+//! internally-consistent (e.g., "every in-service zone that should have one or
+//! more datasets do", or "any given external IP address is used by at most one
+//! in-service zone"). It emits [`BlippyReport`]s in the form of a list of
+//! [`BlippyNote`]s, each of which has an associated severity and parent
+//! component (typically a sled).
+
+mod blippy;
+mod checks;
+mod report;
+
+pub use blippy::Blippy;
+pub use blippy::Kind as BlippyKind;
+pub use blippy::Note as BlippyNote;
+pub use blippy::Severity as BlippySeverity;
+pub use blippy::SledKind as BlippySledKind;
+pub use report::BlippyReport;
+pub use report::BlippyReportSortKey;
diff --git a/nexus/reconfigurator/blippy/src/report.rs b/nexus/reconfigurator/blippy/src/report.rs
new file mode 100644
index 0000000000..136d3b7538
--- /dev/null
+++ b/nexus/reconfigurator/blippy/src/report.rs
@@ -0,0 +1,85 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use crate::blippy::Note;
+use core::fmt;
+use nexus_types::deployment::Blueprint;
+
+#[derive(Debug, Clone, Copy)]
+pub enum BlippyReportSortKey {
+    Kind,
+    Severity,
+}
+
+#[derive(Debug)]
+pub struct BlippyReport<'a> {
+    blueprint: &'a Blueprint,
+    notes: Vec<Note>,
+    sort_key: BlippyReportSortKey,
+}
+
+impl<'a> BlippyReport<'a> {
+    pub(crate) fn new(
+        blueprint: &'a Blueprint,
+        notes: Vec<Note>,
+        sort_key: BlippyReportSortKey,
+    ) -> Self {
+        let mut slf = Self { blueprint, notes, sort_key };
+        slf.sort_notes_by_key(sort_key);
+        slf
+    }
+
+    pub fn sort_notes_by_key(&mut self, key: BlippyReportSortKey) {
+        match key {
+            BlippyReportSortKey::Kind => {
+                self.notes.sort_unstable_by(|a, b| {
+                    let a = (&a.kind, &a.severity);
+                    let b = (&b.kind, &b.severity);
+                    a.cmp(&b)
+                });
+            }
+            BlippyReportSortKey::Severity => {
+                self.notes.sort_unstable_by(|a, b| {
+                    let a = (&a.severity, &a.kind);
+                    let b = (&b.severity, &b.kind);
+                    a.cmp(&b)
+                });
+            }
+        }
+        self.sort_key = key;
+    }
+
+    pub fn blueprint(&self) -> &'a Blueprint {
+        self.blueprint
+    }
+
+    pub fn notes(&self) -> &[Note] {
+        &self.notes
+    }
+
+    pub fn display(&self) -> BlippyReportDisplay<'_> {
+        BlippyReportDisplay { report: self }
+    }
+}
+
+#[derive(Debug)]
+pub struct BlippyReportDisplay<'a> {
+    report: &'a BlippyReport<'a>,
+}
+
+impl fmt::Display for BlippyReportDisplay<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let pluralize = if self.report.notes.len() == 1 { "" } else { "s" };
+        writeln!(
+            f,
+            "blippy report for blueprint {}: {} note{pluralize}",
+            self.report.blueprint.id,
+            self.report.notes.len(),
+        )?;
+        for note in self.report.notes() {
+            writeln!(f, "  {}", note.display(self.report.sort_key))?;
+        }
+        Ok(())
+    }
+}
diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs
index 5ba1665483..543b9bd278 100644
--- a/nexus/reconfigurator/execution/src/lib.rs
+++ b/nexus/reconfigurator/execution/src/lib.rs
@@ -400,7 +400,9 @@ fn register_dataset_records_step<'a>(
                     &opctx,
                     datastore,
                     bp_id,
-                    blueprint.all_omicron_datasets(BlueprintDatasetFilter::All),
+                    blueprint
+                        .all_omicron_datasets(BlueprintDatasetFilter::All)
+                        .map(|(_sled_id, dataset)| dataset),
                 )
                 .await?;
 
diff --git a/nexus/reconfigurator/planning/Cargo.toml b/nexus/reconfigurator/planning/Cargo.toml
index 63c5e3e44f..c7d978b387 100644
--- a/nexus/reconfigurator/planning/Cargo.toml
+++ b/nexus/reconfigurator/planning/Cargo.toml
@@ -20,6 +20,7 @@ ipnet.workspace = true
 itertools.workspace = true
 nexus-config.workspace = true
 nexus-inventory.workspace = true
+nexus-reconfigurator-blippy.workspace = true
 nexus-sled-agent-shared.workspace = true
 nexus-types.workspace = true
 omicron-common.workspace = true
diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
index ad59fb3718..0aaadb624d 100644
--- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
+++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
@@ -1834,11 +1834,13 @@ impl<'a> BlueprintBuilder<'a> {
         let mut skip_zpools = BTreeSet::new();
         for zone_config in self
             .current_sled_zones(sled_id, BlueprintZoneFilter::ShouldBeRunning)
+            .filter(|z| z.zone_type.kind() == zone_kind)
         {
             if let Some(zpool) = zone_config.zone_type.durable_zpool() {
-                if zone_kind == zone_config.zone_type.kind() {
-                    skip_zpools.insert(zpool);
-                }
+                skip_zpools.insert(zpool);
+            }
+            if let Some(zpool) = &zone_config.filesystem_pool {
+                skip_zpools.insert(zpool);
             }
         }
 
@@ -1901,211 +1903,34 @@ impl<'a> BlueprintBuilder<'a> {
 #[cfg(test)]
 pub mod test {
     use super::*;
-    use crate::blueprint_builder::external_networking::ExternalIpAllocator;
     use crate::example::example;
     use crate::example::ExampleSystemBuilder;
     use crate::example::SimRngState;
     use crate::system::SledBuilder;
     use nexus_inventory::CollectionBuilder;
-    use nexus_types::deployment::BlueprintDatasetConfig;
+    use nexus_reconfigurator_blippy::Blippy;
+    use nexus_reconfigurator_blippy::BlippyReportSortKey;
     use nexus_types::deployment::BlueprintDatasetDisposition;
-    use nexus_types::deployment::BlueprintDatasetFilter;
     use nexus_types::deployment::BlueprintZoneFilter;
     use nexus_types::deployment::OmicronZoneNetworkResources;
     use nexus_types::external_api::views::SledPolicy;
     use omicron_common::address::IpRange;
-    use omicron_common::disk::DatasetKind;
     use omicron_test_utils::dev::test_setup_log;
-    use omicron_uuid_kinds::DatasetUuid;
     use std::collections::BTreeSet;
     use std::mem;
 
     pub const DEFAULT_N_SLEDS: usize = 3;
 
-    fn datasets_for_sled(
-        blueprint: &Blueprint,
-        sled_id: SledUuid,
-    ) -> &BTreeMap<DatasetUuid, BlueprintDatasetConfig> {
-        &blueprint
-            .blueprint_datasets
-            .get(&sled_id)
-            .unwrap_or_else(|| {
-                panic!("Cannot find datasets on missing sled: {sled_id}")
-            })
-            .datasets
-    }
-
-    fn find_dataset<'a>(
-        datasets: &'a BTreeMap<DatasetUuid, BlueprintDatasetConfig>,
-        zpool: &ZpoolName,
-        kind: DatasetKind,
-    ) -> &'a BlueprintDatasetConfig {
-        datasets.values().find(|dataset| {
-            &dataset.pool == zpool &&
-                dataset.kind == kind
-        }).unwrap_or_else(|| {
-            let kinds = datasets.values().map(|d| (&d.id, &d.pool, &d.kind)).collect::<Vec<_>>();
-            panic!("Cannot find dataset of type {kind}\nFound the following: {kinds:#?}")
-        })
-    }
-
     /// Checks various conditions that should be true for all blueprints
     #[track_caller]
     pub fn verify_blueprint(blueprint: &Blueprint) {
-        // There should be no duplicate underlay IPs.
-        let mut underlay_ips: BTreeMap<Ipv6Addr, &BlueprintZoneConfig> =
-            BTreeMap::new();
-        for (_, zone) in
-            blueprint.all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning)
-        {
-            if let Some(previous) =
-                underlay_ips.insert(zone.underlay_ip(), zone)
-            {
-                panic!(
-                    "found duplicate underlay IP {} in zones {} and {}\
-                    \n\n\
-                    blueprint: {}",
-                    zone.underlay_ip(),
-                    zone.id,
-                    previous.id,
-                    blueprint.display(),
-                );
-            }
-        }
-
-        // There should be no duplicate external IPs.
-        //
-        // Checking this is slightly complicated due to SNAT IPs, so we'll
-        // delegate to an `ExternalIpAllocator`, which already contains the
-        // logic for dup checking. (`mark_ip_used` fails if the IP is _already_
-        // marked as used.)
-        //
-        // We create this with an empty set of service IP pool ranges; those are
-        // used for allocation, which we don't do, and aren't needed for
-        // duplicate checking.
-        let mut ip_allocator = ExternalIpAllocator::new(&[]);
-        for (external_ip, _nic) in blueprint
-            .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning)
-            .filter_map(|(_, zone)| zone.zone_type.external_networking())
-        {
-            ip_allocator
-                .mark_ip_used(&external_ip)
-                .expect("no duplicate external IPs in running zones");
-        }
-
-        // On any given zpool, we should have at most one zone of any given
-        // kind.
-        //
-        // TODO: we may want a similar check for non-durable datasets?
-        let mut kinds_by_zpool: BTreeMap<
-            ZpoolUuid,
-            BTreeMap<ZoneKind, OmicronZoneUuid>,
-        > = BTreeMap::new();
-        for (_, zone) in blueprint.all_omicron_zones(BlueprintZoneFilter::All) {
-            if let Some(dataset) = zone.zone_type.durable_dataset() {
-                let kind = zone.zone_type.kind();
-                if let Some(previous) = kinds_by_zpool
-                    .entry(dataset.dataset.pool_name.id())
-                    .or_default()
-                    .insert(kind, zone.id)
-                {
-                    panic!(
-                        "zpool {} has two zones of kind {kind:?}: {} and {}\
-                            \n\n\
-                            blueprint: {}",
-                        dataset.dataset.pool_name,
-                        zone.id,
-                        previous,
-                        blueprint.display(),
-                    );
-                }
-            }
-        }
-
-        // All disks should have debug and zone root datasets.
-        for (sled_id, disk_config) in &blueprint.blueprint_disks {
-            for disk in &disk_config.disks {
-                eprintln!(
-                    "checking datasets for sled {sled_id} disk {}",
-                    disk.id
-                );
-                let zpool = ZpoolName::new_external(disk.pool_id);
-                let datasets = datasets_for_sled(&blueprint, *sled_id);
-
-                let dataset =
-                    find_dataset(&datasets, &zpool, DatasetKind::Debug);
-                assert_eq!(
-                    dataset.disposition,
-                    BlueprintDatasetDisposition::InService
-                );
-                let dataset = find_dataset(
-                    &datasets,
-                    &zpool,
-                    DatasetKind::TransientZoneRoot,
-                );
-                assert_eq!(
-                    dataset.disposition,
-                    BlueprintDatasetDisposition::InService
-                );
-            }
-        }
-
-        // All zones should have dataset records.
-        for (sled_id, zone_config) in
-            blueprint.all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning)
-        {
-            match blueprint.sled_state.get(&sled_id) {
-                // Decommissioned sleds don't keep dataset state around.
-                //
-                // Normally we wouldn't observe zones from decommissioned sleds
-                // anyway, but that's the responsibility of the Planner, not the
-                // BlueprintBuilder.
-                None | Some(SledState::Decommissioned) => continue,
-                Some(SledState::Active) => (),
-            }
-            let datasets = datasets_for_sled(&blueprint, sled_id);
-
-            let (zpool, kind) =
-                zone_config.filesystem_dataset().unwrap().into_parts();
-            let dataset = find_dataset(&datasets, &zpool, kind);
-            assert_eq!(
-                dataset.disposition,
-                BlueprintDatasetDisposition::InService
-            );
-
-            if let Some(durable_dataset) =
-                zone_config.zone_type.durable_dataset()
-            {
-                let zpool = &durable_dataset.dataset.pool_name;
-                let dataset =
-                    find_dataset(&datasets, &zpool, durable_dataset.kind);
-                assert_eq!(
-                    dataset.disposition,
-                    BlueprintDatasetDisposition::InService
-                );
-            }
-        }
-
-        // All datasets should be on zpools that have disk records.
-        for (sled_id, datasets) in &blueprint.blueprint_datasets {
-            let sled_disk_zpools = blueprint
-                .blueprint_disks
-                .get(&sled_id)
-                .expect("no disks for sled")
-                .disks
-                .iter()
-                .map(|disk| disk.pool_id)
-                .collect::<BTreeSet<_>>();
-
-            for dataset in datasets.datasets.values().filter(|dataset| {
-                dataset.disposition.matches(BlueprintDatasetFilter::InService)
-            }) {
-                assert!(
-                    sled_disk_zpools.contains(&dataset.pool.id()),
-                    "sled {sled_id} has dataset {dataset:?}, \
-                     which references a zpool without an associated disk",
-                );
-            }
+        let blippy_report =
+            Blippy::new(blueprint).into_report(BlippyReportSortKey::Kind);
+        if !blippy_report.notes().is_empty() {
+            eprintln!("{}", blueprint.display());
+            eprintln!("---");
+            eprintln!("{}", blippy_report.display());
+            panic!("expected blippy report for blueprint to have no notes");
         }
     }
 
@@ -2314,6 +2139,20 @@ pub mod test {
         *blueprint1.sled_state.get_mut(&decommision_sled_id).unwrap() =
             SledState::Decommissioned;
 
+        // We're going under the hood of the blueprint here; a sled can only get
+        // to the decommissioned state if all its disks/datasets/zones have been
+        // expunged, so do that too.
+        for zone in &mut blueprint1
+            .blueprint_zones
+            .get_mut(&decommision_sled_id)
+            .expect("has zones")
+            .zones
+        {
+            zone.disposition = BlueprintZoneDisposition::Expunged;
+        }
+        blueprint1.blueprint_datasets.remove(&decommision_sled_id);
+        blueprint1.blueprint_disks.remove(&decommision_sled_id);
+
         // Change the input to note that the sled is expunged, but still active.
         let mut builder = input.into_builder();
         builder.sleds_mut().get_mut(&decommision_sled_id).unwrap().policy =
diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/internal_dns.rs b/nexus/reconfigurator/planning/src/blueprint_builder/internal_dns.rs
index 56fa39374d..4222481149 100644
--- a/nexus/reconfigurator/planning/src/blueprint_builder/internal_dns.rs
+++ b/nexus/reconfigurator/planning/src/blueprint_builder/internal_dns.rs
@@ -102,6 +102,7 @@ pub mod test {
     use crate::blueprint_builder::test::verify_blueprint;
     use crate::example::ExampleSystemBuilder;
     use nexus_types::deployment::BlueprintZoneFilter;
+    use omicron_common::disk::DatasetKind;
     use omicron_common::policy::INTERNAL_DNS_REDUNDANCY;
     use omicron_test_utils::dev::test_setup_log;
 
@@ -128,6 +129,24 @@ pub mod test {
         let npruned = blueprint1.blueprint_zones.len() - 1;
         assert!(npruned > 0);
 
+        // Also prune out the zones' datasets, or we're left with an invalid
+        // blueprint.
+        for (_, dataset_config) in
+            blueprint1.blueprint_datasets.iter_mut().skip(1)
+        {
+            dataset_config.datasets.retain(|_id, dataset| {
+                // This is gross; once zone configs know explicit dataset IDs,
+                // we should retain by ID instead.
+                match &dataset.kind {
+                    DatasetKind::InternalDns => false,
+                    DatasetKind::TransientZone { name } => {
+                        !name.starts_with("oxz_internal_dns")
+                    }
+                    _ => true,
+                }
+            });
+        }
+
         verify_blueprint(&blueprint1);
 
         // Create an allocator.
diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs
index a18bb6e3d3..ce1a1ae960 100644
--- a/nexus/reconfigurator/planning/src/planner.rs
+++ b/nexus/reconfigurator/planning/src/planner.rs
@@ -1253,6 +1253,21 @@ mod test {
         for (_sled_id, zones) in blueprint1.blueprint_zones.iter_mut().take(2) {
             zones.zones.retain(|z| !z.zone_type.is_internal_dns());
         }
+        for (_, dataset_config) in
+            blueprint1.blueprint_datasets.iter_mut().take(2)
+        {
+            dataset_config.datasets.retain(|_id, dataset| {
+                // This is gross; once zone configs know explicit dataset IDs,
+                // we should retain by ID instead.
+                match &dataset.kind {
+                    DatasetKind::InternalDns => false,
+                    DatasetKind::TransientZone { name } => {
+                        !name.starts_with("oxz_internal_dns")
+                    }
+                    _ => true,
+                }
+            });
+        }
 
         let blueprint2 = Planner::new_based_on(
             logctx.log.clone(),
diff --git a/nexus/reconfigurator/planning/tests/output/planner_dataset_settings_modified_in_place_1_2.txt b/nexus/reconfigurator/planning/tests/output/planner_dataset_settings_modified_in_place_1_2.txt
index f2d8334027..45d7feb667 100644
--- a/nexus/reconfigurator/planning/tests/output/planner_dataset_settings_modified_in_place_1_2.txt
+++ b/nexus/reconfigurator/planning/tests/output/planner_dataset_settings_modified_in_place_1_2.txt
@@ -61,8 +61,8 @@ to:   blueprint fe13be30-94c2-4fa6-aad5-ae3c5028f6bb
     oxp_f843fb62-0f04-4c7d-a56f-62531104dc77/crypt/zone/oxz_crucible_fc4f1769-9611-42d3-b8c1-f2be9b5359f6          35fa6ec8-6b58-4fcc-a5a2-36e66736e9c1   none        none          off        
     oxp_96569b61-9e0c-4ee7-bd11-a5e0c541ca99/crypt/zone/oxz_crucible_fff71a84-09c2-4dab-bc18-8f4570f278bb          00abfe99-288d-4a63-abea-adfa62e74524   none        none          off        
     oxp_3b6e2ade-57fc-4f9d-85c3-38fca27f1df6/crypt/zone/oxz_crucible_pantry_197067bc-9a21-444e-9794-6051d9f78a00   19736dbd-1d01-41e9-a800-ffc450464c2d   none        none          off        
-    oxp_3b6e2ade-57fc-4f9d-85c3-38fca27f1df6/crypt/zone/oxz_crucible_pantry_350fba7f-b754-429e-a21d-e91d139713f2   8be4aa2f-1612-4bdf-a0f6-7458b151308f   none        none          off        
-    oxp_3b6e2ade-57fc-4f9d-85c3-38fca27f1df6/crypt/zone/oxz_crucible_pantry_504963cb-3077-477c-b4e5-2d69bf9caa0c   7fd439f9-dcef-4cfb-b1a1-d298be9d2e3b   none        none          off        
+    oxp_5192ef62-5a12-4a0c-829d-a409da87909c/crypt/zone/oxz_crucible_pantry_350fba7f-b754-429e-a21d-e91d139713f2   8be4aa2f-1612-4bdf-a0f6-7458b151308f   none        none          off        
+    oxp_8778bcc5-dddf-4345-9fdf-5c46a36497b0/crypt/zone/oxz_crucible_pantry_504963cb-3077-477c-b4e5-2d69bf9caa0c   7fd439f9-dcef-4cfb-b1a1-d298be9d2e3b   none        none          off        
     oxp_3b6e2ade-57fc-4f9d-85c3-38fca27f1df6/crypt/zone/oxz_internal_dns_1e9422ca-a3d9-4435-bb17-39d5ad22b4ba      5651c4fb-d146-4270-8794-6ed7ceb6f130   none        none          off        
     oxp_8778bcc5-dddf-4345-9fdf-5c46a36497b0/crypt/zone/oxz_internal_dns_4a0ec9f6-6ce6-4456-831e-5f8df7b57332      d2b9f103-8bf1-4603-873d-cec130430ba7   none        none          off        
     oxp_5192ef62-5a12-4a0c-829d-a409da87909c/crypt/zone/oxz_internal_dns_efecb8a2-ce0b-416f-958b-de1fad1bef02      158e226c-e44e-427f-93af-ee96d2cfb9be   none        none          off        
diff --git a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt
index ef93527fa3..6414749fce 100644
--- a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt
+++ b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt
@@ -177,8 +177,8 @@ to:   blueprint 1ac2d88f-27dd-4506-8585-6b2be832528e
     oxp_9d833141-18a1-4f24-8a34-6076c026aa87/crypt/debug                                                           f631d6a1-9db5-4fc7-978b-9ace485dfe16   100 GiB   none          gzip-9     
     oxp_a279461f-a7b9-413f-a79f-cb4dab4c3fce/crypt/debug                                                           1b9c97d6-c90d-4109-b99c-9ab799b3c3b9   100 GiB   none          gzip-9     
     oxp_ff7e002b-3ad8-4d45-b03a-c46ef0ac8e59/crypt/debug                                                           9427caff-29ec-4cd1-981b-26d4a7900052   100 GiB   none          gzip-9     
-+   oxp_1e2ec79e-9c11-4133-ac77-e0b994a507d5/crypt/zone/oxz_crucible_pantry_ff9ce09c-afbf-425b-bbfa-3d8fb254f98e   7d47e5d6-a1a5-451a-b4b4-3a9747f8154a   none      none          off        
-+   oxp_1e2ec79e-9c11-4133-ac77-e0b994a507d5/crypt/zone/oxz_nexus_845869e9-ecb2-4ec3-b6b8-2a836e459243             a759d2f3-003c-4fb8-b06b-f985e213b273   none      none          off        
++   oxp_440ae69d-5e2e-4539-91d0-e2930bdd7203/crypt/zone/oxz_crucible_pantry_ff9ce09c-afbf-425b-bbfa-3d8fb254f98e   7d47e5d6-a1a5-451a-b4b4-3a9747f8154a   none      none          off        
++   oxp_440ae69d-5e2e-4539-91d0-e2930bdd7203/crypt/zone/oxz_nexus_845869e9-ecb2-4ec3-b6b8-2a836e459243             a759d2f3-003c-4fb8-b06b-f985e213b273   none      none          off        
 
 
     omicron zones generation 2 -> 3:
diff --git a/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_3_4.txt b/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_3_4.txt
index 803d4ea2a1..39b3398b2e 100644
--- a/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_3_4.txt
+++ b/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_3_4.txt
@@ -288,8 +288,8 @@ to:   blueprint 74f2e7fd-687e-4c9e-b5d8-e474a5bb8e7c
     oxp_fe379ac6-1938-4cc2-93a9-43b1447229ae/crypt/debug                                                             3a49dd24-8ead-4196-b453-8aa3273b77d1   100 GiB   none          gzip-9     
 +   oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/clickhouse                                                        410eca9c-8eee-4a98-aea2-a363697974f7   none      none          off        
 +   oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_clickhouse_fa97835a-aabc-4fe9-9e85-3e50f207129c          08f15d4b-91dc-445d-88f4-cb9fa585444b   none      none          off        
-+   oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_crucible_pantry_7741bb11-0d99-4856-95ae-725b6b9ff4fa     4eb52e76-39fa-414d-ae9b-2dcb1c7737f9   none      none          off        
-+   oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_nexus_69789010-8689-43ab-9a68-a944afcba05a               e67b797b-a059-4c7e-a98b-fea18964bad6   none      none          off        
++   oxp_2acfbb84-5ce0-424e-8d73-44c5071d4430/crypt/zone/oxz_crucible_pantry_7741bb11-0d99-4856-95ae-725b6b9ff4fa     4eb52e76-39fa-414d-ae9b-2dcb1c7737f9   none      none          off        
++   oxp_2acfbb84-5ce0-424e-8d73-44c5071d4430/crypt/zone/oxz_nexus_69789010-8689-43ab-9a68-a944afcba05a               e67b797b-a059-4c7e-a98b-fea18964bad6   none      none          off        
 
 
     omicron zones generation 3 -> 4:
diff --git a/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_5_6.txt b/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_5_6.txt
index 93dc17c180..2c90c981fb 100644
--- a/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_5_6.txt
+++ b/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_5_6.txt
@@ -188,9 +188,9 @@ to:   blueprint df68d4d4-5af4-4b56-95bb-1654a6957d4f
     oxp_427b2ccd-998f-4085-af21-e600604cf21e/crypt/zone/oxz_crucible_befe73dd-5970-49a4-9adf-7b4f453c45cf            95d72ef9-e070-49e4-a57b-2c392def6025   none      none          off        
     oxp_2fa34d8e-13d9-42d3-b8ba-ca9d74ac496a/crypt/zone/oxz_crucible_d9106a19-f267-48db-a82b-004e643feb49            9b9fb14e-cd17-4a7a-a74a-bfd9c7682831   none      none          off        
     oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_crucible_pantry_6c7f6a84-78b3-4dd9-878e-51bedfda471f     aa190e01-9a4e-4131-9fcf-240532108c7f   none      none          off        
-    oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_crucible_pantry_7741bb11-0d99-4856-95ae-725b6b9ff4fa     4eb52e76-39fa-414d-ae9b-2dcb1c7737f9   none      none          off        
+    oxp_2acfbb84-5ce0-424e-8d73-44c5071d4430/crypt/zone/oxz_crucible_pantry_7741bb11-0d99-4856-95ae-725b6b9ff4fa     4eb52e76-39fa-414d-ae9b-2dcb1c7737f9   none      none          off        
     oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_internal_dns_0c42ad01-b854-4e7d-bd6c-25fdc3eddef4        1de9cde7-6c1e-4865-bd3d-378e22f62fb8   none      none          off        
-    oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_nexus_69789010-8689-43ab-9a68-a944afcba05a               e67b797b-a059-4c7e-a98b-fea18964bad6   none      none          off        
+    oxp_2acfbb84-5ce0-424e-8d73-44c5071d4430/crypt/zone/oxz_nexus_69789010-8689-43ab-9a68-a944afcba05a               e67b797b-a059-4c7e-a98b-fea18964bad6   none      none          off        
     oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_nexus_7e763480-0f4f-43cb-ab9a-52b667d8fda5               5773e3b1-dde0-4b54-bc13-3c3bf816015e   none      none          off        
     oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_ntp_f34f8d36-7137-48d3-9d13-6a46c4edcef4                 c8c03dec-65d4-4c97-87c3-a43a8363c97c   none      none          off        
     oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/debug                                                             f015e445-2e52-45c9-9f0a-49cb5ceae245   100 GiB   none          gzip-9     
diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt
index f4f63ad96a..8bb7635d75 100644
--- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt
+++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt
@@ -353,9 +353,9 @@ to:   blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6
     oxp_cf32a1ce-2c9e-49f5-b1cf-4af7f2a28901/crypt/debug                                                    2dfc5c53-6618-4352-b754-86ef6463c20a   100 GiB   none          gzip-9     
     oxp_e405da11-cb6b-4ebc-bac1-9bc997352e10/crypt/debug                                                    61a653cf-44a6-43c0-90e1-bec539511703   100 GiB   none          gzip-9     
     oxp_f4d7f914-ec73-4b65-8696-5068591d9065/crypt/debug                                                    b803d901-7e43-42fa-8372-43c3c5b3c1a9   100 GiB   none          gzip-9     
-+   oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_nexus_508abd03-cbfe-4654-9a6d-7f15a1ad32e5      b781d032-3149-4c44-a7d3-5f8d80e4a607   none      none          off        
-+   oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_nexus_99f6d544-8599-4e2b-a55a-82d9e0034662      8a39677a-fbcf-4884-b000-63be3247fb63   none      none          off        
-+   oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_nexus_c26b3bda-5561-44a1-a69f-22103fe209a1      c9c1a582-1fe0-4001-9301-97230387563a   none      none          off        
++   oxp_5248a306-4a03-449e-a8a3-6f86d26da755/crypt/zone/oxz_nexus_508abd03-cbfe-4654-9a6d-7f15a1ad32e5      b781d032-3149-4c44-a7d3-5f8d80e4a607   none      none          off        
++   oxp_55196665-ed61-4b23-9a74-0711bf2eaf90/crypt/zone/oxz_nexus_99f6d544-8599-4e2b-a55a-82d9e0034662      8a39677a-fbcf-4884-b000-63be3247fb63   none      none          off        
++   oxp_6b2a719a-35eb-469f-aa54-114a1f21f37d/crypt/zone/oxz_nexus_c26b3bda-5561-44a1-a69f-22103fe209a1      c9c1a582-1fe0-4001-9301-97230387563a   none      none          off        
 
 
     omicron zones generation 2 -> 3:
@@ -443,9 +443,9 @@ to:   blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6
     oxp_cd62306a-aedf-47e8-93d5-92a358d64c7b/crypt/debug                                                    73674f4b-1d93-404a-bc9c-8395efac97fd   100 GiB   none          gzip-9     
     oxp_f1693454-aac1-4265-b8a0-4e9f3f41c7b3/crypt/debug                                                    938737fb-b72f-4727-8833-9697c518ca37   100 GiB   none          gzip-9     
     oxp_fe4fdfba-3b6d-47d3-8612-1fb2390b650a/crypt/debug                                                    8e58b91f-9ce2-4256-8dec-5f90f31a73fa   100 GiB   none          gzip-9     
-+   oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_nexus_2ec75441-3d7d-4b4b-9614-af03de5a3666      cd15e9c9-0238-493a-8b32-926d1cd1bce6   none      none          off        
-+   oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_nexus_3ca5292f-8a59-4475-bb72-0f43714d0fff      871b35e6-d234-4a96-bab4-d07314bc6ba2   none      none          off        
-+   oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_nexus_59950bc8-1497-44dd-8cbf-b6502ba921b2      63ec1a21-2c77-41b5-ad3e-e7bf39207107   none      none          off        
++   oxp_39ca2e23-4c38-4743-afe0-26b0380b27db/crypt/zone/oxz_nexus_2ec75441-3d7d-4b4b-9614-af03de5a3666      cd15e9c9-0238-493a-8b32-926d1cd1bce6   none      none          off        
++   oxp_60131a33-1f12-4dbb-9435-bdd368db1f51/crypt/zone/oxz_nexus_3ca5292f-8a59-4475-bb72-0f43714d0fff      871b35e6-d234-4a96-bab4-d07314bc6ba2   none      none          off        
++   oxp_4fbd2fe0-2eac-41b8-8e8d-4fa46c3e8b6c/crypt/zone/oxz_nexus_59950bc8-1497-44dd-8cbf-b6502ba921b2      63ec1a21-2c77-41b5-ad3e-e7bf39207107   none      none          off        
 
 
     omicron zones generation 2 -> 3:
diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt
index 72750bc8f0..9fe4b5218b 100644
--- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt
+++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt
@@ -55,10 +55,10 @@ to:   blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6
     oxp_cf32a1ce-2c9e-49f5-b1cf-4af7f2a28901/crypt/zone/oxz_crucible_c60379ba-4e30-4628-a79a-0ae509aef4c5   a6fcf496-70a1-49bf-a951-62fcec8dd5e2   none      none          off        
     oxp_5248a306-4a03-449e-a8a3-6f86d26da755/crypt/zone/oxz_crucible_f0ff59e8-4105-4980-a4bb-a1f4c58de1e3   c596346d-4040-4103-b036-8fafdbaada00   none      none          off        
     oxp_6b2a719a-35eb-469f-aa54-114a1f21f37d/crypt/zone/oxz_crucible_f1a7b9a7-fc6a-4b23-b829-045ff33117ff   c864de0d-9859-4ad1-a30b-f5ac45ba03ed   none      none          off        
-    oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_nexus_508abd03-cbfe-4654-9a6d-7f15a1ad32e5      b781d032-3149-4c44-a7d3-5f8d80e4a607   none      none          off        
-    oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_nexus_99f6d544-8599-4e2b-a55a-82d9e0034662      8a39677a-fbcf-4884-b000-63be3247fb63   none      none          off        
+    oxp_5248a306-4a03-449e-a8a3-6f86d26da755/crypt/zone/oxz_nexus_508abd03-cbfe-4654-9a6d-7f15a1ad32e5      b781d032-3149-4c44-a7d3-5f8d80e4a607   none      none          off        
+    oxp_55196665-ed61-4b23-9a74-0711bf2eaf90/crypt/zone/oxz_nexus_99f6d544-8599-4e2b-a55a-82d9e0034662      8a39677a-fbcf-4884-b000-63be3247fb63   none      none          off        
     oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_nexus_a732c489-d29a-4f75-b900-5966385943af      db6c139b-9028-4d8e-92c7-6cc1e9aa0131   none      none          off        
-    oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_nexus_c26b3bda-5561-44a1-a69f-22103fe209a1      c9c1a582-1fe0-4001-9301-97230387563a   none      none          off        
+    oxp_6b2a719a-35eb-469f-aa54-114a1f21f37d/crypt/zone/oxz_nexus_c26b3bda-5561-44a1-a69f-22103fe209a1      c9c1a582-1fe0-4001-9301-97230387563a   none      none          off        
     oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_ntp_621509d6-3772-4009-aca1-35eefd1098fb        3b5822d2-9918-4bd6-8b75-2f52bdd73189   none      none          off        
     oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/debug                                                    bf9b39db-5a6a-4b45-b2da-c37425271014   100 GiB   none          gzip-9     
     oxp_5248a306-4a03-449e-a8a3-6f86d26da755/crypt/debug                                                    1b4e8d9e-e447-4df1-8e0b-57edc318e8ad   100 GiB   none          gzip-9     
@@ -145,10 +145,10 @@ to:   blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6
     oxp_cd62306a-aedf-47e8-93d5-92a358d64c7b/crypt/zone/oxz_crucible_be920398-024a-4655-8c49-69b5ac48dfff   87f757d6-fa4c-4423-995c-1eab5e7d09a2   none      none          off        
     oxp_39ca2e23-4c38-4743-afe0-26b0380b27db/crypt/zone/oxz_crucible_d47f4996-fac0-4657-bcea-01b1fee6404d   c1af262a-2595-4236-98c8-21c5b63c80c3   none      none          off        
     oxp_789d607d-d196-428e-a988-f7886a327859/crypt/zone/oxz_crucible_e001fea0-6594-4ece-97e3-6198c293e931   5e27b9bc-e69f-4258-83f2-5f9a1109a625   none      none          off        
-    oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_nexus_2ec75441-3d7d-4b4b-9614-af03de5a3666      cd15e9c9-0238-493a-8b32-926d1cd1bce6   none      none          off        
-    oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_nexus_3ca5292f-8a59-4475-bb72-0f43714d0fff      871b35e6-d234-4a96-bab4-d07314bc6ba2   none      none          off        
+    oxp_39ca2e23-4c38-4743-afe0-26b0380b27db/crypt/zone/oxz_nexus_2ec75441-3d7d-4b4b-9614-af03de5a3666      cd15e9c9-0238-493a-8b32-926d1cd1bce6   none      none          off        
+    oxp_60131a33-1f12-4dbb-9435-bdd368db1f51/crypt/zone/oxz_nexus_3ca5292f-8a59-4475-bb72-0f43714d0fff      871b35e6-d234-4a96-bab4-d07314bc6ba2   none      none          off        
     oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_nexus_4ad0e9da-08f8-4d40-b4d3-d17e711b5bbf      45d32c13-cbbb-4382-a0ed-dc6574b827b7   none      none          off        
-    oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_nexus_59950bc8-1497-44dd-8cbf-b6502ba921b2      63ec1a21-2c77-41b5-ad3e-e7bf39207107   none      none          off        
+    oxp_4fbd2fe0-2eac-41b8-8e8d-4fa46c3e8b6c/crypt/zone/oxz_nexus_59950bc8-1497-44dd-8cbf-b6502ba921b2      63ec1a21-2c77-41b5-ad3e-e7bf39207107   none      none          off        
     oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_ntp_bf79a56a-97af-4cc4-94a5-8b20d64c2cda        a410308c-e2cb-4e4d-9da6-1879336f93f2   none      none          off        
     oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/debug                                                    755e24a8-67cc-44b1-8c25-2dcb3acd988f   100 GiB   none          gzip-9     
     oxp_39ca2e23-4c38-4743-afe0-26b0380b27db/crypt/debug                                                    c834f8cd-25ee-4c62-af03-49cef53fc4c1   100 GiB   none          gzip-9     
diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs
index ad39777054..f107f2af71 100644
--- a/nexus/src/app/background/init.rs
+++ b/nexus/src/app/background/init.rs
@@ -825,7 +825,7 @@ impl BackgroundTasksInitializer {
                 done",
             period: config.region_snapshot_replacement_finish.period_secs,
             task_impl: Box::new(RegionSnapshotReplacementFinishDetector::new(
-                datastore,
+                datastore, sagas,
             )),
             opctx: opctx.child(BTreeMap::new()),
             watchers: vec![],
diff --git a/nexus/src/app/background/tasks/region_replacement.rs b/nexus/src/app/background/tasks/region_replacement.rs
index f86ba8eb8f..caa262bc8c 100644
--- a/nexus/src/app/background/tasks/region_replacement.rs
+++ b/nexus/src/app/background/tasks/region_replacement.rs
@@ -23,6 +23,7 @@ use nexus_db_model::RegionReplacement;
 use nexus_db_queries::context::OpContext;
 use nexus_db_queries::db::DataStore;
 use nexus_types::internal_api::background::RegionReplacementStatus;
+use omicron_common::api::external::Error;
 use omicron_uuid_kinds::GenericUuid;
 use omicron_uuid_kinds::TypedUuid;
 use serde_json::json;
@@ -42,7 +43,7 @@ impl RegionReplacementDetector {
         &self,
         serialized_authn: authn::saga::Serialized,
         request: RegionReplacement,
-    ) -> Result<(), omicron_common::api::external::Error> {
+    ) -> Result<(), Error> {
         let params = sagas::region_replacement_start::Params {
             serialized_authn,
             request,
@@ -68,17 +69,18 @@ impl BackgroundTask for RegionReplacementDetector {
 
             let mut status = RegionReplacementStatus::default();
 
-            // Find regions on expunged physical disks
+            // Find read/write regions on expunged physical disks
             let regions_to_be_replaced = match self
                 .datastore
-                .find_regions_on_expunged_physical_disks(opctx)
+                .find_read_write_regions_on_expunged_physical_disks(opctx)
                 .await
             {
                 Ok(regions) => regions,
 
                 Err(e) => {
                     let s = format!(
-                        "find_regions_on_expunged_physical_disks failed: {e}"
+                        "find_read_write_regions_on_expunged_physical_disks \
+                        failed: {e}"
                     );
                     error!(&log, "{s}");
                     status.errors.push(s);
@@ -134,15 +136,31 @@ impl BackgroundTask for RegionReplacementDetector {
                         }
 
                         Err(e) => {
-                            let s = format!(
-                                "error adding region replacement request for \
-                                 region {} volume id {}: {e}",
-                                region.id(),
-                                region.volume_id(),
-                            );
-                            error!(&log, "{s}");
+                            match e {
+                                Error::Conflict { message }
+                                    if message.external_message()
+                                        == "volume repair lock" =>
+                                {
+                                    // This is not a fatal error! If there are
+                                    // competing region replacement and region
+                                    // snapshot replacements, then they are both
+                                    // attempting to lock volumes.
+                                }
+
+                                _ => {
+                                    let s = format!(
+                                        "error adding region replacement \
+                                        request for region {} volume id {}: \
+                                        {e}",
+                                        region.id(),
+                                        region.volume_id(),
+                                    );
+                                    error!(&log, "{s}");
+
+                                    status.errors.push(s);
+                                }
+                            }
 
-                            status.errors.push(s);
                             continue;
                         }
                     }
@@ -173,7 +191,9 @@ impl BackgroundTask for RegionReplacementDetector {
                 // If the replacement request is in the `requested` state and
                 // the request's volume was soft-deleted or hard-deleted, avoid
                 // sending the start request and instead transition the request
-                // to completed
+                // to completed. Note the saga will do the right thing if the
+                // volume is deleted, but this avoids the overhead of starting
+                // it.
 
                 let volume_deleted = match self
                     .datastore
@@ -314,6 +334,21 @@ mod test {
 
         // Add a region replacement request for a fake region
         let volume_id = Uuid::new_v4();
+
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: volume_id,
+                    block_size: 512,
+                    sub_volumes: vec![],
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         let request = RegionReplacement::new(Uuid::new_v4(), volume_id);
         let request_id = request.id;
 
diff --git a/nexus/src/app/background/tasks/region_replacement_driver.rs b/nexus/src/app/background/tasks/region_replacement_driver.rs
index e7fe0d6338..6cc28f9dfd 100644
--- a/nexus/src/app/background/tasks/region_replacement_driver.rs
+++ b/nexus/src/app/background/tasks/region_replacement_driver.rs
@@ -258,6 +258,7 @@ mod test {
     use omicron_uuid_kinds::UpstairsKind;
     use omicron_uuid_kinds::UpstairsRepairKind;
     use omicron_uuid_kinds::UpstairsSessionKind;
+    use sled_agent_client::types::VolumeConstructionRequest;
     use uuid::Uuid;
 
     type ControlPlaneTestContext =
@@ -288,6 +289,20 @@ mod test {
         let new_region_id = Uuid::new_v4();
         let volume_id = Uuid::new_v4();
 
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: volume_id,
+                    block_size: 512,
+                    sub_volumes: vec![],
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         let request = {
             let mut request = RegionReplacement::new(region_id, volume_id);
             request.replacement_state = RegionReplacementState::Running;
@@ -382,6 +397,20 @@ mod test {
                 .unwrap();
         }
 
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                old_region.volume_id(),
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: old_region.volume_id(),
+                    block_size: 512,
+                    sub_volumes: vec![],
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         // Add a region replacement request for that region, and change it to
         // state ReplacementDone. Set the new_region_id to the region created
         // above.
@@ -481,6 +510,20 @@ mod test {
                 .unwrap();
         }
 
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                old_region.volume_id(),
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: old_region.volume_id(),
+                    block_size: 512,
+                    sub_volumes: vec![],
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         // Add a region replacement request for that region, and change it to
         // state Running. Set the new_region_id to the region created above.
         let request = {
@@ -630,6 +673,20 @@ mod test {
                 .unwrap();
         }
 
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                old_region.volume_id(),
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: old_region.volume_id(),
+                    block_size: 512,
+                    sub_volumes: vec![],
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         // Add a region replacement request for that region, and change it to
         // state Running. Set the new_region_id to the region created above.
         let request = {
diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_finish.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_finish.rs
index 0eebd37fb7..61a84c579d 100644
--- a/nexus/src/app/background/tasks/region_snapshot_replacement_finish.rs
+++ b/nexus/src/app/background/tasks/region_snapshot_replacement_finish.rs
@@ -8,9 +8,15 @@
 //! Once all related region snapshot replacement steps are done, the region
 //! snapshot replacement can be completed.
 
+use crate::app::authn;
 use crate::app::background::BackgroundTask;
+use crate::app::saga::StartSaga;
+use crate::app::sagas;
+use crate::app::sagas::region_snapshot_replacement_finish::*;
+use crate::app::sagas::NexusSaga;
 use futures::future::BoxFuture;
 use futures::FutureExt;
+use nexus_db_model::RegionSnapshotReplacement;
 use nexus_db_queries::context::OpContext;
 use nexus_db_queries::db::DataStore;
 use nexus_types::internal_api::background::RegionSnapshotReplacementFinishStatus;
@@ -19,11 +25,31 @@ use std::sync::Arc;
 
 pub struct RegionSnapshotReplacementFinishDetector {
     datastore: Arc<DataStore>,
+    sagas: Arc<dyn StartSaga>,
 }
 
 impl RegionSnapshotReplacementFinishDetector {
-    pub fn new(datastore: Arc<DataStore>) -> Self {
-        RegionSnapshotReplacementFinishDetector { datastore }
+    pub fn new(datastore: Arc<DataStore>, sagas: Arc<dyn StartSaga>) -> Self {
+        RegionSnapshotReplacementFinishDetector { datastore, sagas }
+    }
+
+    async fn send_finish_request(
+        &self,
+        opctx: &OpContext,
+        request: RegionSnapshotReplacement,
+    ) -> Result<(), omicron_common::api::external::Error> {
+        let params = sagas::region_snapshot_replacement_finish::Params {
+            serialized_authn: authn::saga::Serialized::for_opctx(opctx),
+            request,
+        };
+
+        let saga_dag = SagaRegionSnapshotReplacementFinish::prepare(&params)?;
+
+        // We only care that the saga was started, and don't wish to wait for it
+        // to complete, so use `StartSaga::saga_start`, rather than `saga_run`.
+        self.sagas.saga_start(saga_dag).await?;
+
+        Ok(())
     }
 
     async fn transition_requests_to_done(
@@ -120,21 +146,23 @@ impl RegionSnapshotReplacementFinishDetector {
                     }
                 };
 
-                // Transition region snapshot replacement to Complete
-                match self
-                    .datastore
-                    .set_region_snapshot_replacement_complete(opctx, request.id)
-                    .await
-                {
+                let request_id = request.id;
+
+                match self.send_finish_request(opctx, request).await {
                     Ok(()) => {
-                        let s = format!("set request {} to done", request.id);
+                        let s = format!(
+                            "region snapshot replacement finish invoked ok for \
+                            {request_id}"
+                        );
+
                         info!(&log, "{s}");
-                        status.records_set_to_done.push(s);
+                        status.finish_invoked_ok.push(s);
                     }
 
                     Err(e) => {
                         let s = format!(
-                            "marking snapshot replacement as done failed: {e}"
+                            "invoking region snapshot replacement finish for \
+                            {request_id} failed: {e}",
                         );
                         error!(&log, "{s}");
                         status.errors.push(s);
@@ -170,6 +198,7 @@ mod test {
     use nexus_db_queries::db::datastore::region_snapshot_replacement;
     use nexus_test_utils_macros::nexus_test;
     use omicron_uuid_kinds::DatasetUuid;
+    use sled_agent_client::types::VolumeConstructionRequest;
     use uuid::Uuid;
 
     type ControlPlaneTestContext =
@@ -186,8 +215,10 @@ mod test {
             datastore.clone(),
         );
 
-        let mut task =
-            RegionSnapshotReplacementFinishDetector::new(datastore.clone());
+        let mut task = RegionSnapshotReplacementFinishDetector::new(
+            datastore.clone(),
+            nexus.sagas.clone(),
+        );
 
         // Noop test
         let result: RegionSnapshotReplacementFinishStatus =
@@ -208,11 +239,25 @@ mod test {
 
         let request_id = request.id;
 
+        let volume_id = Uuid::new_v4();
+
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(), // not required to match!
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         datastore
             .insert_region_snapshot_replacement_request_with_volume_id(
-                &opctx,
-                request,
-                Uuid::new_v4(),
+                &opctx, request, volume_id,
             )
             .await
             .unwrap();
@@ -232,6 +277,7 @@ mod test {
             .unwrap();
 
         let new_region_id = Uuid::new_v4();
+        let new_region_volume_id = Uuid::new_v4();
         let old_snapshot_volume_id = Uuid::new_v4();
 
         datastore
@@ -240,6 +286,7 @@ mod test {
                 request_id,
                 operating_saga_id,
                 new_region_id,
+                new_region_volume_id,
                 old_snapshot_volume_id,
             )
             .await
@@ -267,14 +314,44 @@ mod test {
 
         let operating_saga_id = Uuid::new_v4();
 
+        let step_volume_id = Uuid::new_v4();
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                step_volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(), // not required to match!
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         let mut step_1 =
-            RegionSnapshotReplacementStep::new(request_id, Uuid::new_v4());
+            RegionSnapshotReplacementStep::new(request_id, step_volume_id);
         step_1.replacement_state = RegionSnapshotReplacementStepState::Complete;
         step_1.operating_saga_id = Some(operating_saga_id);
         let step_1_id = step_1.id;
 
+        let step_volume_id = Uuid::new_v4();
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                step_volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(), // not required to match!
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         let mut step_2 =
-            RegionSnapshotReplacementStep::new(request_id, Uuid::new_v4());
+            RegionSnapshotReplacementStep::new(request_id, step_volume_id);
         step_2.replacement_state = RegionSnapshotReplacementStepState::Complete;
         step_2.operating_saga_id = Some(operating_saga_id);
         let step_2_id = step_2.id;
@@ -335,8 +412,9 @@ mod test {
         assert_eq!(
             result,
             RegionSnapshotReplacementFinishStatus {
-                records_set_to_done: vec![format!(
-                    "set request {request_id} to done"
+                finish_invoked_ok: vec![format!(
+                    "region snapshot replacement finish invoked ok for \
+                    {request_id}"
                 )],
                 errors: vec![],
             },
diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs
index db30e1d074..57bbf3741c 100644
--- a/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs
+++ b/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs
@@ -155,6 +155,7 @@ mod test {
     use nexus_db_model::RegionSnapshotReplacementState;
     use nexus_test_utils_macros::nexus_test;
     use omicron_uuid_kinds::DatasetUuid;
+    use sled_agent_client::types::VolumeConstructionRequest;
     use uuid::Uuid;
 
     type ControlPlaneTestContext =
@@ -199,11 +200,25 @@ mod test {
 
         let request_1_id = request.id;
 
+        let volume_id = Uuid::new_v4();
+
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(), // not required to match!
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         datastore
             .insert_region_snapshot_replacement_request_with_volume_id(
-                &opctx,
-                request,
-                Uuid::new_v4(),
+                &opctx, request, volume_id,
             )
             .await
             .unwrap();
@@ -219,11 +234,25 @@ mod test {
 
         let request_2_id = request.id;
 
+        let volume_id = Uuid::new_v4();
+
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(), // not required to match!
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         datastore
             .insert_region_snapshot_replacement_request_with_volume_id(
-                &opctx,
-                request,
-                Uuid::new_v4(),
+                &opctx, request, volume_id,
             )
             .await
             .unwrap();
diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs
index 04d86a1268..f2b82a3943 100644
--- a/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs
+++ b/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs
@@ -23,6 +23,7 @@ use nexus_db_model::RegionSnapshotReplacement;
 use nexus_db_queries::context::OpContext;
 use nexus_db_queries::db::DataStore;
 use nexus_types::internal_api::background::RegionSnapshotReplacementStartStatus;
+use omicron_common::api::external::Error;
 use serde_json::json;
 use std::sync::Arc;
 
@@ -40,7 +41,7 @@ impl RegionSnapshotReplacementDetector {
         &self,
         serialized_authn: authn::saga::Serialized,
         request: RegionSnapshotReplacement,
-    ) -> Result<(), omicron_common::api::external::Error> {
+    ) -> Result<(), Error> {
         let params = sagas::region_snapshot_replacement_start::Params {
             serialized_authn,
             request,
@@ -138,17 +139,33 @@ impl RegionSnapshotReplacementDetector {
                     }
 
                     Err(e) => {
-                        let s =
-                            format!("error creating replacement request: {e}");
-
-                        error!(
-                            &log,
-                            "{s}";
-                            "snapshot_id" => %region_snapshot.snapshot_id,
-                            "region_id" => %region_snapshot.region_id,
-                            "dataset_id" => %region_snapshot.dataset_id,
-                        );
-                        status.errors.push(s);
+                        match e {
+                            Error::Conflict { message }
+                                if message.external_message()
+                                    == "volume repair lock" =>
+                            {
+                                // This is not a fatal error! If there are
+                                // competing region replacement and region
+                                // snapshot replacements, then they are both
+                                // attempting to lock volumes.
+                            }
+
+                            _ => {
+                                let s = format!(
+                                    "error creating replacement request: {e}"
+                                );
+
+                                error!(
+                                    &log,
+                                    "{s}";
+                                    "snapshot_id" => %region_snapshot.snapshot_id,
+                                    "region_id" => %region_snapshot.region_id,
+                                    "dataset_id" => %region_snapshot.dataset_id,
+                                );
+
+                                status.errors.push(s);
+                            }
+                        }
                     }
                 }
             }
@@ -185,6 +202,67 @@ impl RegionSnapshotReplacementDetector {
         for request in requests {
             let request_id = request.id;
 
+            // If the region snapshot is gone, then there are no more references
+            // in any volume, and the whole region snapshot replacement can be
+            // fast-tracked to Complete.
+
+            let maybe_region_snapshot = match self
+                .datastore
+                .region_snapshot_get(
+                    request.old_dataset_id.into(),
+                    request.old_region_id,
+                    request.old_snapshot_id,
+                )
+                .await
+            {
+                Ok(maybe_region_snapshot) => maybe_region_snapshot,
+
+                Err(e) => {
+                    let s = format!("query for region snapshot failed: {e}");
+
+                    error!(
+                        &log,
+                        "{s}";
+                        "request.snapshot_id" => %request.old_snapshot_id,
+                        "request.region_id" => %request.old_region_id,
+                        "request.dataset_id" => %request.old_dataset_id,
+                    );
+                    status.errors.push(s);
+                    return;
+                }
+            };
+
+            if maybe_region_snapshot.is_none() {
+                match self
+                    .datastore
+                    .set_region_snapshot_replacement_complete_from_requested(
+                        &opctx, request.id,
+                    )
+                    .await
+                {
+                    Ok(()) => {
+                        let s = format!(
+                            "region snapshot replacement {request_id} \
+                                completed ok"
+                        );
+                        info!(&log, "{s}");
+                        status.requests_completed_ok.push(s);
+                    }
+
+                    Err(e) => {
+                        let s = format!(
+                            "query to set region snapshot request state \
+                                to complete failed: {e}"
+                        );
+
+                        error!(&log, "{s}"; "request.id" => %request_id);
+                        status.errors.push(s);
+                    }
+                }
+
+                continue;
+            }
+
             let result = self
                 .send_start_request(
                     authn::saga::Serialized::for_opctx(opctx),
@@ -269,6 +347,7 @@ mod test {
     use nexus_db_model::Snapshot;
     use nexus_db_model::SnapshotIdentity;
     use nexus_db_model::SnapshotState;
+    use nexus_db_model::VolumeResourceUsage;
     use nexus_db_queries::authz;
     use nexus_db_queries::db::lookup::LookupPath;
     use nexus_test_utils::resource_helpers::create_project;
@@ -276,6 +355,8 @@ mod test {
     use omicron_common::api::external;
     use omicron_uuid_kinds::DatasetUuid;
     use omicron_uuid_kinds::GenericUuid;
+    use sled_agent_client::types::CrucibleOpts;
+    use sled_agent_client::types::VolumeConstructionRequest;
     use std::collections::BTreeMap;
     use uuid::Uuid;
 
@@ -309,19 +390,43 @@ mod test {
 
         // Add a region snapshot replacement request for a fake region snapshot
 
-        let request = RegionSnapshotReplacement::new(
-            DatasetUuid::new_v4(), // dataset id
-            Uuid::new_v4(),        // region id
-            Uuid::new_v4(),        // snapshot id
+        let dataset_id = DatasetUuid::new_v4();
+        let region_id = Uuid::new_v4();
+        let snapshot_id = Uuid::new_v4();
+
+        let region_snapshot = RegionSnapshot::new(
+            dataset_id,
+            region_id,
+            snapshot_id,
+            "[::]:12345".to_string(),
         );
 
+        datastore.region_snapshot_create(region_snapshot).await.unwrap();
+
+        let request =
+            RegionSnapshotReplacement::new(dataset_id, region_id, snapshot_id);
+
         let request_id = request.id;
 
+        let volume_id = Uuid::new_v4();
+
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(), // not required to match!
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         datastore
             .insert_region_snapshot_replacement_request_with_volume_id(
-                &opctx,
-                request,
-                Uuid::new_v4(),
+                &opctx, request, volume_id,
             )
             .await
             .unwrap();
@@ -339,6 +444,7 @@ mod test {
                     "region snapshot replacement start invoked ok for \
                     {request_id}"
                 )],
+                requests_completed_ok: vec![],
                 errors: vec![],
             },
         );
@@ -407,6 +513,22 @@ mod test {
             .await
             .unwrap();
 
+        let volume_id = Uuid::new_v4();
+
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: volume_id,
+                    block_size: 512,
+                    sub_volumes: vec![],
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         datastore
             .project_ensure_snapshot(
                 &opctx,
@@ -426,7 +548,7 @@ mod test {
 
                     project_id,
                     disk_id: Uuid::new_v4(),
-                    volume_id: Uuid::new_v4(),
+                    volume_id,
                     destination_volume_id: Uuid::new_v4(),
 
                     gen: Generation::new(),
@@ -508,4 +630,194 @@ mod test {
             dataset_to_zpool.get(&first_zpool.id.to_string()).unwrap();
         assert_eq!(&request.old_dataset_id.to_string(), dataset_id);
     }
+
+    #[nexus_test(server = crate::Server)]
+    async fn test_delete_region_snapshot_replacement_volume_causes_complete(
+        cptestctx: &ControlPlaneTestContext,
+    ) {
+        let nexus = &cptestctx.server.server_context().nexus;
+        let datastore = nexus.datastore();
+        let opctx = OpContext::for_tests(
+            cptestctx.logctx.log.clone(),
+            datastore.clone(),
+        );
+
+        let starter = Arc::new(NoopStartSaga::new());
+        let mut task = RegionSnapshotReplacementDetector::new(
+            datastore.clone(),
+            starter.clone(),
+        );
+
+        // Noop test
+        let result: RegionSnapshotReplacementStartStatus =
+            serde_json::from_value(task.activate(&opctx).await).unwrap();
+        assert_eq!(result, RegionSnapshotReplacementStartStatus::default());
+        assert_eq!(starter.count_reset(), 0);
+
+        // The volume reference counting machinery needs a fake dataset to exist
+        // (region snapshots are joined with the dataset table when creating the
+        // CrucibleResources object)
+
+        let disk_test = DiskTest::new(cptestctx).await;
+
+        let dataset_id = disk_test.zpools().next().unwrap().datasets[0].id;
+
+        // Add a region snapshot replacement request for a fake region snapshot
+
+        let region_id = Uuid::new_v4();
+        let snapshot_id = Uuid::new_v4();
+
+        let region_snapshot = RegionSnapshot::new(
+            dataset_id,
+            region_id,
+            snapshot_id,
+            "[::1]:12345".to_string(),
+        );
+
+        datastore
+            .region_snapshot_create(region_snapshot.clone())
+            .await
+            .unwrap();
+
+        let request =
+            RegionSnapshotReplacement::new(dataset_id, region_id, snapshot_id);
+
+        let request_id = request.id;
+
+        let volume_id = Uuid::new_v4();
+
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(), // not required to match!
+                    block_size: 512,
+                    sub_volumes: vec![],
+                    read_only_parent: Some(Box::new(
+                        VolumeConstructionRequest::Region {
+                            block_size: 512,
+                            blocks_per_extent: 1,
+                            extent_count: 1,
+                            gen: 1,
+                            opts: CrucibleOpts {
+                                id: Uuid::new_v4(),
+                                target: vec![
+                                    // the region snapshot
+                                    String::from("[::1]:12345"),
+                                ],
+                                lossy: false,
+                                flush_timeout: None,
+                                key: None,
+                                cert_pem: None,
+                                key_pem: None,
+                                root_cert_pem: None,
+                                control: None,
+                                read_only: true,
+                            },
+                        },
+                    )),
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
+        // Assert usage
+
+        let records = datastore
+            .volume_usage_records_for_resource(
+                VolumeResourceUsage::RegionSnapshot {
+                    dataset_id: region_snapshot.dataset_id.into(),
+                    region_id: region_snapshot.region_id,
+                    snapshot_id: region_snapshot.snapshot_id,
+                },
+            )
+            .await
+            .unwrap();
+
+        assert!(!records.is_empty());
+        assert_eq!(records[0].volume_id, volume_id);
+
+        datastore
+            .insert_region_snapshot_replacement_request_with_volume_id(
+                &opctx, request, volume_id,
+            )
+            .await
+            .unwrap();
+
+        // Before the task starts, soft-delete the volume, and delete the
+        // region snapshot (like the volume delete saga would do).
+
+        let crucible_resources =
+            datastore.soft_delete_volume(volume_id).await.unwrap();
+
+        // Assert no more usage
+
+        let records = datastore
+            .volume_usage_records_for_resource(
+                VolumeResourceUsage::RegionSnapshot {
+                    dataset_id: region_snapshot.dataset_id.into(),
+                    region_id: region_snapshot.region_id,
+                    snapshot_id: region_snapshot.snapshot_id,
+                },
+            )
+            .await
+            .unwrap();
+
+        assert!(records.is_empty());
+
+        // The region snapshot should have been returned for deletion
+
+        let datasets_and_snapshots =
+            datastore.snapshots_to_delete(&crucible_resources).await.unwrap();
+
+        assert!(!datasets_and_snapshots.is_empty());
+
+        let region_snapshot_to_delete = &datasets_and_snapshots[0].1;
+
+        assert_eq!(
+            region_snapshot_to_delete.dataset_id,
+            region_snapshot.dataset_id,
+        );
+        assert_eq!(
+            region_snapshot_to_delete.region_id,
+            region_snapshot.region_id,
+        );
+        assert_eq!(
+            region_snapshot_to_delete.snapshot_id,
+            region_snapshot.snapshot_id,
+        );
+
+        // So delete it!
+
+        datastore
+            .region_snapshot_remove(
+                region_snapshot_to_delete.dataset_id.into(),
+                region_snapshot_to_delete.region_id,
+                region_snapshot_to_delete.snapshot_id,
+            )
+            .await
+            .unwrap();
+
+        // Activate the task - it should pick the request up but not attempt to
+        // run the start saga
+
+        let result: RegionSnapshotReplacementStartStatus =
+            serde_json::from_value(task.activate(&opctx).await).unwrap();
+
+        assert_eq!(
+            result,
+            RegionSnapshotReplacementStartStatus {
+                requests_created_ok: vec![],
+                start_invoked_ok: vec![],
+                requests_completed_ok: vec![format!(
+                    "region snapshot replacement {request_id} completed ok"
+                )],
+                errors: vec![],
+            },
+        );
+
+        // Assert start saga not invoked
+        assert_eq!(starter.count_reset(), 0);
+    }
 }
diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_step.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_step.rs
index ac259ecba8..f481126312 100644
--- a/nexus/src/app/background/tasks/region_snapshot_replacement_step.rs
+++ b/nexus/src/app/background/tasks/region_snapshot_replacement_step.rs
@@ -32,10 +32,11 @@ use futures::future::BoxFuture;
 use futures::FutureExt;
 use nexus_db_model::RegionSnapshotReplacementStep;
 use nexus_db_queries::context::OpContext;
-use nexus_db_queries::db::datastore::region_snapshot_replacement;
+use nexus_db_queries::db::datastore::region_snapshot_replacement::*;
 use nexus_db_queries::db::DataStore;
 use nexus_types::identity::Asset;
 use nexus_types::internal_api::background::RegionSnapshotReplacementStepStatus;
+use omicron_common::api::external::Error;
 use serde_json::json;
 use std::sync::Arc;
 
@@ -53,7 +54,7 @@ impl RegionSnapshotReplacementFindAffected {
         &self,
         opctx: &OpContext,
         request: RegionSnapshotReplacementStep,
-    ) -> Result<(), omicron_common::api::external::Error> {
+    ) -> Result<(), Error> {
         let params = sagas::region_snapshot_replacement_step::Params {
             serialized_authn: authn::saga::Serialized::for_opctx(opctx),
             request,
@@ -70,7 +71,7 @@ impl RegionSnapshotReplacementFindAffected {
         &self,
         opctx: &OpContext,
         request: RegionSnapshotReplacementStep,
-    ) -> Result<(), omicron_common::api::external::Error> {
+    ) -> Result<(), Error> {
         let Some(old_snapshot_volume_id) = request.old_snapshot_volume_id
         else {
             // This state is illegal!
@@ -79,9 +80,7 @@ impl RegionSnapshotReplacementFindAffected {
                 request.id,
             );
 
-            return Err(omicron_common::api::external::Error::internal_error(
-                &s,
-            ));
+            return Err(Error::internal_error(&s));
         };
 
         let params =
@@ -315,6 +314,21 @@ impl RegionSnapshotReplacementFindAffected {
                 // functions execute), an indefinite amount of work would be
                 // created, continually "moving" the snapshot_addr from
                 // temporary volume to temporary volume.
+                //
+                // If the volume was soft deleted, then skip making a step for
+                // it.
+
+                if volume.time_deleted.is_some() {
+                    info!(
+                        log,
+                        "volume was soft-deleted, skipping creating a step for \
+                        it";
+                        "request id" => ?request.id,
+                        "volume id" => ?volume.id(),
+                    );
+
+                    continue;
+                }
 
                 match self
                     .datastore
@@ -326,7 +340,7 @@ impl RegionSnapshotReplacementFindAffected {
                     .await
                 {
                     Ok(insertion_result) => match insertion_result {
-                        region_snapshot_replacement::InsertStepResult::Inserted { step_id } => {
+                        InsertStepResult::Inserted { step_id } => {
                             let s = format!("created {step_id}");
                             info!(
                                 log,
@@ -337,7 +351,7 @@ impl RegionSnapshotReplacementFindAffected {
                             status.step_records_created_ok.push(s);
                         }
 
-                        region_snapshot_replacement::InsertStepResult::AlreadyHandled { .. } => {
+                        InsertStepResult::AlreadyHandled { .. } => {
                             info!(
                                 log,
                                 "step already exists for volume id";
@@ -345,17 +359,32 @@ impl RegionSnapshotReplacementFindAffected {
                                 "volume id" => ?volume.id(),
                             );
                         }
-                    }
+                    },
 
                     Err(e) => {
                         let s = format!("error creating step request: {e}");
-                        error!(
+                        warn!(
                             log,
                             "{s}";
                             "request id" => ?request.id,
                             "volume id" => ?volume.id(),
                         );
-                        status.errors.push(s);
+
+                        match e {
+                            Error::Conflict { message }
+                                if message.external_message()
+                                    == "volume repair lock" =>
+                            {
+                                // This is not a fatal error! If there are
+                                // competing region replacement and region
+                                // snapshot replacements, then they are both
+                                // attempting to lock volumes.
+                            }
+
+                            _ => {
+                                status.errors.push(s);
+                            }
+                        }
                     }
                 }
             }
@@ -392,13 +421,81 @@ impl RegionSnapshotReplacementFindAffected {
         };
 
         for request in step_requests {
-            let request_id = request.id;
+            let request_step_id = request.id;
+
+            // Check if the volume was deleted _after_ the replacement step was
+            // created. Avoid launching the region snapshot replacement step
+            // saga if it was deleted: the saga will do the right thing if it is
+            // deleted, but this avoids the overhead of starting it.
+
+            let volume_deleted =
+                match self.datastore.volume_deleted(request.volume_id).await {
+                    Ok(volume_deleted) => volume_deleted,
+
+                    Err(e) => {
+                        let s = format!(
+                            "error checking if volume id {} was \
+                        deleted: {e}",
+                            request.volume_id,
+                        );
+                        error!(&log, "{s}");
+
+                        status.errors.push(s);
+                        continue;
+                    }
+                };
+
+            if volume_deleted {
+                // Volume was soft or hard deleted, so proceed with clean up,
+                // which if this is in state Requested there won't be any
+                // additional associated state, so transition the record to
+                // Completed.
+
+                info!(
+                    &log,
+                    "request {} step {} volume {} was soft or hard deleted!",
+                    request.request_id,
+                    request_step_id,
+                    request.volume_id,
+                );
+
+                let result = self
+                    .datastore
+                    .set_region_snapshot_replacement_step_volume_deleted_from_requested(
+                        opctx, request,
+                    )
+                    .await;
+
+                match result {
+                    Ok(()) => {
+                        let s = format!(
+                            "request step {request_step_id} transitioned from \
+                            requested to volume_deleted"
+                        );
+
+                        info!(&log, "{s}");
+                        status.step_set_volume_deleted_ok.push(s);
+                    }
+
+                    Err(e) => {
+                        let s = format!(
+                            "error transitioning {request_step_id} from \
+                            requested to complete: {e}"
+                        );
+
+                        error!(&log, "{s}");
+                        status.errors.push(s);
+                    }
+                }
+
+                continue;
+            }
 
             match self.send_start_request(opctx, request.clone()).await {
                 Ok(()) => {
                     let s = format!(
                         "region snapshot replacement step saga invoked ok for \
-                        {request_id}"
+                        {request_step_id}"
                     );
 
                     info!(
@@ -413,7 +510,7 @@ impl RegionSnapshotReplacementFindAffected {
                 Err(e) => {
                     let s = format!(
                         "invoking region snapshot replacement step saga for \
-                        {request_id} failed: {e}"
+                        {request_step_id} failed: {e}"
                     );
 
                     error!(
@@ -575,11 +672,25 @@ mod test {
 
         let request_id = request.id;
 
+        let volume_id = Uuid::new_v4();
+
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(), // not required to match!
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         datastore
             .insert_region_snapshot_replacement_request_with_volume_id(
-                &opctx,
-                request,
-                Uuid::new_v4(),
+                &opctx, request, volume_id,
             )
             .await
             .unwrap();
@@ -599,6 +710,7 @@ mod test {
             .unwrap();
 
         let new_region_id = Uuid::new_v4();
+        let new_region_volume_id = Uuid::new_v4();
         let old_snapshot_volume_id = Uuid::new_v4();
 
         datastore
@@ -607,6 +719,7 @@ mod test {
                 request_id,
                 operating_saga_id,
                 new_region_id,
+                new_region_volume_id,
                 old_snapshot_volume_id,
             )
             .await
@@ -731,11 +844,27 @@ mod test {
         // Now, add some Complete records and make sure the garbage collection
         // saga is invoked.
 
+        let volume_id = Uuid::new_v4();
+
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(),
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         let result = datastore
             .insert_region_snapshot_replacement_step(&opctx, {
                 let mut record = RegionSnapshotReplacementStep::new(
                     Uuid::new_v4(),
-                    Uuid::new_v4(),
+                    volume_id,
                 );
 
                 record.replacement_state =
@@ -747,16 +876,29 @@ mod test {
             .await
             .unwrap();
 
-        assert!(matches!(
-            result,
-            region_snapshot_replacement::InsertStepResult::Inserted { .. }
-        ));
+        assert!(matches!(result, InsertStepResult::Inserted { .. }));
+
+        let volume_id = Uuid::new_v4();
+
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(),
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
 
         let result = datastore
             .insert_region_snapshot_replacement_step(&opctx, {
                 let mut record = RegionSnapshotReplacementStep::new(
                     Uuid::new_v4(),
-                    Uuid::new_v4(),
+                    volume_id,
                 );
 
                 record.replacement_state =
@@ -768,10 +910,7 @@ mod test {
             .await
             .unwrap();
 
-        assert!(matches!(
-            result,
-            region_snapshot_replacement::InsertStepResult::Inserted { .. }
-        ));
+        assert!(matches!(result, InsertStepResult::Inserted { .. }));
 
         // Activate the task - it should pick the complete steps up and try to
         // run the region snapshot replacement step garbage collect saga
diff --git a/nexus/src/app/external_endpoints.rs b/nexus/src/app/external_endpoints.rs
index f837edc4fb..b93b692465 100644
--- a/nexus/src/app/external_endpoints.rs
+++ b/nexus/src/app/external_endpoints.rs
@@ -33,15 +33,17 @@ use anyhow::Context;
 use nexus_db_model::AuthenticationMode;
 use nexus_db_model::Certificate;
 use nexus_db_model::DnsGroup;
+use nexus_db_model::DnsZone;
+use nexus_db_model::Silo;
 use nexus_db_queries::context::OpContext;
 use nexus_db_queries::db::datastore::Discoverability;
 use nexus_db_queries::db::model::ServiceKind;
+use nexus_db_queries::db::pagination::Paginator;
 use nexus_db_queries::db::DataStore;
 use nexus_types::identity::Resource;
 use nexus_types::silo::silo_dns_name;
 use nexus_types::silo::DEFAULT_SILO_ID;
 use omicron_common::api::external::http_pagination::PaginatedBy;
-use omicron_common::api::external::DataPageParams;
 use omicron_common::api::external::Error;
 use omicron_common::bail_unless;
 use openssl::pkey::PKey;
@@ -488,69 +490,61 @@ pub(crate) async fn read_all_endpoints(
     datastore: &DataStore,
     opctx: &OpContext,
 ) -> Result<ExternalEndpoints, Error> {
-    // We will not look for more than this number of external DNS zones, Silos,
-    // or certificates.  We do not expect very many of any of these objects.
-    const MAX: u32 = 200;
-    let pagparams_id = DataPageParams {
-        marker: None,
-        limit: NonZeroU32::new(MAX).unwrap(),
-        direction: dropshot::PaginationOrder::Ascending,
-    };
-    let pagbyid = PaginatedBy::Id(pagparams_id);
-    let pagparams_name = DataPageParams {
-        marker: None,
-        limit: NonZeroU32::new(MAX).unwrap(),
-        direction: dropshot::PaginationOrder::Ascending,
-    };
-
-    let silos =
-        datastore.silos_list(opctx, &pagbyid, Discoverability::All).await?;
-    let external_dns_zones = datastore
-        .dns_zones_list(opctx, DnsGroup::External, &pagparams_name)
-        .await?;
+    // The batch size here is pretty arbitrary.  On the vast majority of
+    // systems, there will only ever be a handful of any of these objects.  Some
+    // systems are known to have a few dozen silos and a few hundred TLS
+    // certificates.  This code path is not particularly latency-sensitive.  Our
+    // purpose in limiting the batch size is just to avoid unbounded-size
+    // database transactions.
+    //
+    // unwrap(): safe because 200 is non-zero.
+    let batch_size = NonZeroU32::new(200).unwrap();
+
+    // Fetch all silos.
+    let mut silos = Vec::new();
+    let mut paginator = Paginator::new(batch_size);
+    while let Some(p) = paginator.next() {
+        let batch = datastore
+            .silos_list(
+                opctx,
+                &PaginatedBy::Id(p.current_pagparams()),
+                Discoverability::All,
+            )
+            .await?;
+        paginator = p.found_batch(&batch, &|s: &Silo| s.id());
+        silos.extend(batch.into_iter());
+    }
+
+    // Fetch all external DNS zones.  We should really only ever have one, but
+    // we may as well paginate this.
+    let mut external_dns_zones = Vec::new();
+    let mut paginator = Paginator::new(batch_size);
+    while let Some(p) = paginator.next() {
+        let batch = datastore
+            .dns_zones_list(opctx, DnsGroup::External, &p.current_pagparams())
+            .await?;
+        paginator = p.found_batch(&batch, &|z: &DnsZone| z.zone_name.clone());
+        external_dns_zones.extend(batch.into_iter());
+    }
     bail_unless!(
         !external_dns_zones.is_empty(),
         "expected at least one external DNS zone"
     );
-    let certs = datastore
-        .certificate_list_for(opctx, Some(ServiceKind::Nexus), &pagbyid, false)
-        .await?;
-
-    // If we found too many of any of these things, complain as loudly as we
-    // can.  Our results will be wrong.  But we still don't want to fail if we
-    // can avoid it because we want to be able to serve as many endpoints as we
-    // can.
-    // TODO-reliability we should prevent people from creating more than this
-    // maximum number of Silos and certificates.
-    let max = usize::try_from(MAX).unwrap();
-    if silos.len() >= max {
-        error!(
-            &opctx.log,
-            "reading endpoints: expected at most {} silos, but found at \
-            least {}.  TLS may not work on some Silos' external endpoints.",
-            MAX,
-            silos.len(),
-        );
-    }
-    if external_dns_zones.len() >= max {
-        error!(
-            &opctx.log,
-            "reading endpoints: expected at most {} external DNS zones, but \
-            found at least {}.  TLS may not work on some Silos' external \
-            endpoints.",
-            MAX,
-            external_dns_zones.len(),
-        );
-    }
-    if certs.len() >= max {
-        error!(
-            &opctx.log,
-            "reading endpoints: expected at most {} certificates, but \
-            found at least {}.  TLS may not work on some Silos' external \
-            endpoints.",
-            MAX,
-            certs.len(),
-        );
+
+    // Fetch all TLS certificates.
+    let mut certs = Vec::new();
+    let mut paginator = Paginator::new(batch_size);
+    while let Some(p) = paginator.next() {
+        let batch = datastore
+            .certificate_list_for(
+                opctx,
+                Some(ServiceKind::Nexus),
+                &PaginatedBy::Id(p.current_pagparams()),
+                false,
+            )
+            .await?;
+        paginator = p.found_batch(&batch, &|s: &Certificate| s.id());
+        certs.extend(batch);
     }
 
     Ok(ExternalEndpoints::new(silos, certs, external_dns_zones))
diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs
index 405a972976..d8ba6abbdd 100644
--- a/nexus/src/app/sagas/mod.rs
+++ b/nexus/src/app/sagas/mod.rs
@@ -39,6 +39,7 @@ pub mod project_create;
 pub mod region_replacement_drive;
 pub mod region_replacement_finish;
 pub mod region_replacement_start;
+pub mod region_snapshot_replacement_finish;
 pub mod region_snapshot_replacement_garbage_collect;
 pub mod region_snapshot_replacement_start;
 pub mod region_snapshot_replacement_step;
@@ -173,7 +174,8 @@ fn make_action_registry() -> ActionRegistry {
         region_snapshot_replacement_start::SagaRegionSnapshotReplacementStart,
         region_snapshot_replacement_garbage_collect::SagaRegionSnapshotReplacementGarbageCollect,
         region_snapshot_replacement_step::SagaRegionSnapshotReplacementStep,
-        region_snapshot_replacement_step_garbage_collect::SagaRegionSnapshotReplacementStepGarbageCollect
+        region_snapshot_replacement_step_garbage_collect::SagaRegionSnapshotReplacementStepGarbageCollect,
+        region_snapshot_replacement_finish::SagaRegionSnapshotReplacementFinish
     ];
 
     #[cfg(test)]
diff --git a/nexus/src/app/sagas/region_replacement_finish.rs b/nexus/src/app/sagas/region_replacement_finish.rs
index c7efa2f03f..2212e6fdf3 100644
--- a/nexus/src/app/sagas/region_replacement_finish.rs
+++ b/nexus/src/app/sagas/region_replacement_finish.rs
@@ -311,6 +311,20 @@ pub(crate) mod test {
             operating_saga_id: None,
         };
 
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                new_volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: new_volume_id,
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         datastore
             .insert_region_replacement_request(&opctx, request.clone())
             .await
diff --git a/nexus/src/app/sagas/region_replacement_start.rs b/nexus/src/app/sagas/region_replacement_start.rs
index ce063dc5be..aa9e83c037 100644
--- a/nexus/src/app/sagas/region_replacement_start.rs
+++ b/nexus/src/app/sagas/region_replacement_start.rs
@@ -491,7 +491,7 @@ async fn srrs_get_old_region_address(
 
 async fn srrs_replace_region_in_volume(
     sagactx: NexusActionContext,
-) -> Result<(), ActionError> {
+) -> Result<VolumeReplaceResult, ActionError> {
     let log = sagactx.user_data().log();
     let osagactx = sagactx.user_data();
     let params = sagactx.saga_params::<Params>()?;
@@ -555,8 +555,6 @@ async fn srrs_replace_region_in_volume(
         .await
         .map_err(ActionError::action_failed)?;
 
-    debug!(log, "replacement returned {:?}", volume_replace_region_result);
-
     match volume_replace_region_result {
         VolumeReplaceResult::AlreadyHappened | VolumeReplaceResult::Done => {
             // The replacement was done either by this run of this saga node, or
@@ -565,10 +563,11 @@ async fn srrs_replace_region_in_volume(
             // with the rest of the saga (to properly clean up allocated
             // resources).
 
-            Ok(())
+            Ok(volume_replace_region_result)
         }
 
-        VolumeReplaceResult::ExistingVolumeDeleted => {
+        VolumeReplaceResult::ExistingVolumeSoftDeleted
+        | VolumeReplaceResult::ExistingVolumeHardDeleted => {
             // Unwind the saga here to clean up the resources allocated during
             // this saga. The associated background task will transition this
             // request's state to Completed.
diff --git a/nexus/src/app/sagas/region_snapshot_replacement_finish.rs b/nexus/src/app/sagas/region_snapshot_replacement_finish.rs
new file mode 100644
index 0000000000..d992f753d6
--- /dev/null
+++ b/nexus/src/app/sagas/region_snapshot_replacement_finish.rs
@@ -0,0 +1,211 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! After the change to store a "new region volume" in the region snapshot
+//! replacement request, that volume requires garbage collection before the
+//! region snapshot replacement transitions to Complete. It's this saga's
+//! responsibility to ensure that cleanup. This saga handles the following
+//! region snapshot replacement request state transitions:
+//!
+//! ```text
+//!          Running    <--
+//!                       |
+//!             |         |
+//!             v         |
+//!                       |
+//!         Completing   --
+//!
+//!             |
+//!             v
+//!
+//!          Complete
+//! ```
+//!
+//! The first thing this saga does is set itself as the "operating saga" for the
+//! request, and change the state to "Completing". Then, it performs the volume
+//! delete sub-saga for the new region volume. Finally, it updates the region
+//! snapshot replacement request by clearing the operating saga id and changing
+//! the state to "Complete".
+//!
+//! Any unwind will place the state back into Running.
+
+use super::{
+    ActionRegistry, NexusActionContext, NexusSaga, SagaInitError,
+    ACTION_GENERATE_ID,
+};
+use crate::app::sagas::declare_saga_actions;
+use crate::app::sagas::volume_delete;
+use crate::app::{authn, db};
+use serde::Deserialize;
+use serde::Serialize;
+use steno::ActionError;
+use steno::Node;
+use uuid::Uuid;
+
+// region snapshot replacement finish saga: input parameters
+
+#[derive(Debug, Deserialize, Serialize)]
+pub(crate) struct Params {
+    pub serialized_authn: authn::saga::Serialized,
+    pub request: db::model::RegionSnapshotReplacement,
+}
+
+// region snapshot replacement finish saga: actions
+
+declare_saga_actions! {
+    region_snapshot_replacement_finish;
+    SET_SAGA_ID -> "unused_1" {
+        + rsrfs_set_saga_id
+        - rsrfs_set_saga_id_undo
+    }
+    UPDATE_REQUEST_RECORD -> "unused_4" {
+        + rsrfs_update_request_record
+    }
+}
+
+// region snapshot replacement finish saga: definition
+
+#[derive(Debug)]
+pub(crate) struct SagaRegionSnapshotReplacementFinish;
+impl NexusSaga for SagaRegionSnapshotReplacementFinish {
+    const NAME: &'static str = "region-snapshot-replacement-finish";
+    type Params = Params;
+
+    fn register_actions(registry: &mut ActionRegistry) {
+        region_snapshot_replacement_finish_register_actions(registry);
+    }
+
+    fn make_saga_dag(
+        params: &Self::Params,
+        mut builder: steno::DagBuilder,
+    ) -> Result<steno::Dag, SagaInitError> {
+        builder.append(Node::action(
+            "saga_id",
+            "GenerateSagaId",
+            ACTION_GENERATE_ID.as_ref(),
+        ));
+
+        builder.append(set_saga_id_action());
+
+        if let Some(new_region_volume_id) = params.request.new_region_volume_id
+        {
+            let subsaga_params = volume_delete::Params {
+                serialized_authn: params.serialized_authn.clone(),
+                volume_id: new_region_volume_id,
+            };
+
+            let subsaga_dag = {
+                let subsaga_builder = steno::DagBuilder::new(
+                    steno::SagaName::new(volume_delete::SagaVolumeDelete::NAME),
+                );
+                volume_delete::SagaVolumeDelete::make_saga_dag(
+                    &subsaga_params,
+                    subsaga_builder,
+                )?
+            };
+
+            builder.append(Node::constant(
+                "params_for_volume_delete_subsaga",
+                serde_json::to_value(&subsaga_params).map_err(|e| {
+                    SagaInitError::SerializeError(
+                        "params_for_volume_delete_subsaga".to_string(),
+                        e,
+                    )
+                })?,
+            ));
+
+            builder.append(Node::subsaga(
+                "volume_delete_subsaga_no_result",
+                subsaga_dag,
+                "params_for_volume_delete_subsaga",
+            ));
+        }
+
+        builder.append(update_request_record_action());
+
+        Ok(builder.build()?)
+    }
+}
+
+// region snapshot replacement finish saga: action implementations
+
+async fn rsrfs_set_saga_id(
+    sagactx: NexusActionContext,
+) -> Result<(), ActionError> {
+    let osagactx = sagactx.user_data();
+    let params = sagactx.saga_params::<Params>()?;
+
+    let opctx = crate::context::op_context_for_saga_action(
+        &sagactx,
+        &params.serialized_authn,
+    );
+
+    let saga_id = sagactx.lookup::<Uuid>("saga_id")?;
+
+    // Change the request record here to an intermediate "completing" state to
+    // block out other sagas that will be triggered for the same request.
+    osagactx
+        .datastore()
+        .set_region_snapshot_replacement_completing(
+            &opctx,
+            params.request.id,
+            saga_id,
+        )
+        .await
+        .map_err(ActionError::action_failed)?;
+
+    Ok(())
+}
+
+async fn rsrfs_set_saga_id_undo(
+    sagactx: NexusActionContext,
+) -> Result<(), anyhow::Error> {
+    let osagactx = sagactx.user_data();
+    let params = sagactx.saga_params::<Params>()?;
+    let opctx = crate::context::op_context_for_saga_action(
+        &sagactx,
+        &params.serialized_authn,
+    );
+
+    let saga_id = sagactx.lookup::<Uuid>("saga_id")?;
+
+    osagactx
+        .datastore()
+        .undo_set_region_snapshot_replacement_completing(
+            &opctx,
+            params.request.id,
+            saga_id,
+        )
+        .await?;
+
+    Ok(())
+}
+
+async fn rsrfs_update_request_record(
+    sagactx: NexusActionContext,
+) -> Result<(), ActionError> {
+    let params = sagactx.saga_params::<Params>()?;
+    let osagactx = sagactx.user_data();
+    let datastore = osagactx.datastore();
+    let opctx = crate::context::op_context_for_saga_action(
+        &sagactx,
+        &params.serialized_authn,
+    );
+
+    let saga_id = sagactx.lookup::<Uuid>("saga_id")?;
+
+    // Update the replacement request record to 'Complete' and clear the
+    // operating saga id. There is no undo step for this, it should succeed
+    // idempotently.
+    datastore
+        .set_region_snapshot_replacement_complete(
+            &opctx,
+            params.request.id,
+            saga_id,
+        )
+        .await
+        .map_err(ActionError::action_failed)?;
+
+    Ok(())
+}
diff --git a/nexus/src/app/sagas/region_snapshot_replacement_garbage_collect.rs b/nexus/src/app/sagas/region_snapshot_replacement_garbage_collect.rs
index 762182724b..675b2b0cb3 100644
--- a/nexus/src/app/sagas/region_snapshot_replacement_garbage_collect.rs
+++ b/nexus/src/app/sagas/region_snapshot_replacement_garbage_collect.rs
@@ -284,11 +284,27 @@ pub(crate) mod test {
             RegionSnapshotReplacementState::ReplacementDone;
         request.old_snapshot_volume_id = Some(old_snapshot_volume_id);
 
+        let volume_id = Uuid::new_v4();
+
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(),
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         datastore
             .insert_region_snapshot_replacement_request_with_volume_id(
                 &opctx,
                 request.clone(),
-                Uuid::new_v4(),
+                volume_id,
             )
             .await
             .unwrap();
diff --git a/nexus/src/app/sagas/region_snapshot_replacement_start.rs b/nexus/src/app/sagas/region_snapshot_replacement_start.rs
index 4855f64ac2..b9ed75c288 100644
--- a/nexus/src/app/sagas/region_snapshot_replacement_start.rs
+++ b/nexus/src/app/sagas/region_snapshot_replacement_start.rs
@@ -65,6 +65,7 @@ use crate::app::{authn, db};
 use nexus_types::identity::Asset;
 use nexus_types::identity::Resource;
 use omicron_common::api::external::Error;
+use omicron_uuid_kinds::DatasetUuid;
 use serde::Deserialize;
 use serde::Serialize;
 use sled_agent_client::types::CrucibleOpts;
@@ -91,6 +92,9 @@ declare_saga_actions! {
         + rsrss_set_saga_id
         - rsrss_set_saga_id_undo
     }
+    GET_CLONE_SOURCE -> "clone_source" {
+        + rsrss_get_clone_source
+    }
     GET_ALLOC_REGION_PARAMS -> "alloc_region_params" {
         + rsrss_get_alloc_region_params
     }
@@ -101,9 +105,47 @@ declare_saga_actions! {
     FIND_NEW_REGION -> "new_dataset_and_region" {
         + rsrss_find_new_region
     }
+    // One of the common sharp edges of sagas is that the compensating action of
+    // a node does _not_ run if the forward action fails. Said another way, for
+    // this node:
+    //
+    // EXAMPLE -> "output" {
+    //   + forward_action
+    //   - forward_action_undo
+    // }
+    //
+    // If `forward_action` fails, `forward_action_undo` is never executed.
+    // Forward actions are therefore required to be atomic, in that they either
+    // fully apply or don't apply at all.
+    //
+    // Sagas with nodes that ensure multiple regions exist cannot be atomic
+    // because they can partially fail (for example: what if only 2 out of 3
+    // ensures succeed?). In order for the compensating action to be run, it
+    // must exist as a separate node that has a no-op forward action:
+    //
+    // EXAMPLE_UNDO -> "not_used" {
+    //   + noop
+    //   - forward_action_undo
+    // }
+    // EXAMPLE -> "output" {
+    //   + forward_action
+    // }
+    //
+    // This saga will only ever ensure that a single region exists, so you might
+    // think you could get away with a single node that combines the forward and
+    // compensating action - you'd be mistaken! The Crucible agent's region
+    // ensure is not atomic in all cases: if the region fails to create, it
+    // enters the `failed` state, but is not deleted. Nexus must clean these up.
+    NEW_REGION_ENSURE_UNDO -> "not_used" {
+        + rsrss_noop
+        - rsrss_new_region_ensure_undo
+    }
     NEW_REGION_ENSURE -> "ensured_dataset_and_region" {
         + rsrss_new_region_ensure
-        - rsrss_new_region_ensure_undo
+    }
+    NEW_REGION_VOLUME_CREATE -> "new_region_volume" {
+        + rsrss_new_region_volume_create
+        - rsrss_new_region_volume_create_undo
     }
     GET_OLD_SNAPSHOT_VOLUME_ID -> "old_snapshot_volume_id" {
         + rsrss_get_old_snapshot_volume_id
@@ -149,11 +191,20 @@ impl NexusSaga for SagaRegionSnapshotReplacementStart {
             ACTION_GENERATE_ID.as_ref(),
         ));
 
+        builder.append(Node::action(
+            "new_region_volume_id",
+            "GenerateNewRegionVolumeId",
+            ACTION_GENERATE_ID.as_ref(),
+        ));
+
         builder.append(set_saga_id_action());
+        builder.append(get_clone_source_action());
         builder.append(get_alloc_region_params_action());
         builder.append(alloc_new_region_action());
         builder.append(find_new_region_action());
+        builder.append(new_region_ensure_undo_action());
         builder.append(new_region_ensure_action());
+        builder.append(new_region_volume_create_action());
         builder.append(get_old_snapshot_volume_id_action());
         builder.append(create_fake_volume_action());
         builder.append(replace_snapshot_in_volume_action());
@@ -219,6 +270,169 @@ async fn rsrss_set_saga_id_undo(
     Ok(())
 }
 
+#[derive(Debug, Deserialize, Serialize, PartialEq, Eq)]
+enum CloneSource {
+    RegionSnapshot { dataset_id: DatasetUuid, region_id: Uuid },
+    Region { region_id: Uuid },
+}
+
+async fn rsrss_get_clone_source(
+    sagactx: NexusActionContext,
+) -> Result<CloneSource, ActionError> {
+    let params = sagactx.saga_params::<Params>()?;
+    let osagactx = sagactx.user_data();
+    let log = osagactx.log();
+
+    // Find either a region snapshot or a read-only region that is associated
+    // with the request snapshot that has not been expunged, and return that as
+    // the source to be used to populate the read-only region that will replace
+    // the request's region snapshot.
+    //
+    // Importantly, determine the clone source before new region alloc step in
+    // this saga, otherwise the query that searches for read-only region
+    // candidates will match the newly allocated region (that is not created
+    // yet!).
+    //
+    // Choose a clone source based on the following policy:
+    //
+    // - choose a region snapshot associated with the one being replaced
+    //
+    // - choose a read-only region from the associated snapshot volume
+    //
+    // - choose the region snapshot being replaced (only if it is not expunged!
+    //   if the downstairs being cloned from is on an expunged dataset, we have
+    //   to assume that the clone will never succeed, even if the expunged
+    //   thing is still there)
+    //
+    // The policy here prefers to choose a clone source that isn't the region
+    // snapshot in the request: if it's flaky, it shouldn't be used as a clone
+    // source! This function does not know _why_ the replacement request was
+    // created for that region snapshot, and assumes that there may be a problem
+    // with it and will choose it as a last resort (if no other candidate clone
+    // source is found and the request's region snapshot is not on an expunged
+    // dataset, then it has to be chosen as a clone source, as the alternative
+    // is lost data). The request's region snapshot may also be completely fine,
+    // for example if a scrub is being requested.
+    //
+    // Also, the policy also prefers to choose to clone from a region snapshot
+    // instead of a read-only region: this is an arbitrary order, there is no
+    // reason behind this. The region snapshots and read-only regions will have
+    // identical contents.
+
+    // First, try to select another region snapshot that's part of this
+    // snapshot.
+
+    let opctx = crate::context::op_context_for_saga_action(
+        &sagactx,
+        &params.serialized_authn,
+    );
+
+    let mut non_expunged_region_snapshots = osagactx
+        .datastore()
+        .find_non_expunged_region_snapshots(
+            &opctx,
+            params.request.old_snapshot_id,
+        )
+        .await
+        .map_err(ActionError::action_failed)?;
+
+    // Filter out the request's region snapshot - if there are no other
+    // candidates, this could be chosen later in this function.
+
+    non_expunged_region_snapshots.retain(|rs| {
+        !(rs.dataset_id == params.request.old_dataset_id
+            && rs.region_id == params.request.old_region_id
+            && rs.snapshot_id == params.request.old_snapshot_id)
+    });
+
+    if let Some(candidate) = non_expunged_region_snapshots.pop() {
+        info!(
+            log,
+            "found another non-expunged region snapshot";
+            "snapshot_id" => %params.request.old_snapshot_id,
+            "dataset_id" => %candidate.dataset_id,
+            "region_id" => %candidate.region_id,
+        );
+
+        return Ok(CloneSource::RegionSnapshot {
+            dataset_id: candidate.dataset_id.into(),
+            region_id: candidate.region_id,
+        });
+    }
+
+    // Next, try to select a read-only region that's associated with the
+    // snapshot volume
+
+    info!(
+        log,
+        "no region snapshot clone source candidates";
+        "snapshot_id" => %params.request.old_snapshot_id,
+    );
+
+    // Look up the existing snapshot
+    let maybe_db_snapshot = osagactx
+        .datastore()
+        .snapshot_get(&opctx, params.request.old_snapshot_id)
+        .await
+        .map_err(ActionError::action_failed)?;
+
+    let Some(db_snapshot) = maybe_db_snapshot else {
+        return Err(ActionError::action_failed(Error::internal_error(
+            &format!(
+                "snapshot {} was hard deleted!",
+                params.request.old_snapshot_id
+            ),
+        )));
+    };
+
+    let mut non_expunged_read_only_regions = osagactx
+        .datastore()
+        .find_non_expunged_regions(&opctx, db_snapshot.volume_id)
+        .await
+        .map_err(ActionError::action_failed)?;
+
+    if let Some(candidate) = non_expunged_read_only_regions.pop() {
+        info!(
+            log,
+            "found region clone source candidate";
+            "snapshot_id" => %params.request.old_snapshot_id,
+            "dataset_id" => %candidate.dataset_id(),
+            "region_id" => %candidate.id(),
+        );
+
+        return Ok(CloneSource::Region { region_id: candidate.id() });
+    }
+
+    // If no other non-expunged region snapshot or read-only region exists, then
+    // check if the request's region snapshot is non-expunged. This will use the
+    // region snapshot that is being replaced as a clone source, which may not
+    // work if there's a problem with that region snapshot that this replacement
+    // request is meant to fix!
+
+    let request_dataset_on_in_service_physical_disk = osagactx
+        .datastore()
+        .dataset_physical_disk_in_service(params.request.old_dataset_id.into())
+        .await
+        .map_err(ActionError::action_failed)?;
+
+    if request_dataset_on_in_service_physical_disk {
+        // If the request region snapshot's dataset has not been expunged, it
+        // can be used
+        return Ok(CloneSource::RegionSnapshot {
+            dataset_id: params.request.old_dataset_id.into(),
+            region_id: params.request.old_region_id,
+        });
+    }
+
+    // If all targets of a Volume::Region are on expunged datasets, then the
+    // user's data is gone, and this code will fail to select a clone source.
+
+    return Err(ActionError::action_failed(format!(
+        "no clone source candidate for {}!",
+        params.request.old_snapshot_id,
+    )));
+}
+
 #[derive(Debug, Deserialize, Serialize)]
 struct AllocRegionParams {
     block_size: u64,
@@ -380,6 +594,10 @@ async fn rsrss_find_new_region(
     Ok(dataset_and_region)
 }
 
+async fn rsrss_noop(_sagactx: NexusActionContext) -> Result<(), ActionError> {
+    Ok(())
+}
+
 async fn rsrss_new_region_ensure(
     sagactx: NexusActionContext,
 ) -> Result<
@@ -390,55 +608,72 @@ async fn rsrss_new_region_ensure(
     let osagactx = sagactx.user_data();
     let log = osagactx.log();
 
-    // With a list of datasets and regions to ensure, other sagas need to have a
-    // separate no-op forward step for the undo action to ensure that the undo
-    // step occurs in the case that the ensure partially fails. Here this is not
-    // required, there's only one dataset and region.
     let new_dataset_and_region = sagactx
         .lookup::<(db::model::Dataset, db::model::Region)>(
             "new_dataset_and_region",
         )?;
 
-    let region_snapshot = osagactx
-        .datastore()
-        .region_snapshot_get(
-            params.request.old_dataset_id.into(),
-            params.request.old_region_id,
-            params.request.old_snapshot_id,
-        )
-        .await
-        .map_err(ActionError::action_failed)?;
+    let clone_source = sagactx.lookup::<CloneSource>("clone_source")?;
+
+    let mut source_repair_addr: SocketAddrV6 = match clone_source {
+        CloneSource::RegionSnapshot { dataset_id, region_id } => {
+            let region_snapshot = osagactx
+                .datastore()
+                .region_snapshot_get(
+                    dataset_id,
+                    region_id,
+                    params.request.old_snapshot_id,
+                )
+                .await
+                .map_err(ActionError::action_failed)?;
 
-    let Some(region_snapshot) = region_snapshot else {
-        return Err(ActionError::action_failed(format!(
-            "region snapshot {} {} {} deleted!",
-            params.request.old_dataset_id,
-            params.request.old_region_id,
-            params.request.old_snapshot_id,
-        )));
-    };
+            let Some(region_snapshot) = region_snapshot else {
+                return Err(ActionError::action_failed(format!(
+                    "region snapshot {} {} {} deleted!",
+                    dataset_id, region_id, params.request.old_snapshot_id,
+                )));
+            };
 
-    let (new_dataset, new_region) = new_dataset_and_region;
+            match region_snapshot.snapshot_addr.parse() {
+                Ok(addr) => addr,
 
-    // Currently, the repair port is set using a fixed offset above the
-    // downstairs port. Once this goes away, Nexus will require a way to query
-    // for the repair port!
+                Err(e) => {
+                    return Err(ActionError::action_failed(format!(
+                        "error parsing region_snapshot.snapshot_addr: {e}"
+                    )));
+                }
+            }
+        }
 
-    let mut source_repair_addr: SocketAddrV6 =
-        match region_snapshot.snapshot_addr.parse() {
-            Ok(addr) => addr,
+        CloneSource::Region { region_id } => {
+            let maybe_addr = osagactx
+                .datastore()
+                .region_addr(region_id)
+                .await
+                .map_err(ActionError::action_failed)?;
 
-            Err(e) => {
-                return Err(ActionError::action_failed(format!(
-                    "error parsing region_snapshot.snapshot_addr: {e}"
-                )));
+            match maybe_addr {
+                Some(addr) => addr,
+
+                None => {
+                    return Err(ActionError::action_failed(format!(
+                        "region clone source {region_id} has no port!"
+                    )));
+                }
             }
-        };
+        }
+    };
+
+    // Currently, the repair port is set using a fixed offset above the
+    // downstairs port. Once this goes away, Nexus will require a way to query
+    // for the repair port!
 
     source_repair_addr.set_port(
         source_repair_addr.port() + crucible_common::REPAIR_PORT_OFFSET,
     );
 
+    let (new_dataset, new_region) = new_dataset_and_region;
+
     let ensured_region = osagactx
         .nexus()
         .ensure_region_in_dataset(
@@ -474,6 +709,94 @@ async fn rsrss_new_region_ensure_undo(
     Ok(())
 }
 
+async fn rsrss_new_region_volume_create(
+    sagactx: NexusActionContext,
+) -> Result<(), ActionError> {
+    let osagactx = sagactx.user_data();
+
+    let new_region_volume_id =
+        sagactx.lookup::<Uuid>("new_region_volume_id")?;
+
+    let (new_dataset, ensured_region) = sagactx.lookup::<(
+        db::model::Dataset,
+        crucible_agent_client::types::Region,
+    )>(
+        "ensured_dataset_and_region",
+    )?;
+
+    let Some(new_dataset_address) = new_dataset.address() else {
+        return Err(ActionError::action_failed(format!(
+            "dataset {} does not have an address!",
+            new_dataset.id(),
+        )));
+    };
+
+    let new_region_address = SocketAddrV6::new(
+        *new_dataset_address.ip(),
+        ensured_region.port_number,
+        0,
+        0,
+    );
+
+    // Create a volume to inflate the reference count of the newly created
+    // read-only region. If this is not done it's possible that a user could
+    // delete the snapshot volume _after_ the new read-only region was swapped
+    // in, removing the last reference to it and causing garbage collection.
+
+    let volume_construction_request = VolumeConstructionRequest::Volume {
+        id: new_region_volume_id,
+        block_size: 0,
+        sub_volumes: vec![VolumeConstructionRequest::Region {
+            block_size: 0,
+            blocks_per_extent: 0,
+            extent_count: 0,
+            gen: 0,
+            opts: CrucibleOpts {
+                id: new_region_volume_id,
+                target: vec![new_region_address.to_string()],
+                lossy: false,
+                flush_timeout: None,
+                key: None,
+                cert_pem: None,
+                key_pem: None,
+                root_cert_pem: None,
+                control: None,
+                read_only: true,
+            },
+        }],
+        read_only_parent: None,
+    };
+
+    let volume_data = serde_json::to_string(&volume_construction_request)
+        .map_err(|e| {
+            ActionError::action_failed(Error::internal_error(&e.to_string()))
+        })?;
+
+    let volume = db::model::Volume::new(new_region_volume_id, volume_data);
+
+    osagactx
+        .datastore()
+        .volume_create(volume)
+        .await
+        .map_err(ActionError::action_failed)?;
+
+    Ok(())
+}
+
+async fn rsrss_new_region_volume_create_undo(
+    sagactx: NexusActionContext,
+) -> Result<(), anyhow::Error> {
+    let osagactx = sagactx.user_data();
+
+    // Delete the volume.
+
+    let new_region_volume_id =
+        sagactx.lookup::<Uuid>("new_region_volume_id")?;
+    osagactx.datastore().volume_hard_delete(new_region_volume_id).await?;
+
+    Ok(())
+}
+
 async fn rsrss_get_old_snapshot_volume_id(
     sagactx: NexusActionContext,
 ) -> Result<Uuid, ActionError> {
@@ -660,7 +983,7 @@ async fn get_replace_params(
 
 async fn rsrss_replace_snapshot_in_volume(
     sagactx: NexusActionContext,
-) -> Result<(), ActionError> {
+) -> Result<VolumeReplaceResult, ActionError> {
     let log = sagactx.user_data().log();
     let osagactx = sagactx.user_data();
 
@@ -694,10 +1017,11 @@ async fn rsrss_replace_snapshot_in_volume(
             // if the transaction occurred on the non-deleted volume so proceed
             // with the rest of the saga.
 
-            Ok(())
+            Ok(volume_replace_snapshot_result)
         }
 
-        VolumeReplaceResult::ExistingVolumeDeleted => {
+        VolumeReplaceResult::ExistingVolumeSoftDeleted
+        | VolumeReplaceResult::ExistingVolumeHardDeleted => {
             // If the snapshot volume was deleted, we still want to proceed with
             // replacing the rest of the uses of the region snapshot. Note this
             // also covers the case where this saga node runs (performing the
@@ -706,7 +1030,7 @@ async fn rsrss_replace_snapshot_in_volume(
             // deleted. If this saga unwound here, that would violate the
             // property of idempotency.
 
-            Ok(())
+            Ok(volume_replace_snapshot_result)
         }
     }
 }
@@ -780,6 +1104,9 @@ async fn rsrss_update_request_record(
 
     let old_region_volume_id = sagactx.lookup::<Uuid>("new_volume_id")?;
 
+    let new_region_volume_id =
+        sagactx.lookup::<Uuid>("new_region_volume_id")?;
+
     // Now that the region has been ensured and the construction request has
     // been updated, update the replacement request record to 'ReplacementDone'
     // and clear the operating saga id. There is no undo step for this, it
@@ -790,6 +1117,7 @@ async fn rsrss_update_request_record(
             params.request.id,
             saga_id,
             new_region_id,
+            new_region_volume_id,
             old_region_volume_id,
         )
         .await
@@ -806,6 +1134,7 @@ pub(crate) mod test {
         app::sagas::region_snapshot_replacement_start::*,
         app::sagas::test_helpers::test_opctx, app::RegionAllocationStrategy,
     };
+    use nexus_db_model::PhysicalDiskPolicy;
     use nexus_db_model::RegionSnapshotReplacement;
     use nexus_db_model::RegionSnapshotReplacementState;
     use nexus_db_model::Volume;
@@ -815,9 +1144,11 @@ pub(crate) mod test {
     use nexus_test_utils::resource_helpers::create_project;
     use nexus_test_utils::resource_helpers::create_snapshot;
     use nexus_test_utils::resource_helpers::DiskTest;
+    use nexus_test_utils::resource_helpers::DiskTestBuilder;
     use nexus_test_utils_macros::nexus_test;
     use nexus_types::external_api::views;
     use nexus_types::identity::Asset;
+    use omicron_uuid_kinds::GenericUuid;
     use sled_agent_client::types::VolumeConstructionRequest;
 
     type ControlPlaneTestContext =
@@ -830,7 +1161,7 @@ pub(crate) mod test {
     /// Create four zpools, a disk, and a snapshot of that disk
     async fn prepare_for_test(
         cptestctx: &ControlPlaneTestContext,
-    ) -> PrepareResult {
+    ) -> PrepareResult<'_> {
         let client = &cptestctx.external_client;
         let nexus = &cptestctx.server.server_context().nexus;
         let datastore = nexus.datastore();
@@ -876,20 +1207,21 @@ pub(crate) mod test {
                 panic!("test snapshot {:?} should exist", snapshot_id)
             });
 
-        PrepareResult { db_disk, snapshot, db_snapshot }
+        PrepareResult { db_disk, snapshot, db_snapshot, disk_test }
     }
 
-    struct PrepareResult {
+    struct PrepareResult<'a> {
         db_disk: nexus_db_model::Disk,
         snapshot: views::Snapshot,
         db_snapshot: nexus_db_model::Snapshot,
+        disk_test: DiskTest<'a, crate::Server>,
     }
 
     #[nexus_test(server = crate::Server)]
     async fn test_region_snapshot_replacement_start_saga(
         cptestctx: &ControlPlaneTestContext,
     ) {
-        let PrepareResult { db_disk, snapshot, db_snapshot } =
+        let PrepareResult { db_disk, snapshot, db_snapshot, .. } =
             prepare_for_test(cptestctx).await;
 
         let nexus = &cptestctx.server.server_context().nexus;
@@ -990,8 +1322,10 @@ pub(crate) mod test {
             .await
             .unwrap();
 
-        assert_eq!(volumes.len(), 1);
-        assert_eq!(volumes[0].id(), db_snapshot.volume_id);
+        assert!(volumes
+            .iter()
+            .map(|v| v.id())
+            .any(|vid| vid == db_snapshot.volume_id));
     }
 
     fn new_test_params(
@@ -1009,9 +1343,11 @@ pub(crate) mod test {
 
     pub(crate) async fn verify_clean_slate(
         cptestctx: &ControlPlaneTestContext,
+        test: &DiskTest<'_, crate::Server>,
         request: &RegionSnapshotReplacement,
         affected_volume_original: &Volume,
     ) {
+        let sled_agent = &cptestctx.sled_agent.sled_agent;
         let datastore = cptestctx.server.server_context().nexus.datastore();
 
         crate::app::sagas::test_helpers::assert_no_failed_undo_steps(
@@ -1024,6 +1360,10 @@ pub(crate) mod test {
         // original disk, and three for the (currently unused) snapshot
         // destination volume
         assert_eq!(region_allocations(&datastore).await, 6);
+
+        // Assert that only those six provisioned regions are non-destroyed
+        assert_no_other_ensured_regions(sled_agent, test, &datastore).await;
+
         assert_region_snapshot_replacement_request_untouched(
             cptestctx, &datastore, &request,
         )
@@ -1031,11 +1371,12 @@ pub(crate) mod test {
         assert_volume_untouched(&datastore, &affected_volume_original).await;
     }
 
-    async fn region_allocations(datastore: &DataStore) -> usize {
+    async fn regions(datastore: &DataStore) -> Vec<db::model::Region> {
         use async_bb8_diesel::AsyncConnection;
         use async_bb8_diesel::AsyncRunQueryDsl;
         use async_bb8_diesel::AsyncSimpleConnection;
         use diesel::QueryDsl;
+        use diesel::SelectableHelper;
         use nexus_db_queries::db::queries::ALLOW_FULL_TABLE_SCAN_SQL;
         use nexus_db_queries::db::schema::region::dsl;
 
@@ -1047,15 +1388,56 @@ pub(crate) mod test {
             conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await.unwrap();
 
             dsl::region
-                .count()
-                .get_result_async(&conn)
+                .select(db::model::Region::as_select())
+                .get_results_async(&conn)
                 .await
-                .map(|x: i64| x as usize)
         })
         .await
         .unwrap()
     }
 
+    async fn region_allocations(datastore: &DataStore) -> usize {
+        regions(datastore).await.len()
+    }
+
+    async fn assert_no_other_ensured_regions(
+        sled_agent: &omicron_sled_agent::sim::SledAgent,
+        test: &DiskTest<'_, crate::Server>,
+        datastore: &DataStore,
+    ) {
+        let mut non_destroyed_regions_from_agent = vec![];
+
+        for zpool in test.zpools() {
+            for dataset in &zpool.datasets {
+                let crucible_dataset =
+                    sled_agent.get_crucible_dataset(zpool.id, dataset.id).await;
+                for region in crucible_dataset.list().await {
+                    match region.state {
+                        crucible_agent_client::types::State::Tombstoned
+                        | crucible_agent_client::types::State::Destroyed => {
+                            // ok
+                        }
+
+                        _ => {
+                            non_destroyed_regions_from_agent
+                                .push(region.clone());
+                        }
+                    }
+                }
+            }
+        }
+
+        let db_regions = regions(datastore).await;
+        let db_region_ids: Vec<Uuid> =
+            db_regions.iter().map(|x| x.id()).collect();
+
+        for region in non_destroyed_regions_from_agent {
+            let region_id = region.id.0.parse().unwrap();
+            let contains = db_region_ids.contains(&region_id);
+            assert!(contains, "db does not have {:?}", region_id);
+        }
+    }
+
     async fn assert_region_snapshot_replacement_request_untouched(
         cptestctx: &ControlPlaneTestContext,
         datastore: &DataStore,
@@ -1094,11 +1476,78 @@ pub(crate) mod test {
         assert_eq!(actual, expected);
     }
 
+    #[nexus_test(server = crate::Server)]
+    async fn test_action_failure_can_unwind(
+        cptestctx: &ControlPlaneTestContext,
+    ) {
+        let PrepareResult { db_disk, snapshot, db_snapshot, disk_test } =
+            prepare_for_test(cptestctx).await;
+
+        let log = &cptestctx.logctx.log;
+        let nexus = &cptestctx.server.server_context().nexus;
+        let datastore = nexus.datastore();
+        let opctx = test_opctx(cptestctx);
+
+        let disk_allocated_regions =
+            datastore.get_allocated_regions(db_disk.volume_id).await.unwrap();
+        assert_eq!(disk_allocated_regions.len(), 3);
+
+        let region: &nexus_db_model::Region = &disk_allocated_regions[0].1;
+        let snapshot_id = snapshot.identity.id;
+
+        let region_snapshot = datastore
+            .region_snapshot_get(region.dataset_id(), region.id(), snapshot_id)
+            .await
+            .unwrap()
+            .unwrap();
+
+        let request =
+            RegionSnapshotReplacement::for_region_snapshot(&region_snapshot);
+
+        datastore
+            .insert_region_snapshot_replacement_request(&opctx, request.clone())
+            .await
+            .unwrap();
+
+        let affected_volume_original =
+            datastore.volume_get(db_snapshot.volume_id).await.unwrap().unwrap();
+
+        verify_clean_slate(
+            &cptestctx,
+            &disk_test,
+            &request,
+            &affected_volume_original,
+        )
+        .await;
+
+        crate::app::sagas::test_helpers::action_failure_can_unwind::<
+            SagaRegionSnapshotReplacementStart,
+            _,
+            _,
+        >(
+            nexus,
+            || Box::pin(async { new_test_params(&opctx, &request) }),
+            || {
+                Box::pin(async {
+                    verify_clean_slate(
+                        &cptestctx,
+                        &disk_test,
+                        &request,
+                        &affected_volume_original,
+                    )
+                    .await;
+                })
+            },
+            log,
+        )
+        .await;
+    }
+
     #[nexus_test(server = crate::Server)]
     async fn test_action_failure_can_unwind_idempotently(
         cptestctx: &ControlPlaneTestContext,
     ) {
-        let PrepareResult { db_disk, snapshot, db_snapshot } =
+        let PrepareResult { db_disk, snapshot, db_snapshot, disk_test } =
             prepare_for_test(cptestctx).await;
 
         let log = &cptestctx.logctx.log;
@@ -1130,8 +1579,13 @@ pub(crate) mod test {
         let affected_volume_original =
             datastore.volume_get(db_snapshot.volume_id).await.unwrap().unwrap();
 
-        verify_clean_slate(&cptestctx, &request, &affected_volume_original)
-            .await;
+        verify_clean_slate(
+            &cptestctx,
+            &disk_test,
+            &request,
+            &affected_volume_original,
+        )
+        .await;
 
         crate::app::sagas::test_helpers::action_failure_can_unwind_idempotently::<
             SagaRegionSnapshotReplacementStart,
@@ -1143,6 +1597,7 @@ pub(crate) mod test {
             || Box::pin(async {
                 verify_clean_slate(
                     &cptestctx,
+                    &disk_test,
                     &request,
                     &affected_volume_original,
                 ).await;
@@ -1155,7 +1610,7 @@ pub(crate) mod test {
     async fn test_actions_succeed_idempotently(
         cptestctx: &ControlPlaneTestContext,
     ) {
-        let PrepareResult { db_disk, snapshot, db_snapshot: _ } =
+        let PrepareResult { db_disk, snapshot, .. } =
             prepare_for_test(cptestctx).await;
 
         let nexus = &cptestctx.server.server_context().nexus;
@@ -1192,4 +1647,384 @@ pub(crate) mod test {
         )
         .await;
     }
+
+    /// Assert this saga does not leak regions if the replacement read-only
+    /// region cannot be created.
+    #[nexus_test(server = crate::Server)]
+    async fn test_no_leak_region(cptestctx: &ControlPlaneTestContext) {
+        let PrepareResult { db_disk, snapshot, db_snapshot, disk_test } =
+            prepare_for_test(cptestctx).await;
+
+        let nexus = &cptestctx.server.server_context().nexus;
+        let datastore = nexus.datastore();
+        let opctx = test_opctx(cptestctx);
+
+        let disk_allocated_regions =
+            datastore.get_allocated_regions(db_disk.volume_id).await.unwrap();
+        assert_eq!(disk_allocated_regions.len(), 3);
+
+        let region: &nexus_db_model::Region = &disk_allocated_regions[0].1;
+        let snapshot_id = snapshot.identity.id;
+
+        let region_snapshot = datastore
+            .region_snapshot_get(region.dataset_id(), region.id(), snapshot_id)
+            .await
+            .unwrap()
+            .unwrap();
+
+        let request =
+            RegionSnapshotReplacement::for_region_snapshot(&region_snapshot);
+
+        datastore
+            .insert_region_snapshot_replacement_request(&opctx, request.clone())
+            .await
+            .unwrap();
+
+        let affected_volume_original =
+            datastore.volume_get(db_snapshot.volume_id).await.unwrap().unwrap();
+
+        disk_test.set_always_fail_callback().await;
+
+        // Run the region snapshot replacement start saga
+        let dag =
+            create_saga_dag::<SagaRegionSnapshotReplacementStart>(Params {
+                serialized_authn: Serialized::for_opctx(&opctx),
+                request: request.clone(),
+                allocation_strategy: RegionAllocationStrategy::Random {
+                    seed: None,
+                },
+            })
+            .unwrap();
+
+        let runnable_saga = nexus.sagas.saga_prepare(dag).await.unwrap();
+
+        // Actually run the saga
+        runnable_saga.run_to_completion().await.unwrap();
+
+        verify_clean_slate(
+            &cptestctx,
+            &disk_test,
+            &request,
+            &affected_volume_original,
+        )
+        .await;
+    }
+
+    /// Tests that the region snapshot replacement start saga will not choose
+    /// the request's region snapshot, but instead will choose the other
+    /// non-expunged one.
+    #[nexus_test(server = crate::Server)]
+    async fn test_region_snapshot_replacement_start_prefer_not_self(
+        cptestctx: &ControlPlaneTestContext,
+    ) {
+        let nexus = &cptestctx.server.server_context().nexus;
+        let datastore = nexus.datastore();
+        let opctx = OpContext::for_tests(
+            cptestctx.logctx.log.new(o!()),
+            datastore.clone(),
+        );
+
+        // Create four zpools, each with one dataset. This is required for
+        // region and region snapshot replacement to have somewhere to move the
+        // data, and for this test we're doing one expungements.
+        let sled_id = cptestctx.first_sled();
+
+        let disk_test = DiskTestBuilder::new(&cptestctx)
+            .on_specific_sled(sled_id)
+            .with_zpool_count(4)
+            .build()
+            .await;
+
+        // Any volumes sent to the Pantry for reconciliation should return
+        // active for this test
+
+        cptestctx
+            .sled_agent
+            .pantry_server
+            .as_ref()
+            .unwrap()
+            .pantry
+            .set_auto_activate_volumes()
+            .await;
+
+        // Create a disk and a snapshot
+        let client = &cptestctx.external_client;
+        let _project_id =
+            create_project(&client, PROJECT_NAME).await.identity.id;
+
+        let disk = create_disk(&client, PROJECT_NAME, "disk").await;
+        let snapshot =
+            create_snapshot(&client, PROJECT_NAME, "disk", "snap").await;
+
+        // Before expunging any physical disk, save some DB models
+        let (.., db_disk) = LookupPath::new(&opctx, &datastore)
+            .disk_id(disk.identity.id)
+            .fetch()
+            .await
+            .unwrap();
+
+        let (.., db_snapshot) = LookupPath::new(&opctx, &datastore)
+            .snapshot_id(snapshot.identity.id)
+            .fetch()
+            .await
+            .unwrap();
+
+        let disk_allocated_regions =
+            datastore.get_allocated_regions(db_disk.volume_id).await.unwrap();
+        let snapshot_allocated_regions = datastore
+            .get_allocated_regions(db_snapshot.volume_id)
+            .await
+            .unwrap();
+
+        assert_eq!(disk_allocated_regions.len(), 3);
+        assert_eq!(snapshot_allocated_regions.len(), 0);
+
+        // Expunge one physical disk
+        {
+            let (dataset, _) = &disk_allocated_regions[0];
+
+            let zpool = disk_test
+                .zpools()
+                .find(|x| *x.id.as_untyped_uuid() == dataset.pool_id)
+                .expect("Expected at least one zpool");
+
+            let (_, db_zpool) = LookupPath::new(&opctx, datastore)
+                .zpool_id(zpool.id.into_untyped_uuid())
+                .fetch()
+                .await
+                .unwrap();
+
+            datastore
+                .physical_disk_update_policy(
+                    &opctx,
+                    db_zpool.physical_disk_id.into(),
+                    PhysicalDiskPolicy::Expunged,
+                )
+                .await
+                .unwrap();
+        }
+
+        // Request that the second region snapshot be replaced
+
+        let region_snapshot = datastore
+            .region_snapshot_get(
+                disk_allocated_regions[1].0.id(), // dataset id
+                disk_allocated_regions[1].1.id(), // region id
+                snapshot.identity.id,
+            )
+            .await
+            .unwrap()
+            .unwrap();
+
+        let request_id = datastore
+            .create_region_snapshot_replacement_request(
+                &opctx,
+                &region_snapshot,
+            )
+            .await
+            .unwrap();
+
+        // Manually invoke the region snapshot replacement start saga
+
+        let saga_outputs = nexus
+            .sagas
+            .saga_execute::<SagaRegionSnapshotReplacementStart>(Params {
+                serialized_authn: Serialized::for_opctx(&opctx),
+
+                request: datastore
+                    .get_region_snapshot_replacement_request_by_id(
+                        &opctx, request_id,
+                    )
+                    .await
+                    .unwrap(),
+
+                allocation_strategy: RegionAllocationStrategy::Random {
+                    seed: None,
+                },
+            })
+            .await
+            .unwrap();
+
+        // The third region snapshot should have been selected as the clone
+        // source
+
+        let selected_clone_source = saga_outputs
+            .lookup_node_output::<CloneSource>("clone_source")
+            .unwrap();
+
+        assert_eq!(
+            selected_clone_source,
+            CloneSource::RegionSnapshot {
+                dataset_id: disk_allocated_regions[2].0.id(),
+                region_id: disk_allocated_regions[2].1.id(),
+            },
+        );
+
+        let snapshot_allocated_regions = datastore
+            .get_allocated_regions(db_snapshot.volume_id)
+            .await
+            .unwrap();
+
+        assert_eq!(snapshot_allocated_regions.len(), 1);
+        assert!(snapshot_allocated_regions.iter().all(|(_, r)| r.read_only()));
+    }
+
+    /// Tests that a region snapshot replacement request can select the region
+    /// snapshot being replaced as a clone source (but only if it is not
+    /// expunged!)
+    #[nexus_test(server = crate::Server)]
+    async fn test_region_snapshot_replacement_start_hail_mary(
+        cptestctx: &ControlPlaneTestContext,
+    ) {
+        let nexus = &cptestctx.server.server_context().nexus;
+        let datastore = nexus.datastore();
+        let opctx = OpContext::for_tests(
+            cptestctx.logctx.log.new(o!()),
+            datastore.clone(),
+        );
+
+        // Create five zpools, each with one dataset. This is required for
+        // region and region snapshot replacement to have somewhere to move the
+        // data, and for this test we're doing two expungements.
+        let sled_id = cptestctx.first_sled();
+
+        let disk_test = DiskTestBuilder::new(&cptestctx)
+            .on_specific_sled(sled_id)
+            .with_zpool_count(5)
+            .build()
+            .await;
+
+        // Any volumes sent to the Pantry for reconciliation should return
+        // active for this test
+
+        cptestctx
+            .sled_agent
+            .pantry_server
+            .as_ref()
+            .unwrap()
+            .pantry
+            .set_auto_activate_volumes()
+            .await;
+
+        // Create a disk and a snapshot
+        let client = &cptestctx.external_client;
+        let _project_id =
+            create_project(&client, PROJECT_NAME).await.identity.id;
+
+        let disk = create_disk(&client, PROJECT_NAME, "disk").await;
+        let snapshot =
+            create_snapshot(&client, PROJECT_NAME, "disk", "snap").await;
+
+        // Before expunging any physical disk, save some DB models
+        let (.., db_disk) = LookupPath::new(&opctx, &datastore)
+            .disk_id(disk.identity.id)
+            .fetch()
+            .await
+            .unwrap();
+
+        let (.., db_snapshot) = LookupPath::new(&opctx, &datastore)
+            .snapshot_id(snapshot.identity.id)
+            .fetch()
+            .await
+            .unwrap();
+
+        let disk_allocated_regions =
+            datastore.get_allocated_regions(db_disk.volume_id).await.unwrap();
+        let snapshot_allocated_regions = datastore
+            .get_allocated_regions(db_snapshot.volume_id)
+            .await
+            .unwrap();
+
+        assert_eq!(disk_allocated_regions.len(), 3);
+        assert_eq!(snapshot_allocated_regions.len(), 0);
+
+        // Expunge two physical disks
+        for i in [0, 1] {
+            let (dataset, _) = &disk_allocated_regions[i];
+
+            let zpool = disk_test
+                .zpools()
+                .find(|x| *x.id.as_untyped_uuid() == dataset.pool_id)
+                .expect("Expected at least one zpool");
+
+            let (_, db_zpool) = LookupPath::new(&opctx, datastore)
+                .zpool_id(zpool.id.into_untyped_uuid())
+                .fetch()
+                .await
+                .unwrap();
+
+            datastore
+                .physical_disk_update_policy(
+                    &opctx,
+                    db_zpool.physical_disk_id.into(),
+                    PhysicalDiskPolicy::Expunged,
+                )
+                .await
+                .unwrap();
+        }
+
+        // Request that the third region snapshot be replaced
+
+        let region_snapshot = datastore
+            .region_snapshot_get(
+                disk_allocated_regions[2].0.id(), // dataset id
+                disk_allocated_regions[2].1.id(), // region id
+                snapshot.identity.id,
+            )
+            .await
+            .unwrap()
+            .unwrap();
+
+        let request_id = datastore
+            .create_region_snapshot_replacement_request(
+                &opctx,
+                &region_snapshot,
+            )
+            .await
+            .unwrap();
+
+        // Manually invoke the region snapshot replacement start saga
+
+        let saga_outputs = nexus
+            .sagas
+            .saga_execute::<SagaRegionSnapshotReplacementStart>(Params {
+                serialized_authn: Serialized::for_opctx(&opctx),
+
+                request: datastore
+                    .get_region_snapshot_replacement_request_by_id(
+                        &opctx, request_id,
+                    )
+                    .await
+                    .unwrap(),
+
+                allocation_strategy: RegionAllocationStrategy::Random {
+                    seed: None,
+                },
+            })
+            .await
+            .unwrap();
+
+        // This should have chosen the request's region snapshot as a clone
+        // source, and replaced it with a read-only region
+
+        let selected_clone_source = saga_outputs
+            .lookup_node_output::<CloneSource>("clone_source")
+            .unwrap();
+
+        assert_eq!(
+            selected_clone_source,
+            CloneSource::RegionSnapshot {
+                dataset_id: disk_allocated_regions[2].0.id(),
+                region_id: disk_allocated_regions[2].1.id(),
+            },
+        );
+
+        let snapshot_allocated_regions = datastore
+            .get_allocated_regions(db_snapshot.volume_id)
+            .await
+            .unwrap();
+
+        assert_eq!(snapshot_allocated_regions.len(), 1);
+        assert!(snapshot_allocated_regions.iter().all(|(_, r)| r.read_only()));
+    }
 }
diff --git a/nexus/src/app/sagas/region_snapshot_replacement_step.rs b/nexus/src/app/sagas/region_snapshot_replacement_step.rs
index 7b1d598861..a236fcf62c 100644
--- a/nexus/src/app/sagas/region_snapshot_replacement_step.rs
+++ b/nexus/src/app/sagas/region_snapshot_replacement_step.rs
@@ -375,7 +375,8 @@ async fn rsrss_replace_snapshot_in_volume(
             // with the saga.
         }
 
-        VolumeReplaceResult::ExistingVolumeDeleted => {
+        VolumeReplaceResult::ExistingVolumeSoftDeleted
+        | VolumeReplaceResult::ExistingVolumeHardDeleted => {
             // Proceed with the saga but skip the notification step.
         }
     }
@@ -423,6 +424,20 @@ async fn rsrss_notify_upstairs(
     let params = sagactx.saga_params::<Params>()?;
     let log = sagactx.user_data().log();
 
+    // If the associated volume was deleted, then skip this notification step as
+    // there is no Upstairs to talk to. Continue with the saga to transition the
+    // step request to Complete, and then perform the associated clean up.
+
+    let volume_replace_snapshot_result = sagactx
+        .lookup::<VolumeReplaceResult>("volume_replace_snapshot_result")?;
+    if matches!(
+        volume_replace_snapshot_result,
+        VolumeReplaceResult::ExistingVolumeSoftDeleted
+            | VolumeReplaceResult::ExistingVolumeHardDeleted
+    ) {
+        return Ok(());
+    }
+
     // Make an effort to notify a Propolis if one was booted for this volume.
     // This is best effort: if there is a failure, this saga will unwind and be
     // triggered again for the same request. If there is no Propolis booted for
diff --git a/nexus/src/app/sagas/region_snapshot_replacement_step_garbage_collect.rs b/nexus/src/app/sagas/region_snapshot_replacement_step_garbage_collect.rs
index b83f917a70..15c6a39651 100644
--- a/nexus/src/app/sagas/region_snapshot_replacement_step_garbage_collect.rs
+++ b/nexus/src/app/sagas/region_snapshot_replacement_step_garbage_collect.rs
@@ -187,8 +187,24 @@ pub(crate) mod test {
             .await
             .unwrap();
 
+        let step_volume_id = Uuid::new_v4();
+
+        datastore
+            .volume_create(nexus_db_model::Volume::new(
+                step_volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: Uuid::new_v4(),
+                    block_size: 512,
+                    sub_volumes: vec![], // nothing needed here
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
         let mut request =
-            RegionSnapshotReplacementStep::new(Uuid::new_v4(), Uuid::new_v4());
+            RegionSnapshotReplacementStep::new(Uuid::new_v4(), step_volume_id);
         request.replacement_state =
             RegionSnapshotReplacementStepState::Complete;
         request.old_snapshot_volume_id = Some(old_snapshot_volume_id);
diff --git a/nexus/src/app/sagas/volume_delete.rs b/nexus/src/app/sagas/volume_delete.rs
index a9be1f34ee..a8ded4e33c 100644
--- a/nexus/src/app/sagas/volume_delete.rs
+++ b/nexus/src/app/sagas/volume_delete.rs
@@ -27,12 +27,9 @@ use super::ActionRegistry;
 use super::NexusActionContext;
 use super::NexusSaga;
 use crate::app::sagas::declare_saga_actions;
-use nexus_db_model::Dataset;
-use nexus_db_model::Region;
-use nexus_db_model::Volume;
 use nexus_db_queries::authn;
 use nexus_db_queries::db::datastore::CrucibleResources;
-use nexus_types::identity::Asset;
+use nexus_db_queries::db::datastore::FreedCrucibleResources;
 use serde::Deserialize;
 use serde::Serialize;
 use steno::ActionError;
@@ -330,8 +327,6 @@ async fn svd_delete_crucible_snapshot_records(
     Ok(())
 }
 
-type FreedCrucibleRegions = Vec<(Dataset, Region, Option<Uuid>)>;
-
 /// Deleting region snapshots in a previous saga node may have freed up regions
 /// that were deleted in the DB but couldn't be deleted by the Crucible Agent
 /// because a snapshot existed. Look for those here. These will be a different
@@ -417,7 +412,7 @@ type FreedCrucibleRegions = Vec<(Dataset, Region, Option<Uuid>)>;
 /// another snapshot delete.
 async fn svd_find_freed_crucible_regions(
     sagactx: NexusActionContext,
-) -> Result<FreedCrucibleRegions, ActionError> {
+) -> Result<FreedCrucibleResources, ActionError> {
     let osagactx = sagactx.user_data();
 
     // Find regions freed up for deletion by a previous saga node deleting the
@@ -432,11 +427,7 @@ async fn svd_find_freed_crucible_regions(
             },
         )?;
 
-    // Don't serialize the whole Volume, as the data field contains key material!
-    Ok(freed_datasets_regions_and_volumes
-        .into_iter()
-        .map(|x| (x.0, x.1, x.2.map(|v: Volume| v.id())))
-        .collect())
+    Ok(freed_datasets_regions_and_volumes)
 }
 
 async fn svd_delete_freed_crucible_regions(
@@ -448,9 +439,11 @@ async fn svd_delete_freed_crucible_regions(
     // Find regions freed up for deletion by a previous saga node deleting the
     // region snapshots.
     let freed_datasets_regions_and_volumes =
-        sagactx.lookup::<FreedCrucibleRegions>("freed_crucible_regions")?;
+        sagactx.lookup::<FreedCrucibleResources>("freed_crucible_regions")?;
 
-    for (dataset, region, volume_id) in freed_datasets_regions_and_volumes {
+    for (dataset, region) in
+        &freed_datasets_regions_and_volumes.datasets_and_regions
+    {
         // Send DELETE calls to the corresponding Crucible agents
         osagactx
             .nexus()
@@ -477,18 +470,17 @@ async fn svd_delete_freed_crucible_regions(
                     e,
                 ))
             })?;
+    }
 
-        // Remove volume DB record
-        if let Some(volume_id) = volume_id {
-            osagactx.datastore().volume_hard_delete(volume_id).await.map_err(
-                |e| {
-                    ActionError::action_failed(format!(
-                        "failed to volume_hard_delete {}: {:?}",
-                        volume_id, e,
-                    ))
-                },
-            )?;
-        }
+    for volume_id in &freed_datasets_regions_and_volumes.volumes {
+        osagactx.datastore().volume_hard_delete(*volume_id).await.map_err(
+            |e| {
+                ActionError::action_failed(format!(
+                    "failed to volume_hard_delete {}: {:?}",
+                    volume_id, e,
+                ))
+            },
+        )?;
     }
 
     Ok(())
diff --git a/nexus/test-utils/src/background.rs b/nexus/test-utils/src/background.rs
index 32a2f24d9d..7c8857123c 100644
--- a/nexus/test-utils/src/background.rs
+++ b/nexus/test-utils/src/background.rs
@@ -305,7 +305,7 @@ pub async fn run_region_snapshot_replacement_finish(
 
     assert!(status.errors.is_empty());
 
-    status.records_set_to_done.len()
+    status.finish_invoked_ok.len()
 }
 
 /// Run all replacement related background tasks until they aren't doing
diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs
index 5f7e92c393..57dc624187 100644
--- a/nexus/tests/integration_tests/crucible_replacements.rs
+++ b/nexus/tests/integration_tests/crucible_replacements.rs
@@ -5,9 +5,12 @@
 //! Tests related to region and region snapshot replacement
 
 use dropshot::test_util::ClientTestContext;
+use nexus_client::types::LastResult;
 use nexus_db_model::PhysicalDiskPolicy;
 use nexus_db_model::RegionReplacementState;
+use nexus_db_model::RegionSnapshotReplacementState;
 use nexus_db_queries::context::OpContext;
+use nexus_db_queries::db::datastore::region_snapshot_replacement::*;
 use nexus_db_queries::db::lookup::LookupPath;
 use nexus_db_queries::db::DataStore;
 use nexus_test_utils::background::*;
@@ -15,11 +18,22 @@ use nexus_test_utils::http_testing::AuthnMode;
 use nexus_test_utils::http_testing::NexusRequest;
 use nexus_test_utils::resource_helpers::create_default_ip_pool;
 use nexus_test_utils::resource_helpers::create_disk;
+use nexus_test_utils::resource_helpers::create_disk_from_snapshot;
 use nexus_test_utils::resource_helpers::create_project;
+use nexus_test_utils::resource_helpers::create_snapshot;
+use nexus_test_utils::resource_helpers::object_create;
 use nexus_test_utils_macros::nexus_test;
+use nexus_types::external_api::params;
+use nexus_types::external_api::views;
+use nexus_types::identity::Asset;
+use nexus_types::internal_api::background::*;
+use omicron_common::api::external;
+use omicron_common::api::external::IdentityMetadataCreateParams;
 use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError};
 use omicron_uuid_kinds::GenericUuid;
 use slog::Logger;
+use std::collections::HashSet;
+use std::net::SocketAddr;
 use std::sync::Arc;
 use uuid::Uuid;
 
@@ -40,12 +54,37 @@ fn get_disk_url(disk_name: &str) -> String {
     format!("/v1/disks/{disk_name}?project={}", PROJECT_NAME)
 }
 
+fn get_disks_url() -> String {
+    format!("/v1/disks?project={}", PROJECT_NAME)
+}
+
+fn get_snapshot_url(snapshot_name: &str) -> String {
+    format!("/v1/snapshots/{snapshot_name}?project={}", PROJECT_NAME)
+}
+
+fn get_snapshots_url() -> String {
+    format!("/v1/snapshots?project={}", PROJECT_NAME)
+}
+
 async fn create_project_and_pool(client: &ClientTestContext) -> Uuid {
     create_default_ip_pool(client).await;
     let project = create_project(client, PROJECT_NAME).await;
     project.identity.id
 }
 
+async fn collection_list<T>(
+    client: &ClientTestContext,
+    list_url: &str,
+) -> Vec<T>
+where
+    T: Clone + serde::de::DeserializeOwned,
+{
+    NexusRequest::iter_collection_authn(client, list_url, "", None)
+        .await
+        .expect("failed to list")
+        .all_items
+}
+
 /// Assert that the first part of region replacement does not create a freed
 /// crucible region (that would be picked up by a volume delete saga)
 #[nexus_test]
@@ -118,280 +157,355 @@ async fn test_region_replacement_does_not_create_freed_region(
     assert!(datastore.find_deleted_volume_regions().await.unwrap().is_empty());
 }
 
-struct RegionReplacementDeletedVolumeTest<'a> {
-    log: Logger,
-    datastore: Arc<DataStore>,
-    disk_test: DiskTest<'a>,
-    client: ClientTestContext,
-    internal_client: ClientTestContext,
-    replacement_request_id: Uuid,
-}
-
-#[derive(Debug)]
-struct ExpectedEndState(pub RegionReplacementState);
-
-#[derive(Debug)]
-struct ExpectedIntermediateState(pub RegionReplacementState);
-
-impl<'a> RegionReplacementDeletedVolumeTest<'a> {
-    pub async fn new(cptestctx: &'a ControlPlaneTestContext) -> Self {
-        let nexus = &cptestctx.server.server_context().nexus;
-
-        // Create four zpools, each with one dataset. This is required for
-        // region and region snapshot replacement to have somewhere to move the
-        // data.
-        let disk_test = DiskTestBuilder::new(&cptestctx)
-            .on_specific_sled(cptestctx.first_sled())
-            .with_zpool_count(4)
-            .build()
-            .await;
-
-        let client = &cptestctx.external_client;
-        let internal_client = &cptestctx.internal_client;
-        let datastore = nexus.datastore().clone();
+mod region_replacement {
+    use super::*;
 
-        let opctx = OpContext::for_tests(
-            cptestctx.logctx.log.new(o!()),
-            datastore.clone(),
-        );
+    #[derive(Debug)]
+    struct ExpectedEndState(pub RegionReplacementState);
 
-        // Create a disk
-        let _project_id = create_project_and_pool(client).await;
+    #[derive(Debug)]
+    struct ExpectedIntermediateState(pub RegionReplacementState);
 
-        let disk = create_disk(&client, PROJECT_NAME, "disk").await;
+    #[derive(Debug)]
+    struct ExpectedStartState(pub RegionReplacementState);
 
-        // Manually create the region replacement request for the first
-        // allocated region of that disk
-
-        let (.., db_disk) = LookupPath::new(&opctx, &datastore)
-            .disk_id(disk.identity.id)
-            .fetch()
-            .await
-            .unwrap();
-
-        assert_eq!(db_disk.id(), disk.identity.id);
-
-        let disk_allocated_regions =
-            datastore.get_allocated_regions(db_disk.volume_id).await.unwrap();
-        let (_, region) = &disk_allocated_regions[0];
-
-        let replacement_request_id = datastore
-            .create_region_replacement_request_for_region(&opctx, &region)
-            .await
-            .unwrap();
-
-        // Assert the request is in state Requested
+    pub(super) struct DeletedVolumeTest<'a> {
+        log: Logger,
+        datastore: Arc<DataStore>,
+        disk_test: DiskTest<'a>,
+        client: ClientTestContext,
+        internal_client: ClientTestContext,
+        replacement_request_id: Uuid,
+    }
 
-        let region_replacement = datastore
-            .get_region_replacement_request_by_id(
-                &opctx,
+    impl<'a> DeletedVolumeTest<'a> {
+        pub async fn new(cptestctx: &'a ControlPlaneTestContext) -> Self {
+            let nexus = &cptestctx.server.server_context().nexus;
+
+            // Create four zpools, each with one dataset. This is required for
+            // region and region snapshot replacement to have somewhere to move
+            // the data.
+            let disk_test = DiskTestBuilder::new(&cptestctx)
+                .on_specific_sled(cptestctx.first_sled())
+                .with_zpool_count(4)
+                .build()
+                .await;
+
+            let client = &cptestctx.external_client;
+            let internal_client = &cptestctx.internal_client;
+            let datastore = nexus.datastore().clone();
+
+            let opctx = OpContext::for_tests(
+                cptestctx.logctx.log.new(o!()),
+                datastore.clone(),
+            );
+
+            // Create a disk
+            let _project_id = create_project_and_pool(client).await;
+
+            let disk = create_disk(&client, PROJECT_NAME, "disk").await;
+
+            // Manually create the region replacement request for the first
+            // allocated region of that disk
+
+            let (.., db_disk) = LookupPath::new(&opctx, &datastore)
+                .disk_id(disk.identity.id)
+                .fetch()
+                .await
+                .unwrap();
+
+            assert_eq!(db_disk.id(), disk.identity.id);
+
+            let disk_allocated_regions = datastore
+                .get_allocated_regions(db_disk.volume_id)
+                .await
+                .unwrap();
+            let (_, region) = &disk_allocated_regions[0];
+
+            let replacement_request_id = datastore
+                .create_region_replacement_request_for_region(&opctx, &region)
+                .await
+                .unwrap();
+
+            // Assert the request is in state Requested
+
+            let region_replacement = datastore
+                .get_region_replacement_request_by_id(
+                    &opctx,
+                    replacement_request_id,
+                )
+                .await
+                .unwrap();
+
+            assert_eq!(
+                region_replacement.replacement_state,
+                RegionReplacementState::Requested,
+            );
+
+            DeletedVolumeTest {
+                log: cptestctx.logctx.log.new(o!()),
+                datastore,
+                disk_test,
+                client: client.clone(),
+                internal_client: internal_client.clone(),
                 replacement_request_id,
-            )
-            .await
-            .unwrap();
-
-        assert_eq!(
-            region_replacement.replacement_state,
-            RegionReplacementState::Requested,
-        );
-
-        RegionReplacementDeletedVolumeTest {
-            log: cptestctx.logctx.log.new(o!()),
-            datastore,
-            disk_test,
-            client: client.clone(),
-            internal_client: internal_client.clone(),
-            replacement_request_id,
+            }
         }
-    }
 
-    pub fn opctx(&self) -> OpContext {
-        OpContext::for_tests(self.log.clone(), self.datastore.clone())
-    }
+        pub fn opctx(&self) -> OpContext {
+            OpContext::for_tests(self.log.clone(), self.datastore.clone())
+        }
 
-    pub async fn delete_the_disk(&self) {
-        let disk_url = get_disk_url("disk");
-        NexusRequest::object_delete(&self.client, &disk_url)
-            .authn_as(AuthnMode::PrivilegedUser)
-            .execute()
-            .await
-            .expect("failed to delete disk");
-    }
+        pub async fn delete_the_disk(&self) {
+            let disk_url = get_disk_url("disk");
+            NexusRequest::object_delete(&self.client, &disk_url)
+                .authn_as(AuthnMode::PrivilegedUser)
+                .execute()
+                .await
+                .expect("failed to delete disk");
+        }
 
-    /// Make sure:
-    ///
-    /// - all region replacement related background tasks run to completion
-    /// - this harness' region replacement request has transitioned to Complete
-    /// - no Crucible resources are leaked
-    pub async fn finish_test(&self) {
-        // Make sure that all the background tasks can run to completion.
+        /// Make sure:
+        ///
+        /// - all region replacement related background tasks run to completion
+        /// - this harness' region replacement request has transitioned to
+        ///   Complete
+        /// - no Crucible resources are leaked
+        pub async fn finish_test(&self) {
+            // Make sure that all the background tasks can run to completion.
 
-        run_replacement_tasks_to_completion(&self.internal_client).await;
+            run_replacement_tasks_to_completion(&self.internal_client).await;
 
-        // Assert the request is in state Complete
+            // Assert the request is in state Complete
 
-        let region_replacement = self
-            .datastore
-            .get_region_replacement_request_by_id(
-                &self.opctx(),
-                self.replacement_request_id,
-            )
-            .await
-            .unwrap();
+            let region_replacement = self
+                .datastore
+                .get_region_replacement_request_by_id(
+                    &self.opctx(),
+                    self.replacement_request_id,
+                )
+                .await
+                .unwrap();
 
-        assert_eq!(
-            region_replacement.replacement_state,
-            RegionReplacementState::Complete,
-        );
+            assert_eq!(
+                region_replacement.replacement_state,
+                RegionReplacementState::Complete,
+            );
 
-        // Assert there are no more Crucible resources
+            // Assert there are no more Crucible resources
 
-        assert!(self.disk_test.crucible_resources_deleted().await);
-    }
+            assert!(self.disk_test.crucible_resources_deleted().await);
+        }
 
-    async fn wait_for_request_state(
-        &self,
-        expected_end_state: ExpectedEndState,
-        expected_intermediate_state: ExpectedIntermediateState,
-    ) {
-        wait_for_condition(
-            || {
-                let datastore = self.datastore.clone();
-                let opctx = self.opctx();
-                let replacement_request_id = self.replacement_request_id;
-
-                async move {
-                    let region_replacement = datastore
-                        .get_region_replacement_request_by_id(
-                            &opctx,
-                            replacement_request_id,
-                        )
-                        .await
-                        .unwrap();
-
-                    let state = region_replacement.replacement_state;
-
-                    if state == expected_end_state.0 {
-                        // The saga transitioned the request ok
-                        Ok(())
-                    } else if state == expected_intermediate_state.0 {
-                        // The saga is still running
-                        Err(CondCheckError::<()>::NotYet)
-                    } else {
-                        // Any other state is not expected
-                        panic!("unexpected state {state:?}!");
+        async fn wait_for_request_state(
+            &self,
+            expected_end_state: ExpectedEndState,
+            expected_intermediate_state: ExpectedIntermediateState,
+            expected_start_state: ExpectedStartState,
+        ) {
+            wait_for_condition(
+                || {
+                    let datastore = self.datastore.clone();
+                    let opctx = self.opctx();
+                    let replacement_request_id = self.replacement_request_id;
+
+                    async move {
+                        let region_replacement = datastore
+                            .get_region_replacement_request_by_id(
+                                &opctx,
+                                replacement_request_id,
+                            )
+                            .await
+                            .unwrap();
+
+                        let state = region_replacement.replacement_state;
+
+                        // If the expected start and end state are the same
+                        // (i.e. there's a back edge in the associated request's
+                        // state machine), then it's impossible to determine
+                        // when the saga starts and stops based on the state.
+                        if expected_end_state.0 == expected_start_state.0 {
+                            if state == expected_end_state.0 {
+                                // The saga transitioned the request ok, or
+                                // hasn't started yet. Either way we have to
+                                // return here, and the call site should perform
+                                // an additional check for some associated
+                                // expected result.
+                                Ok(())
+                            } else if state == expected_intermediate_state.0 {
+                                // The saga is still running
+                                Err(CondCheckError::<()>::NotYet)
+                            } else {
+                                // Any other state is not expected
+                                panic!("unexpected state {state:?}!");
+                            }
+                        } else {
+                            if state == expected_end_state.0 {
+                                // The saga transitioned the request ok
+                                Ok(())
+                            } else if state == expected_intermediate_state.0
+                                || state == expected_start_state.0
+                            {
+                                // The saga is still running, or hasn't started
+                                // yet.
+                                Err(CondCheckError::<()>::NotYet)
+                            } else {
+                                // Any other state is not expected
+                                panic!("unexpected state {state:?}!");
+                            }
+                        }
                     }
-                }
-            },
-            &std::time::Duration::from_millis(500),
-            &std::time::Duration::from_secs(60),
-        )
-        .await
-        .expect("request transitioned to expected state");
-
-        // Assert the request state
-
-        let region_replacement = self
-            .datastore
-            .get_region_replacement_request_by_id(
-                &self.opctx(),
-                self.replacement_request_id,
+                },
+                &std::time::Duration::from_millis(500),
+                &std::time::Duration::from_secs(60),
             )
             .await
-            .unwrap();
-
-        assert_eq!(region_replacement.replacement_state, expected_end_state.0);
-    }
-
-    /// Run the "region replacement" task to transition the request to Running.
-    pub async fn transition_request_to_running(&self) {
-        // Activate the "region replacement" background task
-
-        run_region_replacement(&self.internal_client).await;
+            .expect("request transitioned to expected state");
+
+            // Assert the request state
+
+            let region_replacement = self
+                .datastore
+                .get_region_replacement_request_by_id(
+                    &self.opctx(),
+                    self.replacement_request_id,
+                )
+                .await
+                .unwrap();
+
+            assert_eq!(
+                region_replacement.replacement_state,
+                expected_end_state.0
+            );
+        }
 
-        // The activation above could only have started the associated saga, so
-        // wait until the request is in state Running.
+        /// Run the "region replacement" task to transition the request to
+        /// Running.
+        pub async fn transition_request_to_running(&self) {
+            // Activate the "region replacement" background task
 
-        self.wait_for_request_state(
-            ExpectedEndState(RegionReplacementState::Running),
-            ExpectedIntermediateState(RegionReplacementState::Allocating),
-        )
-        .await;
-    }
+            run_region_replacement(&self.internal_client).await;
 
-    /// Call the region replacement drive task to attach the associated volume
-    /// to the simulated pantry, ostensibly for reconciliation
-    pub async fn attach_request_volume_to_pantry(&self) {
-        // Run the "region replacement driver" task to attach the associated
-        // volume to the simulated pantry.
+            // The activation above could only have started the associated saga,
+            // so wait until the request is in state Running.
 
-        run_region_replacement_driver(&self.internal_client).await;
+            self.wait_for_request_state(
+                ExpectedEndState(RegionReplacementState::Running),
+                ExpectedIntermediateState(RegionReplacementState::Allocating),
+                ExpectedStartState(RegionReplacementState::Requested),
+            )
+            .await;
+        }
 
-        // The activation above could only have started the associated saga, so
-        // wait until the request is in the expected end state.
+        /// Call the region replacement drive task to attach the associated volume
+        /// to the simulated pantry, ostensibly for reconciliation
+        pub async fn attach_request_volume_to_pantry(&self) {
+            // Run the "region replacement driver" task to attach the associated
+            // volume to the simulated pantry.
 
-        self.wait_for_request_state(
-            ExpectedEndState(RegionReplacementState::Running),
-            ExpectedIntermediateState(RegionReplacementState::Driving),
-        )
-        .await;
+            run_region_replacement_driver(&self.internal_client).await;
 
-        // Additionally, assert that the drive saga recorded that it sent the
-        // attachment request to the simulated pantry
+            // The activation above could only have started the associated saga,
+            // so wait until the request is in the expected end state.
 
-        let most_recent_step = self
-            .datastore
-            .current_region_replacement_request_step(
-                &self.opctx(),
-                self.replacement_request_id,
+            self.wait_for_request_state(
+                ExpectedEndState(RegionReplacementState::Running),
+                ExpectedIntermediateState(RegionReplacementState::Driving),
+                ExpectedStartState(RegionReplacementState::Running),
             )
-            .await
-            .unwrap()
-            .unwrap();
-
-        assert!(most_recent_step.pantry_address().is_some());
-    }
+            .await;
 
-    /// Manually activate the background attachment for the request volume
-    pub async fn manually_activate_attached_volume(
-        &self,
-        cptestctx: &'a ControlPlaneTestContext,
-    ) {
-        let pantry =
-            cptestctx.sled_agent.pantry_server.as_ref().unwrap().pantry.clone();
-
-        let region_replacement = self
-            .datastore
-            .get_region_replacement_request_by_id(
-                &self.opctx(),
-                self.replacement_request_id,
+            // Additionally, assert that the drive saga recorded that it sent
+            // the attachment request to the simulated pantry.
+            //
+            // If `wait_for_request_state` has the same expected start and end
+            // state (as it does above), it's possible to exit that function
+            // having not yet started the saga yet, and this requires an
+            // additional `wait_for_condition` to wait for the expected recorded
+            // step.
+
+            let most_recent_step = wait_for_condition(
+                || {
+                    let datastore = self.datastore.clone();
+                    let opctx = self.opctx();
+                    let replacement_request_id = self.replacement_request_id;
+
+                    async move {
+                        match datastore
+                            .current_region_replacement_request_step(
+                                &opctx,
+                                replacement_request_id,
+                            )
+                            .await
+                            .unwrap()
+                        {
+                            Some(step) => Ok(step),
+
+                            None => {
+                                // The saga either has not started yet or is
+                                // still running - see the comment before this
+                                // check for more info.
+                                Err(CondCheckError::<()>::NotYet)
+                            }
+                        }
+                    }
+                },
+                &std::time::Duration::from_millis(500),
+                &std::time::Duration::from_secs(10),
             )
             .await
-            .unwrap();
+            .expect("most recent step");
 
-        pantry
-            .activate_background_attachment(
-                region_replacement.volume_id.to_string(),
-            )
-            .await
-            .unwrap();
-    }
+            assert!(most_recent_step.pantry_address().is_some());
+        }
 
-    /// Transition request to ReplacementDone via the region replacement drive
-    /// saga
-    pub async fn transition_request_to_replacement_done(&self) {
-        // Run the "region replacement driver" task
+        /// Manually activate the background attachment for the request volume
+        pub async fn manually_activate_attached_volume(
+            &self,
+            cptestctx: &'a ControlPlaneTestContext,
+        ) {
+            let pantry = cptestctx
+                .sled_agent
+                .pantry_server
+                .as_ref()
+                .unwrap()
+                .pantry
+                .clone();
+
+            let region_replacement = self
+                .datastore
+                .get_region_replacement_request_by_id(
+                    &self.opctx(),
+                    self.replacement_request_id,
+                )
+                .await
+                .unwrap();
+
+            pantry
+                .activate_background_attachment(
+                    region_replacement.volume_id.to_string(),
+                )
+                .await
+                .unwrap();
+        }
 
-        run_region_replacement_driver(&self.internal_client).await;
+        /// Transition request to ReplacementDone via the region replacement
+        /// drive saga
+        pub async fn transition_request_to_replacement_done(&self) {
+            // Run the "region replacement driver" task
 
-        // The activation above could only have started the associated saga, so
-        // wait until the request is in the expected end state.
+            run_region_replacement_driver(&self.internal_client).await;
 
-        self.wait_for_request_state(
-            ExpectedEndState(RegionReplacementState::ReplacementDone),
-            ExpectedIntermediateState(RegionReplacementState::Driving),
-        )
-        .await;
+            // The activation above could only have started the associated saga,
+            // so wait until the request is in the expected end state.
+
+            self.wait_for_request_state(
+                ExpectedEndState(RegionReplacementState::ReplacementDone),
+                ExpectedIntermediateState(RegionReplacementState::Driving),
+                ExpectedStartState(RegionReplacementState::Running),
+            )
+            .await;
+        }
     }
 }
 
@@ -401,7 +515,8 @@ impl<'a> RegionReplacementDeletedVolumeTest<'a> {
 async fn test_delete_volume_region_replacement_state_requested(
     cptestctx: &ControlPlaneTestContext,
 ) {
-    let test_harness = RegionReplacementDeletedVolumeTest::new(cptestctx).await;
+    let test_harness =
+        region_replacement::DeletedVolumeTest::new(cptestctx).await;
 
     // The request leaves the `new` function in state Requested: delete the
     // disk, then finish the test.
@@ -417,7 +532,8 @@ async fn test_delete_volume_region_replacement_state_requested(
 async fn test_delete_volume_region_replacement_state_running(
     cptestctx: &ControlPlaneTestContext,
 ) {
-    let test_harness = RegionReplacementDeletedVolumeTest::new(cptestctx).await;
+    let test_harness =
+        region_replacement::DeletedVolumeTest::new(cptestctx).await;
 
     // The request leaves the `new` function in state Requested:
     // - transition the request to "Running"
@@ -437,7 +553,8 @@ async fn test_delete_volume_region_replacement_state_running(
 async fn test_delete_volume_region_replacement_state_running_on_pantry(
     cptestctx: &ControlPlaneTestContext,
 ) {
-    let test_harness = RegionReplacementDeletedVolumeTest::new(cptestctx).await;
+    let test_harness =
+        region_replacement::DeletedVolumeTest::new(cptestctx).await;
 
     // The request leaves the `new` function in state Requested:
     // - transition the request to "Running"
@@ -459,7 +576,8 @@ async fn test_delete_volume_region_replacement_state_running_on_pantry(
 async fn test_delete_volume_region_replacement_state_replacement_done(
     cptestctx: &ControlPlaneTestContext,
 ) {
-    let test_harness = RegionReplacementDeletedVolumeTest::new(cptestctx).await;
+    let test_harness =
+        region_replacement::DeletedVolumeTest::new(cptestctx).await;
 
     // The request leaves the `new` function in state Requested:
     // - transition the request to "Running"
@@ -481,3 +599,1478 @@ async fn test_delete_volume_region_replacement_state_replacement_done(
 
     test_harness.finish_test().await;
 }
+
+/// Assert that the problem experienced in issue 6353 is fixed
+#[nexus_test]
+async fn test_racing_replacements_for_soft_deleted_disk_volume(
+    cptestctx: &ControlPlaneTestContext,
+) {
+    let nexus = &cptestctx.server.server_context().nexus;
+    let datastore = nexus.datastore();
+    let opctx =
+        OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone());
+
+    // Create four zpools, each with one dataset. This is required for region
+    // and region snapshot replacement to have somewhere to move the data.
+    let sled_id = cptestctx.first_sled();
+    let mut disk_test = DiskTestBuilder::new(&cptestctx)
+        .on_specific_sled(sled_id)
+        .with_zpool_count(4)
+        .build()
+        .await;
+
+    // Create a disk, then a snapshot of that disk
+    let client = &cptestctx.external_client;
+    let _project_id = create_project_and_pool(client).await;
+
+    let disk = create_disk(&client, PROJECT_NAME, "disk").await;
+
+    let snapshots_url = format!("/v1/snapshots?project={}", PROJECT_NAME);
+
+    let snapshot: views::Snapshot = object_create(
+        client,
+        &snapshots_url,
+        &params::SnapshotCreate {
+            identity: IdentityMetadataCreateParams {
+                name: "snapshot".parse().unwrap(),
+                description: String::from("a snapshot"),
+            },
+            disk: disk.identity.name.into(),
+        },
+    )
+    .await;
+
+    // Before deleting the disk, save the DB model
+    let (.., db_disk) = LookupPath::new(&opctx, &datastore)
+        .disk_id(disk.identity.id)
+        .fetch()
+        .await
+        .unwrap();
+
+    assert_eq!(db_disk.id(), disk.identity.id);
+
+    // Next, expunge a physical disk that contains a region snapshot (which
+    // means it'll have the region too)
+
+    let disk_allocated_regions =
+        datastore.get_allocated_regions(db_disk.volume_id).await.unwrap();
+    let (dataset, region) = &disk_allocated_regions[0];
+    let zpool = disk_test
+        .zpools()
+        .find(|x| *x.id.as_untyped_uuid() == dataset.pool_id)
+        .expect("Expected at least one zpool");
+
+    let (_, db_zpool) = LookupPath::new(&opctx, datastore)
+        .zpool_id(zpool.id.into_untyped_uuid())
+        .fetch()
+        .await
+        .unwrap();
+
+    datastore
+        .physical_disk_update_policy(
+            &opctx,
+            db_zpool.physical_disk_id.into(),
+            PhysicalDiskPolicy::Expunged,
+        )
+        .await
+        .unwrap();
+
+    // Only one region snapshot should be been returned by the following call
+    // due to the allocation policy.
+
+    let expunged_region_snapshots = datastore
+        .find_region_snapshots_on_expunged_physical_disks(&opctx)
+        .await
+        .unwrap();
+
+    assert_eq!(expunged_region_snapshots.len(), 1);
+
+    for expunged_region_snapshot in expunged_region_snapshots {
+        assert_eq!(expunged_region_snapshot.snapshot_id, snapshot.identity.id);
+    }
+
+    // Either one or two read/write regions will be returned:
+    //
+    // - one for the disk, and
+    // - one for the snapshot destination volume, depending on if it was
+    //   allocated on to the physical disk that was expunged.
+
+    let expunged_regions = datastore
+        .find_read_write_regions_on_expunged_physical_disks(&opctx)
+        .await
+        .unwrap();
+
+    match expunged_regions.len() {
+        1 => {
+            assert_eq!(expunged_regions[0].id(), region.id());
+        }
+
+        2 => {
+            assert!(expunged_regions.iter().any(|r| r.id() == region.id()));
+
+            let (.., db_snapshot) = LookupPath::new(&opctx, datastore)
+                .snapshot_id(snapshot.identity.id)
+                .fetch()
+                .await
+                .unwrap();
+
+            let snapshot_allocated_datasets_and_regions = datastore
+                .get_allocated_regions(db_snapshot.destination_volume_id)
+                .await
+                .unwrap();
+
+            let snapshot_allocated_regions: Vec<Uuid> =
+                snapshot_allocated_datasets_and_regions
+                    .into_iter()
+                    .map(|(_, r)| r.id())
+                    .collect();
+
+            assert!(expunged_regions.iter().any(|region| {
+                snapshot_allocated_regions.contains(&region.id())
+            }));
+        }
+
+        _ => {
+            panic!("unexpected number of expunged regions!");
+        }
+    }
+
+    // Now, race the region replacement with the region snapshot replacement:
+    //
+    // 1) region replacement will allocate a new region and swap it into the
+    //    disk volume.
+
+    let internal_client = &cptestctx.internal_client;
+
+    let _ =
+        activate_background_task(&internal_client, "region_replacement").await;
+
+    // After that task invocation, there should be one running region
+    // replacement for the disk's region. Filter out the replacement request for
+    // the snapshot destination volume if it's there. The above background task
+    // only starts the associated saga, so wait for it to complete.
+
+    wait_for_condition(
+        || {
+            let opctx = OpContext::for_tests(
+                cptestctx.logctx.log.new(o!()),
+                datastore.clone(),
+            );
+
+            async move {
+                let region_replacements: Vec<_> = datastore
+                    .get_running_region_replacements(&opctx)
+                    .await
+                    .unwrap()
+                    .into_iter()
+                    .filter(|x| x.old_region_id == region.id())
+                    .collect();
+
+                if region_replacements.len() == 1 {
+                    // The saga transitioned the request ok
+                    Ok(())
+                } else {
+                    // The saga is still running
+                    Err(CondCheckError::<()>::NotYet)
+                }
+            }
+        },
+        &std::time::Duration::from_millis(500),
+        &std::time::Duration::from_secs(20),
+    )
+    .await
+    .expect("request transitioned to expected state");
+
+    let region_replacements: Vec<_> = datastore
+        .get_running_region_replacements(&opctx)
+        .await
+        .unwrap()
+        .into_iter()
+        .filter(|x| x.old_region_id == region.id())
+        .collect();
+
+    assert_eq!(region_replacements.len(), 1);
+
+    // 2) region snapshot replacement start will replace the region snapshot in
+    //    the snapshot volume
+
+    let _ = activate_background_task(
+        &internal_client,
+        "region_snapshot_replacement_start",
+    )
+    .await;
+
+    // After that, there should be one "replacement done" region snapshot
+    // replacement for the associated region snapshot. The above background task
+    // only starts the associated saga, so wait for it to complete.
+    wait_for_condition(
+        || {
+            let opctx = OpContext::for_tests(
+                cptestctx.logctx.log.new(o!()),
+                datastore.clone(),
+            );
+
+            async move {
+                let region_snapshot_replacements = datastore
+                    .get_replacement_done_region_snapshot_replacements(&opctx)
+                    .await
+                    .unwrap();
+
+                if region_snapshot_replacements.len() == 1 {
+                    // The saga transitioned the request ok
+                    Ok(())
+                } else {
+                    // The saga is still running
+                    Err(CondCheckError::<()>::NotYet)
+                }
+            }
+        },
+        &std::time::Duration::from_millis(500),
+        &std::time::Duration::from_secs(20),
+    )
+    .await
+    .expect("request transitioned to expected state");
+
+    let region_snapshot_replacements = datastore
+        .get_replacement_done_region_snapshot_replacements(&opctx)
+        .await
+        .unwrap();
+
+    assert_eq!(region_snapshot_replacements.len(), 1);
+    assert_eq!(
+        region_snapshot_replacements[0].old_dataset_id,
+        dataset.id().into()
+    );
+    assert_eq!(region_snapshot_replacements[0].old_region_id, region.id());
+    assert_eq!(
+        region_snapshot_replacements[0].old_snapshot_id,
+        snapshot.identity.id
+    );
+    assert_eq!(
+        region_snapshot_replacements[0].replacement_state,
+        RegionSnapshotReplacementState::ReplacementDone,
+    );
+
+    assert!(datastore.find_deleted_volume_regions().await.unwrap().is_empty());
+
+    // 3) Delete the disk
+    let disk_url = get_disk_url("disk");
+    NexusRequest::object_delete(client, &disk_url)
+        .authn_as(AuthnMode::PrivilegedUser)
+        .execute()
+        .await
+        .expect("failed to delete disk");
+
+    // The volume should be soft-deleted now. The region snapshot replacement
+    // swapped out the region snapshot from the snapshot volume to the temporary
+    // volume for later deletion, but has not actually deleted that temporary
+    // volume yet, so the count will not have gone to 0.
+
+    let volume = datastore.volume_get(db_disk.volume_id).await.unwrap();
+    assert!(volume.is_some());
+    assert!(volume.unwrap().time_deleted.is_some());
+
+    // 4) region snapshot replacement garbage collect will delete the temporary
+    //    volume with the stashed reference to the region snapshot, bringing the
+    //    reference count to zero.
+
+    let _ = activate_background_task(
+        &internal_client,
+        "region_snapshot_replacement_garbage_collection",
+    )
+    .await;
+
+    // Assert the region snapshot was deleted.
+    wait_for_condition(
+        || {
+            let dataset_id = dataset.id();
+            let region_id = region.id();
+            let snapshot_id = snapshot.identity.id;
+
+            async move {
+                let region_snapshot = datastore
+                    .region_snapshot_get(dataset_id, region_id, snapshot_id)
+                    .await
+                    .unwrap();
+
+                match region_snapshot {
+                    Some(_) => {
+                        // Region snapshot not garbage collected yet
+                        Err(CondCheckError::<()>::NotYet)
+                    }
+
+                    None => {
+                        // Region snapshot garbage collected ok
+                        Ok(())
+                    }
+                }
+            }
+        },
+        &std::time::Duration::from_millis(500),
+        &std::time::Duration::from_secs(60),
+    )
+    .await
+    .expect("region snapshot garbage collected");
+
+    // Assert that the disk's volume is still only soft-deleted, because the two
+    // other associated region snapshots still exist.
+    let volume = datastore.volume_get(db_disk.volume_id).await.unwrap();
+    assert!(volume.is_some());
+
+    // Check on the old region id - it should not be deleted
+    let maybe_region =
+        datastore.get_region_optional(region.id()).await.unwrap();
+
+    eprintln!("old_region_id: {:?}", &maybe_region);
+    assert!(maybe_region.is_some());
+
+    // But the new region id will be!
+    let maybe_region = datastore
+        .get_region_optional(region_replacements[0].new_region_id.unwrap())
+        .await
+        .unwrap();
+
+    eprintln!("new region id: {:?}", &maybe_region);
+    assert!(maybe_region.is_none());
+
+    // The region_replacement drive task should invoke the drive saga now, which
+    // will skip over all notification steps and transition the request to
+    // ReplacementDone
+
+    let last_background_task =
+        activate_background_task(&internal_client, "region_replacement_driver")
+            .await;
+
+    assert!(match last_background_task.last {
+        LastResult::Completed(last_result_completed) => {
+            match serde_json::from_value::<RegionReplacementDriverStatus>(
+                last_result_completed.details,
+            ) {
+                Err(e) => {
+                    eprintln!("{e}");
+                    false
+                }
+
+                Ok(v) => !v.drive_invoked_ok.is_empty(),
+            }
+        }
+
+        _ => {
+            false
+        }
+    });
+
+    // wait for the drive saga to complete here
+    wait_for_condition(
+        || {
+            let opctx = OpContext::for_tests(
+                cptestctx.logctx.log.new(o!()),
+                datastore.clone(),
+            );
+            let replacement_request_id = region_replacements[0].id;
+
+            async move {
+                let region_replacement = datastore
+                    .get_region_replacement_request_by_id(
+                        &opctx,
+                        replacement_request_id,
+                    )
+                    .await
+                    .unwrap();
+
+                let state = region_replacement.replacement_state;
+
+                if state == RegionReplacementState::ReplacementDone {
+                    // The saga transitioned the request ok
+                    Ok(())
+                } else if state == RegionReplacementState::Driving {
+                    // The drive saga is still running
+                    Err(CondCheckError::<()>::NotYet)
+                } else if state == RegionReplacementState::Running {
+                    // The drive saga hasn't started yet
+                    Err(CondCheckError::<()>::NotYet)
+                } else if state == RegionReplacementState::Completing {
+                    // The saga transitioned the request ok, and it's now being
+                    // finished by the region replacement finish saga
+                    Ok(())
+                } else if state == RegionReplacementState::Complete {
+                    // The saga transitioned the request ok, and it was finished
+                    // by the region replacement finish saga
+                    Ok(())
+                } else {
+                    // Any other state is not expected
+                    panic!("unexpected state {state:?}!");
+                }
+            }
+        },
+        &std::time::Duration::from_millis(500),
+        &std::time::Duration::from_secs(60),
+    )
+    .await
+    .expect("request transitioned to expected state");
+
+    // After the region snapshot replacement process runs to completion, there
+    // should be no more crucible resources left. Run the "region snapshot
+    // replacement step" background task until there's nothing left, then the
+    // "region snapshot replacement finish", then make sure there are no
+    // crucible resources left.
+
+    let mut count = 0;
+    loop {
+        let actions_taken =
+            run_region_snapshot_replacement_step(&internal_client).await;
+
+        if actions_taken == 0 {
+            break;
+        }
+
+        count += 1;
+
+        if count > 20 {
+            assert!(false);
+        }
+    }
+
+    let _ = activate_background_task(
+        &internal_client,
+        "region_snapshot_replacement_finish",
+    )
+    .await;
+
+    // Ensure the region snapshot replacement request went to Complete
+
+    wait_for_condition(
+        || {
+            let opctx = OpContext::for_tests(
+                cptestctx.logctx.log.new(o!()),
+                datastore.clone(),
+            );
+            let request_id = region_snapshot_replacements[0].id;
+
+            async move {
+                let region_snapshot_replacement = datastore
+                    .get_region_snapshot_replacement_request_by_id(
+                        &opctx, request_id,
+                    )
+                    .await
+                    .unwrap();
+
+                let state = region_snapshot_replacement.replacement_state;
+
+                if state == RegionSnapshotReplacementState::Complete {
+                    Ok(())
+                } else {
+                    // Any other state is not expected
+                    Err(CondCheckError::<()>::NotYet)
+                }
+            }
+        },
+        &std::time::Duration::from_millis(500),
+        &std::time::Duration::from_secs(60),
+    )
+    .await
+    .expect("request transitioned to expected state");
+
+    // Delete the snapshot
+
+    let snapshot_url = get_snapshot_url("snapshot");
+    NexusRequest::object_delete(client, &snapshot_url)
+        .authn_as(AuthnMode::PrivilegedUser)
+        .execute()
+        .await
+        .expect("failed to delete snapshot");
+
+    // and now there should be no higher level resources left
+
+    let disks_url = get_disks_url();
+    assert_eq!(
+        collection_list::<external::Disk>(&client, &disks_url).await.len(),
+        0
+    );
+
+    let snapshots_url = get_snapshots_url();
+    assert_eq!(
+        collection_list::<views::Snapshot>(&client, &snapshots_url).await.len(),
+        0
+    );
+
+    // Make sure that all the background tasks can run to completion.
+
+    run_replacement_tasks_to_completion(&internal_client).await;
+
+    // The disk volume should be deleted by the snapshot delete: wait until this
+    // happens
+
+    wait_for_condition(
+        || {
+            let datastore = datastore.clone();
+            let volume_id = db_disk.volume_id;
+
+            async move {
+                let volume = datastore.volume_get(volume_id).await.unwrap();
+                if volume.is_none() {
+                    Ok(())
+                } else {
+                    Err(CondCheckError::<()>::NotYet)
+                }
+            }
+        },
+        &std::time::Duration::from_millis(500),
+        &std::time::Duration::from_secs(10),
+    )
+    .await
+    .expect("disk volume deleted");
+
+    // There should be no more crucible resources left. Don't just check for
+    // `crucible_resources_deleted` here! We have set one of the physical disk
+    // policies to expunged, so Nexus will not attempt to clean up any resources
+    // on that physical disk.
+
+    disk_test.remove_zpool(db_zpool.id()).await;
+
+    // Now, assert that all crucible resources are cleaned up
+
+    assert!(disk_test.crucible_resources_deleted().await);
+}
+
+mod region_snapshot_replacement {
+    use super::*;
+
+    #[derive(Debug)]
+    struct ExpectedEndState(pub RegionSnapshotReplacementState);
+
+    #[derive(Debug)]
+    struct ExpectedIntermediateState(pub RegionSnapshotReplacementState);
+
+    #[derive(Debug)]
+    struct ExpectedStartState(pub RegionSnapshotReplacementState);
+
+    pub(super) struct DeletedVolumeTest<'a> {
+        log: Logger,
+        datastore: Arc<DataStore>,
+        disk_test: DiskTest<'a>,
+        client: ClientTestContext,
+        internal_client: ClientTestContext,
+        replacement_request_id: Uuid,
+        snapshot_socket_addr: SocketAddr,
+    }
+
+    impl<'a> DeletedVolumeTest<'a> {
+        pub async fn new(cptestctx: &'a ControlPlaneTestContext) -> Self {
+            let nexus = &cptestctx.server.server_context().nexus;
+
+            // Create four zpools, each with one dataset. This is required for
+            // region and region snapshot replacement to have somewhere to move
+            // the data.
+            let disk_test = DiskTestBuilder::new(&cptestctx)
+                .on_specific_sled(cptestctx.first_sled())
+                .with_zpool_count(4)
+                .build()
+                .await;
+
+            let client = &cptestctx.external_client;
+            let internal_client = &cptestctx.internal_client;
+            let datastore = nexus.datastore().clone();
+
+            let opctx = OpContext::for_tests(
+                cptestctx.logctx.log.new(o!()),
+                datastore.clone(),
+            );
+
+            // Create a disk, a snapshot of that disk, and a disk from that
+            // snapshot
+            let _project_id = create_project_and_pool(client).await;
+
+            let disk = create_disk(&client, PROJECT_NAME, "disk").await;
+
+            let snapshot =
+                create_snapshot(&client, PROJECT_NAME, "disk", "snapshot")
+                    .await;
+
+            let disk_from_snapshot = create_disk_from_snapshot(
+                &client,
+                PROJECT_NAME,
+                "disk-from-snapshot",
+                snapshot.identity.id,
+            )
+            .await;
+
+            // Manually create the region snapshot replacement request for the
+            // first allocated region of that disk
+
+            let (.., db_disk) = LookupPath::new(&opctx, &datastore)
+                .disk_id(disk.identity.id)
+                .fetch()
+                .await
+                .unwrap();
+
+            assert_eq!(db_disk.id(), disk.identity.id);
+
+            let disk_allocated_regions = datastore
+                .get_allocated_regions(db_disk.volume_id)
+                .await
+                .unwrap();
+            let (_, region) = &disk_allocated_regions[0];
+
+            let region_snapshot = datastore
+                .region_snapshot_get(
+                    region.dataset_id(),
+                    region.id(),
+                    snapshot.identity.id,
+                )
+                .await
+                .expect("found region snapshot without error")
+                .unwrap();
+
+            let replacement_request_id = datastore
+                .create_region_snapshot_replacement_request(
+                    &opctx,
+                    &region_snapshot,
+                )
+                .await
+                .unwrap();
+
+            // Assert the request is in state Requested
+
+            let region_snapshot_replacement = datastore
+                .get_region_snapshot_replacement_request_by_id(
+                    &opctx,
+                    replacement_request_id,
+                )
+                .await
+                .unwrap();
+
+            assert_eq!(
+                region_snapshot_replacement.replacement_state,
+                RegionSnapshotReplacementState::Requested,
+            );
+
+            // Assert two volumes reference the snapshot addr
+
+            let snapshot_socket_addr =
+                region_snapshot.snapshot_addr.parse().unwrap();
+
+            let volumes = datastore
+                .find_volumes_referencing_socket_addr(
+                    &opctx,
+                    snapshot_socket_addr,
+                )
+                .await
+                .unwrap();
+
+            assert_eq!(volumes.len(), 2);
+
+            // Validate that they are snapshot and disk from snapshot
+
+            let volumes_set: HashSet<Uuid> =
+                volumes.into_iter().map(|v| v.id()).collect();
+
+            let (.., db_snapshot) = LookupPath::new(&opctx, &datastore)
+                .snapshot_id(snapshot.identity.id)
+                .fetch()
+                .await
+                .unwrap();
+
+            let (.., db_disk_from_snapshot) =
+                LookupPath::new(&opctx, &datastore)
+                    .disk_id(disk_from_snapshot.identity.id)
+                    .fetch()
+                    .await
+                    .unwrap();
+
+            assert!(volumes_set.contains(&db_snapshot.volume_id));
+            assert!(volumes_set.contains(&db_disk_from_snapshot.volume_id));
+
+            DeletedVolumeTest {
+                log: cptestctx.logctx.log.new(o!()),
+                datastore,
+                disk_test,
+                client: client.clone(),
+                internal_client: internal_client.clone(),
+                replacement_request_id,
+                snapshot_socket_addr,
+            }
+        }
+
+        pub fn opctx(&self) -> OpContext {
+            OpContext::for_tests(self.log.clone(), self.datastore.clone())
+        }
+
+        pub async fn delete_the_disk(&self) {
+            let disk_url = get_disk_url("disk");
+            NexusRequest::object_delete(&self.client, &disk_url)
+                .authn_as(AuthnMode::PrivilegedUser)
+                .execute()
+                .await
+                .expect("failed to delete disk");
+        }
+
+        pub async fn delete_the_snapshot(&self) {
+            let snapshot_url = get_snapshot_url("snapshot");
+            NexusRequest::object_delete(&self.client, &snapshot_url)
+                .authn_as(AuthnMode::PrivilegedUser)
+                .execute()
+                .await
+                .expect("failed to delete snapshot");
+        }
+
+        pub async fn delete_the_disk_from_snapshot(&self) {
+            let disk_url = get_disk_url("disk-from-snapshot");
+            NexusRequest::object_delete(&self.client, &disk_url)
+                .authn_as(AuthnMode::PrivilegedUser)
+                .execute()
+                .await
+                .expect("failed to delete disk-from-snapshot");
+        }
+
+        /// Make sure:
+        ///
+        /// - all region snapshot replacement related background tasks run to
+        ///   completion
+        /// - this harness' region snapshot replacement request has transitioned
+        ///   to Complete
+        /// - there are no more volumes that reference the request's region
+        ///   snapshot
+        pub async fn finish_test(&self) {
+            // Make sure that all the background tasks can run to completion.
+
+            run_replacement_tasks_to_completion(&self.internal_client).await;
+
+            // Assert the request is in state Complete
+
+            wait_for_condition(
+                || {
+                    let datastore = self.datastore.clone();
+                    let opctx = self.opctx();
+                    let replacement_request_id = self.replacement_request_id;
+
+                    async move {
+                        let region_replacement = datastore
+                            .get_region_snapshot_replacement_request_by_id(
+                                &opctx,
+                                replacement_request_id,
+                            )
+                            .await
+                            .unwrap();
+
+                        let state = region_replacement.replacement_state;
+
+                        if state == RegionSnapshotReplacementState::Complete {
+                            // The saga transitioned the request ok
+                            Ok(())
+                        } else {
+                            // The saga is still running
+                            Err(CondCheckError::<()>::NotYet)
+                        }
+                    }
+                },
+                &std::time::Duration::from_millis(500),
+                &std::time::Duration::from_secs(10),
+            )
+            .await
+            .expect("request transitioned to expected state");
+
+            let region_snapshot_replacement = self
+                .datastore
+                .get_region_snapshot_replacement_request_by_id(
+                    &self.opctx(),
+                    self.replacement_request_id,
+                )
+                .await
+                .unwrap();
+
+            assert_eq!(
+                region_snapshot_replacement.replacement_state,
+                RegionSnapshotReplacementState::Complete,
+            );
+
+            // Assert no volumes are referencing the snapshot address
+
+            let volumes = self
+                .datastore
+                .find_volumes_referencing_socket_addr(
+                    &self.opctx(),
+                    self.snapshot_socket_addr,
+                )
+                .await
+                .unwrap();
+
+            if !volumes.is_empty() {
+                eprintln!("{:?}", volumes);
+            }
+
+            assert!(volumes.is_empty());
+        }
+
+        /// Assert no Crucible resources are leaked
+        pub async fn assert_no_crucible_resources_leaked(&self) {
+            assert!(self.disk_test.crucible_resources_deleted().await);
+        }
+
+        async fn wait_for_request_state(
+            &self,
+            expected_end_state: ExpectedEndState,
+            expected_intermediate_state: ExpectedIntermediateState,
+            expected_start_state: ExpectedStartState,
+        ) {
+            wait_for_condition(
+                || {
+                    let datastore = self.datastore.clone();
+                    let opctx = self.opctx();
+                    let replacement_request_id = self.replacement_request_id;
+
+                    async move {
+                        let request = datastore
+                            .get_region_snapshot_replacement_request_by_id(
+                                &opctx,
+                                replacement_request_id,
+                            )
+                            .await
+                            .unwrap();
+
+                        let state = request.replacement_state;
+
+                        if state == expected_end_state.0 {
+                            // The saga transitioned the request ok
+                            Ok(())
+                        } else if state == expected_intermediate_state.0 {
+                            // The saga is still running
+                            Err(CondCheckError::<()>::NotYet)
+                        } else if state == expected_start_state.0 {
+                            // The saga hasn't started yet
+                            Err(CondCheckError::<()>::NotYet)
+                        } else {
+                            // Any other state is not expected
+                            panic!("unexpected state {state:?}!");
+                        }
+                    }
+                },
+                &std::time::Duration::from_millis(500),
+                &std::time::Duration::from_secs(60),
+            )
+            .await
+            .expect("request transitioned to expected state");
+
+            // Assert the request state
+
+            let region_snapshot_replacement = self
+                .datastore
+                .get_region_snapshot_replacement_request_by_id(
+                    &self.opctx(),
+                    self.replacement_request_id,
+                )
+                .await
+                .unwrap();
+
+            assert_eq!(
+                region_snapshot_replacement.replacement_state,
+                expected_end_state.0,
+            );
+        }
+
+        /// Run the "region snapshot replacement" task to transition the request
+        /// to ReplacementDone.
+        pub async fn transition_request_to_replacement_done(&self) {
+            // Activate the "region snapshot replacement start" background task
+
+            run_region_snapshot_replacement_start(&self.internal_client).await;
+
+            // The activation above could only have started the associated saga,
+            // so wait until the request is in state Running.
+
+            self.wait_for_request_state(
+                ExpectedEndState(
+                    RegionSnapshotReplacementState::ReplacementDone,
+                ),
+                ExpectedIntermediateState(
+                    RegionSnapshotReplacementState::Allocating,
+                ),
+                ExpectedStartState(RegionSnapshotReplacementState::Requested),
+            )
+            .await;
+        }
+
+        /// Run the "region snapshot replacement garbage collection" task to
+        /// transition the request to Running.
+        pub async fn transition_request_to_running(&self) {
+            // Activate the "region snapshot replacement garbage collection"
+            // background task
+
+            run_region_snapshot_replacement_garbage_collection(
+                &self.internal_client,
+            )
+            .await;
+
+            // The activation above could only have started the associated saga,
+            // so wait until the request is in state Running.
+
+            self.wait_for_request_state(
+                ExpectedEndState(RegionSnapshotReplacementState::Running),
+                ExpectedIntermediateState(
+                    RegionSnapshotReplacementState::DeletingOldVolume,
+                ),
+                ExpectedStartState(
+                    RegionSnapshotReplacementState::ReplacementDone,
+                ),
+            )
+            .await;
+        }
+
+        /// Manually create a region snapshot replacement step for the disk
+        /// created from the snapshot
+        pub async fn create_manual_region_snapshot_replacement_step(&self) {
+            let disk_url = get_disk_url("disk-from-snapshot");
+
+            let disk_from_snapshot: external::Disk =
+                NexusRequest::object_get(&self.client, &disk_url)
+                    .authn_as(AuthnMode::PrivilegedUser)
+                    .execute()
+                    .await
+                    .unwrap()
+                    .parsed_body()
+                    .unwrap();
+
+            let (.., db_disk_from_snapshot) =
+                LookupPath::new(&self.opctx(), &self.datastore)
+                    .disk_id(disk_from_snapshot.identity.id)
+                    .fetch()
+                    .await
+                    .unwrap();
+
+            let result = self
+                .datastore
+                .create_region_snapshot_replacement_step(
+                    &self.opctx(),
+                    self.replacement_request_id,
+                    db_disk_from_snapshot.volume_id,
+                )
+                .await
+                .unwrap();
+
+            match result {
+                InsertStepResult::Inserted { .. } => {}
+
+                _ => {
+                    assert!(false, "bad result from create_region_snapshot_replacement_step");
+                }
+            }
+        }
+    }
+}
+
+/// Assert that a region snapshot replacement request in state "Requested" can
+/// have its snapshot deleted and still transition to Complete
+#[nexus_test]
+async fn test_delete_volume_region_snapshot_replacement_state_requested(
+    cptestctx: &ControlPlaneTestContext,
+) {
+    let test_harness =
+        region_snapshot_replacement::DeletedVolumeTest::new(cptestctx).await;
+
+    // The request leaves the above `new` function in state Requested: delete
+    // the snapshot, then finish the test.
+
+    test_harness.delete_the_snapshot().await;
+
+    test_harness.finish_test().await;
+
+    // Delete all the non-deleted resources
+
+    test_harness.delete_the_disk().await;
+    test_harness.delete_the_disk_from_snapshot().await;
+
+    // Assert there are no more Crucible resources
+
+    test_harness.assert_no_crucible_resources_leaked().await;
+}
+
+/// Assert that a region snapshot replacement request in state "Requested" can
+/// have its snapshot deleted, and the snapshot's source disk can be deleted,
+/// and the request will still transition to Complete
+#[nexus_test]
+async fn test_delete_volume_region_snapshot_replacement_state_requested_2(
+    cptestctx: &ControlPlaneTestContext,
+) {
+    let test_harness =
+        region_snapshot_replacement::DeletedVolumeTest::new(cptestctx).await;
+
+    // The request leaves the above `new` function in state Requested:
+    // - delete the snapshot
+    // - delete the snapshot's source disk
+    // - the only thing that will remain is the disk-from-snap that was created
+    // - finally, call finish_test
+
+    test_harness.delete_the_snapshot().await;
+    test_harness.delete_the_disk().await;
+
+    test_harness.finish_test().await;
+
+    // Delete all the non-deleted resources
+
+    test_harness.delete_the_disk_from_snapshot().await;
+
+    // Assert there are no more Crucible resources
+
+    test_harness.assert_no_crucible_resources_leaked().await;
+}
+
+/// Assert that a region snapshot replacement request in state "Requested" can
+/// have everything be deleted, and the request will still transition to
+/// Complete
+#[nexus_test]
+async fn test_delete_volume_region_snapshot_replacement_state_requested_3(
+    cptestctx: &ControlPlaneTestContext,
+) {
+    let test_harness =
+        region_snapshot_replacement::DeletedVolumeTest::new(cptestctx).await;
+
+    // The request leaves the above `new` function in state Requested:
+    // - delete the snapshot
+    // - delete the snapshot's source disk
+    // - delete the disk created from the snapshot
+    // - finally, call finish_test
+
+    test_harness.delete_the_snapshot().await;
+    test_harness.delete_the_disk().await;
+    test_harness.delete_the_disk_from_snapshot().await;
+
+    test_harness.finish_test().await;
+
+    // Assert there are no more Crucible resources
+
+    test_harness.assert_no_crucible_resources_leaked().await;
+}
+
+/// Assert that a region snapshot replacement request in state "ReplacementDone"
+/// can have its snapshot deleted, and the request will still transition to
+/// Complete
+#[nexus_test]
+async fn test_delete_volume_region_snapshot_replacement_state_replacement_done(
+    cptestctx: &ControlPlaneTestContext,
+) {
+    let test_harness =
+        region_snapshot_replacement::DeletedVolumeTest::new(cptestctx).await;
+
+    // The request leaves the above `new` function in state Requested:
+    // - transition the request to "ReplacementDone"
+    // - delete the snapshot
+    // - finally, call finish_test
+
+    test_harness.transition_request_to_replacement_done().await;
+
+    test_harness.delete_the_snapshot().await;
+    test_harness.delete_the_disk().await;
+
+    test_harness.finish_test().await;
+
+    // Delete all the non-deleted resources
+
+    test_harness.delete_the_disk_from_snapshot().await;
+
+    // Assert there are no more Crucible resources
+
+    test_harness.assert_no_crucible_resources_leaked().await;
+}
+
+/// Assert that a region snapshot replacement request in state "Running"
+/// can have its snapshot deleted, and the request will still transition to
+/// Complete
+#[nexus_test]
+async fn test_delete_volume_region_snapshot_replacement_state_running(
+    cptestctx: &ControlPlaneTestContext,
+) {
+    let test_harness =
+        region_snapshot_replacement::DeletedVolumeTest::new(cptestctx).await;
+
+    // The request leaves the above `new` function in state Requested:
+    // - transition the request to "ReplacementDone"
+    // - transition the request to "Running"
+    // - delete the snapshot
+    // - finally, call finish_test
+
+    test_harness.transition_request_to_replacement_done().await;
+    test_harness.transition_request_to_running().await;
+
+    test_harness.delete_the_snapshot().await;
+    test_harness.delete_the_disk().await;
+
+    test_harness.finish_test().await;
+
+    // Delete all the non-deleted resources
+
+    test_harness.delete_the_disk_from_snapshot().await;
+
+    // Assert there are no more Crucible resources
+
+    test_harness.assert_no_crucible_resources_leaked().await;
+}
+
+/// Assert that a region snapshot replacement step can have its associated
+/// volume deleted and still transition to VolumeDeleted
+#[nexus_test]
+async fn test_delete_volume_region_snapshot_replacement_step(
+    cptestctx: &ControlPlaneTestContext,
+) {
+    let test_harness =
+        region_snapshot_replacement::DeletedVolumeTest::new(cptestctx).await;
+
+    // The request leaves the above `new` function in state Requested:
+    // - transition the request to "ReplacementDone"
+    // - transition the request to "Running"
+    // - manually create a region snapshot replacement step for the disk created
+    //   from the snapshot
+    // - delete the disk created from the snapshot
+    // - finally, call finish_test
+
+    test_harness.transition_request_to_replacement_done().await;
+    test_harness.transition_request_to_running().await;
+
+    test_harness.create_manual_region_snapshot_replacement_step().await;
+    test_harness.delete_the_disk_from_snapshot().await;
+
+    test_harness.finish_test().await;
+
+    // Delete all the non-deleted resources
+
+    test_harness.delete_the_disk().await;
+    test_harness.delete_the_snapshot().await;
+
+    // Assert there are no more Crucible resources
+
+    test_harness.assert_no_crucible_resources_leaked().await;
+}
+
+/// Tests that replacement can occur until completion
+#[nexus_test]
+async fn test_replacement_sanity(cptestctx: &ControlPlaneTestContext) {
+    let nexus = &cptestctx.server.server_context().nexus;
+    let datastore = nexus.datastore();
+    let opctx =
+        OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone());
+
+    // Create four zpools, each with one dataset. This is required for region
+    // and region snapshot replacement to have somewhere to move the data.
+    let sled_id = cptestctx.first_sled();
+
+    let disk_test = DiskTestBuilder::new(&cptestctx)
+        .on_specific_sled(sled_id)
+        .with_zpool_count(4)
+        .build()
+        .await;
+
+    // Create a disk and a snapshot and a disk from that snapshot
+    let client = &cptestctx.external_client;
+    let _project_id = create_project_and_pool(client).await;
+
+    let disk = create_disk(&client, PROJECT_NAME, "disk").await;
+    let snapshot = create_snapshot(&client, PROJECT_NAME, "disk", "snap").await;
+    let _disk_from_snapshot = create_disk_from_snapshot(
+        &client,
+        PROJECT_NAME,
+        "disk-from-snap",
+        snapshot.identity.id,
+    )
+    .await;
+
+    // Before expunging the physical disk, save the DB model
+    let (.., db_disk) = LookupPath::new(&opctx, &datastore)
+        .disk_id(disk.identity.id)
+        .fetch()
+        .await
+        .unwrap();
+
+    assert_eq!(db_disk.id(), disk.identity.id);
+
+    // Next, expunge a physical disk that contains a region
+
+    let disk_allocated_regions =
+        datastore.get_allocated_regions(db_disk.volume_id).await.unwrap();
+    let (dataset, _) = &disk_allocated_regions[0];
+
+    let zpool = disk_test
+        .zpools()
+        .find(|x| *x.id.as_untyped_uuid() == dataset.pool_id)
+        .expect("Expected at least one zpool");
+
+    let (_, db_zpool) = LookupPath::new(&opctx, datastore)
+        .zpool_id(zpool.id.into_untyped_uuid())
+        .fetch()
+        .await
+        .unwrap();
+
+    datastore
+        .physical_disk_update_policy(
+            &opctx,
+            db_zpool.physical_disk_id.into(),
+            PhysicalDiskPolicy::Expunged,
+        )
+        .await
+        .unwrap();
+
+    // Any volumes sent to the Pantry for reconciliation should return active
+    // for this test
+
+    cptestctx
+        .sled_agent
+        .pantry_server
+        .as_ref()
+        .unwrap()
+        .pantry
+        .set_auto_activate_volumes()
+        .await;
+
+    // Now, run all replacement tasks to completion
+    let internal_client = &cptestctx.internal_client;
+    run_replacement_tasks_to_completion(&internal_client).await;
+}
+
+/// Tests that multiple replacements can occur until completion
+#[nexus_test]
+async fn test_region_replacement_triple_sanity(
+    cptestctx: &ControlPlaneTestContext,
+) {
+    let nexus = &cptestctx.server.server_context().nexus;
+    let datastore = nexus.datastore();
+    let opctx =
+        OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone());
+
+    // Create five zpools, each with one dataset. This is required for region
+    // and region snapshot replacement to have somewhere to move the data, and
+    // for this test we're doing two expungements.
+    let sled_id = cptestctx.first_sled();
+
+    let disk_test = DiskTestBuilder::new(&cptestctx)
+        .on_specific_sled(sled_id)
+        .with_zpool_count(6)
+        .build()
+        .await;
+
+    // Any volumes sent to the Pantry for reconciliation should return active
+    // for this test
+
+    cptestctx
+        .sled_agent
+        .pantry_server
+        .as_ref()
+        .unwrap()
+        .pantry
+        .set_auto_activate_volumes()
+        .await;
+
+    // Create a disk and a snapshot and a disk from that snapshot
+    let client = &cptestctx.external_client;
+    let _project_id = create_project_and_pool(client).await;
+
+    let disk = create_disk(&client, PROJECT_NAME, "disk").await;
+    let snapshot = create_snapshot(&client, PROJECT_NAME, "disk", "snap").await;
+    let _disk_from_snapshot = create_disk_from_snapshot(
+        &client,
+        PROJECT_NAME,
+        "disk-from-snap",
+        snapshot.identity.id,
+    )
+    .await;
+
+    // Before expunging any physical disk, save some DB models
+    let (.., db_disk) = LookupPath::new(&opctx, &datastore)
+        .disk_id(disk.identity.id)
+        .fetch()
+        .await
+        .unwrap();
+
+    let (.., db_snapshot) = LookupPath::new(&opctx, &datastore)
+        .snapshot_id(snapshot.identity.id)
+        .fetch()
+        .await
+        .unwrap();
+
+    let internal_client = &cptestctx.internal_client;
+
+    let disk_allocated_regions =
+        datastore.get_allocated_regions(db_disk.volume_id).await.unwrap();
+    let snapshot_allocated_regions =
+        datastore.get_allocated_regions(db_snapshot.volume_id).await.unwrap();
+
+    assert_eq!(disk_allocated_regions.len(), 3);
+    assert_eq!(snapshot_allocated_regions.len(), 0);
+
+    for i in disk_allocated_regions {
+        let (dataset, _) = &i;
+
+        let zpool = disk_test
+            .zpools()
+            .find(|x| *x.id.as_untyped_uuid() == dataset.pool_id)
+            .expect("Expected at least one zpool");
+
+        let (_, db_zpool) = LookupPath::new(&opctx, datastore)
+            .zpool_id(zpool.id.into_untyped_uuid())
+            .fetch()
+            .await
+            .unwrap();
+
+        datastore
+            .physical_disk_update_policy(
+                &opctx,
+                db_zpool.physical_disk_id.into(),
+                PhysicalDiskPolicy::Expunged,
+            )
+            .await
+            .unwrap();
+
+        // Now, run all replacement tasks to completion
+        run_replacement_tasks_to_completion(&internal_client).await;
+    }
+
+    let disk_allocated_regions =
+        datastore.get_allocated_regions(db_disk.volume_id).await.unwrap();
+    let snapshot_allocated_regions =
+        datastore.get_allocated_regions(db_snapshot.volume_id).await.unwrap();
+
+    assert_eq!(disk_allocated_regions.len(), 3);
+    assert!(disk_allocated_regions.iter().all(|(_, r)| !r.read_only()));
+
+    // Assert region snapshots replaced with three read-only regions
+    assert_eq!(snapshot_allocated_regions.len(), 3);
+    assert!(snapshot_allocated_regions.iter().all(|(_, r)| r.read_only()));
+}
+
+/// Tests that multiple replacements can occur until completion, after expunging
+/// two physical disks before any replacements occur (aka we can lose two
+/// physical disks and still recover)
+#[nexus_test]
+async fn test_region_replacement_triple_sanity_2(
+    cptestctx: &ControlPlaneTestContext,
+) {
+    let nexus = &cptestctx.server.server_context().nexus;
+    let datastore = nexus.datastore();
+    let opctx =
+        OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone());
+
+    // Create five zpools, each with one dataset. This is required for region
+    // and region snapshot replacement to have somewhere to move the data, and
+    // for this test we're doing two expungements.
+    let sled_id = cptestctx.first_sled();
+
+    let disk_test = DiskTestBuilder::new(&cptestctx)
+        .on_specific_sled(sled_id)
+        .with_zpool_count(6)
+        .build()
+        .await;
+
+    // Any volumes sent to the Pantry for reconciliation should return active
+    // for this test
+
+    cptestctx
+        .sled_agent
+        .pantry_server
+        .as_ref()
+        .unwrap()
+        .pantry
+        .set_auto_activate_volumes()
+        .await;
+
+    // Create a disk and a snapshot and a disk from that snapshot
+    let client = &cptestctx.external_client;
+    let _project_id = create_project_and_pool(client).await;
+
+    let disk = create_disk(&client, PROJECT_NAME, "disk").await;
+    let snapshot = create_snapshot(&client, PROJECT_NAME, "disk", "snap").await;
+    let _disk_from_snapshot = create_disk_from_snapshot(
+        &client,
+        PROJECT_NAME,
+        "disk-from-snap",
+        snapshot.identity.id,
+    )
+    .await;
+
+    // Before expunging any physical disk, save some DB models
+    let (.., db_disk) = LookupPath::new(&opctx, &datastore)
+        .disk_id(disk.identity.id)
+        .fetch()
+        .await
+        .unwrap();
+
+    let (.., db_snapshot) = LookupPath::new(&opctx, &datastore)
+        .snapshot_id(snapshot.identity.id)
+        .fetch()
+        .await
+        .unwrap();
+
+    let internal_client = &cptestctx.internal_client;
+
+    let disk_allocated_regions =
+        datastore.get_allocated_regions(db_disk.volume_id).await.unwrap();
+    let snapshot_allocated_regions =
+        datastore.get_allocated_regions(db_snapshot.volume_id).await.unwrap();
+
+    assert_eq!(disk_allocated_regions.len(), 3);
+    assert_eq!(snapshot_allocated_regions.len(), 0);
+
+    // Expunge two physical disks before any replacements occur
+    for i in [0, 1] {
+        let (dataset, _) = &disk_allocated_regions[i];
+
+        let zpool = disk_test
+            .zpools()
+            .find(|x| *x.id.as_untyped_uuid() == dataset.pool_id)
+            .expect("Expected at least one zpool");
+
+        let (_, db_zpool) = LookupPath::new(&opctx, datastore)
+            .zpool_id(zpool.id.into_untyped_uuid())
+            .fetch()
+            .await
+            .unwrap();
+
+        datastore
+            .physical_disk_update_policy(
+                &opctx,
+                db_zpool.physical_disk_id.into(),
+                PhysicalDiskPolicy::Expunged,
+            )
+            .await
+            .unwrap();
+    }
+
+    // Now, run all replacement tasks to completion
+    run_replacement_tasks_to_completion(&internal_client).await;
+
+    // Expunge the last physical disk
+    {
+        let (dataset, _) = &disk_allocated_regions[2];
+
+        let zpool = disk_test
+            .zpools()
+            .find(|x| *x.id.as_untyped_uuid() == dataset.pool_id)
+            .expect("Expected at least one zpool");
+
+        let (_, db_zpool) = LookupPath::new(&opctx, datastore)
+            .zpool_id(zpool.id.into_untyped_uuid())
+            .fetch()
+            .await
+            .unwrap();
+
+        datastore
+            .physical_disk_update_policy(
+                &opctx,
+                db_zpool.physical_disk_id.into(),
+                PhysicalDiskPolicy::Expunged,
+            )
+            .await
+            .unwrap();
+    }
+
+    // Now, run all replacement tasks to completion
+    run_replacement_tasks_to_completion(&internal_client).await;
+
+    let disk_allocated_regions =
+        datastore.get_allocated_regions(db_disk.volume_id).await.unwrap();
+    let snapshot_allocated_regions =
+        datastore.get_allocated_regions(db_snapshot.volume_id).await.unwrap();
+
+    assert_eq!(disk_allocated_regions.len(), 3);
+    assert!(disk_allocated_regions.iter().all(|(_, r)| !r.read_only()));
+
+    // Assert region snapshots replaced with three read-only regions
+    assert_eq!(snapshot_allocated_regions.len(), 3);
+    assert!(snapshot_allocated_regions.iter().all(|(_, r)| r.read_only()));
+}
diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs
index d9888f9ccd..db16113dd7 100644
--- a/nexus/tests/integration_tests/disks.rs
+++ b/nexus/tests/integration_tests/disks.rs
@@ -769,10 +769,9 @@ async fn test_disk_region_creation_failure(
     .await
     .unwrap();
 
-    // After the failed allocation, the disk should be Faulted
+    // After the failed allocation, the disk creation should have unwound
     let disks = disks_list(&client, &disks_url).await;
-    assert_eq!(disks.len(), 1);
-    assert_eq!(disks[0].state, DiskState::Faulted);
+    assert_eq!(disks.len(), 0);
 }
 
 // Tests that invalid block sizes are rejected
@@ -2578,7 +2577,7 @@ async fn test_disk_expunge(cptestctx: &ControlPlaneTestContext) {
 
     // All three regions should be returned
     let expunged_regions = datastore
-        .find_regions_on_expunged_physical_disks(&opctx)
+        .find_read_write_regions_on_expunged_physical_disks(&opctx)
         .await
         .unwrap();
 
diff --git a/nexus/tests/integration_tests/volume_management.rs b/nexus/tests/integration_tests/volume_management.rs
index 6a9ce28389..f0eb294e58 100644
--- a/nexus/tests/integration_tests/volume_management.rs
+++ b/nexus/tests/integration_tests/volume_management.rs
@@ -3764,12 +3764,13 @@ impl TestReadOnlyRegionReferenceUsage {
 
     // read-only regions should never be returned by find_deleted_volume_regions
     pub async fn region_not_returned_by_find_deleted_volume_regions(&self) {
-        let deleted_volume_regions =
+        let freed_crucible_resources =
             self.datastore.find_deleted_volume_regions().await.unwrap();
 
-        assert!(!deleted_volume_regions
+        assert!(!freed_crucible_resources
+            .datasets_and_regions
             .into_iter()
-            .any(|(_, r, _)| r.id() == self.region.id()));
+            .any(|(_, r)| r.id() == self.region.id()));
     }
 
     pub async fn create_first_volume_region_in_rop(&self) {
diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs
index ea41375d78..8915ac8746 100644
--- a/nexus/types/src/deployment.rs
+++ b/nexus/types/src/deployment.rs
@@ -248,11 +248,13 @@ impl Blueprint {
     pub fn all_omicron_datasets(
         &self,
         filter: BlueprintDatasetFilter,
-    ) -> impl Iterator<Item = &BlueprintDatasetConfig> {
+    ) -> impl Iterator<Item = (SledUuid, &BlueprintDatasetConfig)> {
         self.blueprint_datasets
             .iter()
-            .flat_map(move |(_, datasets)| datasets.datasets.values())
-            .filter(move |d| d.disposition.matches(filter))
+            .flat_map(move |(sled_id, datasets)| {
+                datasets.datasets.values().map(|dataset| (*sled_id, dataset))
+            })
+            .filter(move |(_, d)| d.disposition.matches(filter))
     }
 
     /// Iterate over the [`BlueprintZoneConfig`] instances in the blueprint
@@ -637,7 +639,16 @@ fn zone_sort_key<T: ZoneSortKey>(z: &T) -> impl Ord {
 ///
 /// Part of [`BlueprintZonesConfig`].
 #[derive(
-    Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus,
+    Debug,
+    Clone,
+    Eq,
+    PartialEq,
+    Ord,
+    PartialOrd,
+    JsonSchema,
+    Deserialize,
+    Serialize,
+    Diffus,
 )]
 pub struct BlueprintZoneConfig {
     /// The disposition (desired state) of this zone recorded in the blueprint.
@@ -1010,7 +1021,16 @@ impl BlueprintDatasetDisposition {
 
 /// Information about a dataset as recorded in a blueprint
 #[derive(
-    Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus,
+    Debug,
+    Clone,
+    Eq,
+    PartialEq,
+    Ord,
+    PartialOrd,
+    JsonSchema,
+    Deserialize,
+    Serialize,
+    Diffus,
 )]
 pub struct BlueprintDatasetConfig {
     // TODO: Display this in diffs - leave for now, for backwards compat
diff --git a/nexus/types/src/deployment/network_resources.rs b/nexus/types/src/deployment/network_resources.rs
index 358e7c229d..b08e5a2582 100644
--- a/nexus/types/src/deployment/network_resources.rs
+++ b/nexus/types/src/deployment/network_resources.rs
@@ -148,7 +148,18 @@ impl OmicronZoneNetworkResources {
 }
 
 /// External IP variants possible for Omicron-managed zones.
-#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize, Deserialize)]
+#[derive(
+    Debug,
+    Clone,
+    Copy,
+    Hash,
+    PartialOrd,
+    Ord,
+    PartialEq,
+    Eq,
+    Serialize,
+    Deserialize,
+)]
 pub enum OmicronZoneExternalIp {
     Floating(OmicronZoneExternalFloatingIp),
     Snat(OmicronZoneExternalSnatIp),
@@ -204,6 +215,8 @@ pub enum OmicronZoneExternalIpKey {
     Clone,
     Copy,
     Hash,
+    PartialOrd,
+    Ord,
     PartialEq,
     Eq,
     JsonSchema,
@@ -223,6 +236,8 @@ pub struct OmicronZoneExternalFloatingIp {
     Copy,
     PartialEq,
     Eq,
+    PartialOrd,
+    Ord,
     JsonSchema,
     Serialize,
     Deserialize,
@@ -249,6 +264,8 @@ impl OmicronZoneExternalFloatingAddr {
     Clone,
     Copy,
     Hash,
+    PartialOrd,
+    Ord,
     PartialEq,
     Eq,
     JsonSchema,
diff --git a/nexus/types/src/deployment/zone_type.rs b/nexus/types/src/deployment/zone_type.rs
index bb5225986c..c1ecd18158 100644
--- a/nexus/types/src/deployment/zone_type.rs
+++ b/nexus/types/src/deployment/zone_type.rs
@@ -23,7 +23,16 @@ use std::net::Ipv6Addr;
 use std::net::SocketAddrV6;
 
 #[derive(
-    Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus,
+    Debug,
+    Clone,
+    Eq,
+    PartialEq,
+    Ord,
+    PartialOrd,
+    JsonSchema,
+    Deserialize,
+    Serialize,
+    Diffus,
 )]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum BlueprintZoneType {
@@ -339,7 +348,16 @@ pub mod blueprint_zone_type {
     use std::net::SocketAddrV6;
 
     #[derive(
-        Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus,
+        Debug,
+        Clone,
+        Eq,
+        PartialEq,
+        Ord,
+        PartialOrd,
+        JsonSchema,
+        Deserialize,
+        Serialize,
+        Diffus,
     )]
     pub struct BoundaryNtp {
         pub address: SocketAddrV6,
@@ -353,7 +371,16 @@ pub mod blueprint_zone_type {
 
     /// Used in single-node clickhouse setups
     #[derive(
-        Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus,
+        Debug,
+        Clone,
+        Eq,
+        PartialEq,
+        Ord,
+        PartialOrd,
+        JsonSchema,
+        Deserialize,
+        Serialize,
+        Diffus,
     )]
     pub struct Clickhouse {
         pub address: SocketAddrV6,
@@ -361,7 +388,16 @@ pub mod blueprint_zone_type {
     }
 
     #[derive(
-        Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus,
+        Debug,
+        Clone,
+        Eq,
+        PartialEq,
+        Ord,
+        PartialOrd,
+        JsonSchema,
+        Deserialize,
+        Serialize,
+        Diffus,
     )]
     pub struct ClickhouseKeeper {
         pub address: SocketAddrV6,
@@ -370,7 +406,16 @@ pub mod blueprint_zone_type {
 
     /// Used in replicated clickhouse setups
     #[derive(
-        Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus,
+        Debug,
+        Clone,
+        Eq,
+        PartialEq,
+        Ord,
+        PartialOrd,
+        JsonSchema,
+        Deserialize,
+        Serialize,
+        Diffus,
     )]
     pub struct ClickhouseServer {
         pub address: SocketAddrV6,
@@ -378,7 +423,16 @@ pub mod blueprint_zone_type {
     }
 
     #[derive(
-        Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus,
+        Debug,
+        Clone,
+        Eq,
+        PartialEq,
+        Ord,
+        PartialOrd,
+        JsonSchema,
+        Deserialize,
+        Serialize,
+        Diffus,
     )]
     pub struct CockroachDb {
         pub address: SocketAddrV6,
@@ -386,7 +440,16 @@ pub mod blueprint_zone_type {
     }
 
     #[derive(
-        Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus,
+        Debug,
+        Clone,
+        Eq,
+        PartialEq,
+        Ord,
+        PartialOrd,
+        JsonSchema,
+        Deserialize,
+        Serialize,
+        Diffus,
     )]
     pub struct Crucible {
         pub address: SocketAddrV6,
@@ -394,14 +457,32 @@ pub mod blueprint_zone_type {
     }
 
     #[derive(
-        Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus,
+        Debug,
+        Clone,
+        Eq,
+        PartialEq,
+        Ord,
+        PartialOrd,
+        JsonSchema,
+        Deserialize,
+        Serialize,
+        Diffus,
     )]
     pub struct CruciblePantry {
         pub address: SocketAddrV6,
     }
 
     #[derive(
-        Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus,
+        Debug,
+        Clone,
+        Eq,
+        PartialEq,
+        Ord,
+        PartialOrd,
+        JsonSchema,
+        Deserialize,
+        Serialize,
+        Diffus,
     )]
     pub struct ExternalDns {
         pub dataset: OmicronZoneDataset,
@@ -414,7 +495,16 @@ pub mod blueprint_zone_type {
     }
 
     #[derive(
-        Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus,
+        Debug,
+        Clone,
+        Eq,
+        PartialEq,
+        Ord,
+        PartialOrd,
+        JsonSchema,
+        Deserialize,
+        Serialize,
+        Diffus,
     )]
     pub struct InternalDns {
         pub dataset: OmicronZoneDataset,
@@ -434,14 +524,32 @@ pub mod blueprint_zone_type {
     }
 
     #[derive(
-        Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus,
+        Debug,
+        Clone,
+        Eq,
+        PartialEq,
+        Ord,
+        PartialOrd,
+        JsonSchema,
+        Deserialize,
+        Serialize,
+        Diffus,
     )]
     pub struct InternalNtp {
         pub address: SocketAddrV6,
     }
 
     #[derive(
-        Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus,
+        Debug,
+        Clone,
+        Eq,
+        PartialEq,
+        Ord,
+        PartialOrd,
+        JsonSchema,
+        Deserialize,
+        Serialize,
+        Diffus,
     )]
     pub struct Nexus {
         /// The address at which the internal nexus server is reachable.
@@ -457,7 +565,16 @@ pub mod blueprint_zone_type {
     }
 
     #[derive(
-        Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus,
+        Debug,
+        Clone,
+        Eq,
+        PartialEq,
+        Ord,
+        PartialOrd,
+        JsonSchema,
+        Deserialize,
+        Serialize,
+        Diffus,
     )]
     pub struct Oximeter {
         pub address: SocketAddrV6,
diff --git a/nexus/types/src/internal_api/background.rs b/nexus/types/src/internal_api/background.rs
index cf3d652587..bee2f56c34 100644
--- a/nexus/types/src/internal_api/background.rs
+++ b/nexus/types/src/internal_api/background.rs
@@ -37,6 +37,7 @@ pub struct LookupRegionPortStatus {
 pub struct RegionSnapshotReplacementStartStatus {
     pub requests_created_ok: Vec<String>,
     pub start_invoked_ok: Vec<String>,
+    pub requests_completed_ok: Vec<String>,
     pub errors: Vec<String>,
 }
 
@@ -55,13 +56,14 @@ pub struct RegionSnapshotReplacementStepStatus {
     pub step_records_created_ok: Vec<String>,
     pub step_garbage_collect_invoked_ok: Vec<String>,
     pub step_invoked_ok: Vec<String>,
+    pub step_set_volume_deleted_ok: Vec<String>,
     pub errors: Vec<String>,
 }
 
 /// The status of a `region_snapshot_replacement_finish` background task activation
 #[derive(Serialize, Deserialize, Default, Debug, PartialEq, Eq)]
 pub struct RegionSnapshotReplacementFinishStatus {
-    pub records_set_to_done: Vec<String>,
+    pub finish_invoked_ok: Vec<String>,
     pub errors: Vec<String>,
 }
 
diff --git a/openapi/clickhouse-admin-server.json b/openapi/clickhouse-admin-server.json
index c82c7c0d8e..50c526569e 100644
--- a/openapi/clickhouse-admin-server.json
+++ b/openapi/clickhouse-admin-server.json
@@ -616,7 +616,7 @@
         "type": "object",
         "properties": {
           "time": {
-            "$ref": "#/components/schemas/Timestamp"
+            "type": "string"
           },
           "value": {
             "type": "number",
@@ -628,17 +628,6 @@
           "value"
         ]
       },
-      "Timestamp": {
-        "anyOf": [
-          {
-            "type": "string",
-            "format": "date-time"
-          },
-          {
-            "type": "string"
-          }
-        ]
-      },
       "SystemTable": {
         "description": "Available metrics tables in the `system` database",
         "type": "string",
diff --git a/openapi/clickhouse-admin-single.json b/openapi/clickhouse-admin-single.json
index b00bf56314..c6b99da245 100644
--- a/openapi/clickhouse-admin-single.json
+++ b/openapi/clickhouse-admin-single.json
@@ -131,7 +131,7 @@
         "type": "object",
         "properties": {
           "time": {
-            "$ref": "#/components/schemas/Timestamp"
+            "type": "string"
           },
           "value": {
             "type": "number",
@@ -143,17 +143,6 @@
           "value"
         ]
       },
-      "Timestamp": {
-        "anyOf": [
-          {
-            "type": "string",
-            "format": "date-time"
-          },
-          {
-            "type": "string"
-          }
-        ]
-      },
       "SystemTable": {
         "description": "Available metrics tables in the `system` database",
         "type": "string",
diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json
index 14a4c92692..c7d2b36de4 100644
--- a/openapi/sled-agent.json
+++ b/openapi/sled-agent.json
@@ -785,9 +785,9 @@
       }
     },
     "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}": {
-      "get": {
-        "summary": "Fetch a support bundle from a particular dataset",
-        "operationId": "support_bundle_get",
+      "post": {
+        "summary": "Create a support bundle within a particular dataset",
+        "operationId": "support_bundle_create",
         "parameters": [
           {
             "in": "path",
@@ -815,32 +815,50 @@
             "schema": {
               "$ref": "#/components/schemas/TypedUuidForZpoolKind"
             }
+          },
+          {
+            "in": "query",
+            "name": "hash",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "hex string (32 bytes)"
+            }
           }
         ],
         "requestBody": {
           "content": {
-            "application/json": {
+            "application/octet-stream": {
               "schema": {
-                "$ref": "#/components/schemas/SupportBundleGetQueryParams"
+                "type": "string",
+                "format": "binary"
               }
             }
           },
           "required": true
         },
         "responses": {
-          "default": {
-            "description": "",
+          "201": {
+            "description": "successful creation",
             "content": {
-              "*/*": {
-                "schema": {}
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/SupportBundleMetadata"
+                }
               }
             }
+          },
+          "4XX": {
+            "$ref": "#/components/responses/Error"
+          },
+          "5XX": {
+            "$ref": "#/components/responses/Error"
           }
         }
       },
-      "post": {
-        "summary": "Create a support bundle within a particular dataset",
-        "operationId": "support_bundle_create",
+      "delete": {
+        "summary": "Delete a support bundle from a particular dataset",
+        "operationId": "support_bundle_delete",
         "parameters": [
           {
             "in": "path",
@@ -868,50 +886,165 @@
             "schema": {
               "$ref": "#/components/schemas/TypedUuidForZpoolKind"
             }
+          }
+        ],
+        "responses": {
+          "204": {
+            "description": "successful deletion"
           },
+          "4XX": {
+            "$ref": "#/components/responses/Error"
+          },
+          "5XX": {
+            "$ref": "#/components/responses/Error"
+          }
+        }
+      }
+    },
+    "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/download": {
+      "get": {
+        "summary": "Fetch a support bundle from a particular dataset",
+        "operationId": "support_bundle_download",
+        "parameters": [
           {
-            "in": "query",
-            "name": "hash",
+            "in": "path",
+            "name": "dataset_id",
+            "description": "The dataset on which this support bundle was provisioned",
             "required": true,
             "schema": {
-              "type": "string",
-              "format": "hex string (32 bytes)"
+              "$ref": "#/components/schemas/TypedUuidForDatasetKind"
+            }
+          },
+          {
+            "in": "path",
+            "name": "support_bundle_id",
+            "description": "The ID of the support bundle itself",
+            "required": true,
+            "schema": {
+              "$ref": "#/components/schemas/TypedUuidForSupportBundleKind"
+            }
+          },
+          {
+            "in": "path",
+            "name": "zpool_id",
+            "description": "The zpool on which this support bundle was provisioned",
+            "required": true,
+            "schema": {
+              "$ref": "#/components/schemas/TypedUuidForZpoolKind"
             }
           }
         ],
-        "requestBody": {
-          "content": {
-            "application/octet-stream": {
-              "schema": {
-                "type": "string",
-                "format": "binary"
+        "responses": {
+          "default": {
+            "description": "",
+            "content": {
+              "*/*": {
+                "schema": {}
               }
             }
+          }
+        }
+      },
+      "head": {
+        "summary": "Fetch metadata about a support bundle from a particular dataset",
+        "operationId": "support_bundle_head",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "dataset_id",
+            "description": "The dataset on which this support bundle was provisioned",
+            "required": true,
+            "schema": {
+              "$ref": "#/components/schemas/TypedUuidForDatasetKind"
+            }
           },
-          "required": true
-        },
+          {
+            "in": "path",
+            "name": "support_bundle_id",
+            "description": "The ID of the support bundle itself",
+            "required": true,
+            "schema": {
+              "$ref": "#/components/schemas/TypedUuidForSupportBundleKind"
+            }
+          },
+          {
+            "in": "path",
+            "name": "zpool_id",
+            "description": "The zpool on which this support bundle was provisioned",
+            "required": true,
+            "schema": {
+              "$ref": "#/components/schemas/TypedUuidForZpoolKind"
+            }
+          }
+        ],
         "responses": {
-          "201": {
-            "description": "successful creation",
+          "default": {
+            "description": "",
             "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/SupportBundleMetadata"
-                }
+              "*/*": {
+                "schema": {}
               }
             }
+          }
+        }
+      }
+    },
+    "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/download/{file}": {
+      "get": {
+        "summary": "Fetch a file within a support bundle from a particular dataset",
+        "operationId": "support_bundle_download_file",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "dataset_id",
+            "description": "The dataset on which this support bundle was provisioned",
+            "required": true,
+            "schema": {
+              "$ref": "#/components/schemas/TypedUuidForDatasetKind"
+            }
           },
-          "4XX": {
-            "$ref": "#/components/responses/Error"
+          {
+            "in": "path",
+            "name": "file",
+            "description": "The path of the file within the support bundle to query",
+            "required": true,
+            "schema": {
+              "type": "string"
+            }
           },
-          "5XX": {
-            "$ref": "#/components/responses/Error"
+          {
+            "in": "path",
+            "name": "support_bundle_id",
+            "description": "The ID of the support bundle itself",
+            "required": true,
+            "schema": {
+              "$ref": "#/components/schemas/TypedUuidForSupportBundleKind"
+            }
+          },
+          {
+            "in": "path",
+            "name": "zpool_id",
+            "description": "The zpool on which this support bundle was provisioned",
+            "required": true,
+            "schema": {
+              "$ref": "#/components/schemas/TypedUuidForZpoolKind"
+            }
+          }
+        ],
+        "responses": {
+          "default": {
+            "description": "",
+            "content": {
+              "*/*": {
+                "schema": {}
+              }
+            }
           }
         }
       },
-      "delete": {
-        "summary": "Delete a support bundle from a particular dataset",
-        "operationId": "support_bundle_delete",
+      "head": {
+        "summary": "Fetch metadata about a file within a support bundle from a particular dataset",
+        "operationId": "support_bundle_head_file",
         "parameters": [
           {
             "in": "path",
@@ -922,6 +1055,15 @@
               "$ref": "#/components/schemas/TypedUuidForDatasetKind"
             }
           },
+          {
+            "in": "path",
+            "name": "file",
+            "description": "The path of the file within the support bundle to query",
+            "required": true,
+            "schema": {
+              "type": "string"
+            }
+          },
           {
             "in": "path",
             "name": "support_bundle_id",
@@ -942,20 +1084,64 @@
           }
         ],
         "responses": {
-          "204": {
-            "description": "successful deletion"
+          "default": {
+            "description": "",
+            "content": {
+              "*/*": {
+                "schema": {}
+              }
+            }
+          }
+        }
+      }
+    },
+    "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/index": {
+      "get": {
+        "summary": "Fetch the index (list of files within a support bundle)",
+        "operationId": "support_bundle_index",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "dataset_id",
+            "description": "The dataset on which this support bundle was provisioned",
+            "required": true,
+            "schema": {
+              "$ref": "#/components/schemas/TypedUuidForDatasetKind"
+            }
           },
-          "4XX": {
-            "$ref": "#/components/responses/Error"
+          {
+            "in": "path",
+            "name": "support_bundle_id",
+            "description": "The ID of the support bundle itself",
+            "required": true,
+            "schema": {
+              "$ref": "#/components/schemas/TypedUuidForSupportBundleKind"
+            }
           },
-          "5XX": {
-            "$ref": "#/components/responses/Error"
+          {
+            "in": "path",
+            "name": "zpool_id",
+            "description": "The zpool on which this support bundle was provisioned",
+            "required": true,
+            "schema": {
+              "$ref": "#/components/schemas/TypedUuidForZpoolKind"
+            }
+          }
+        ],
+        "responses": {
+          "default": {
+            "description": "",
+            "content": {
+              "*/*": {
+                "schema": {}
+              }
+            }
           }
         }
       },
       "head": {
-        "summary": "Fetch a support bundle from a particular dataset",
-        "operationId": "support_bundle_head",
+        "summary": "Fetch metadata about the list of files within a support bundle",
+        "operationId": "support_bundle_head_index",
         "parameters": [
           {
             "in": "path",
@@ -985,16 +1171,6 @@
             }
           }
         ],
-        "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": "#/components/schemas/SupportBundleGetQueryParams"
-              }
-            }
-          },
-          "required": true
-        },
         "responses": {
           "default": {
             "description": "",
@@ -5613,18 +5789,6 @@
         "format": "uint8",
         "minimum": 0
       },
-      "SupportBundleGetQueryParams": {
-        "description": "Query parameters for reading the support bundle",
-        "type": "object",
-        "properties": {
-          "query_type": {
-            "$ref": "#/components/schemas/SupportBundleQueryType"
-          }
-        },
-        "required": [
-          "query_type"
-        ]
-      },
       "SupportBundleMetadata": {
         "description": "Metadata about a support bundle",
         "type": "object",
@@ -5641,60 +5805,6 @@
           "support_bundle_id"
         ]
       },
-      "SupportBundleQueryType": {
-        "description": "Describes the type of access to the support bundle",
-        "oneOf": [
-          {
-            "description": "Access the whole support bundle",
-            "type": "object",
-            "properties": {
-              "type": {
-                "type": "string",
-                "enum": [
-                  "whole"
-                ]
-              }
-            },
-            "required": [
-              "type"
-            ]
-          },
-          {
-            "description": "Access the names of all files within the support bundle",
-            "type": "object",
-            "properties": {
-              "type": {
-                "type": "string",
-                "enum": [
-                  "index"
-                ]
-              }
-            },
-            "required": [
-              "type"
-            ]
-          },
-          {
-            "description": "Access a specific file within the support bundle",
-            "type": "object",
-            "properties": {
-              "file_path": {
-                "type": "string"
-              },
-              "type": {
-                "type": "string",
-                "enum": [
-                  "path"
-                ]
-              }
-            },
-            "required": [
-              "file_path",
-              "type"
-            ]
-          }
-        ]
-      },
       "SupportBundleState": {
         "type": "string",
         "enum": [
diff --git a/oximeter/collector/src/agent.rs b/oximeter/collector/src/agent.rs
index e924cb2ee3..7e28831fa0 100644
--- a/oximeter/collector/src/agent.rs
+++ b/oximeter/collector/src/agent.rs
@@ -745,9 +745,9 @@ mod tests {
         let count = stats.collections.datum.value() as usize;
 
         assert!(count != 0);
-        assert_eq!(
-            count,
-            collection_count.load(Ordering::SeqCst),
+        let server_count = collection_count.load(Ordering::SeqCst);
+        assert!(
+            count == server_count || count - 1 == server_count,
             "number of collections reported by the collection \
             task differs from the number reported by the empty \
             producer server itself"
@@ -892,9 +892,16 @@ mod tests {
 
         assert_eq!(stats.collections.datum.value(), 0);
         assert!(count != 0);
-        assert_eq!(
-            count,
-            collection_count.load(Ordering::SeqCst),
+
+        // The server may have handled a request that we've not yet recorded on
+        // our collection task side, so we allow the server count to be greater
+        // than our own. But since the collection task is single-threaded, it
+        // cannot ever be more than _one_ greater than our count, since we
+        // should increment that counter before making another request to the
+        // server.
+        let server_count = collection_count.load(Ordering::SeqCst);
+        assert!(
+            count == server_count || count - 1 == server_count,
             "number of collections reported by the collection \
             task differs from the number reported by the always-ded \
             producer server itself"
diff --git a/package-manifest.toml b/package-manifest.toml
index 83b1ba8168..b28ac7d59f 100644
--- a/package-manifest.toml
+++ b/package-manifest.toml
@@ -166,7 +166,8 @@ source.packages = [
   "internal-dns-cli.tar.gz",
   "omicron-clickhouse-admin.tar.gz",
   "zone-setup.tar.gz",
-  "zone-network-install.tar.gz"
+  "zone-network-install.tar.gz",
+  "clickana.tar.gz"
 ]
 output.type = "zone"
 
@@ -197,7 +198,8 @@ source.packages = [
   "internal-dns-cli.tar.gz",
   "omicron-clickhouse-admin.tar.gz",
   "zone-setup.tar.gz",
-  "zone-network-install.tar.gz"
+  "zone-network-install.tar.gz",
+  "clickana.tar.gz"
 ]
 output.type = "zone"
 
@@ -924,3 +926,12 @@ service_name = "probe"
 source.type = "composite"
 source.packages = ["thundermuffin.tar.gz"]
 output.type = "zone"
+
+[package.clickana]
+service_name = "clickana"
+only_for_targets.image = "standard"
+source.type = "local"
+source.rust.binary_names = ["clickana"]
+source.rust.release = true
+output.type = "zone"
+output.intermediate_only = true
\ No newline at end of file
diff --git a/package/Cargo.toml b/package/Cargo.toml
index b63a5ed96f..ccc768bb8e 100644
--- a/package/Cargo.toml
+++ b/package/Cargo.toml
@@ -25,6 +25,7 @@ reqwest = { workspace = true, features = [ "rustls-tls" ] }
 ring.workspace = true
 semver.workspace = true
 serde.workspace = true
+shell-words.workspace = true
 sled-hardware.workspace = true
 slog.workspace = true
 slog-async.workspace = true
diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs
index cc4050cbce..f4bda47e2c 100644
--- a/package/src/bin/omicron-package.rs
+++ b/package/src/bin/omicron-package.rs
@@ -4,31 +4,29 @@
 
 //! Utility for bundling target binaries as tarfiles.
 
-use anyhow::{anyhow, bail, ensure, Context, Result};
+use anyhow::{anyhow, bail, Context, Result};
 use camino::{Utf8Path, Utf8PathBuf};
 use clap::{Parser, Subcommand};
 use futures::stream::{self, StreamExt, TryStreamExt};
 use illumos_utils::{zfs, zone};
 use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
+use omicron_package::cargo_plan::build_cargo_plan;
+use omicron_package::config::{Config, ConfigArgs};
 use omicron_package::target::KnownTarget;
 use omicron_package::{parse, BuildCommand, DeployCommand, TargetCommand};
-use omicron_zone_package::config::{Config as PackageConfig, PackageMap};
+use omicron_zone_package::config::Config as PackageConfig;
 use omicron_zone_package::package::{Package, PackageOutput, PackageSource};
 use omicron_zone_package::progress::Progress;
 use omicron_zone_package::target::Target;
 use rayon::prelude::*;
 use ring::digest::{Context as DigestContext, Digest, SHA256};
 use sled_hardware::cleanup::cleanup_networking_resources;
-use slog::debug;
 use slog::o;
 use slog::Drain;
 use slog::Logger;
 use slog::{info, warn};
-use std::collections::{BTreeMap, BTreeSet};
 use std::env;
 use std::fs::create_dir_all;
-use std::io::Write;
-use std::str::FromStr;
 use std::sync::{Arc, OnceLock};
 use std::time::Duration;
 use tokio::io::{AsyncReadExt, AsyncWriteExt, BufReader};
@@ -43,11 +41,6 @@ enum SubCommand {
     Deploy(DeployCommand),
 }
 
-fn parse_duration_ms(arg: &str) -> Result<std::time::Duration> {
-    let ms = arg.parse()?;
-    Ok(std::time::Duration::from_millis(ms))
-}
-
 #[derive(Debug, Parser)]
 #[clap(name = "packaging tool")]
 struct Args {
@@ -63,170 +56,70 @@ struct Args {
     )]
     manifest: Utf8PathBuf,
 
-    #[clap(
-        short,
-        long,
-        help = "The name of the build target to use for this command",
-        default_value_t = ACTIVE.to_string(),
-    )]
-    target: String,
-
     /// The output directory, where artifacts should be built and staged
     #[clap(long = "artifacts", default_value = "out/")]
-    artifact_dir: Utf8PathBuf,
-
-    #[clap(
-        short,
-        long,
-        help = "Skip confirmation prompt for destructive operations",
-        action,
-        default_value_t = false
-    )]
-    force: bool,
-
-    #[clap(
-        long,
-        help = "Number of retries to use when re-attempting failed package downloads",
-        action,
-        default_value_t = 10
-    )]
-    retry_count: usize,
+    pub artifact_dir: Utf8PathBuf,
 
-    #[clap(
-        long,
-        help = "Duration, in ms, to wait before re-attempting failed package downloads",
-        action,
-        value_parser = parse_duration_ms,
-        default_value = "1000",
-    )]
-    retry_duration: std::time::Duration,
+    #[clap(flatten)]
+    config_args: ConfigArgs,
 
     #[clap(subcommand)]
     subcommand: SubCommand,
 }
 
-#[derive(Debug, Default)]
-struct CargoPlan<'a> {
-    command: &'a str,
-    packages: BTreeSet<&'a String>,
-    bins: BTreeSet<&'a String>,
-    features: BTreeSet<&'a String>,
-    release: bool,
-}
+async fn do_show_cargo_commands(config: &Config) -> Result<()> {
+    let metadata = cargo_metadata::MetadataCommand::new().no_deps().exec()?;
+    let features = config.cargo_features();
+    let cargo_plan =
+        build_cargo_plan(&metadata, config.packages_to_build(), &features)?;
 
-impl<'a> CargoPlan<'a> {
-    async fn run(&self, log: &Logger) -> Result<()> {
-        if self.bins.is_empty() {
-            return Ok(());
-        }
+    let release_command = cargo_plan.release.build_command("build");
+    let debug_command = cargo_plan.debug.build_command("build");
 
-        let mut cmd = Command::new("cargo");
-        // We rely on the rust-toolchain.toml file for toolchain information,
-        // rather than specifying one within the packaging tool.
-        cmd.arg(self.command);
-        // We specify _both_ --package and --bin; --bin does not imply
-        // --package, and without any --package options Cargo unifies features
-        // across all workspace default members. See rust-lang/cargo#8157.
-        for package in &self.packages {
-            cmd.arg("--package").arg(package);
-        }
-        for bin in &self.bins {
-            cmd.arg("--bin").arg(bin);
-        }
-        if !self.features.is_empty() {
-            cmd.arg("--features").arg(self.features.iter().fold(
-                String::new(),
-                |mut acc, s| {
-                    if !acc.is_empty() {
-                        acc.push(' ');
-                    }
-                    acc.push_str(s);
-                    acc
-                },
-            ));
-        }
-        if self.release {
-            cmd.arg("--release");
-        }
-        info!(log, "running: {:?}", cmd.as_std());
-        let status = cmd
-            .status()
-            .await
-            .context(format!("Failed to run command: ({:?})", cmd))?;
-        if !status.success() {
-            bail!("Failed to build packages");
-        }
+    print!("release command: ");
+    if let Some(command) = release_command {
+        println!("{}", command_to_string(&command));
+    } else {
+        println!("(none)");
+    }
 
-        Ok(())
+    print!("debug command: ");
+    if let Some(command) = debug_command {
+        println!("{}", command_to_string(&command));
+    } else {
+        println!("(none)");
     }
+
+    Ok(())
+}
+
+fn command_to_string(command: &Command) -> String {
+    // Use shell-words to join the command and arguments into a single string.
+    let mut v = vec![command
+        .as_std()
+        .get_program()
+        .to_str()
+        .expect("program is valid UTF-8")];
+    v.extend(
+        command
+            .as_std()
+            .get_args()
+            .map(|arg| arg.to_str().expect("argument is valid UTF-8")),
+    );
+
+    shell_words::join(&v)
 }
 
 async fn do_for_all_rust_packages(
     config: &Config,
     command: &str,
 ) -> Result<()> {
-    // Collect a map of all of the workspace packages
-    let workspace = cargo_metadata::MetadataCommand::new().no_deps().exec()?;
-    let workspace_pkgs = workspace
-        .packages
-        .into_iter()
-        .filter_map(|package| {
-            workspace
-                .workspace_members
-                .contains(&package.id)
-                .then_some((package.name.clone(), package))
-        })
-        .collect::<BTreeMap<_, _>>();
-
-    // Generate a list of all features we might want to request
-    let features = config
-        .target
-        .0
-        .iter()
-        .map(|(name, value)| format!("{name}-{value}"))
-        .collect::<Vec<_>>();
-
-    // We split the packages to be built into "release" and "debug" lists
-    let mut release =
-        CargoPlan { command, release: true, ..Default::default() };
-    let mut debug = CargoPlan { command, release: false, ..Default::default() };
-
-    for (name, pkg) in config.packages_to_build().0 {
-        // If this is a Rust package, `name` (the map key) is the name of the
-        // corresponding Rust crate.
-        if let PackageSource::Local { rust: Some(rust_pkg), .. } = &pkg.source {
-            let plan = if rust_pkg.release { &mut release } else { &mut debug };
-            // Add the package name to the plan
-            plan.packages.insert(name);
-            // Get the package metadata
-            let metadata = workspace_pkgs.get(name).with_context(|| {
-                format!("package '{name}' is not a workspace package")
-            })?;
-            // Add the binaries we want to build to the plan
-            let bins = metadata
-                .targets
-                .iter()
-                .filter_map(|target| target.is_bin().then_some(&target.name))
-                .collect::<BTreeSet<_>>();
-            for bin in &rust_pkg.binary_names {
-                ensure!(
-                    bins.contains(bin),
-                    "bin target '{bin}' does not belong to package '{name}'"
-                );
-                plan.bins.insert(bin);
-            }
-            // Add all features we want to request to the plan
-            plan.features.extend(
-                features
-                    .iter()
-                    .filter(|feature| metadata.features.contains_key(*feature)),
-            );
-        }
-    }
+    let metadata = cargo_metadata::MetadataCommand::new().no_deps().exec()?;
+    let features = config.cargo_features();
+    let cargo_plan =
+        build_cargo_plan(&metadata, config.packages_to_build(), &features)?;
 
-    release.run(&config.log).await?;
-    debug.run(&config.log).await?;
-    Ok(())
+    cargo_plan.run(command, config.log()).await
 }
 
 async fn do_check(config: &Config) -> Result<()> {
@@ -240,7 +133,7 @@ async fn do_build(config: &Config) -> Result<()> {
 async fn do_dot(config: &Config) -> Result<()> {
     println!(
         "{}",
-        omicron_package::dot::do_dot(&config.target, &config.package_config)?
+        omicron_package::dot::do_dot(config.target(), config.package_config())?
     );
     Ok(())
 }
@@ -262,9 +155,6 @@ async fn do_list_outputs(
     Ok(())
 }
 
-// The name reserved for the currently-in-use build target.
-const ACTIVE: &str = "active";
-
 async fn do_target(
     artifact_dir: &Utf8Path,
     name: &str,
@@ -302,7 +192,8 @@ async fn do_target(
             println!("Created new build target '{name}' and set it as active");
         }
         TargetCommand::List => {
-            let active = tokio::fs::read_link(target_dir.join(ACTIVE)).await?;
+            let active =
+                tokio::fs::read_link(target_dir.join(Config::ACTIVE)).await?;
             let active = Utf8PathBuf::try_from(active)?;
             for entry in walkdir::WalkDir::new(&target_dir)
                 .max_depth(1)
@@ -341,7 +232,7 @@ async fn get_single_target(
     target_dir: impl AsRef<Utf8Path>,
     name: &str,
 ) -> Result<Utf8PathBuf> {
-    if name == ACTIVE {
+    if name == Config::ACTIVE {
         bail!(
             "The name '{name}' is reserved, please try another (e.g. 'default')\n\
             Usage: '{} -t <TARGET> target ...'",
@@ -358,7 +249,7 @@ async fn replace_active_link(
     let src = src.as_ref();
     let target_dir = target_dir.as_ref();
 
-    let dst = target_dir.join(ACTIVE);
+    let dst = target_dir.join(Config::ACTIVE);
     if !target_dir.join(src).exists() {
         bail!("Target file {} does not exist", src);
     }
@@ -467,7 +358,7 @@ async fn ensure_package(
     output_directory: &Utf8Path,
     disable_cache: bool,
 ) -> Result<()> {
-    let target = &config.target;
+    let target = config.target();
     let progress = ui.add_package(package_name.to_string());
     match &package.source {
         PackageSource::Prebuilt { repo, commit, sha256 } => {
@@ -484,7 +375,7 @@ async fn ensure_package(
             };
 
             if should_download {
-                let mut attempts_left = config.retry_count + 1;
+                let mut attempts_left = config.retry_count() + 1;
                 loop {
                     match download_prebuilt(
                         &progress,
@@ -504,7 +395,7 @@ async fn ensure_package(
                             if attempts_left == 0 {
                                 return Err(err);
                             }
-                            tokio::time::sleep(config.retry_duration).await;
+                            tokio::time::sleep(config.retry_duration()).await;
                             progress.reset();
                         }
                     }
@@ -555,7 +446,7 @@ async fn do_package(
     create_dir_all(&output_directory)
         .map_err(|err| anyhow!("Cannot create output directory: {}", err))?;
 
-    let ui = ProgressUI::new(&config.log);
+    let ui = ProgressUI::new(config.log());
 
     do_build(&config).await?;
 
@@ -596,8 +487,8 @@ async fn do_stamp(
 ) -> Result<()> {
     // Find the package which should be stamped
     let (_name, package) = config
-        .package_config
-        .packages_to_deploy(&config.target)
+        .package_config()
+        .packages_to_deploy(config.target())
         .0
         .into_iter()
         .find(|(name, _pkg)| name.as_str() == package_name)
@@ -620,7 +511,8 @@ async fn do_unpack(
     })?;
 
     // Copy all packages to the install location in parallel.
-    let packages = config.package_config.packages_to_deploy(&config.target).0;
+    let packages =
+        config.package_config().packages_to_deploy(&config.target()).0;
 
     packages.par_iter().try_for_each(
         |(package_name, package)| -> Result<()> {
@@ -629,7 +521,7 @@ async fn do_unpack(
             let dst =
                 package.get_output_path(&package.service_name, install_dir);
             info!(
-                &config.log,
+                config.log(),
                 "Installing service";
                 "src" => %src,
                 "dst" => %dst,
@@ -661,7 +553,7 @@ async fn do_unpack(
         let tar_path = install_dir.join(format!("{}.tar", service_name));
         let service_path = install_dir.join(service_name);
         info!(
-            &config.log,
+            config.log(),
             "Unpacking service tarball";
             "tar_path" => %tar_path,
             "service_path" => %service_path,
@@ -681,14 +573,14 @@ fn do_activate(config: &Config, install_dir: &Utf8Path) -> Result<()> {
     // Install the bootstrap service, which itself extracts and
     // installs other services.
     if let Some(package) =
-        config.package_config.packages.get("omicron-sled-agent")
+        config.package_config().packages.get("omicron-sled-agent")
     {
         let manifest_path = install_dir
             .join(&package.service_name)
             .join("pkg")
             .join("manifest.xml");
         info!(
-            config.log,
+            config.log(),
             "Installing bootstrap service from {}", manifest_path
         );
 
@@ -722,7 +614,7 @@ async fn uninstall_all_omicron_zones() -> Result<()> {
 fn uninstall_all_omicron_datasets(config: &Config) -> Result<()> {
     let datasets = match zfs::get_all_omicron_datasets_for_delete() {
         Err(e) => {
-            warn!(config.log, "Failed to get omicron datasets: {}", e);
+            warn!(config.log(), "Failed to get omicron datasets: {}", e);
             return Err(e);
         }
         Ok(datasets) => datasets,
@@ -737,7 +629,7 @@ fn uninstall_all_omicron_datasets(config: &Config) -> Result<()> {
         datasets
     ))?;
     for dataset in &datasets {
-        info!(config.log, "Deleting dataset: {dataset}");
+        info!(config.log(), "Deleting dataset: {dataset}");
         zfs::Zfs::destroy_dataset(dataset)?;
     }
 
@@ -747,8 +639,8 @@ fn uninstall_all_omicron_datasets(config: &Config) -> Result<()> {
 // Attempts to both disable and delete all requested packages.
 fn uninstall_all_packages(config: &Config) {
     for (_, package) in config
-        .package_config
-        .packages_to_deploy(&config.target)
+        .package_config()
+        .packages_to_deploy(config.target())
         .0
         .into_iter()
         .filter(|(_, package)| matches!(package.output, PackageOutput::Tarball))
@@ -812,18 +704,18 @@ fn remove_all_except<P: AsRef<Utf8Path>>(
 }
 
 async fn do_deactivate(config: &Config) -> Result<()> {
-    info!(&config.log, "Removing all Omicron zones");
+    info!(config.log(), "Removing all Omicron zones");
     uninstall_all_omicron_zones().await?;
-    info!(config.log, "Uninstalling all packages");
+    info!(config.log(), "Uninstalling all packages");
     uninstall_all_packages(config);
-    info!(config.log, "Removing networking resources");
-    cleanup_networking_resources(&config.log).await?;
+    info!(config.log(), "Removing networking resources");
+    cleanup_networking_resources(config.log()).await?;
     Ok(())
 }
 
 async fn do_uninstall(config: &Config) -> Result<()> {
     do_deactivate(config).await?;
-    info!(config.log, "Removing datasets");
+    info!(config.log(), "Removing datasets");
     uninstall_all_omicron_datasets(config)?;
     Ok(())
 }
@@ -834,7 +726,7 @@ async fn do_clean(
     install_dir: &Utf8Path,
 ) -> Result<()> {
     do_uninstall(&config).await?;
-    info!(config.log, "Removing artifacts from {}", artifact_dir);
+    info!(config.log(), "Removing artifacts from {}", artifact_dir);
     const ARTIFACTS_TO_KEEP: &[&str] = &[
         "clickhouse",
         "cockroachdb",
@@ -843,10 +735,10 @@ async fn do_clean(
         "downloads",
         "softnpu",
     ];
-    remove_all_except(artifact_dir, ARTIFACTS_TO_KEEP, &config.log)?;
-    info!(config.log, "Removing installed objects in: {}", install_dir);
+    remove_all_except(artifact_dir, ARTIFACTS_TO_KEEP, config.log())?;
+    info!(config.log(), "Removing installed objects in: {}", install_dir);
     const INSTALLED_OBJECTS_TO_KEEP: &[&str] = &["opte"];
-    remove_all_except(install_dir, INSTALLED_OBJECTS_TO_KEEP, &config.log)?;
+    remove_all_except(install_dir, INSTALLED_OBJECTS_TO_KEEP, config.log())?;
 
     Ok(())
 }
@@ -957,102 +849,6 @@ impl Progress for PackageProgress {
     }
 }
 
-struct Config {
-    log: Logger,
-    // Description of all possible packages.
-    package_config: PackageConfig,
-    // Description of the target we're trying to operate on.
-    target: Target,
-    // The list of packages the user wants us to build (all, if empty)
-    only: Vec<String>,
-    // True if we should skip confirmations for destructive operations.
-    force: bool,
-    // Number of times to retry failed downloads.
-    retry_count: usize,
-    // Duration to wait before retrying failed downloads.
-    retry_duration: std::time::Duration,
-}
-
-impl Config {
-    /// Prompts the user for input before proceeding with an operation.
-    fn confirm(&self, prompt: &str) -> Result<()> {
-        if self.force {
-            return Ok(());
-        }
-
-        print!("{prompt}\n[yY to confirm] >> ");
-        let _ = std::io::stdout().flush();
-
-        let mut input = String::new();
-        std::io::stdin().read_line(&mut input)?;
-        match input.as_str().trim() {
-            "y" | "Y" => Ok(()),
-            _ => bail!("Aborting"),
-        }
-    }
-
-    /// Returns target packages to be assembled on the builder machine, limited
-    /// to those specified in `only` (if set).
-    fn packages_to_build(&self) -> PackageMap<'_> {
-        let packages = self.package_config.packages_to_build(&self.target);
-        if self.only.is_empty() {
-            return packages;
-        }
-
-        let mut filtered_packages = PackageMap(BTreeMap::new());
-        let mut to_walk = PackageMap(BTreeMap::new());
-        // add the requested packages to `to_walk`
-        for package_name in &self.only {
-            to_walk.0.insert(
-                package_name,
-                packages.0.get(package_name).unwrap_or_else(|| {
-                    panic!(
-                        "Explicitly-requested package '{}' does not exist",
-                        package_name
-                    )
-                }),
-            );
-        }
-        // dependencies are listed by output name, so create a lookup table to
-        // get a package by its output name.
-        let lookup_by_output = packages
-            .0
-            .iter()
-            .map(|(name, package)| {
-                (package.get_output_file(name), (*name, *package))
-            })
-            .collect::<BTreeMap<_, _>>();
-        // packages yet to be walked are added to `to_walk`. pop each entry and
-        // add its dependencies to `to_walk`, then add the package we finished
-        // walking to `filtered_packages`.
-        while let Some((package_name, package)) = to_walk.0.pop_first() {
-            if let PackageSource::Composite { packages } = &package.source {
-                for output in packages {
-                    // find the package by output name
-                    let (dep_name, dep_package) =
-                        lookup_by_output.get(output).unwrap_or_else(|| {
-                            panic!(
-                                "Could not find a package which creates '{}'",
-                                output
-                            )
-                        });
-                    if dep_name.as_str() == package_name {
-                        panic!("'{}' depends on itself", package_name);
-                    }
-                    // if we've seen this package already, it will be in
-                    // `filtered_packages`. otherwise, add it to `to_walk`.
-                    if !filtered_packages.0.contains_key(dep_name) {
-                        to_walk.0.insert(dep_name, dep_package);
-                    }
-                }
-            }
-            // we're done looking at this package's deps
-            filtered_packages.0.insert(package_name, package);
-        }
-        filtered_packages
-    }
-}
-
 #[tokio::main]
 async fn main() -> Result<()> {
     let args = Args::try_parse()?;
@@ -1069,43 +865,13 @@ async fn main() -> Result<()> {
     let drain = slog_async::Async::new(drain).build().fuse();
     let log = Logger::root(drain, o!());
 
-    let target_help_str = || -> String {
-        format!(
-            "Try calling: '{} -t default target create' to create a new build target",
-            env::current_exe().unwrap().display()
-        )
-    };
-
     let get_config = || -> Result<Config> {
-        let target_path = args.artifact_dir.join("target").join(&args.target);
-        let raw_target =
-            std::fs::read_to_string(&target_path).inspect_err(|_| {
-                eprintln!(
-                    "Failed to read build target: {}\n{}",
-                    target_path,
-                    target_help_str()
-                );
-            })?;
-        let target: Target = KnownTarget::from_str(&raw_target)
-            .inspect_err(|_| {
-                eprintln!(
-                    "Failed to parse {} as target\n{}",
-                    target_path,
-                    target_help_str()
-                );
-            })?
-            .into();
-        debug!(log, "target[{}]: {:?}", args.target, target);
-
-        Ok(Config {
-            log: log.clone(),
+        Config::get_config(
+            &log,
             package_config,
-            target,
-            only: Vec::new(),
-            force: args.force,
-            retry_count: args.retry_count,
-            retry_duration: args.retry_duration,
-        })
+            &args.config_args,
+            &args.artifact_dir,
+        )
     };
 
     // Use a CWD that is the root of the Omicron repository.
@@ -1119,7 +885,12 @@ async fn main() -> Result<()> {
 
     match args.subcommand {
         SubCommand::Build(BuildCommand::Target { subcommand }) => {
-            do_target(&args.artifact_dir, &args.target, &subcommand).await?;
+            do_target(
+                &args.artifact_dir,
+                &args.config_args.target,
+                &subcommand,
+            )
+            .await?;
         }
         SubCommand::Build(BuildCommand::Dot) => {
             do_dot(&get_config()?).await?;
@@ -1130,7 +901,7 @@ async fn main() -> Result<()> {
         }
         SubCommand::Build(BuildCommand::Package { disable_cache, only }) => {
             let mut config = get_config()?;
-            config.only = only;
+            config.set_only(only);
             do_package(&config, &args.artifact_dir, disable_cache).await?;
         }
         SubCommand::Build(BuildCommand::Stamp { package_name, version }) => {
@@ -1142,6 +913,9 @@ async fn main() -> Result<()> {
             )
             .await?;
         }
+        SubCommand::Build(BuildCommand::ShowCargoCommands) => {
+            do_show_cargo_commands(&get_config()?).await?;
+        }
         SubCommand::Build(BuildCommand::Check) => {
             do_check(&get_config()?).await?
         }
diff --git a/package/src/cargo_plan.rs b/package/src/cargo_plan.rs
new file mode 100644
index 0000000000..1a32b199fb
--- /dev/null
+++ b/package/src/cargo_plan.rs
@@ -0,0 +1,172 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use std::collections::BTreeMap;
+use std::collections::BTreeSet;
+
+use anyhow::bail;
+use anyhow::ensure;
+use anyhow::Context;
+use anyhow::Result;
+use cargo_metadata::Metadata;
+use omicron_zone_package::config::PackageMap;
+use omicron_zone_package::package::PackageSource;
+use slog::info;
+use slog::Logger;
+use tokio::process::Command;
+
+/// For a configuration, build a plan: the set of packages, binaries, and
+/// features to operate on in release and debug modes.
+pub fn build_cargo_plan<'a>(
+    metadata: &Metadata,
+    package_map: PackageMap<'a>,
+    features: &'a [String],
+) -> Result<CargoPlan<'a>> {
+    // Collect a map of all of the workspace packages
+    let workspace_pkgs = metadata
+        .packages
+        .iter()
+        .filter_map(|package| {
+            metadata
+                .workspace_members
+                .contains(&package.id)
+                .then_some((package.name.clone(), package))
+        })
+        .collect::<BTreeMap<_, _>>();
+
+    let mut release = CargoTargets::new(BuildKind::Release);
+    let mut debug = CargoTargets::new(BuildKind::Debug);
+
+    for (name, pkg) in package_map.0 {
+        // If this is a Rust package, `name` (the map key) is the name of the
+        // corresponding Rust crate.
+        if let PackageSource::Local { rust: Some(rust_pkg), .. } = &pkg.source {
+            let plan = if rust_pkg.release { &mut release } else { &mut debug };
+            // Add the package name to the plan
+            plan.packages.insert(name);
+            // Get the package metadata
+            let metadata = workspace_pkgs.get(name).with_context(|| {
+                format!("package '{name}' is not a workspace package")
+            })?;
+            // Add the binaries we want to build to the plan
+            let bins = metadata
+                .targets
+                .iter()
+                .filter_map(|target| target.is_bin().then_some(&target.name))
+                .collect::<BTreeSet<_>>();
+            for bin in &rust_pkg.binary_names {
+                ensure!(
+                    bins.contains(bin),
+                    "bin target '{bin}' does not belong to package '{name}'"
+                );
+                plan.bins.insert(bin);
+            }
+            // Add all features we want to request to the plan
+            plan.features.extend(
+                features
+                    .iter()
+                    .filter(|feature| metadata.features.contains_key(*feature)),
+            );
+        }
+    }
+
+    Ok(CargoPlan { release, debug })
+}
+
+#[derive(Debug)]
+pub struct CargoPlan<'a> {
+    pub release: CargoTargets<'a>,
+    pub debug: CargoTargets<'a>,
+}
+
+impl CargoPlan<'_> {
+    pub async fn run(&self, command: &str, log: &Logger) -> Result<()> {
+        self.release.run(command, log).await?;
+        self.debug.run(command, log).await?;
+        Ok(())
+    }
+}
+
+/// A set of packages, binaries, and features to operate on.
+#[derive(Debug)]
+pub struct CargoTargets<'a> {
+    pub kind: BuildKind,
+    pub packages: BTreeSet<&'a String>,
+    pub bins: BTreeSet<&'a String>,
+    pub features: BTreeSet<&'a String>,
+}
+
+impl CargoTargets<'_> {
+    fn new(kind: BuildKind) -> Self {
+        Self {
+            kind,
+            packages: BTreeSet::new(),
+            bins: BTreeSet::new(),
+            features: BTreeSet::new(),
+        }
+    }
+
+    pub fn build_command(&self, command: &str) -> Option<Command> {
+        if self.bins.is_empty() {
+            return None;
+        }
+
+        let mut cmd = Command::new("cargo");
+        // We rely on the rust-toolchain.toml file for toolchain information,
+        // rather than specifying one within the packaging tool.
+        cmd.arg(command);
+        // We specify _both_ --package and --bin; --bin does not imply
+        // --package, and without any --package options Cargo unifies features
+        // across all workspace default members. See rust-lang/cargo#8157.
+        for package in &self.packages {
+            cmd.arg("--package").arg(package);
+        }
+        for bin in &self.bins {
+            cmd.arg("--bin").arg(bin);
+        }
+        if !self.features.is_empty() {
+            cmd.arg("--features").arg(self.features.iter().fold(
+                String::new(),
+                |mut acc, s| {
+                    if !acc.is_empty() {
+                        acc.push(' ');
+                    }
+                    acc.push_str(s);
+                    acc
+                },
+            ));
+        }
+        match self.kind {
+            BuildKind::Release => {
+                cmd.arg("--release");
+            }
+            BuildKind::Debug => {}
+        }
+
+        Some(cmd)
+    }
+
+    pub async fn run(&self, command: &str, log: &Logger) -> Result<()> {
+        let Some(mut cmd) = self.build_command(command) else {
+            return Ok(());
+        };
+
+        info!(log, "running: {:?}", cmd.as_std());
+        let status = cmd
+            .status()
+            .await
+            .context(format!("Failed to run command: ({:?})", cmd))?;
+        if !status.success() {
+            bail!("Failed to build packages");
+        }
+
+        Ok(())
+    }
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum BuildKind {
+    Release,
+    Debug,
+}
diff --git a/package/src/config.rs b/package/src/config.rs
new file mode 100644
index 0000000000..f80bd36057
--- /dev/null
+++ b/package/src/config.rs
@@ -0,0 +1,246 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use anyhow::{bail, Result};
+use camino::Utf8Path;
+use clap::Args;
+use omicron_zone_package::{
+    config::{Config as PackageConfig, PackageMap},
+    package::PackageSource,
+    target::Target,
+};
+use slog::{debug, Logger};
+use std::{
+    collections::BTreeMap, env, io::Write, str::FromStr, time::Duration,
+};
+
+use crate::target::KnownTarget;
+
+#[derive(Debug, Args)]
+pub struct ConfigArgs {
+    /// The name of the build target to use for this command
+    #[clap(
+        short,
+        long,
+        default_value_t = Config::ACTIVE.to_string(),
+    )]
+    pub target: String,
+
+    /// Skip confirmation prompt for destructive operations
+    #[clap(short, long, action, default_value_t = false)]
+    pub force: bool,
+
+    /// Number of retries to use when re-attempting failed package downloads
+    #[clap(long, action, default_value_t = 10)]
+    pub retry_count: usize,
+
+    /// Duration, in ms, to wait before re-attempting failed package downloads
+    #[clap(
+        long,
+        action,
+        value_parser = parse_duration_ms,
+        default_value = "1000",
+    )]
+    pub retry_duration: Duration,
+}
+
+fn parse_duration_ms(arg: &str) -> Result<std::time::Duration> {
+    let ms = arg.parse()?;
+    Ok(Duration::from_millis(ms))
+}
+
+#[derive(Debug)]
+pub struct Config {
+    log: Logger,
+    // Description of all possible packages.
+    package_config: PackageConfig,
+    // Description of the target we're trying to operate on.
+    target: Target,
+    // The list of packages the user wants us to build (all, if empty)
+    only: Vec<String>,
+    // True if we should skip confirmations for destructive operations.
+    force: bool,
+    // Number of times to retry failed downloads.
+    retry_count: usize,
+    // Duration to wait before retrying failed downloads.
+    retry_duration: Duration,
+}
+
+impl Config {
+    /// The name reserved for the currently-in-use build target.
+    pub const ACTIVE: &str = "active";
+
+    /// Builds a new configuration.
+    pub fn get_config(
+        log: &Logger,
+        package_config: PackageConfig,
+        args: &ConfigArgs,
+        artifact_dir: &Utf8Path,
+    ) -> Result<Self> {
+        let target_help_str = || -> String {
+            format!(
+                "Try calling: '{} -t default target create' to create a new build target",
+                env::current_exe().unwrap().display()
+            )
+        };
+
+        let target_path = artifact_dir.join("target").join(&args.target);
+        let raw_target =
+            std::fs::read_to_string(&target_path).inspect_err(|_| {
+                eprintln!(
+                    "Failed to read build target: {}\n{}",
+                    target_path,
+                    target_help_str()
+                );
+            })?;
+        let target: Target = KnownTarget::from_str(&raw_target)
+            .inspect_err(|_| {
+                eprintln!(
+                    "Failed to parse {} as target\n{}",
+                    target_path,
+                    target_help_str()
+                );
+            })?
+            .into();
+        debug!(log, "target[{}]: {:?}", args.target, target);
+
+        Ok(Config {
+            log: log.clone(),
+            package_config,
+            target,
+            only: Vec::new(),
+            force: args.force,
+            retry_count: args.retry_count,
+            retry_duration: args.retry_duration,
+        })
+    }
+
+    /// Sets the `only` field.
+    #[inline]
+    pub fn set_only(&mut self, only: Vec<String>) -> &mut Self {
+        self.only = only;
+        self
+    }
+
+    /// Returns the logger.
+    #[inline]
+    pub fn log(&self) -> &Logger {
+        &self.log
+    }
+
+    /// Returns the target currently being operated on.
+    #[inline]
+    pub fn target(&self) -> &Target {
+        &self.target
+    }
+
+    /// Returns the underlying package configuration.
+    #[inline]
+    pub fn package_config(&self) -> &PackageConfig {
+        &self.package_config
+    }
+
+    /// Returns the retry count.
+    #[inline]
+    pub fn retry_count(&self) -> usize {
+        self.retry_count
+    }
+
+    /// Returns the retry duration.
+    #[inline]
+    pub fn retry_duration(&self) -> Duration {
+        self.retry_duration
+    }
+
+    /// Prompts the user for input before proceeding with an operation.
+    pub fn confirm(&self, prompt: &str) -> Result<()> {
+        if self.force {
+            return Ok(());
+        }
+
+        print!("{prompt}\n[yY to confirm] >> ");
+        let _ = std::io::stdout().flush();
+
+        let mut input = String::new();
+        std::io::stdin().read_line(&mut input)?;
+        match input.as_str().trim() {
+            "y" | "Y" => Ok(()),
+            _ => bail!("Aborting"),
+        }
+    }
+
+    /// Returns target packages to be assembled on the builder machine, limited
+    /// to those specified in `only` (if set).
+    pub fn packages_to_build(&self) -> PackageMap<'_> {
+        let packages = self.package_config.packages_to_build(&self.target);
+        if self.only.is_empty() {
+            return packages;
+        }
+
+        let mut filtered_packages = PackageMap(BTreeMap::new());
+        let mut to_walk = PackageMap(BTreeMap::new());
+        // add the requested packages to `to_walk`
+        for package_name in &self.only {
+            to_walk.0.insert(
+                package_name,
+                packages.0.get(package_name).unwrap_or_else(|| {
+                    panic!(
+                        "Explicitly-requested package '{}' does not exist",
+                        package_name
+                    )
+                }),
+            );
+        }
+        // dependencies are listed by output name, so create a lookup table to
+        // get a package by its output name.
+        let lookup_by_output = packages
+            .0
+            .iter()
+            .map(|(name, package)| {
+                (package.get_output_file(name), (*name, *package))
+            })
+            .collect::<BTreeMap<_, _>>();
+        // packages yet to be walked are added to `to_walk`. pop each entry and
+        // add its dependencies to `to_walk`, then add the package we finished
+        // walking to `filtered_packages`.
+        while let Some((package_name, package)) = to_walk.0.pop_first() {
+            if let PackageSource::Composite { packages } = &package.source {
+                for output in packages {
+                    // find the package by output name
+                    let (dep_name, dep_package) =
+                        lookup_by_output.get(output).unwrap_or_else(|| {
+                            panic!(
+                                "Could not find a package which creates '{}'",
+                                output
+                            )
+                        });
+                    if dep_name.as_str() == package_name {
+                        panic!("'{}' depends on itself", package_name);
+                    }
+                    // if we've seen this package already, it will be in
+                    // `filtered_packages`. otherwise, add it to `to_walk`.
+                    if !filtered_packages.0.contains_key(dep_name) {
+                        to_walk.0.insert(dep_name, dep_package);
+                    }
+                }
+            }
+            // we're done looking at this package's deps
+            filtered_packages.0.insert(package_name, package);
+        }
+        filtered_packages
+    }
+
+    /// Return a list of all possible Cargo features that could be requested for
+    /// the packages being built.
+    ///
+    /// Out of these, the features that actually get requested are determined by
+    /// which features are available for the list of packages being built.
+    pub fn cargo_features(&self) -> Vec<String> {
+        self.target
+            .0
+            .iter()
+            .map(|(name, value)| format!("{name}-{value}"))
+            .collect::<Vec<_>>()
+    }
+}
diff --git a/package/src/lib.rs b/package/src/lib.rs
index b37c1774fd..8ef9a4c951 100644
--- a/package/src/lib.rs
+++ b/package/src/lib.rs
@@ -5,6 +5,8 @@ use clap::Subcommand;
 use serde::de::DeserializeOwned;
 use thiserror::Error;
 
+pub mod cargo_plan;
+pub mod config;
 pub mod dot;
 pub mod target;
 
@@ -130,6 +132,8 @@ pub enum BuildCommand {
         /// The version to be stamped onto the package.
         version: semver::Version,
     },
+    /// Show the Cargo commands that would be run to build the packages.
+    ShowCargoCommands,
     /// Checks the packages specified in a manifest, without building them.
     Check,
 }
diff --git a/schema/crdb/add-completing-and-new-region-volume/up01.sql b/schema/crdb/add-completing-and-new-region-volume/up01.sql
new file mode 100644
index 0000000000..6a973eb3c3
--- /dev/null
+++ b/schema/crdb/add-completing-and-new-region-volume/up01.sql
@@ -0,0 +1 @@
+ALTER TYPE omicron.public.region_snapshot_replacement_state ADD VALUE IF NOT EXISTS 'completing' AFTER 'complete';
diff --git a/schema/crdb/add-completing-and-new-region-volume/up02.sql b/schema/crdb/add-completing-and-new-region-volume/up02.sql
new file mode 100644
index 0000000000..42c0028ff5
--- /dev/null
+++ b/schema/crdb/add-completing-and-new-region-volume/up02.sql
@@ -0,0 +1 @@
+ALTER TABLE omicron.public.region_snapshot_replacement ADD COLUMN IF NOT EXISTS new_region_volume_id UUID;
diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql
index 75b7dbaf08..67b8ab0041 100644
--- a/schema/crdb/dbinit.sql
+++ b/schema/crdb/dbinit.sql
@@ -4427,7 +4427,8 @@ CREATE TYPE IF NOT EXISTS omicron.public.region_snapshot_replacement_state AS EN
   'replacement_done',
   'deleting_old_volume',
   'running',
-  'complete'
+  'complete',
+  'completing'
 );
 
 CREATE TABLE IF NOT EXISTS omicron.public.region_snapshot_replacement (
@@ -4445,7 +4446,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.region_snapshot_replacement (
 
     replacement_state omicron.public.region_snapshot_replacement_state NOT NULL,
 
-    operating_saga_id UUID
+    operating_saga_id UUID,
+
+    new_region_volume_id UUID
 );
 
 CREATE INDEX IF NOT EXISTS lookup_region_snapshot_replacement_by_state on omicron.public.region_snapshot_replacement (replacement_state);
@@ -4694,7 +4697,7 @@ INSERT INTO omicron.public.db_metadata (
     version,
     target_version
 ) VALUES
-    (TRUE, NOW(), NOW(), '116.0.0', NULL)
+    (TRUE, NOW(), NOW(), '117.0.0', NULL)
 ON CONFLICT DO NOTHING;
 
 COMMIT;
diff --git a/sled-agent/api/src/lib.rs b/sled-agent/api/src/lib.rs
index 634640079a..56c6760be7 100644
--- a/sled-agent/api/src/lib.rs
+++ b/sled-agent/api/src/lib.rs
@@ -184,23 +184,61 @@ pub trait SledAgentApi {
     /// Fetch a support bundle from a particular dataset
     #[endpoint {
         method = GET,
-        path = "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}"
+        path = "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/download"
     }]
-    async fn support_bundle_get(
+    async fn support_bundle_download(
         rqctx: RequestContext<Self::Context>,
         path_params: Path<SupportBundlePathParam>,
-        body: TypedBody<SupportBundleGetQueryParams>,
     ) -> Result<http::Response<Body>, HttpError>;
 
-    /// Fetch a support bundle from a particular dataset
+    /// Fetch a file within a support bundle from a particular dataset
+    #[endpoint {
+        method = GET,
+        path = "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/download/{file}"
+    }]
+    async fn support_bundle_download_file(
+        rqctx: RequestContext<Self::Context>,
+        path_params: Path<SupportBundleFilePathParam>,
+    ) -> Result<http::Response<Body>, HttpError>;
+
+    /// Fetch the index (list of files within a support bundle)
+    #[endpoint {
+        method = GET,
+        path = "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/index"
+    }]
+    async fn support_bundle_index(
+        rqctx: RequestContext<Self::Context>,
+        path_params: Path<SupportBundlePathParam>,
+    ) -> Result<http::Response<Body>, HttpError>;
+
+    /// Fetch metadata about a support bundle from a particular dataset
     #[endpoint {
         method = HEAD,
-        path = "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}"
+        path = "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/download"
     }]
     async fn support_bundle_head(
         rqctx: RequestContext<Self::Context>,
         path_params: Path<SupportBundlePathParam>,
-        body: TypedBody<SupportBundleGetQueryParams>,
+    ) -> Result<http::Response<Body>, HttpError>;
+
+    /// Fetch metadata about a file within a support bundle from a particular dataset
+    #[endpoint {
+        method = HEAD,
+        path = "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/download/{file}"
+    }]
+    async fn support_bundle_head_file(
+        rqctx: RequestContext<Self::Context>,
+        path_params: Path<SupportBundleFilePathParam>,
+    ) -> Result<http::Response<Body>, HttpError>;
+
+    /// Fetch metadata about the list of files within a support bundle
+    #[endpoint {
+        method = HEAD,
+        path = "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/index"
+    }]
+    async fn support_bundle_head_index(
+        rqctx: RequestContext<Self::Context>,
+        path_params: Path<SupportBundlePathParam>,
     ) -> Result<http::Response<Body>, HttpError>;
 
     /// Delete a support bundle from a particular dataset
@@ -689,6 +727,9 @@ pub struct SupportBundlePathParam {
 pub struct SupportBundleFilePathParam {
     #[serde(flatten)]
     pub parent: SupportBundlePathParam,
+
+    /// The path of the file within the support bundle to query
+    pub file: String,
 }
 
 /// Metadata about a support bundle
@@ -702,24 +743,6 @@ pub struct SupportBundleGetHeaders {
     range: String,
 }
 
-/// Query parameters for reading the support bundle
-#[derive(Deserialize, Serialize, JsonSchema)]
-pub struct SupportBundleGetQueryParams {
-    pub query_type: SupportBundleQueryType,
-}
-
-/// Describes the type of access to the support bundle
-#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
-#[serde(tag = "type", rename_all = "snake_case")]
-pub enum SupportBundleQueryType {
-    /// Access the whole support bundle
-    Whole,
-    /// Access the names of all files within the support bundle
-    Index,
-    /// Access a specific file within the support bundle
-    Path { file_path: String },
-}
-
 #[derive(Deserialize, Debug, Serialize, JsonSchema, PartialEq)]
 #[serde(rename_all = "snake_case")]
 pub enum SupportBundleState {
diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs
index 88259537d2..edba1c47b1 100644
--- a/sled-agent/src/http_entrypoints.rs
+++ b/sled-agent/src/http_entrypoints.rs
@@ -6,6 +6,7 @@
 
 use super::sled_agent::SledAgent;
 use crate::sled_agent::Error as SledAgentError;
+use crate::support_bundle::storage::SupportBundleQueryType;
 use crate::zone_bundle::BundleError;
 use bootstore::schemes::v0::NetworkConfig;
 use camino::Utf8PathBuf;
@@ -267,37 +268,135 @@ impl SledAgentApi for SledAgentImpl {
         Ok(HttpResponseCreated(metadata))
     }
 
-    async fn support_bundle_get(
+    async fn support_bundle_download(
         rqctx: RequestContext<Self::Context>,
         path_params: Path<SupportBundlePathParam>,
-        body: TypedBody<SupportBundleGetQueryParams>,
     ) -> Result<http::Response<Body>, HttpError> {
         let sa = rqctx.context();
         let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } =
             path_params.into_inner();
 
         let range = rqctx.range();
-        let query = body.into_inner().query_type;
         Ok(sa
             .as_support_bundle_storage()
-            .get(zpool_id, dataset_id, support_bundle_id, range, query)
+            .get(
+                zpool_id,
+                dataset_id,
+                support_bundle_id,
+                range,
+                SupportBundleQueryType::Whole,
+            )
+            .await?)
+    }
+
+    async fn support_bundle_download_file(
+        rqctx: RequestContext<Self::Context>,
+        path_params: Path<SupportBundleFilePathParam>,
+    ) -> Result<http::Response<Body>, HttpError> {
+        let sa = rqctx.context();
+        let SupportBundleFilePathParam {
+            parent:
+                SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id },
+            file,
+        } = path_params.into_inner();
+
+        let range = rqctx.range();
+        Ok(sa
+            .as_support_bundle_storage()
+            .get(
+                zpool_id,
+                dataset_id,
+                support_bundle_id,
+                range,
+                SupportBundleQueryType::Path { file_path: file },
+            )
+            .await?)
+    }
+
+    async fn support_bundle_index(
+        rqctx: RequestContext<Self::Context>,
+        path_params: Path<SupportBundlePathParam>,
+    ) -> Result<http::Response<Body>, HttpError> {
+        let sa = rqctx.context();
+        let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } =
+            path_params.into_inner();
+
+        let range = rqctx.range();
+        Ok(sa
+            .as_support_bundle_storage()
+            .get(
+                zpool_id,
+                dataset_id,
+                support_bundle_id,
+                range,
+                SupportBundleQueryType::Index,
+            )
             .await?)
     }
 
     async fn support_bundle_head(
         rqctx: RequestContext<Self::Context>,
         path_params: Path<SupportBundlePathParam>,
-        body: TypedBody<SupportBundleGetQueryParams>,
     ) -> Result<http::Response<Body>, HttpError> {
         let sa = rqctx.context();
         let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } =
             path_params.into_inner();
 
         let range = rqctx.range();
-        let query = body.into_inner().query_type;
         Ok(sa
             .as_support_bundle_storage()
-            .head(zpool_id, dataset_id, support_bundle_id, range, query)
+            .head(
+                zpool_id,
+                dataset_id,
+                support_bundle_id,
+                range,
+                SupportBundleQueryType::Whole,
+            )
+            .await?)
+    }
+
+    async fn support_bundle_head_file(
+        rqctx: RequestContext<Self::Context>,
+        path_params: Path<SupportBundleFilePathParam>,
+    ) -> Result<http::Response<Body>, HttpError> {
+        let sa = rqctx.context();
+        let SupportBundleFilePathParam {
+            parent:
+                SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id },
+            file,
+        } = path_params.into_inner();
+
+        let range = rqctx.range();
+        Ok(sa
+            .as_support_bundle_storage()
+            .head(
+                zpool_id,
+                dataset_id,
+                support_bundle_id,
+                range,
+                SupportBundleQueryType::Path { file_path: file },
+            )
+            .await?)
+    }
+
+    async fn support_bundle_head_index(
+        rqctx: RequestContext<Self::Context>,
+        path_params: Path<SupportBundlePathParam>,
+    ) -> Result<http::Response<Body>, HttpError> {
+        let sa = rqctx.context();
+        let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } =
+            path_params.into_inner();
+
+        let range = rqctx.range();
+        Ok(sa
+            .as_support_bundle_storage()
+            .head(
+                zpool_id,
+                dataset_id,
+                support_bundle_id,
+                range,
+                SupportBundleQueryType::Index,
+            )
             .await?)
     }
 
diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs
index db250eb914..7c5fb44d13 100644
--- a/sled-agent/src/services.rs
+++ b/sled-agent/src/services.rs
@@ -76,6 +76,7 @@ use omicron_common::address::WICKETD_NEXUS_PROXY_PORT;
 use omicron_common::address::WICKETD_PORT;
 use omicron_common::address::{
     get_internal_dns_server_addresses, CLICKHOUSE_ADMIN_PORT,
+    CLICKHOUSE_TCP_PORT,
 };
 use omicron_common::address::{Ipv6Subnet, NEXUS_TECHPORT_EXTERNAL_PORT};
 use omicron_common::address::{BOOTSTRAP_ARTIFACT_PORT, COCKROACH_ADMIN_PORT};
@@ -1597,13 +1598,20 @@ impl ServiceManager {
                     addr.to_string()
                 };
 
+                // The ClickHouse client connects via the TCP port
+                let ch_address = {
+                    let mut addr = *address;
+                    addr.set_port(CLICKHOUSE_TCP_PORT);
+                    addr.to_string()
+                };
+
                 let clickhouse_admin_config =
                     PropertyGroupBuilder::new("config")
                         .add_property("http_address", "astring", admin_address)
                         .add_property(
                             "ch_address",
                             "astring",
-                            address.to_string(),
+                            ch_address.to_string(),
                         )
                         .add_property(
                             "ch_binary",
@@ -1668,13 +1676,20 @@ impl ServiceManager {
                     addr.to_string()
                 };
 
+                // The ClickHouse client connects via the TCP port
+                let ch_address = {
+                    let mut addr = *address;
+                    addr.set_port(CLICKHOUSE_TCP_PORT);
+                    addr.to_string()
+                };
+
                 let clickhouse_admin_config =
                     PropertyGroupBuilder::new("config")
                         .add_property("http_address", "astring", admin_address)
                         .add_property(
                             "ch_address",
                             "astring",
-                            address.to_string(),
+                            ch_address.to_string(),
                         )
                         .add_property(
                             "ch_binary",
diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs
index 2d23f9150b..60dcb1be31 100644
--- a/sled-agent/src/sim/http_entrypoints.rs
+++ b/sled-agent/src/sim/http_entrypoints.rs
@@ -438,10 +438,9 @@ impl SledAgentApi for SledAgentSimImpl {
         ))
     }
 
-    async fn support_bundle_get(
+    async fn support_bundle_download(
         rqctx: RequestContext<Self::Context>,
         path_params: Path<SupportBundlePathParam>,
-        _body: TypedBody<SupportBundleGetQueryParams>,
     ) -> Result<http::Response<dropshot::Body>, HttpError> {
         let sa = rqctx.context();
         let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } =
@@ -458,10 +457,95 @@ impl SledAgentApi for SledAgentSimImpl {
             .unwrap())
     }
 
+    async fn support_bundle_download_file(
+        rqctx: RequestContext<Self::Context>,
+        path_params: Path<SupportBundleFilePathParam>,
+    ) -> Result<http::Response<dropshot::Body>, HttpError> {
+        let sa = rqctx.context();
+        let SupportBundleFilePathParam {
+            parent:
+                SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id },
+            file: _,
+        } = path_params.into_inner();
+
+        sa.support_bundle_get(zpool_id, dataset_id, support_bundle_id).await?;
+
+        Ok(http::Response::builder()
+            .status(http::StatusCode::OK)
+            .header(http::header::CONTENT_TYPE, "text/html")
+            .body(dropshot::Body::with_content(
+                "simulated support bundle file; do not eat",
+            ))
+            .unwrap())
+    }
+
+    async fn support_bundle_index(
+        rqctx: RequestContext<Self::Context>,
+        path_params: Path<SupportBundlePathParam>,
+    ) -> Result<http::Response<dropshot::Body>, HttpError> {
+        let sa = rqctx.context();
+        let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } =
+            path_params.into_inner();
+
+        sa.support_bundle_get(zpool_id, dataset_id, support_bundle_id).await?;
+
+        Ok(http::Response::builder()
+            .status(http::StatusCode::OK)
+            .header(http::header::CONTENT_TYPE, "text/html")
+            .body(dropshot::Body::with_content(
+                "simulated support bundle index; do not eat",
+            ))
+            .unwrap())
+    }
+
     async fn support_bundle_head(
         rqctx: RequestContext<Self::Context>,
         path_params: Path<SupportBundlePathParam>,
-        _body: TypedBody<SupportBundleGetQueryParams>,
+    ) -> Result<http::Response<dropshot::Body>, HttpError> {
+        let sa = rqctx.context();
+        let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } =
+            path_params.into_inner();
+
+        sa.support_bundle_get(zpool_id, dataset_id, support_bundle_id).await?;
+
+        let fictional_length = 10000;
+
+        Ok(http::Response::builder()
+            .status(http::StatusCode::OK)
+            .header(http::header::CONTENT_TYPE, "text/html")
+            .header(hyper::header::ACCEPT_RANGES, "bytes")
+            .header(hyper::header::CONTENT_LENGTH, fictional_length)
+            .body(dropshot::Body::empty())
+            .unwrap())
+    }
+
+    async fn support_bundle_head_file(
+        rqctx: RequestContext<Self::Context>,
+        path_params: Path<SupportBundleFilePathParam>,
+    ) -> Result<http::Response<dropshot::Body>, HttpError> {
+        let sa = rqctx.context();
+        let SupportBundleFilePathParam {
+            parent:
+                SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id },
+            file: _,
+        } = path_params.into_inner();
+
+        sa.support_bundle_get(zpool_id, dataset_id, support_bundle_id).await?;
+
+        let fictional_length = 10000;
+
+        Ok(http::Response::builder()
+            .status(http::StatusCode::OK)
+            .header(http::header::CONTENT_TYPE, "text/html")
+            .header(hyper::header::ACCEPT_RANGES, "bytes")
+            .header(hyper::header::CONTENT_LENGTH, fictional_length)
+            .body(dropshot::Body::empty())
+            .unwrap())
+    }
+
+    async fn support_bundle_head_index(
+        rqctx: RequestContext<Self::Context>,
+        path_params: Path<SupportBundlePathParam>,
     ) -> Result<http::Response<dropshot::Body>, HttpError> {
         let sa = rqctx.context();
         let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } =
diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs
index dc8cf63fe4..2299ba9db9 100644
--- a/sled-agent/src/sim/storage.rs
+++ b/sled-agent/src/sim/storage.rs
@@ -117,6 +117,8 @@ impl CrucibleDataInner {
             bail!("region creation error!");
         }
 
+        let read_only = params.source.is_some();
+
         let region = Region {
             id: params.id,
             block_size: params.block_size,
@@ -129,8 +131,8 @@ impl CrucibleDataInner {
             cert_pem: None,
             key_pem: None,
             root_pem: None,
-            source: None,
-            read_only: params.source.is_some(),
+            source: params.source,
+            read_only,
         };
 
         let old = self.regions.insert(id, region.clone());
@@ -168,12 +170,6 @@ impl CrucibleDataInner {
 
         let id = Uuid::from_str(&id.0).unwrap();
         if let Some(region) = self.regions.get_mut(&id) {
-            if region.state == State::Failed {
-                // The real Crucible agent would not let a Failed region be
-                // deleted
-                bail!("cannot delete in state Failed");
-            }
-
             region.state = State::Destroyed;
             self.used_ports.remove(&region.port_number);
             Ok(Some(region.clone()))
@@ -1370,29 +1366,41 @@ pub struct PantryVolume {
     activate_job: Option<String>,
 }
 
+pub struct PantryInner {
+    /// Map Volume UUID to PantryVolume struct
+    volumes: HashMap<String, PantryVolume>,
+
+    jobs: HashSet<String>,
+
+    /// Auto activate volumes attached in the background
+    auto_activate_volumes: bool,
+}
+
 /// Simulated crucible pantry
 pub struct Pantry {
     pub id: OmicronZoneUuid,
-    /// Map Volume UUID to PantryVolume struct
-    volumes: Mutex<HashMap<String, PantryVolume>>,
     sled_agent: Arc<SledAgent>,
-    jobs: Mutex<HashSet<String>>,
+    inner: Mutex<PantryInner>,
 }
 
 impl Pantry {
     pub fn new(sled_agent: Arc<SledAgent>) -> Self {
         Self {
             id: OmicronZoneUuid::new_v4(),
-            volumes: Mutex::new(HashMap::default()),
             sled_agent,
-            jobs: Mutex::new(HashSet::default()),
+            inner: Mutex::new(PantryInner {
+                volumes: HashMap::default(),
+                jobs: HashSet::default(),
+                auto_activate_volumes: false,
+            }),
         }
     }
 
     pub async fn status(&self) -> Result<PantryStatus, HttpError> {
+        let inner = self.inner.lock().await;
         Ok(PantryStatus {
-            volumes: self.volumes.lock().await.keys().cloned().collect(),
-            num_job_handles: self.jobs.lock().await.len(),
+            volumes: inner.volumes.keys().cloned().collect(),
+            num_job_handles: inner.jobs.len(),
         })
     }
 
@@ -1400,8 +1408,9 @@ impl Pantry {
         &self,
         volume_id: String,
     ) -> Result<VolumeConstructionRequest, HttpError> {
-        let volumes = self.volumes.lock().await;
-        match volumes.get(&volume_id) {
+        let inner = self.inner.lock().await;
+
+        match inner.volumes.get(&volume_id) {
             Some(entry) => Ok(entry.vcr.clone()),
 
             None => Err(HttpError::for_not_found(None, volume_id)),
@@ -1413,9 +1422,9 @@ impl Pantry {
         volume_id: String,
         volume_construction_request: VolumeConstructionRequest,
     ) -> Result<()> {
-        let mut volumes = self.volumes.lock().await;
+        let mut inner = self.inner.lock().await;
 
-        volumes.insert(
+        inner.volumes.insert(
             volume_id,
             PantryVolume {
                 vcr: volume_construction_request,
@@ -1431,29 +1440,34 @@ impl Pantry {
         Ok(())
     }
 
+    pub async fn set_auto_activate_volumes(&self) {
+        self.inner.lock().await.auto_activate_volumes = true;
+    }
+
     pub async fn attach_activate_background(
         &self,
         volume_id: String,
         activate_job_id: String,
         volume_construction_request: VolumeConstructionRequest,
     ) -> Result<(), HttpError> {
-        let mut volumes = self.volumes.lock().await;
-        let mut jobs = self.jobs.lock().await;
+        let mut inner = self.inner.lock().await;
+
+        let auto_activate_volumes = inner.auto_activate_volumes;
 
-        volumes.insert(
+        inner.volumes.insert(
             volume_id,
             PantryVolume {
                 vcr: volume_construction_request,
                 status: VolumeStatus {
-                    active: false,
-                    seen_active: false,
+                    active: auto_activate_volumes,
+                    seen_active: auto_activate_volumes,
                     num_job_handles: 1,
                 },
                 activate_job: Some(activate_job_id.clone()),
             },
         );
 
-        jobs.insert(activate_job_id);
+        inner.jobs.insert(activate_job_id);
 
         Ok(())
     }
@@ -1463,8 +1477,8 @@ impl Pantry {
         volume_id: String,
     ) -> Result<String, HttpError> {
         let activate_job = {
-            let volumes = self.volumes.lock().await;
-            volumes.get(&volume_id).unwrap().activate_job.clone().unwrap()
+            let inner = self.inner.lock().await;
+            inner.volumes.get(&volume_id).unwrap().activate_job.clone().unwrap()
         };
 
         let mut status = self.volume_status(volume_id.clone()).await?;
@@ -1481,9 +1495,9 @@ impl Pantry {
         &self,
         volume_id: String,
     ) -> Result<VolumeStatus, HttpError> {
-        let volumes = self.volumes.lock().await;
+        let inner = self.inner.lock().await;
 
-        match volumes.get(&volume_id) {
+        match inner.volumes.get(&volume_id) {
             Some(pantry_volume) => Ok(pantry_volume.status.clone()),
 
             None => Err(HttpError::for_not_found(None, volume_id)),
@@ -1495,9 +1509,9 @@ impl Pantry {
         volume_id: String,
         status: VolumeStatus,
     ) -> Result<(), HttpError> {
-        let mut volumes = self.volumes.lock().await;
+        let mut inner = self.inner.lock().await;
 
-        match volumes.get_mut(&volume_id) {
+        match inner.volumes.get_mut(&volume_id) {
             Some(pantry_volume) => {
                 pantry_volume.status = status;
                 Ok(())
@@ -1511,8 +1525,8 @@ impl Pantry {
         &self,
         job_id: String,
     ) -> Result<bool, HttpError> {
-        let jobs = self.jobs.lock().await;
-        if !jobs.contains(&job_id) {
+        let inner = self.inner.lock().await;
+        if !inner.jobs.contains(&job_id) {
             return Err(HttpError::for_not_found(None, job_id));
         }
         Ok(true)
@@ -1522,11 +1536,11 @@ impl Pantry {
         &self,
         job_id: String,
     ) -> Result<Result<bool>, HttpError> {
-        let mut jobs = self.jobs.lock().await;
-        if !jobs.contains(&job_id) {
+        let mut inner = self.inner.lock().await;
+        if !inner.jobs.contains(&job_id) {
             return Err(HttpError::for_not_found(None, job_id));
         }
-        jobs.remove(&job_id);
+        inner.jobs.remove(&job_id);
         Ok(Ok(true))
     }
 
@@ -1539,9 +1553,9 @@ impl Pantry {
         self.entry(volume_id).await?;
 
         // Make up job
-        let mut jobs = self.jobs.lock().await;
+        let mut inner = self.inner.lock().await;
         let job_id = Uuid::new_v4().to_string();
-        jobs.insert(job_id.clone());
+        inner.jobs.insert(job_id.clone());
 
         Ok(job_id)
     }
@@ -1555,8 +1569,9 @@ impl Pantry {
         // the simulated instance ensure, then call
         // [`instance_issue_disk_snapshot_request`] as the snapshot logic is the
         // same.
-        let volumes = self.volumes.lock().await;
-        let volume_construction_request = &volumes.get(&volume_id).unwrap().vcr;
+        let inner = self.inner.lock().await;
+        let volume_construction_request =
+            &inner.volumes.get(&volume_id).unwrap().vcr;
 
         self.sled_agent
             .map_disk_ids_to_region_ids(volume_construction_request)
@@ -1636,16 +1651,16 @@ impl Pantry {
         self.entry(volume_id).await?;
 
         // Make up job
-        let mut jobs = self.jobs.lock().await;
+        let mut inner = self.inner.lock().await;
         let job_id = Uuid::new_v4().to_string();
-        jobs.insert(job_id.clone());
+        inner.jobs.insert(job_id.clone());
 
         Ok(job_id)
     }
 
     pub async fn detach(&self, volume_id: String) -> Result<()> {
-        let mut volumes = self.volumes.lock().await;
-        volumes.remove(&volume_id);
+        let mut inner = self.inner.lock().await;
+        inner.volumes.remove(&volume_id);
         Ok(())
     }
 }
diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs
index b9bf703933..37526690cb 100644
--- a/sled-agent/src/sled_agent.rs
+++ b/sled-agent/src/sled_agent.rs
@@ -962,12 +962,21 @@ impl SledAgent {
                 continue;
             };
 
-            // First, ensure the dataset exists
-            let dataset_id = zone.id.into_untyped_uuid();
-            self.inner
-                .storage
-                .upsert_filesystem(dataset_id, dataset_name)
-                .await?;
+            // NOTE: This code will be deprecated by https://github.com/oxidecomputer/omicron/pull/7160
+            //
+            // However, we need to ensure that all blueprints have datasets
+            // within them before we can remove this back-fill.
+            //
+            // Therefore, we do something hairy here: We ensure the filesystem
+            // exists, but don't specify any dataset UUID value.
+            //
+            // This means that:
+            // - If the dataset exists and has a UUID, this will be a no-op
+            // - If the dataset doesn't exist, it'll be created without its
+            // oxide:uuid zfs property set
+            // - If a subsequent call to "datasets_ensure" tries to set a UUID,
+            // it should be able to get set (once).
+            self.inner.storage.upsert_filesystem(None, dataset_name).await?;
         }
 
         self.inner
diff --git a/sled-agent/src/support_bundle/storage.rs b/sled-agent/src/support_bundle/storage.rs
index e51f35e146..97d345a8d2 100644
--- a/sled-agent/src/support_bundle/storage.rs
+++ b/sled-agent/src/support_bundle/storage.rs
@@ -100,6 +100,17 @@ impl From<Error> for HttpError {
     }
 }
 
+/// Describes the type of access to the support bundle
+#[derive(Clone, Debug)]
+pub(crate) enum SupportBundleQueryType {
+    /// Access the whole support bundle
+    Whole,
+    /// Access the names of all files within the support bundle
+    Index,
+    /// Access a specific file within the support bundle
+    Path { file_path: String },
+}
+
 // Implements "seeking" and "putting a capacity on a file" manually.
 //
 // TODO: When https://github.com/zip-rs/zip2/issues/231 is resolved,
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index f0653c7905..d6ffd42d0a 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -35,7 +35,6 @@ use std::collections::HashSet;
 use std::future::Future;
 use tokio::sync::{mpsc, oneshot, watch};
 use tokio::time::{interval, Duration, MissedTickBehavior};
-use uuid::Uuid;
 
 // The size of the mpsc bounded channel used to communicate
 // between the `StorageHandle` and `StorageManager`.
@@ -100,7 +99,7 @@ enum StorageManagerState {
 
 #[derive(Debug)]
 pub(crate) struct NewFilesystemRequest {
-    dataset_id: Uuid,
+    dataset_id: Option<DatasetUuid>,
     dataset_name: DatasetName,
     responder: DebugIgnore<oneshot::Sender<Result<(), Error>>>,
 }
@@ -526,7 +525,7 @@ impl StorageHandle {
     // and ask for the set of all datasets from Nexus.
     pub async fn upsert_filesystem(
         &self,
-        dataset_id: Uuid,
+        dataset_id: Option<DatasetUuid>,
         dataset_name: DatasetName,
     ) -> Result<(), Error> {
         let (tx, rx) = oneshot::channel();
@@ -1499,27 +1498,9 @@ impl StorageManager {
             zoned,
             encryption_details,
             size_details,
-            id: Some(DatasetUuid::from_untyped_uuid(request.dataset_id)),
+            id: request.dataset_id,
             additional_options: None,
         })?;
-        // Ensure the dataset has a usable UUID.
-        if let Ok(id_str) = Zfs::get_oxide_value(&fs_name, "uuid") {
-            if let Ok(id) = id_str.parse::<Uuid>() {
-                if id != request.dataset_id {
-                    return Err(Error::UuidMismatch {
-                        name: request.dataset_name.full_name(),
-                        old: id,
-                        new: request.dataset_id,
-                    });
-                }
-                return Ok(());
-            }
-        }
-        Zfs::set_oxide_value(
-            &fs_name,
-            "uuid",
-            &request.dataset_id.to_string(),
-        )?;
 
         Ok(())
     }
@@ -1544,7 +1525,6 @@ mod tests {
     use std::collections::BTreeMap;
     use std::str::FromStr;
     use std::sync::atomic::Ordering;
-    use uuid::Uuid;
 
     // A helper struct to advance time.
     struct TimeTravel {}
@@ -2005,16 +1985,92 @@ mod tests {
             .expect("Ensuring disks should work after key manager is ready");
         assert!(!result.has_error(), "{:?}", result);
 
-        // Create a filesystem on the newly formatted U.2
-        let dataset_id = Uuid::new_v4();
+        // Create a filesystem on the newly formatted U.2.
+        //
+        // We can call "upsert_filesystem" both with and without a UUID.
+        let dataset_id = DatasetUuid::new_v4();
+        let zpool_name = ZpoolName::new_external(config.disks[0].pool_id);
+        let dataset_name =
+            DatasetName::new(zpool_name.clone(), DatasetKind::Crucible);
+        harness
+            .handle()
+            .upsert_filesystem(Some(dataset_id), dataset_name.clone())
+            .await
+            .unwrap();
+        // Observe the dataset exists, and the UUID is set.
+        let observed_dataset = &Zfs::get_dataset_properties(
+            &[dataset_name.full_name()],
+            WhichDatasets::SelfOnly,
+        )
+        .unwrap()[0];
+        assert_eq!(observed_dataset.id, Some(dataset_id));
+
+        harness
+            .handle()
+            .upsert_filesystem(None, dataset_name.clone())
+            .await
+            .unwrap();
+        // Observe the dataset still exists, and the UUID is still set,
+        // even though we did not ask for a new value explicitly.
+        let observed_dataset = &Zfs::get_dataset_properties(
+            &[dataset_name.full_name()],
+            WhichDatasets::SelfOnly,
+        )
+        .unwrap()[0];
+        assert_eq!(observed_dataset.id, Some(dataset_id));
+
+        harness.cleanup().await;
+        logctx.cleanup_successful();
+    }
+
+    #[tokio::test]
+    async fn upsert_filesystem_no_uuid() {
+        illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);
+        let logctx = test_setup_log("upsert_filesystem");
+        let mut harness = StorageManagerTestHarness::new(&logctx.log).await;
+
+        // Test setup: Add a U.2 and M.2, adopt them into the "control plane"
+        // for usage.
+        harness.handle().key_manager_ready().await;
+        let raw_disks =
+            harness.add_vdevs(&["u2_under_test.vdev", "m2_helping.vdev"]).await;
+        let config = harness.make_config(1, &raw_disks);
+        let result = harness
+            .handle()
+            .omicron_physical_disks_ensure(config.clone())
+            .await
+            .expect("Ensuring disks should work after key manager is ready");
+        assert!(!result.has_error(), "{:?}", result);
+
+        // Create a filesystem on the newly formatted U.2, without a UUID
         let zpool_name = ZpoolName::new_external(config.disks[0].pool_id);
         let dataset_name =
             DatasetName::new(zpool_name.clone(), DatasetKind::Crucible);
         harness
             .handle()
-            .upsert_filesystem(dataset_id, dataset_name)
+            .upsert_filesystem(None, dataset_name.clone())
             .await
             .unwrap();
+        let observed_dataset = &Zfs::get_dataset_properties(
+            &[dataset_name.full_name()],
+            WhichDatasets::SelfOnly,
+        )
+        .unwrap()[0];
+        assert_eq!(observed_dataset.id, None);
+
+        // Later, we can set the UUID to a specific value
+        let dataset_id = DatasetUuid::new_v4();
+        harness
+            .handle()
+            .upsert_filesystem(Some(dataset_id), dataset_name.clone())
+            .await
+            .unwrap();
+        let observed_dataset = &Zfs::get_dataset_properties(
+            &[dataset_name.full_name()],
+            WhichDatasets::SelfOnly,
+        )
+        .unwrap()[0];
+        assert_eq!(observed_dataset.id, Some(dataset_id));
 
         harness.cleanup().await;
         logctx.cleanup_successful();
diff --git a/tools/console_version b/tools/console_version
index 85ca41f755..ef52d38564 100644
--- a/tools/console_version
+++ b/tools/console_version
@@ -1,2 +1,2 @@
-COMMIT="c1ebd8d9acae4ff7a09b2517265fba52ebdfe82e"
-SHA2="840dbfda1c0def66212e7602d7be6e8acf1b26ba218f10ce3e627df49f5ce9e2"
+COMMIT="d583ae70786db46ace23f0e45bc9fdcffc21e6ae"
+SHA2="590fda51f0599879effea4f4b2754ec572f6c1f4d5a586016ff6a2e2d8b6f5f9"