diff --git a/Cargo.lock b/Cargo.lock index 142a1ed939..02e09d74d0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1332,6 +1332,32 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" +[[package]] +name = "clickana" +version = "0.1.0" +dependencies = [ + "anyhow", + "camino", + "chrono", + "clap", + "clickhouse-admin-server-client", + "clickhouse-admin-types", + "dropshot 0.13.0", + "futures", + "omicron-common", + "omicron-workspace-hack", + "ratatui", + "schemars", + "serde_json", + "slog", + "slog-async", + "slog-dtrace", + "slog-error-chain", + "slog-term", + "tokio", + "tokio-postgres", +] + [[package]] name = "clickhouse-admin-api" version = "0.1.0" @@ -6032,6 +6058,19 @@ dependencies = [ "uuid", ] +[[package]] +name = "nexus-reconfigurator-blippy" +version = "0.1.0" +dependencies = [ + "nexus-reconfigurator-planning", + "nexus-sled-agent-shared", + "nexus-types", + "omicron-common", + "omicron-test-utils", + "omicron-uuid-kinds", + "omicron-workspace-hack", +] + [[package]] name = "nexus-reconfigurator-execution" version = "0.1.0" @@ -6099,6 +6138,7 @@ dependencies = [ "maplit", "nexus-config", "nexus-inventory", + "nexus-reconfigurator-blippy", "nexus-sled-agent-shared", "nexus-types", "omicron-common", @@ -7141,6 +7181,7 @@ dependencies = [ "ring 0.17.8", "semver 1.0.23", "serde", + "shell-words", "sled-hardware", "slog", "slog-async", diff --git a/Cargo.toml b/Cargo.toml index 639064580b..12be26ecbe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,7 @@ members = [ "cockroach-admin/types", "common", "dev-tools/cert-dev", + "dev-tools/clickana", "dev-tools/clickhouse-cluster-dev", "dev-tools/ch-dev", "dev-tools/crdb-seed", @@ -78,6 +79,7 @@ members = [ "nexus/macros-common", "nexus/metrics-producer-gc", "nexus/networking", + "nexus/reconfigurator/blippy", "nexus/reconfigurator/execution", "nexus/reconfigurator/planning", "nexus/reconfigurator/preparation", @@ -158,6 +160,7 @@ default-members = [ "cockroach-admin/types", "common", "dev-tools/cert-dev", + "dev-tools/clickana", "dev-tools/clickhouse-cluster-dev", "dev-tools/ch-dev", "dev-tools/crdb-seed", @@ -211,6 +214,7 @@ default-members = [ "nexus/macros-common", "nexus/metrics-producer-gc", "nexus/networking", + "nexus/reconfigurator/blippy", "nexus/reconfigurator/execution", "nexus/reconfigurator/planning", "nexus/reconfigurator/preparation", @@ -332,6 +336,7 @@ chrono = { version = "0.4", features = [ "serde" ] } chrono-tz = "0.10.0" ciborium = "0.2.2" clap = { version = "4.5", features = ["cargo", "derive", "env", "wrap_help"] } +clickana = { path = "dev-tools/clickana" } clickhouse-admin-api = { path = "clickhouse-admin/api" } clickhouse-admin-keeper-client = { path = "clients/clickhouse-admin-keeper-client" } clickhouse-admin-server-client = { path = "clients/clickhouse-admin-server-client" } @@ -466,6 +471,7 @@ nexus-internal-api = { path = "nexus/internal-api" } nexus-macros-common = { path = "nexus/macros-common" } nexus-metrics-producer-gc = { path = "nexus/metrics-producer-gc" } nexus-networking = { path = "nexus/networking" } +nexus-reconfigurator-blippy = { path = "nexus/reconfigurator/blippy" } nexus-reconfigurator-execution = { path = "nexus/reconfigurator/execution" } nexus-reconfigurator-planning = { path = "nexus/reconfigurator/planning" } nexus-reconfigurator-preparation = { path = "nexus/reconfigurator/preparation" } diff --git a/clickhouse-admin/Cargo.toml b/clickhouse-admin/Cargo.toml index 80b080b2ff..65ceb3fdbf 100644 --- a/clickhouse-admin/Cargo.toml +++ b/clickhouse-admin/Cargo.toml @@ -22,7 +22,9 @@ slog.workspace = true slog-async.workspace = true slog-dtrace.workspace = true slog-error-chain.workspace = true +slog-term.workspace = true serde.workspace = true +serde_json.workspace = true thiserror.workspace = true tokio.workspace = true tokio-postgres.workspace = true diff --git a/clickhouse-admin/types/src/lib.rs b/clickhouse-admin/types/src/lib.rs index 726f4ad6e4..b4dd21652c 100644 --- a/clickhouse-admin/types/src/lib.rs +++ b/clickhouse-admin/types/src/lib.rs @@ -1203,7 +1203,7 @@ pub enum Timestamp { #[derive(Debug, Serialize, Deserialize, JsonSchema, PartialEq)] #[serde(rename_all = "snake_case")] pub struct SystemTimeSeries { - pub time: Timestamp, + pub time: String, pub value: f64, // TODO: Would be really nice to have an enum with possible units (s, ms, bytes) // Not sure if I can even add this, the system tables don't mention units at all. @@ -2102,15 +2102,15 @@ snapshot_storage_disk=LocalSnapshotDisk let expected = vec![ SystemTimeSeries { - time: crate::Timestamp::Unix("1732494720".to_string()), + time: "1732494720".to_string(), value: 110220450825.75238, }, SystemTimeSeries { - time: crate::Timestamp::Unix("1732494840".to_string()), + time: "1732494840".to_string(), value: 110339992917.33331, }, SystemTimeSeries { - time: crate::Timestamp::Unix("1732494960".to_string()), + time: "1732494960".to_string(), value: 110421854037.33331, }, ]; @@ -2130,21 +2130,15 @@ snapshot_storage_disk=LocalSnapshotDisk let expected = vec![ SystemTimeSeries { - time: crate::Timestamp::Utc( - "2024-11-25T00:34:00Z".parse::>().unwrap(), - ), + time: "2024-11-25T00:34:00Z".to_string(), value: 110220450825.75238, }, SystemTimeSeries { - time: crate::Timestamp::Utc( - "2024-11-25T00:35:00Z".parse::>().unwrap(), - ), + time: "2024-11-25T00:35:00Z".to_string(), value: 110339992917.33331, }, SystemTimeSeries { - time: crate::Timestamp::Utc( - "2024-11-25T00:36:00Z".parse::>().unwrap(), - ), + time: "2024-11-25T00:36:00Z".to_string(), value: 110421854037.33331, }, ]; @@ -2179,7 +2173,7 @@ snapshot_storage_disk=LocalSnapshotDisk assert_eq!( format!("{}", root_cause), - "data did not match any variant of untagged enum Timestamp at line 1 column 12", + "invalid type: integer `2024`, expected a string at line 1 column 12", ); } } diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index d8022f51c0..4c8f032fcb 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -1934,6 +1934,8 @@ impl JsonSchema for L4PortRange { DeserializeFromStr, PartialEq, Eq, + PartialOrd, + Ord, SerializeDisplay, Hash, )] diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index ff14e899fb..588117c71c 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -57,6 +57,8 @@ pub enum NetworkInterfaceKind { JsonSchema, PartialEq, Eq, + PartialOrd, + Ord, Hash, Diffus, )] @@ -79,7 +81,17 @@ pub struct NetworkInterface { // Note that `Deserialize` is manually implemented; if you make any changes to // the fields of this structure, you must make them to that implementation too. #[derive( - Debug, Clone, Copy, Serialize, JsonSchema, PartialEq, Eq, Hash, Diffus, + Debug, + Clone, + Copy, + Serialize, + JsonSchema, + PartialEq, + Eq, + PartialOrd, + Ord, + Hash, + Diffus, )] pub struct SourceNatConfig { /// The external address provided to the instance or service. diff --git a/dev-tools/clickana/Cargo.toml b/dev-tools/clickana/Cargo.toml new file mode 100644 index 0000000000..a9f91b890b --- /dev/null +++ b/dev-tools/clickana/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "clickana" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +anyhow.workspace = true +camino.workspace = true +chrono.workspace = true +clap.workspace = true +clickhouse-admin-types.workspace = true +clickhouse-admin-server-client.workspace = true +dropshot.workspace = true +futures.workspace = true +omicron-common.workspace = true +ratatui.workspace = true +schemars.workspace = true +slog.workspace = true +slog-async.workspace = true +slog-dtrace.workspace = true +slog-error-chain.workspace = true +slog-term.workspace = true +serde_json.workspace = true +tokio.workspace = true +tokio-postgres.workspace = true + +omicron-workspace-hack.workspace = true + +[lints] +workspace = true diff --git a/dev-tools/clickana/src/bin/clickana.rs b/dev-tools/clickana/src/bin/clickana.rs new file mode 100644 index 0000000000..0c8d06156e --- /dev/null +++ b/dev-tools/clickana/src/bin/clickana.rs @@ -0,0 +1,57 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use anyhow::Result; +use camino::Utf8PathBuf; +use clap::Parser; +use clickana::Clickana; +use std::net::SocketAddr; + +const CLICKANA_LOG_FILE: &str = "/tmp/clickana.log"; + +#[tokio::main] +async fn main() -> Result<()> { + let args = Cli::parse(); + + let terminal = ratatui::init(); + let result = Clickana::new( + args.clickhouse_addr, + args.log_path, + args.sampling_interval, + args.time_range, + args.refresh_interval, + ) + .run(terminal) + .await; + ratatui::restore(); + result +} + +#[derive(Debug, Parser)] +struct Cli { + /// Path to the log file + #[arg( + long, + short, + env = "CLICKANA_LOG_PATH", + default_value = CLICKANA_LOG_FILE, + )] + log_path: Utf8PathBuf, + + /// Address where a clickhouse admin server is listening on + #[arg(long, short = 'a')] + clickhouse_addr: SocketAddr, + + /// The interval to collect monitoring data in seconds + #[arg(long, short, default_value_t = 60)] + sampling_interval: u64, + + /// Range of time to collect monitoring data in seconds + #[arg(long, short, default_value_t = 3600)] + time_range: u64, + + /// The interval at which the dashboards will refresh + #[arg(long, short, default_value_t = 60)] + refresh_interval: u64, +} diff --git a/dev-tools/clickana/src/chart.rs b/dev-tools/clickana/src/chart.rs new file mode 100644 index 0000000000..f8a78fb63d --- /dev/null +++ b/dev-tools/clickana/src/chart.rs @@ -0,0 +1,765 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use anyhow::{bail, Result}; +use chrono::{DateTime, Utc}; +use clickhouse_admin_server_client::types::{SystemTable, SystemTimeSeries}; +use ratatui::{ + layout::{Constraint, Rect}, + style::{Color, Style, Stylize}, + symbols::Marker, + text::Line, + widgets::{Axis, Block, Chart, Dataset, GraphType, LegendPosition}, + Frame, +}; +use std::fmt::Display; + +// Ratatui requires data points in a Dataset to be f64 +const GIBIBYTE_F64: f64 = 1073741824.0; +const MEBIBYTE_F64: f64 = 1048576.0; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Unit { + Count, + Gibibyte, + Mebibyte, +} + +impl Display for Unit { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let s = match self { + Unit::Count => "", + Unit::Gibibyte => "GiB", + Unit::Mebibyte => "MiB", + }; + write!(f, "{s}") + } +} + +impl Unit { + /// Returns the value of the unit represented in bytes. + fn as_bytes_f64(&self) -> Result { + let bytes = match self { + Unit::Gibibyte => GIBIBYTE_F64, + Unit::Mebibyte => MEBIBYTE_F64, + Unit::Count => bail!("Count cannot be converted into bytes"), + }; + Ok(bytes) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum MetricName { + DiskUsage, + MemoryTracking, + QueryCount, + RunningQueries, +} + +impl Display for MetricName { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let s = match self { + MetricName::DiskUsage => "DiskUsed_default", + MetricName::MemoryTracking => "CurrentMetric_MemoryTracking", + MetricName::QueryCount => "ProfileEvent_Query", + MetricName::RunningQueries => "CurrentMetric_Query", + }; + write!(f, "{s}") + } +} + +impl MetricName { + /// Returns the associated table to query for each metric. + pub fn table(&self) -> SystemTable { + match self { + MetricName::DiskUsage => SystemTable::AsynchronousMetricLog, + MetricName::MemoryTracking + | MetricName::QueryCount + | MetricName::RunningQueries => SystemTable::MetricLog, + } + } + + /// Returns the unit the data values will be represented as. + fn unit(&self) -> Unit { + match self { + MetricName::DiskUsage => Unit::Gibibyte, + MetricName::MemoryTracking => Unit::Mebibyte, + MetricName::QueryCount | MetricName::RunningQueries => Unit::Count, + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ChartMetadata { + pub title: String, + pub unit: Unit, +} + +impl ChartMetadata { + pub fn new(metric: MetricName, title: String) -> Self { + let unit = metric.unit(); + Self { title, unit } + } +} + +#[derive(Debug)] +struct TimeSeriesValues { + /// A collection of all the values from the timeseries + values: Vec, +} + +impl TimeSeriesValues { + fn new(raw_data: &Vec) -> Self { + let values: Vec = raw_data.iter().map(|ts| ts.value).collect(); + Self { values } + } + + fn min(&self) -> Result<&f64> { + let Some(min_value) = + self.values.iter().min_by(|a, b| a.partial_cmp(b).unwrap()) + else { + bail!("no values have been retrieved") + }; + + Ok(min_value) + } + + fn max(&self) -> Result<&f64> { + let Some(max_value) = + self.values.iter().max_by(|a, b| a.partial_cmp(b).unwrap()) + else { + bail!("no values have been retrieved") + }; + + Ok(max_value) + } + + /// Returns the average value of the max and min values. + fn avg(&self, min_value_label: f64, max_value_label: f64) -> f64 { + (min_value_label + max_value_label) / 2.0 + } +} + +// The result of the following functions will not be precise, but it doesn't +// matter since we just want an estimate for the chart's labels and bounds. +// all we need are values that are larger than the maximum value in the +// timeseries or smaller than the minimum value in the timeseries. + +/// Returns the sum of the maximum raw value and 1 or the equivalent of 1 +/// MiB or GiB in bytes. +fn padded_max_value_raw(unit: Unit, max_value_raw: &f64) -> Result { + let ceil_value = max_value_raw.ceil(); + let padded_value = match unit { + Unit::Count => ceil_value + 1.0, + Unit::Gibibyte | Unit::Mebibyte => ceil_value + unit.as_bytes_f64()?, + }; + Ok(padded_value) +} + +/// Returns the sum of the max raw value and 1 or the equivalent of 1 +/// Mib or Gib. +fn padded_max_value_as_unit(unit: Unit, max_value_raw: &f64) -> Result { + let label_value = match unit { + Unit::Count => max_value_raw + 1.0, + Unit::Gibibyte | Unit::Mebibyte => { + (max_value_raw / unit.as_bytes_f64()?) + 1.0 + } + }; + Ok(label_value.ceil()) +} + +/// Returns the difference of the minimum raw value and 1 or the equivalent +/// of 1 in MiB or GiB in bytes. If the minimum is equal to or less than 1.0, +/// or the equivalent of 1 once converted from bytes to the expected unit +/// (e.g. less than or equal to 1048576 if we're using MiB) we'll use 0.0 as +/// the minimum value as we don't expect any of our charts +/// to require negative numbers for now. +fn padded_min_value_raw(unit: Unit, min_value_raw: &f64) -> Result { + let padded_value = match unit { + Unit::Count => { + if *min_value_raw <= 1.0 { + 0.0 + } else { + min_value_raw - 1.0 + } + } + Unit::Gibibyte | Unit::Mebibyte => { + let bytes = unit.as_bytes_f64()?; + if *min_value_raw <= bytes { + 0.0 + } else { + min_value_raw - bytes + } + } + }; + Ok(padded_value.floor()) +} + +/// Returns the difference of the minimum raw value and 1 or the equivalent +/// of 1 in MiB or GiB in bytes. If the minimum is less than 1, we'll use +/// 0 as the minimum value as we don't expect any of our charts to require +/// negative numbers for now. +fn padded_min_value_as_unit(unit: Unit, min_value_raw: &f64) -> Result { + let padded_value = match unit { + Unit::Count => { + if *min_value_raw < 1.0 { + 0.0 + } else { + min_value_raw - 1.0 + } + } + Unit::Gibibyte | Unit::Mebibyte => { + let value_as_unit = min_value_raw / unit.as_bytes_f64()?; + if value_as_unit < 1.0 { + 0.0 + } else { + value_as_unit - 1.0 + } + } + }; + Ok(padded_value.floor()) +} + +#[derive(Debug, PartialEq)] +struct YAxisValues { + lower_label: String, + mid_label: String, + upper_label: String, + lower_bound: f64, + upper_bound: f64, +} + +impl YAxisValues { + fn new(unit: Unit, raw_data: &Vec) -> Result { + // Retrieve values only to create Y axis bounds and labels + let values = TimeSeriesValues::new(&raw_data); + let max_value = values.max()?; + let min_value = values.min()?; + + // In case there is very little variance in the y axis, we will be adding some + // padding to the bounds and labels so we don't end up with repeated labels or + // straight lines too close to the upper bounds. + let upper_bound = padded_max_value_raw(unit, max_value)?; + let upper_label_as_unit = padded_max_value_as_unit(unit, max_value)?; + let lower_bound = padded_min_value_raw(unit, min_value)?; + let lower_label_as_unit = padded_min_value_as_unit(unit, min_value)?; + let mid_label_as_unit = + values.avg(lower_label_as_unit, upper_label_as_unit); + + // To nicely display the mid value label for the Y axis, we do the following: + // - It is not displayed it if it is a 0.0. + // - If it does not have a fractional number we display it as an integer. + // - Else, we display the number as is up to the first fractional number. + //let mid_value = mid_label; + let fractional_of_mid_value = + mid_label_as_unit - mid_label_as_unit.floor(); + let mid_value_formatted = format!("{:.1}", mid_label_as_unit); + let mid_label = if mid_value_formatted == *"0.0" { + "".to_string() + } else if fractional_of_mid_value == 0.0 { + format!( + "{} {}", + mid_value_formatted.split('.').next().unwrap(), + unit + ) + } else { + format!("{} {}", mid_value_formatted, unit) + }; + + let upper_label = format!("{} {}", upper_label_as_unit, unit); + let lower_label = format!("{} {}", lower_label_as_unit, unit); + + Ok(Self { + lower_label, + mid_label, + upper_label, + lower_bound, + upper_bound, + }) + } +} + +#[derive(Debug)] +struct TimeSeriesTimestamps { + /// A collection of all the timestamps from the timeseries + timestamps: Vec, +} + +impl TimeSeriesTimestamps { + fn new(raw_data: &Vec) -> Self { + let timestamps: Vec = raw_data + .iter() + .map(|ts| { + ts.time.trim_matches('"').parse::().unwrap_or_else(|_| { + panic!("could not parse timestamp {} into i64", ts.time) + }) + }) + .collect(); + Self { timestamps } + } + + fn min(&self) -> Result<&i64> { + let Some(start_time) = self.timestamps.iter().min() else { + bail!("failed to retrieve start time, timestamp list is empty") + }; + Ok(start_time) + } + + fn max(&self) -> Result<&i64> { + let Some(end_time) = self.timestamps.iter().max() else { + bail!("failed to retrieve end time, timestamp list is empty") + }; + Ok(end_time) + } + + fn avg(&self, start_time: &i64, end_time: &i64) -> i64 { + (start_time + end_time) / 2 + } +} + +#[derive(Debug, PartialEq)] +pub struct XAxisTimestamps { + mid_time_label: DateTime, + pub start_time_label: DateTime, + pub end_time_label: DateTime, + start_time_bound: f64, + end_time_bound: f64, +} + +impl XAxisTimestamps { + fn new(raw_data: &Vec) -> Result { + // Retrieve timestamps only to create chart bounds and labels + let timestamps = TimeSeriesTimestamps::new(&raw_data); + // These timestamps will be used to calculate start and end timestamps in order + // to create labels and set bounds for the X axis. As above, some of these conversions + // may lose precision, but it's OK as these values are only used to make sure the + // datapoints fit within the graph nicely. + let start_time = timestamps.min()?; + let end_time = timestamps.max()?; + let avg_time = timestamps.avg(start_time, end_time); + + let Some(start_time_label) = DateTime::from_timestamp(*start_time, 0) + else { + bail!( + "failed to convert timestamp to UTC date and time; + timestamp = {}", + start_time + ) + }; + let Some(end_time_label) = DateTime::from_timestamp(*end_time, 0) + else { + bail!( + "failed to convert timestamp to UTC date and time; + timestamp = {}", + end_time + ) + }; + let Some(mid_time_label) = DateTime::from_timestamp(avg_time, 0) else { + bail!( + "failed to convert timestamp to UTC date and time; + timestamp = {}", + avg_time + ) + }; + + let start_time_bound = *start_time as f64; + let end_time_bound = *end_time as f64; + + Ok(Self { + mid_time_label, + start_time_label, + end_time_label, + start_time_bound, + end_time_bound, + }) + } +} + +#[derive(Debug, PartialEq)] +struct DataPoints { + data: Vec<(f64, f64)>, +} + +impl DataPoints { + fn new(timeseries: &Vec) -> Self { + // These values will be used to render the graph and ratatui + // requires them to be f64 + let data: Vec<(f64, f64)> = timeseries + .iter() + .map(|ts| { + ( + ts.time.trim_matches('"').parse::().unwrap_or_else( + |_| { + panic!( + "could not parse timestamp {} into f64", + ts.time + ) + }, + ), + ts.value, + ) + }) + .collect(); + Self { data } + } +} + +#[derive(Debug, PartialEq)] +pub struct ChartData { + metadata: ChartMetadata, + data_points: DataPoints, + pub x_axis_timestamps: XAxisTimestamps, + y_axis_values: YAxisValues, +} + +impl ChartData { + pub fn new( + raw_data: Vec, + metadata: ChartMetadata, + ) -> Result { + // Retrieve datapoints that will be charted + let data_points = DataPoints::new(&raw_data); + + // Retrieve X axis bounds and labels + let x_axis_timestamps = XAxisTimestamps::new(&raw_data)?; + + // Retrieve X axis bounds and labels + let y_axis_values = YAxisValues::new(metadata.unit, &raw_data)?; + + Ok(Self { metadata, data_points, x_axis_timestamps, y_axis_values }) + } + + pub fn render_line_chart(&self, frame: &mut Frame, area: Rect) { + let datasets = vec![Dataset::default() + .marker(Marker::Braille) + .style(Style::default().fg(Color::LightGreen)) + .graph_type(GraphType::Line) + .data(&self.data_points.data)]; + + let chart = Chart::new(datasets) + .block( + Block::bordered() + .title(Line::from(self.title()).cyan().bold().centered()), + ) + .x_axis( + Axis::default() + .style(Style::default().gray()) + .bounds([self.start_time_bound(), self.end_time_bound()]) + .labels([ + self.start_time_label().bold(), + self.mid_time_label().bold(), + self.end_time_label().bold(), + ]), + ) + .y_axis( + Axis::default() + .style(Style::default().gray()) + .bounds([ + self.lower_value_bound(), + self.upper_value_bound(), + ]) + .labels([ + self.lower_value_label().bold(), + self.mid_value_label().bold(), + self.upper_value_label().bold(), + ]), + ) + .legend_position(Some(LegendPosition::TopLeft)) + .hidden_legend_constraints(( + Constraint::Ratio(1, 2), + Constraint::Ratio(1, 2), + )); + + frame.render_widget(chart, area); + } + + fn title(&self) -> String { + self.metadata.title.clone() + } + + pub fn start_date_time(&self) -> DateTime { + self.x_axis_timestamps.start_time_label + } + + pub fn end_date_time(&self) -> DateTime { + self.x_axis_timestamps.end_time_label + } + + fn start_time_label(&self) -> String { + self.x_axis_timestamps.start_time_label.time().to_string() + } + + fn mid_time_label(&self) -> String { + self.x_axis_timestamps.mid_time_label.time().to_string() + } + + fn end_time_label(&self) -> String { + self.x_axis_timestamps.end_time_label.time().to_string() + } + + fn start_time_bound(&self) -> f64 { + self.x_axis_timestamps.start_time_bound + } + + fn end_time_bound(&self) -> f64 { + self.x_axis_timestamps.end_time_bound + } + + fn lower_value_label(&self) -> String { + self.y_axis_values.lower_label.clone() + } + + fn mid_value_label(&self) -> String { + self.y_axis_values.mid_label.clone() + } + + fn upper_value_label(&self) -> String { + self.y_axis_values.upper_label.clone() + } + + fn lower_value_bound(&self) -> f64 { + self.y_axis_values.lower_bound + } + + fn upper_value_bound(&self) -> f64 { + self.y_axis_values.upper_bound + } +} + +#[cfg(test)] +mod tests { + use crate::{ + chart::{Unit, YAxisValues}, + ChartData, ChartMetadata, MetricName, + }; + use chrono::DateTime; + use clickhouse_admin_server_client::types::SystemTimeSeries; + + use super::{DataPoints, XAxisTimestamps}; + + #[test] + fn gather_chart_data_for_disk_usage_success() { + let metadata = + ChartMetadata::new(MetricName::DiskUsage, "Test Chart".to_string()); + let raw_data = vec![ + SystemTimeSeries { + time: "1732223400".to_string(), + value: 479551511587.3104, + }, + SystemTimeSeries { + time: "1732223520".to_string(), + value: 479555459822.93335, + }, + SystemTimeSeries { + time: "1732223640".to_string(), + value: 479560290201.6, + }, + ]; + + let expected_result = ChartData { + metadata: ChartMetadata { + title: "Test Chart".to_string(), + unit: Unit::Gibibyte, + }, + data_points: DataPoints { + data: vec![ + (1732223400.0, 479551511587.3104), + (1732223520.0, 479555459822.93335), + (1732223640.0, 479560290201.6), + ], + }, + x_axis_timestamps: XAxisTimestamps { + start_time_label: DateTime::from_timestamp(1732223400, 0) + .unwrap(), + mid_time_label: DateTime::from_timestamp(1732223520, 0) + .unwrap(), + end_time_label: DateTime::from_timestamp(1732223640, 0) + .unwrap(), + start_time_bound: 1732223400.0, + end_time_bound: 1732223640.0, + }, + y_axis_values: YAxisValues { + lower_label: "445 GiB".to_string(), + mid_label: "446.5 GiB".to_string(), + upper_label: "448 GiB".to_string(), + lower_bound: 478477769763.0, + upper_bound: 480634032026.0, + }, + }; + let result = ChartData::new(raw_data, metadata).unwrap(); + assert_eq!(result, expected_result); + } + + #[test] + fn gather_chart_data_for_memory_tracking_success() { + let metadata = ChartMetadata::new( + MetricName::MemoryTracking, + "Test Chart".to_string(), + ); + let raw_data = vec![ + SystemTimeSeries { + time: "1732223400".to_string(), + value: 479551511587.3104, + }, + SystemTimeSeries { + time: "1732223520".to_string(), + value: 479555459822.93335, + }, + SystemTimeSeries { + time: "1732223640".to_string(), + value: 479560290201.6, + }, + ]; + + let expected_result = ChartData { + metadata: ChartMetadata { + title: "Test Chart".to_string(), + unit: Unit::Mebibyte, + }, + data_points: DataPoints { + data: vec![ + (1732223400.0, 479551511587.3104), + (1732223520.0, 479555459822.93335), + (1732223640.0, 479560290201.6), + ], + }, + x_axis_timestamps: XAxisTimestamps { + start_time_label: DateTime::from_timestamp(1732223400, 0) + .unwrap(), + mid_time_label: DateTime::from_timestamp(1732223520, 0) + .unwrap(), + end_time_label: DateTime::from_timestamp(1732223640, 0) + .unwrap(), + start_time_bound: 1732223400.0, + end_time_bound: 1732223640.0, + }, + y_axis_values: YAxisValues { + lower_label: "457334 MiB".to_string(), + mid_label: "457340 MiB".to_string(), + upper_label: "457346 MiB".to_string(), + lower_bound: 479550463011.0, + upper_bound: 479561338778.0, + }, + }; + let result = ChartData::new(raw_data, metadata).unwrap(); + assert_eq!(result, expected_result); + } + + #[test] + fn gather_chart_data_for_query_count_success() { + let metadata = ChartMetadata::new( + MetricName::QueryCount, + "Test Chart".to_string(), + ); + let raw_data = vec![ + SystemTimeSeries { time: "1732223400".to_string(), value: 0.0 }, + SystemTimeSeries { time: "1732223520".to_string(), value: 0.004 }, + SystemTimeSeries { time: "1732223640".to_string(), value: 0.0 }, + ]; + + let expected_result = ChartData { + metadata: ChartMetadata { + title: "Test Chart".to_string(), + unit: Unit::Count, + }, + data_points: DataPoints { + data: vec![ + (1732223400.0, 0.0), + (1732223520.0, 0.004), + (1732223640.0, 0.0), + ], + }, + x_axis_timestamps: XAxisTimestamps { + start_time_label: DateTime::from_timestamp(1732223400, 0) + .unwrap(), + mid_time_label: DateTime::from_timestamp(1732223520, 0) + .unwrap(), + end_time_label: DateTime::from_timestamp(1732223640, 0) + .unwrap(), + start_time_bound: 1732223400.0, + end_time_bound: 1732223640.0, + }, + y_axis_values: YAxisValues { + lower_label: "0 ".to_string(), + mid_label: "1 ".to_string(), + upper_label: "2 ".to_string(), + lower_bound: 0.0, + upper_bound: 2.0, + }, + }; + let result = ChartData::new(raw_data, metadata).unwrap(); + assert_eq!(result, expected_result); + } + + #[test] + fn gather_chart_data_for_running_queries_success() { + let metadata = ChartMetadata::new( + MetricName::RunningQueries, + "Test Chart".to_string(), + ); + let raw_data = vec![ + SystemTimeSeries { time: "1732223400".to_string(), value: 1.554 }, + SystemTimeSeries { time: "1732223520".to_string(), value: 1.877 }, + SystemTimeSeries { time: "1732223640".to_string(), value: 1.3456 }, + ]; + + let expected_result = ChartData { + metadata: ChartMetadata { + title: "Test Chart".to_string(), + unit: Unit::Count, + }, + data_points: DataPoints { + data: vec![ + (1732223400.0, 1.554), + (1732223520.0, 1.877), + (1732223640.0, 1.3456), + ], + }, + x_axis_timestamps: XAxisTimestamps { + start_time_label: DateTime::from_timestamp(1732223400, 0) + .unwrap(), + mid_time_label: DateTime::from_timestamp(1732223520, 0) + .unwrap(), + end_time_label: DateTime::from_timestamp(1732223640, 0) + .unwrap(), + start_time_bound: 1732223400.0, + end_time_bound: 1732223640.0, + }, + y_axis_values: YAxisValues { + lower_label: "0 ".to_string(), + mid_label: "1.5 ".to_string(), + upper_label: "3 ".to_string(), + lower_bound: 0.0, + upper_bound: 3.0, + }, + }; + let result = ChartData::new(raw_data, metadata).unwrap(); + assert_eq!(result, expected_result); + } + + #[test] + #[should_panic( + expected = "could not parse timestamp Some nonsense string into f64" + )] + fn gather_chart_data_failure() { + let metadata = + ChartMetadata::new(MetricName::DiskUsage, "Test Chart".to_string()); + let raw_data = vec![ + SystemTimeSeries { + time: "Some nonsense string".to_string(), + value: 479551511587.3104, + }, + SystemTimeSeries { + time: "1732223520".to_string(), + value: 479555459822.93335, + }, + SystemTimeSeries { + time: "1732223640".to_string(), + value: 479560290201.6, + }, + ]; + + let _ = ChartData::new(raw_data, metadata); + } +} diff --git a/dev-tools/clickana/src/lib.rs b/dev-tools/clickana/src/lib.rs new file mode 100644 index 0000000000..76af35ba8d --- /dev/null +++ b/dev-tools/clickana/src/lib.rs @@ -0,0 +1,274 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use anyhow::{anyhow, bail, Context, Result}; +use camino::Utf8PathBuf; +use chrono::{DateTime, Utc}; +use clickhouse_admin_server_client::types::{ + SystemTimeSeries, TimestampFormat, +}; +use clickhouse_admin_server_client::Client as ClickhouseServerClient; +use futures::stream::FuturesOrdered; +use futures::StreamExt; +use omicron_common::FileKv; +use ratatui::crossterm::event::{self, Event, KeyCode}; +use ratatui::layout::{Constraint, Layout, Rect}; +use ratatui::style::{Color, Style, Stylize}; +use ratatui::text::Span; +use ratatui::widgets::Paragraph; +use ratatui::{DefaultTerminal, Frame}; +use slog::{o, Drain, Logger}; +use slog_async::Async; +use slog_term::{FullFormat, PlainDecorator}; +use std::collections::BTreeMap; +use std::future::Future; +use std::net::SocketAddr; +use std::pin::Pin; +use std::time::{Duration, Instant}; + +use crate::chart::{ChartData, ChartMetadata, MetricName}; + +mod chart; + +#[derive(Debug)] +struct Dashboard { + start_time: DateTime, + end_time: DateTime, + top_left_frame: ChartData, + top_right_frame: ChartData, + bottom_left_frame: ChartData, + bottom_right_frame: ChartData, +} + +#[derive(Clone, Debug)] +pub struct Clickana { + clickhouse_addr: SocketAddr, + log_path: Utf8PathBuf, + sampling_interval: u64, + time_range: u64, + refresh_interval: u64, +} + +impl Clickana { + pub fn new( + clickhouse_addr: SocketAddr, + log_path: Utf8PathBuf, + sampling_interval: u64, + time_range: u64, + refresh_interval: u64, + ) -> Self { + Self { + clickhouse_addr, + log_path, + sampling_interval, + time_range, + refresh_interval, + } + } + + pub async fn run(self, mut terminal: DefaultTerminal) -> Result<()> { + let admin_url = format!("http://{}", self.clickhouse_addr); + let log = self.new_logger()?; + let client = ClickhouseServerClient::new(&admin_url, log.clone()); + + let tick_interval = Duration::from_secs(self.refresh_interval); + let mut last_tick = Instant::now(); + loop { + // Charts we will be showing in the dashboard + // + // We are hardcoding these for now. In the future these will likely be taken + // from a TOML config file. + let charts = BTreeMap::from([ + (MetricName::DiskUsage, "Disk Usage".to_string()), + ( + MetricName::MemoryTracking, + "Memory Allocated by the Server".to_string(), + ), + ( + MetricName::QueryCount, + "Queries Started per Second".to_string(), + ), + (MetricName::RunningQueries, "Queries Running".to_string()), + ]); + + let mut tasks = FuturesOrdered::< + Pin>>>, + >::new(); + + for (metric_name, title) in charts { + let s = self.clone(); + let c = client.clone(); + + let task = Box::pin(async move { + let metadata = ChartMetadata::new(metric_name, title); + let data = s.get_api_data(&c, metric_name).await?; + ChartData::new(data, metadata) + }); + tasks.push_back(task); + } + + if tasks.len() != 4 { + bail!( + "expected information for 4 charts, received {} instead", + tasks.len() + ); + } + + // TODO: Eventually we may want to not have a set amount of charts and make the + // dashboard a bit more dynamic. Perhaps taking a toml configuration file or + // something like that. We can then create a vector of "ChartData"s for Dashboard + // to take and create the layout dynamically. + // + // IDEA (ajs): I think it would be useful to be able to have a little menu of charts + // on the side of the pane, and then you can scroll and select which ones to show + // without having to restart the app, or mess with a toml file. + // You could also allow toggling between a set of predefined layouts to make it always + // look nice. So you could show, 1, 2, 4, 6, 8 charts or something and allow selecting + // which to show in each view. You could even remember which charts to show in each layout, + // so you could toggle back and forth between different layouts and see all the charts, + // some with more detail. + // + // We have already checked that the length of tasks is 4, so it's safe to unwrap + let top_left_frame: ChartData = tasks.next().await.unwrap()?; + let top_right_frame: ChartData = tasks.next().await.unwrap()?; + let bottom_left_frame: ChartData = tasks.next().await.unwrap()?; + let bottom_right_frame: ChartData = tasks.next().await.unwrap()?; + + // We only need to retrieve from one chart as they will all be relatively the same. + // Rarely, the charts may have a variance of a second or so depending on when + // the API calls were made, but for the header block we don't need exact precision. + let start_time = top_left_frame.start_date_time(); + let end_time = top_left_frame.end_date_time(); + + let dashboard = Dashboard { + start_time, + end_time, + top_left_frame, + top_right_frame, + bottom_left_frame, + bottom_right_frame, + }; + terminal.draw(|frame| self.draw(frame, dashboard))?; + + let timeout = tick_interval.saturating_sub(last_tick.elapsed()); + if event::poll(timeout)? { + if let Event::Key(key) = event::read()? { + // To exit the dashboard press the "q" key + if key.code == KeyCode::Char('q') { + return Ok(()); + } + } + } + + if last_tick.elapsed() >= tick_interval { + last_tick = Instant::now(); + } + } + } + + fn draw(&self, frame: &mut Frame, dashboard: Dashboard) { + let [heading, top, bottom] = Layout::vertical([ + Constraint::Length(4), + // TODO: If we make the dashboard with too many charts + // we may want to reconsider setting sizes instead of filling + // the space + Constraint::Fill(1), + Constraint::Fill(1), + ]) + .areas(frame.area()); + let [title] = + Layout::horizontal([Constraint::Fill(1); 1]).areas(heading); + let [top_left_frame, top_right_frame] = + Layout::horizontal([Constraint::Fill(1); 2]).areas(top); + let [bottom_left_frame, bottom_right_frame] = + Layout::horizontal([Constraint::Fill(1); 2]).areas(bottom); + + self.render_title_bar(frame, title, &dashboard); + + dashboard.top_left_frame.render_line_chart(frame, top_left_frame); + dashboard.top_right_frame.render_line_chart(frame, top_right_frame); + dashboard.bottom_left_frame.render_line_chart(frame, bottom_left_frame); + dashboard + .bottom_right_frame + .render_line_chart(frame, bottom_right_frame); + } + + fn render_title_bar( + &self, + frame: &mut Frame, + area: Rect, + dashboard: &Dashboard, + ) { + let style = Style::new().fg(Color::Green).bold(); + let title = vec![ + Span::styled("CLICKANA", style).into_centered_line(), + Span::styled( + format!("Sampling Interval: {}s", self.sampling_interval), + style, + ) + .into_left_aligned_line(), + Span::styled( + format!( + "Time Range: {} - {} ({}s)", + dashboard.start_time, dashboard.end_time, self.time_range + ), + style, + ) + .into_left_aligned_line(), + Span::styled( + format!("Refresh Interval {}s", self.refresh_interval), + style, + ) + .into_left_aligned_line(), + ]; + let p = Paragraph::new(title); + + frame.render_widget(p, area); + } + + async fn get_api_data( + &self, + client: &ClickhouseServerClient, + metric: MetricName, + ) -> Result> { + let timeseries = client + .system_timeseries_avg( + metric.table(), + &format!("{metric}"), + Some(self.sampling_interval), + Some(self.time_range), + Some(TimestampFormat::UnixEpoch), + ) + .await + .map(|t| t.into_inner()) + .map_err(|e| { + anyhow!( + concat!( + "failed to retrieve timeseries from clickhouse server; ", + "error = {}", + ), + e + ) + }); + + timeseries + } + + fn new_logger(&self) -> Result { + let file = std::fs::OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(self.log_path.clone()) + .with_context(|| { + format!("error opening log file {}", self.log_path) + })?; + + let decorator = PlainDecorator::new(file); + let drain = FullFormat::new(decorator).build().fuse(); + let drain = Async::new(drain).build().fuse(); + + Ok(slog::Logger::root(drain, o!(FileKv))) + } +} diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 667a666375..e501e650b1 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -797,12 +797,23 @@ enum ValidateCommands { /// Validate each `volume_references` column in the region snapshots table ValidateVolumeReferences, + /// Find either regions Nexus knows about that the corresponding Crucible + /// agent says were deleted, or regions that Nexus doesn't know about. + ValidateRegions(ValidateRegionsArgs), + /// Find either region snapshots Nexus knows about that the corresponding /// Crucible agent says were deleted, or region snapshots that Nexus doesn't /// know about. ValidateRegionSnapshots, } +#[derive(Debug, Args)] +struct ValidateRegionsArgs { + /// Delete Regions Nexus is unaware of + #[clap(long, default_value_t = false)] + clean_up_orphaned_regions: bool, +} + #[derive(Debug, Args)] struct VolumeArgs { #[command(subcommand)] @@ -1093,6 +1104,20 @@ impl DbArgs { DbCommands::Validate(ValidateArgs { command: ValidateCommands::ValidateVolumeReferences, }) => cmd_db_validate_volume_references(&datastore).await, + DbCommands::Validate(ValidateArgs { + command: ValidateCommands::ValidateRegions(args), + }) => { + let clean_up_orphaned_regions = + if args.clean_up_orphaned_regions { + let token = omdb.check_allow_destructive()?; + CleanUpOrphanedRegions::Yes { _token: token } + } else { + CleanUpOrphanedRegions::No + }; + + cmd_db_validate_regions(&datastore, clean_up_orphaned_regions) + .await + } DbCommands::Validate(ValidateArgs { command: ValidateCommands::ValidateRegionSnapshots, }) => cmd_db_validate_region_snapshots(&datastore).await, @@ -2566,39 +2591,48 @@ async fn cmd_db_region_used_by( async fn cmd_db_region_find_deleted( datastore: &DataStore, ) -> Result<(), anyhow::Error> { - let datasets_regions_volumes = + let freed_crucible_resources = datastore.find_deleted_volume_regions().await?; #[derive(Tabled)] - struct Row { + struct RegionRow { dataset_id: DatasetUuid, region_id: Uuid, - volume_id: String, } - let rows: Vec = datasets_regions_volumes - .into_iter() + #[derive(Tabled)] + struct VolumeRow { + volume_id: Uuid, + } + + let region_rows: Vec = freed_crucible_resources + .datasets_and_regions + .iter() .map(|row| { - let (dataset, region, volume) = row; + let (dataset, region) = row; - Row { - dataset_id: dataset.id(), - region_id: region.id(), - volume_id: if let Some(volume) = volume { - volume.id().to_string() - } else { - String::from("") - }, - } + RegionRow { dataset_id: dataset.id(), region_id: region.id() } }) .collect(); - let table = tabled::Table::new(rows) + let table = tabled::Table::new(region_rows) .with(tabled::settings::Style::psql()) .to_string(); println!("{}", table); + let volume_rows: Vec = freed_crucible_resources + .volumes + .iter() + .map(|volume_id| VolumeRow { volume_id: *volume_id }) + .collect(); + + let volume_table = tabled::Table::new(volume_rows) + .with(tabled::settings::Style::psql()) + .to_string(); + + println!("{}", volume_table); + Ok(()) } @@ -4526,6 +4560,283 @@ async fn cmd_db_validate_volume_references( Ok(()) } +enum CleanUpOrphanedRegions { + Yes { _token: DestructiveOperationToken }, + No, +} + +async fn cmd_db_validate_regions( + datastore: &DataStore, + clean_up_orphaned_regions: CleanUpOrphanedRegions, +) -> Result<(), anyhow::Error> { + // *Lifetime note*: + // + // The lifetime of the region record in cockroachdb is longer than the time + // the Crucible agent's region is in a non-destroyed state: Nexus will + // perform the query to allocate regions (inserting them into the database) + // before it ensures those regions are created (i.e. making the POST request + // to the appropriate Crucible agent to create them), and it will request + // that the regions be deleted (then wait for that region to transition to + // the destroyed state) before hard-deleting the records in the database. + + // First, get all region records (with their corresponding dataset) + let datasets_and_regions: Vec<(Dataset, Region)> = datastore + .pool_connection_for_tests() + .await? + .transaction_async(|conn| async move { + // Selecting all datasets and regions requires a full table scan + conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await?; + + use db::schema::dataset::dsl as dataset_dsl; + use db::schema::region::dsl; + + dsl::region + .inner_join( + dataset_dsl::dataset + .on(dsl::dataset_id.eq(dataset_dsl::id)), + ) + .select((Dataset::as_select(), Region::as_select())) + .get_results_async(&conn) + .await + }) + .await?; + + #[derive(Tabled)] + struct Row { + dataset_id: DatasetUuid, + region_id: Uuid, + dataset_addr: std::net::SocketAddrV6, + error: String, + } + + let mut rows = Vec::new(); + + // Reconcile with the corresponding Crucible Agent: are they aware of each + // region in the database? + for (dataset, region) in &datasets_and_regions { + // If the dataset was expunged, do not attempt to contact the Crucible + // agent! + let in_service = + datastore.dataset_physical_disk_in_service(dataset.id()).await?; + + if !in_service { + eprintln!( + "dataset {} {:?} is not in service, skipping", + dataset.id(), + dataset.address(), + ); + continue; + } + + use crucible_agent_client::types::RegionId; + use crucible_agent_client::types::State; + use crucible_agent_client::Client as CrucibleAgentClient; + + let Some(dataset_addr) = dataset.address() else { + eprintln!("Dataset {} missing an IP address", dataset.id()); + continue; + }; + + let url = format!("http://{}", dataset_addr); + let client = CrucibleAgentClient::new(&url); + + let actual_region = + match client.region_get(&RegionId(region.id().to_string())).await { + Ok(region) => region.into_inner(), + + Err(e) => { + // Either there was a communication error, or the agent is + // unaware of the Region (this would be a 404). + match e { + crucible_agent_client::Error::ErrorResponse(rv) + if rv.status() == http::StatusCode::NOT_FOUND => + { + rows.push(Row { + dataset_id: dataset.id(), + region_id: region.id(), + dataset_addr, + error: String::from( + "Agent does not know about this region!", + ), + }); + } + + _ => { + eprintln!( + "{} region_get {:?}: {e}", + dataset_addr, + region.id(), + ); + } + } + + continue; + } + }; + + // The Agent is aware of this region, but is it in the appropriate + // state? + + match actual_region.state { + State::Destroyed => { + // If it is destroyed, then this is invalid as the record should + // be hard-deleted as well (see the lifetime note above). Note + // that omdb could be racing a Nexus that is performing region + // deletion: if the region transitioned to Destroyed but Nexus + // is waiting to re-poll, it will not have hard-deleted the + // region record yet. + + rows.push(Row { + dataset_id: dataset.id(), + region_id: region.id(), + dataset_addr, + error: String::from( + "region may need to be manually hard-deleted", + ), + }); + } + + _ => { + // ok + } + } + } + + // Reconcile with the Crucible agents: are there regions that Nexus does not + // know about? Ask each Crucible agent for its list of regions, then check + // in the database: if that region is _not_ in the database, then either it + // was never created by Nexus, or it was hard-deleted by Nexus. Either way, + // omdb should (if the command line argument is supplied) request that the + // orphaned region be deleted. + // + // Note: This should not delete what is actually a valid region, see the + // lifetime note above. + + let mut orphaned_bytes: u64 = 0; + + let db_region_ids: BTreeSet = + datasets_and_regions.iter().map(|(_, r)| r.id()).collect(); + + // Find all the Crucible datasets + let datasets: Vec = datastore + .pool_connection_for_tests() + .await? + .transaction_async(|conn| async move { + // Selecting all datasets and regions requires a full table scan + conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await?; + + use db::schema::dataset::dsl; + + dsl::dataset + .filter(dsl::kind.eq(nexus_db_model::DatasetKind::Crucible)) + .select(Dataset::as_select()) + .get_results_async(&conn) + .await + }) + .await?; + + for dataset in &datasets { + // If the dataset was expunged, do not attempt to contact the Crucible + // agent! + let in_service = + datastore.dataset_physical_disk_in_service(dataset.id()).await?; + + if !in_service { + eprintln!( + "dataset {} {:?} is not in service, skipping", + dataset.id(), + dataset.address(), + ); + continue; + } + + use crucible_agent_client::types::State; + use crucible_agent_client::Client as CrucibleAgentClient; + + let Some(dataset_addr) = dataset.address() else { + eprintln!("Dataset {} missing an IP address", dataset.id()); + continue; + }; + + let url = format!("http://{}", dataset_addr); + let client = CrucibleAgentClient::new(&url); + + let actual_regions = match client.region_list().await { + Ok(v) => v.into_inner(), + Err(e) => { + eprintln!("{} region_list: {e}", dataset_addr); + continue; + } + }; + + for actual_region in actual_regions { + // Skip doing anything if the region is already tombstoned or + // destroyed + match actual_region.state { + State::Destroyed | State::Tombstoned => { + // the Crucible agent will eventually clean this up, or + // already has. + continue; + } + + State::Failed | State::Requested | State::Created => { + // this region needs cleaning up if there isn't an + // associated db record + } + } + + let actual_region_id: Uuid = actual_region.id.0.parse().unwrap(); + if !db_region_ids.contains(&actual_region_id) { + orphaned_bytes += actual_region.block_size + * actual_region.extent_size + * u64::from(actual_region.extent_count); + + match clean_up_orphaned_regions { + CleanUpOrphanedRegions::Yes { .. } => { + match client.region_delete(&actual_region.id).await { + Ok(_) => { + eprintln!( + "{} region {} deleted ok", + dataset_addr, actual_region.id, + ); + } + + Err(e) => { + eprintln!( + "{} region_delete {:?}: {e}", + dataset_addr, actual_region.id, + ); + } + } + } + + CleanUpOrphanedRegions::No => { + // Do not delete this region, just print a row + rows.push(Row { + dataset_id: dataset.id(), + region_id: actual_region_id, + dataset_addr, + error: String::from( + "Nexus does not know about this region!", + ), + }); + } + } + } + } + } + + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .to_string(); + + println!("{}", table); + + eprintln!("found {} orphaned bytes", orphaned_bytes); + + Ok(()) +} + async fn cmd_db_validate_region_snapshots( datastore: &DataStore, ) -> Result<(), anyhow::Error> { @@ -4581,6 +4892,15 @@ async fn cmd_db_validate_region_snapshots( .or_default() .insert(region_snapshot.snapshot_id); + // If the dataset was expunged, do not attempt to contact the Crucible + // agent! + let in_service = + datastore.dataset_physical_disk_in_service(dataset.id()).await?; + + if !in_service { + continue; + } + use crucible_agent_client::types::RegionId; use crucible_agent_client::types::State; use crucible_agent_client::Client as CrucibleAgentClient; @@ -4593,11 +4913,21 @@ async fn cmd_db_validate_region_snapshots( let url = format!("http://{}", dataset_addr); let client = CrucibleAgentClient::new(&url); - let actual_region_snapshots = client + let actual_region_snapshots = match client .region_get_snapshots(&RegionId( region_snapshot.region_id.to_string(), )) - .await?; + .await + { + Ok(v) => v, + Err(e) => { + eprintln!( + "{} region_get_snapshots {:?}: {e}", + dataset_addr, region_snapshot.region_id, + ); + continue; + } + }; let snapshot_id = region_snapshot.snapshot_id.to_string(); @@ -4741,6 +5071,15 @@ async fn cmd_db_validate_region_snapshots( // Reconcile with the Crucible agents: are there snapshots that Nexus does // not know about? for (dataset, region) in datasets_and_regions { + // If the dataset was expunged, do not attempt to contact the Crucible + // agent! + let in_service = + datastore.dataset_physical_disk_in_service(dataset.id()).await?; + + if !in_service { + continue; + } + use crucible_agent_client::types::RegionId; use crucible_agent_client::types::State; use crucible_agent_client::Client as CrucibleAgentClient; @@ -4753,9 +5092,20 @@ async fn cmd_db_validate_region_snapshots( let url = format!("http://{}", dataset_addr); let client = CrucibleAgentClient::new(&url); - let actual_region_snapshots = client + let actual_region_snapshots = match client .region_get_snapshots(&RegionId(region.id().to_string())) - .await?; + .await + { + Ok(v) => v, + Err(e) => { + eprintln!( + "{} region_get_snapshots {:?}: {e}", + dataset_addr, + region.id(), + ); + continue; + } + }; let default = HashSet::default(); let nexus_region_snapshots: &HashSet = diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index fe16b4076e..eb7d74b340 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -1655,6 +1655,14 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) { println!(" > {line}"); } + println!( + " total requests completed ok: {}", + status.requests_completed_ok.len(), + ); + for line in &status.requests_completed_ok { + println!(" > {line}"); + } + println!(" errors: {}", status.errors.len()); for line in &status.errors { println!(" > {line}"); @@ -1720,6 +1728,14 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) { println!(" > {line}"); } + println!( + " total steps set to volume_deleted ok: {}", + status.step_set_volume_deleted_ok.len(), + ); + for line in &status.step_set_volume_deleted_ok { + println!(" > {line}"); + } + println!(" errors: {}", status.errors.len()); for line in &status.errors { println!(" > {line}"); @@ -1831,10 +1847,11 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) { Ok(status) => { println!( - " total records transitioned to done: {}", - status.records_set_to_done.len(), + " region snapshot replacement finish sagas started \ + ok: {}", + status.finish_invoked_ok.len() ); - for line in &status.records_set_to_done { + for line in &status.finish_invoked_ok { println!(" > {line}"); } diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index dcf1671d8f..75f6ce0d0e 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -627,7 +627,7 @@ task: "region_snapshot_replacement_finish" currently executing: no last completed activation: , triggered by a periodic timer firing started at (s ago) and ran for ms - total records transitioned to done: 0 + region snapshot replacement finish sagas started ok: 0 errors: 0 task: "region_snapshot_replacement_garbage_collection" @@ -645,6 +645,7 @@ task: "region_snapshot_replacement_start" started at (s ago) and ran for ms total requests created ok: 0 total start saga invoked ok: 0 + total requests completed ok: 0 errors: 0 task: "region_snapshot_replacement_step" @@ -655,6 +656,7 @@ task: "region_snapshot_replacement_step" total step records created ok: 0 total step garbage collect saga invoked ok: 0 total step saga invoked ok: 0 + total steps set to volume_deleted ok: 0 errors: 0 task: "saga_recovery" @@ -1070,7 +1072,7 @@ task: "region_snapshot_replacement_finish" currently executing: no last completed activation: , triggered by a periodic timer firing started at (s ago) and ran for ms - total records transitioned to done: 0 + region snapshot replacement finish sagas started ok: 0 errors: 0 task: "region_snapshot_replacement_garbage_collection" @@ -1088,6 +1090,7 @@ task: "region_snapshot_replacement_start" started at (s ago) and ran for ms total requests created ok: 0 total start saga invoked ok: 0 + total requests completed ok: 0 errors: 0 task: "region_snapshot_replacement_step" @@ -1098,6 +1101,7 @@ task: "region_snapshot_replacement_step" total step records created ok: 0 total step garbage collect saga invoked ok: 0 total step saga invoked ok: 0 + total steps set to volume_deleted ok: 0 errors: 0 task: "saga_recovery" diff --git a/internal-dns/resolver/src/resolver.rs b/internal-dns/resolver/src/resolver.rs index af47bb23ad..8d0e071e19 100644 --- a/internal-dns/resolver/src/resolver.rs +++ b/internal-dns/resolver/src/resolver.rs @@ -12,7 +12,7 @@ use omicron_common::address::{ get_internal_dns_server_addresses, Ipv6Subnet, AZ_PREFIX, DNS_PORT, }; use slog::{debug, error, info, trace}; -use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; #[derive(Debug, Clone, thiserror::Error)] pub enum ResolveError { @@ -323,20 +323,6 @@ impl Resolver { } } - pub async fn lookup_ip( - &self, - srv: ServiceName, - ) -> Result { - let name = srv.srv_name(); - debug!(self.log, "lookup srv"; "dns_name" => &name); - let response = self.resolver.lookup_ip(&name).await?; - let address = response - .iter() - .next() - .ok_or_else(|| ResolveError::NotFound(srv))?; - Ok(address) - } - /// Returns an iterator of [`SocketAddrV6`]'s for the targets of the given /// SRV lookup response. // SRV records have a target, which is itself another DNS name that needs @@ -534,7 +520,7 @@ mod test { let resolver = dns_server.resolver().unwrap(); let err = resolver - .lookup_ip(ServiceName::Cockroach) + .lookup_srv(ServiceName::Cockroach) .await .expect_err("Looking up non-existent service should fail"); diff --git a/live-tests/tests/common/mod.rs b/live-tests/tests/common/mod.rs index 50f84d0b59..8a60b332b6 100644 --- a/live-tests/tests/common/mod.rs +++ b/live-tests/tests/common/mod.rs @@ -157,7 +157,7 @@ async fn check_execution_environment( // The only real requirement for these tests is that they're run from a // place with connectivity to the underlay network of a deployed control // plane. The easiest way to tell is to look up something in internal DNS. - resolver.lookup_ip(ServiceName::InternalDns).await.map_err(|e| { + resolver.lookup_srv(ServiceName::InternalDns).await.map_err(|e| { let text = format!( "check_execution_environment(): failed to look up internal DNS \ in the internal DNS servers.\n\n \ diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index 88fd3a0978..b2b1001f9b 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -181,6 +181,8 @@ impl OmicronZoneConfig { JsonSchema, PartialEq, Eq, + PartialOrd, + Ord, Hash, Diffus, )] diff --git a/nexus/db-model/src/region_snapshot_replacement.rs b/nexus/db-model/src/region_snapshot_replacement.rs index bcbd55028d..28627d8379 100644 --- a/nexus/db-model/src/region_snapshot_replacement.rs +++ b/nexus/db-model/src/region_snapshot_replacement.rs @@ -28,6 +28,7 @@ impl_enum_type!( ReplacementDone => b"replacement_done" DeletingOldVolume => b"deleting_old_volume" Running => b"running" + Completing => b"completing" Complete => b"complete" ); @@ -46,6 +47,7 @@ impl std::str::FromStr for RegionSnapshotReplacementState { Ok(RegionSnapshotReplacementState::DeletingOldVolume) } "running" => Ok(RegionSnapshotReplacementState::Running), + "completing" => Ok(RegionSnapshotReplacementState::Completing), "complete" => Ok(RegionSnapshotReplacementState::Complete), _ => Err(format!("unrecognized value {} for enum", s)), } @@ -79,9 +81,14 @@ impl std::str::FromStr for RegionSnapshotReplacementState { /// | | /// v --- /// --- -/// Running | -/// | set in region snapshot replacement -/// | | finish background task +/// Running <-- | +/// | | +/// | | | +/// v | | +/// | | responsibility of region snapshot +/// Completing -- | replacement finish saga +/// | +/// | | /// v | /// | /// Complete --- @@ -133,6 +140,12 @@ pub struct RegionSnapshotReplacement { pub replacement_state: RegionSnapshotReplacementState, pub operating_saga_id: Option, + + /// In order for the newly created region not to be deleted inadvertently, + /// an additional reference count bump is required. This volume should live + /// as long as this request so that all necessary replacements can be + /// completed. + pub new_region_volume_id: Option, } impl RegionSnapshotReplacement { @@ -157,6 +170,7 @@ impl RegionSnapshotReplacement { old_snapshot_id, old_snapshot_volume_id: None, new_region_id: None, + new_region_volume_id: None, replacement_state: RegionSnapshotReplacementState::Requested, operating_saga_id: None, } diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 399da81ea4..a54c2e3029 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1932,6 +1932,7 @@ table! { new_region_id -> Nullable, replacement_state -> crate::RegionSnapshotReplacementStateEnum, operating_saga_id -> Nullable, + new_region_volume_id -> Nullable, } } diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 02646bc6dd..8b663d2549 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(116, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(117, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(117, "add-completing-and-new-region-volume"), KnownVersion::new(116, "bp-physical-disk-disposition"), KnownVersion::new(115, "inv-omicron-physical-disks-generation"), KnownVersion::new(114, "crucible-ref-count-records"), diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index c90f1cc92e..d20f24e773 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -125,16 +125,7 @@ pub use sled::TransitionError; pub use switch_port::SwitchPortSettingsCombinedResult; pub use virtual_provisioning_collection::StorageType; pub use vmm::VmmStateUpdateResult; -pub use volume::read_only_resources_associated_with_volume; -pub use volume::CrucibleResources; -pub use volume::CrucibleTargets; -pub use volume::ExistingTarget; -pub use volume::ReplacementTarget; -pub use volume::VolumeCheckoutReason; -pub use volume::VolumeReplaceResult; -pub use volume::VolumeReplacementParams; -pub use volume::VolumeToDelete; -pub use volume::VolumeWithTarget; +pub use volume::*; // Number of unique datasets required to back a region. // TODO: This should likely turn into a configuration option. diff --git a/nexus/db-queries/src/db/datastore/region.rs b/nexus/db-queries/src/db/datastore/region.rs index 885cb622b8..67bd37cf69 100644 --- a/nexus/db-queries/src/db/datastore/region.rs +++ b/nexus/db-queries/src/db/datastore/region.rs @@ -422,8 +422,8 @@ impl DataStore { } } - /// Find regions on expunged disks - pub async fn find_regions_on_expunged_physical_disks( + /// Find read/write regions on expunged disks + pub async fn find_read_write_regions_on_expunged_physical_disks( &self, opctx: &OpContext, ) -> LookupResult> { @@ -450,6 +450,8 @@ impl DataStore { )) .select(dataset_dsl::id) )) + // only return read-write regions here + .filter(region_dsl::read_only.eq(false)) .select(Region::as_select()) .load_async(&*conn) .await @@ -546,6 +548,42 @@ impl DataStore { Ok(records) } + + /// Find regions not on expunged disks that match a volume id + pub async fn find_non_expunged_regions( + &self, + opctx: &OpContext, + volume_id: Uuid, + ) -> LookupResult> { + let conn = self.pool_connection_authorized(opctx).await?; + + use db::schema::dataset::dsl as dataset_dsl; + use db::schema::physical_disk::dsl as physical_disk_dsl; + use db::schema::region::dsl as region_dsl; + use db::schema::zpool::dsl as zpool_dsl; + + region_dsl::region + .filter(region_dsl::dataset_id.eq_any( + dataset_dsl::dataset + .filter(dataset_dsl::time_deleted.is_null()) + .filter(dataset_dsl::pool_id.eq_any( + zpool_dsl::zpool + .filter(zpool_dsl::time_deleted.is_null()) + .filter(zpool_dsl::physical_disk_id.eq_any( + physical_disk_dsl::physical_disk + .filter(physical_disk_dsl::disk_policy.eq(PhysicalDiskPolicy::InService)) + .select(physical_disk_dsl::id) + )) + .select(zpool_dsl::id) + )) + .select(dataset_dsl::id) + )) + .filter(region_dsl::volume_id.eq(volume_id)) + .select(Region::as_select()) + .load_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } } #[cfg(test)] diff --git a/nexus/db-queries/src/db/datastore/region_replacement.rs b/nexus/db-queries/src/db/datastore/region_replacement.rs index 0fda6b46ba..8aad7f2cfd 100644 --- a/nexus/db-queries/src/db/datastore/region_replacement.rs +++ b/nexus/db-queries/src/db/datastore/region_replacement.rs @@ -37,6 +37,13 @@ impl DataStore { opctx: &OpContext, region: &Region, ) -> Result { + if region.read_only() { + return Err(Error::invalid_request(format!( + "region {} is read-only", + region.id(), + ))); + } + let request = RegionReplacement::for_region(region); let request_id = request.id; @@ -52,19 +59,22 @@ impl DataStore { opctx: &OpContext, request: RegionReplacement, ) -> Result<(), Error> { + let err = OptionalError::new(); let conn = self.pool_connection_authorized(opctx).await?; self.transaction_retry_wrapper("insert_region_replacement_request") .transaction(&conn, |conn| { let request = request.clone(); + let err = err.clone(); async move { use db::schema::region_replacement::dsl; - Self::volume_repair_insert_query( + Self::volume_repair_insert_in_txn( + &conn, + err, request.volume_id, request.id, ) - .execute_async(&conn) .await?; diesel::insert_into(dsl::region_replacement) @@ -76,7 +86,13 @@ impl DataStore { } }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map_err(|e| { + if let Some(err) = err.take() { + err + } else { + public_error_from_diesel(e, ErrorHandler::Server) + } + }) } pub async fn get_region_replacement_request_by_id( @@ -908,6 +924,7 @@ mod test { use crate::db::pub_test_utils::TestDatabase; use omicron_test_utils::dev; + use sled_agent_client::types::VolumeConstructionRequest; #[tokio::test] async fn test_one_replacement_per_volume() { @@ -919,6 +936,20 @@ mod test { let region_2_id = Uuid::new_v4(); let volume_id = Uuid::new_v4(); + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![], + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + let request_1 = RegionReplacement::new(region_1_id, volume_id); let request_2 = RegionReplacement::new(region_2_id, volume_id); @@ -950,6 +981,20 @@ mod test { let region_id = Uuid::new_v4(); let volume_id = Uuid::new_v4(); + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![], + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + let request = { let mut request = RegionReplacement::new(region_id, volume_id); request.replacement_state = RegionReplacementState::Running; @@ -1042,6 +1087,20 @@ mod test { let region_id = Uuid::new_v4(); let volume_id = Uuid::new_v4(); + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![], + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + let request = { let mut request = RegionReplacement::new(region_id, volume_id); request.replacement_state = RegionReplacementState::ReplacementDone; diff --git a/nexus/db-queries/src/db/datastore/region_snapshot.rs b/nexus/db-queries/src/db/datastore/region_snapshot.rs index 0129869f4f..f7a34fdb52 100644 --- a/nexus/db-queries/src/db/datastore/region_snapshot.rs +++ b/nexus/db-queries/src/db/datastore/region_snapshot.rs @@ -120,4 +120,40 @@ impl DataStore { .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + + /// Find region snapshots not on expunged disks that match a snapshot id + pub async fn find_non_expunged_region_snapshots( + &self, + opctx: &OpContext, + snapshot_id: Uuid, + ) -> LookupResult> { + let conn = self.pool_connection_authorized(opctx).await?; + + use db::schema::dataset::dsl as dataset_dsl; + use db::schema::physical_disk::dsl as physical_disk_dsl; + use db::schema::region_snapshot::dsl as region_snapshot_dsl; + use db::schema::zpool::dsl as zpool_dsl; + + region_snapshot_dsl::region_snapshot + .filter(region_snapshot_dsl::dataset_id.eq_any( + dataset_dsl::dataset + .filter(dataset_dsl::time_deleted.is_null()) + .filter(dataset_dsl::pool_id.eq_any( + zpool_dsl::zpool + .filter(zpool_dsl::time_deleted.is_null()) + .filter(zpool_dsl::physical_disk_id.eq_any( + physical_disk_dsl::physical_disk + .filter(physical_disk_dsl::disk_policy.eq(PhysicalDiskPolicy::InService)) + .select(physical_disk_dsl::id) + )) + .select(zpool_dsl::id) + )) + .select(dataset_dsl::id) + )) + .filter(region_snapshot_dsl::snapshot_id.eq(snapshot_id)) + .select(RegionSnapshot::as_select()) + .load_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } } diff --git a/nexus/db-queries/src/db/datastore/region_snapshot_replacement.rs b/nexus/db-queries/src/db/datastore/region_snapshot_replacement.rs index 76a83cca2a..90b014c582 100644 --- a/nexus/db-queries/src/db/datastore/region_snapshot_replacement.rs +++ b/nexus/db-queries/src/db/datastore/region_snapshot_replacement.rs @@ -16,7 +16,6 @@ use crate::db::model::RegionSnapshotReplacement; use crate::db::model::RegionSnapshotReplacementState; use crate::db::model::RegionSnapshotReplacementStep; use crate::db::model::RegionSnapshotReplacementStepState; -use crate::db::model::VolumeRepair; use crate::db::pagination::paginated; use crate::db::pagination::Paginator; use crate::db::update_and_check::UpdateAndCheck; @@ -93,6 +92,7 @@ impl DataStore { request: RegionSnapshotReplacement, volume_id: Uuid, ) -> Result<(), Error> { + let err = OptionalError::new(); let conn = self.pool_connection_authorized(opctx).await?; self.transaction_retry_wrapper( @@ -100,20 +100,24 @@ impl DataStore { ) .transaction(&conn, |conn| { let request = request.clone(); + let err = err.clone(); async move { use db::schema::region_snapshot_replacement::dsl; - use db::schema::volume_repair::dsl as volume_repair_dsl; - // An associated volume repair record isn't _strictly_ needed: - // snapshot volumes should never be directly constructed, and - // therefore won't ever have an associated Upstairs that - // receives a volume replacement request. However it's being - // done in an attempt to be overly cautious. - - diesel::insert_into(volume_repair_dsl::volume_repair) - .values(VolumeRepair { volume_id, repair_id: request.id }) - .execute_async(&conn) - .await?; + // An associated volume repair record isn't _strictly_ + // needed: snapshot volumes should never be directly + // constructed, and therefore won't ever have an associated + // Upstairs that receives a volume replacement request. + // However it's being done in an attempt to be overly + // cautious, and it validates that the volume exist: + // otherwise it would be possible to create a region + // snapshot replacement request for a volume that didn't + // exist! + + Self::volume_repair_insert_in_txn( + &conn, err, volume_id, request.id, + ) + .await?; diesel::insert_into(dsl::region_snapshot_replacement) .values(request) @@ -124,7 +128,13 @@ impl DataStore { } }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map_err(|e| { + if let Some(err) = err.take() { + err + } else { + public_error_from_diesel(e, ErrorHandler::Server) + } + }) } pub async fn get_region_snapshot_replacement_request_by_id( @@ -342,6 +352,7 @@ impl DataStore { region_snapshot_replacement_id: Uuid, operating_saga_id: Uuid, new_region_id: Uuid, + new_region_volume_id: Uuid, old_snapshot_volume_id: Uuid, ) -> Result<(), Error> { use db::schema::region_snapshot_replacement::dsl; @@ -357,6 +368,7 @@ impl DataStore { .eq(RegionSnapshotReplacementState::ReplacementDone), dsl::old_snapshot_volume_id.eq(Some(old_snapshot_volume_id)), dsl::new_region_id.eq(Some(new_region_id)), + dsl::new_region_volume_id.eq(Some(new_region_volume_id)), dsl::operating_saga_id.eq(Option::::None), )) .check_if_exists::( @@ -375,6 +387,8 @@ impl DataStore { && record.replacement_state == RegionSnapshotReplacementState::ReplacementDone && record.new_region_id == Some(new_region_id) + && record.new_region_volume_id + == Some(new_region_volume_id) && record.old_snapshot_volume_id == Some(old_snapshot_volume_id) { @@ -557,15 +571,201 @@ impl DataStore { } } - /// Transition a RegionSnapshotReplacement record from Running to Complete. - /// Also removes the `volume_repair` record that is taking a "lock" on the - /// Volume. Note this doesn't occur from a saga context, and therefore 1) - /// doesn't accept an operating saga id parameter, and 2) checks that - /// operating_saga_id is null for the corresponding record. + /// Transition a RegionSnapshotReplacement record from Running to + /// Completing, setting a unique id at the same time. + pub async fn set_region_snapshot_replacement_completing( + &self, + opctx: &OpContext, + region_snapshot_replacement_id: Uuid, + operating_saga_id: Uuid, + ) -> Result<(), Error> { + use db::schema::region_snapshot_replacement::dsl; + let updated = diesel::update(dsl::region_snapshot_replacement) + .filter(dsl::id.eq(region_snapshot_replacement_id)) + .filter( + dsl::replacement_state + .eq(RegionSnapshotReplacementState::Running), + ) + .filter(dsl::operating_saga_id.is_null()) + .set(( + dsl::replacement_state + .eq(RegionSnapshotReplacementState::Completing), + dsl::operating_saga_id.eq(operating_saga_id), + )) + .check_if_exists::( + region_snapshot_replacement_id, + ) + .execute_and_check(&*self.pool_connection_authorized(opctx).await?) + .await; + + match updated { + Ok(result) => match result.status { + UpdateStatus::Updated => Ok(()), + UpdateStatus::NotUpdatedButExists => { + let record = result.found; + + if record.operating_saga_id == Some(operating_saga_id) + && record.replacement_state + == RegionSnapshotReplacementState::Completing + { + Ok(()) + } else { + Err(Error::conflict(format!( + "region snapshot replacement {} set to {:?} \ + (operating saga id {:?})", + region_snapshot_replacement_id, + record.replacement_state, + record.operating_saga_id, + ))) + } + } + }, + + Err(e) => Err(public_error_from_diesel(e, ErrorHandler::Server)), + } + } + + /// Transition a RegionReplacement record from Completing to Running, + /// clearing the operating saga id. + pub async fn undo_set_region_snapshot_replacement_completing( + &self, + opctx: &OpContext, + region_snapshot_replacement_id: Uuid, + operating_saga_id: Uuid, + ) -> Result<(), Error> { + use db::schema::region_snapshot_replacement::dsl; + let updated = diesel::update(dsl::region_snapshot_replacement) + .filter(dsl::id.eq(region_snapshot_replacement_id)) + .filter( + dsl::replacement_state + .eq(RegionSnapshotReplacementState::Completing), + ) + .filter(dsl::operating_saga_id.eq(operating_saga_id)) + .set(( + dsl::replacement_state + .eq(RegionSnapshotReplacementState::Running), + dsl::operating_saga_id.eq(Option::::None), + )) + .check_if_exists::( + region_snapshot_replacement_id, + ) + .execute_and_check(&*self.pool_connection_authorized(opctx).await?) + .await; + + match updated { + Ok(result) => match result.status { + UpdateStatus::Updated => Ok(()), + UpdateStatus::NotUpdatedButExists => { + let record = result.found; + + if record.operating_saga_id == None + && record.replacement_state + == RegionSnapshotReplacementState::Running + { + Ok(()) + } else { + Err(Error::conflict(format!( + "region snapshot replacement {} set to {:?} \ + (operating saga id {:?})", + region_snapshot_replacement_id, + record.replacement_state, + record.operating_saga_id, + ))) + } + } + }, + + Err(e) => Err(public_error_from_diesel(e, ErrorHandler::Server)), + } + } + + /// Transition a RegionSnapshotReplacement record from Completing to + /// Complete. Also removes the `volume_repair` record that is taking a + /// "lock" on the Volume. pub async fn set_region_snapshot_replacement_complete( &self, opctx: &OpContext, region_snapshot_replacement_id: Uuid, + operating_saga_id: Uuid, + ) -> Result<(), Error> { + let err = OptionalError::new(); + let conn = self.pool_connection_authorized(opctx).await?; + + self.transaction_retry_wrapper( + "set_region_snapshot_replacement_complete", + ) + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + use db::schema::volume_repair::dsl as volume_repair_dsl; + + diesel::delete( + volume_repair_dsl::volume_repair.filter( + volume_repair_dsl::repair_id + .eq(region_snapshot_replacement_id), + ), + ) + .execute_async(&conn) + .await?; + + use db::schema::region_snapshot_replacement::dsl; + + let result = diesel::update(dsl::region_snapshot_replacement) + .filter(dsl::id.eq(region_snapshot_replacement_id)) + .filter( + dsl::replacement_state + .eq(RegionSnapshotReplacementState::Completing), + ) + .filter(dsl::operating_saga_id.eq(operating_saga_id)) + .set(( + dsl::replacement_state + .eq(RegionSnapshotReplacementState::Complete), + dsl::operating_saga_id.eq(Option::::None), + )) + .check_if_exists::( + region_snapshot_replacement_id, + ) + .execute_and_check(&conn) + .await?; + + match result.status { + UpdateStatus::Updated => Ok(()), + UpdateStatus::NotUpdatedButExists => { + let record = result.found; + + if record.replacement_state + == RegionSnapshotReplacementState::Complete + { + Ok(()) + } else { + Err(err.bail(Error::conflict(format!( + "region snapshot replacement {} set to {:?} \ + (operating saga id {:?})", + region_snapshot_replacement_id, + record.replacement_state, + record.operating_saga_id, + )))) + } + } + } + } + }) + .await + .map_err(|e| match err.take() { + Some(error) => error, + None => public_error_from_diesel(e, ErrorHandler::Server), + }) + } + + /// Transition a RegionSnapshotReplacement record from Requested to Complete + /// - this is required when the region snapshot is hard-deleted, which means + /// that all volume references are gone and no replacement is required. Also + /// removes the `volume_repair` record that is taking a "lock" on the + /// Volume. + pub async fn set_region_snapshot_replacement_complete_from_requested( + &self, + opctx: &OpContext, + region_snapshot_replacement_id: Uuid, ) -> Result<(), Error> { type TxnError = TransactionError; @@ -577,6 +777,7 @@ impl DataStore { let err = err.clone(); async move { use db::schema::volume_repair::dsl as volume_repair_dsl; + use db::schema::region_snapshot_replacement::dsl; diesel::delete( volume_repair_dsl::volume_repair.filter( @@ -587,17 +788,16 @@ impl DataStore { .execute_async(&conn) .await?; - use db::schema::region_snapshot_replacement::dsl; - let result = diesel::update(dsl::region_snapshot_replacement) .filter(dsl::id.eq(region_snapshot_replacement_id)) .filter( dsl::replacement_state - .eq(RegionSnapshotReplacementState::Running), + .eq(RegionSnapshotReplacementState::Requested), ) .filter(dsl::operating_saga_id.is_null()) - .set((dsl::replacement_state - .eq(RegionSnapshotReplacementState::Complete),)) + .filter(dsl::new_region_volume_id.is_null()) + .set(dsl::replacement_state + .eq(RegionSnapshotReplacementState::Complete)) .check_if_exists::( region_snapshot_replacement_id, ) @@ -621,7 +821,7 @@ impl DataStore { region_snapshot_replacement_id, record.replacement_state, record.operating_saga_id, - ), + ) )))) } } @@ -651,6 +851,7 @@ impl DataStore { opctx: &OpContext, request: RegionSnapshotReplacementStep, ) -> Result { + let err = OptionalError::new(); let conn = self.pool_connection_authorized(opctx).await?; self.transaction_retry_wrapper( @@ -658,10 +859,10 @@ impl DataStore { ) .transaction(&conn, |conn| { let request = request.clone(); + let err = err.clone(); async move { use db::schema::region_snapshot_replacement_step::dsl; - use db::schema::volume_repair::dsl as volume_repair_dsl; // Skip inserting this new record if we found another region // snapshot replacement step with this volume in the step's @@ -714,13 +915,13 @@ impl DataStore { // volume replacement: create an associated volume repair // record. - diesel::insert_into(volume_repair_dsl::volume_repair) - .values(VolumeRepair { - volume_id: request.volume_id, - repair_id: request.id, - }) - .execute_async(&conn) - .await?; + Self::volume_repair_insert_in_txn( + &conn, + err, + request.volume_id, + request.id, + ) + .await?; let request_id = request.id; @@ -733,7 +934,13 @@ impl DataStore { } }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map_err(|e| { + if let Some(err) = err.take() { + err + } else { + public_error_from_diesel(e, ErrorHandler::Server) + } + }) } pub async fn get_region_snapshot_replacement_step_by_id( @@ -1073,6 +1280,87 @@ impl DataStore { Err(e) => Err(public_error_from_diesel(e, ErrorHandler::Server)), } } + + /// Transition from Requested to VolumeDeleted, and remove the associated + /// `volume_repair` record. This occurs when the associated snapshot's + /// volume is deleted. Note this doesn't occur from a saga context, and + /// therefore 1) doesn't accept an operating saga id parameter, and 2) + /// checks that operating_saga_id is null for the corresponding record. + pub async fn set_region_snapshot_replacement_step_volume_deleted_from_requested( + &self, + opctx: &OpContext, + region_snapshot_replacement_step: RegionSnapshotReplacementStep, + ) -> Result<(), Error> { + let conn = self.pool_connection_authorized(opctx).await?; + let err = OptionalError::new(); + + self.transaction_retry_wrapper( + "set_region_snapshot_replacement_complete", + ) + .transaction(&conn, |conn| { + let err = err.clone(); + + async move { + use db::schema::volume_repair::dsl as volume_repair_dsl; + + diesel::delete( + volume_repair_dsl::volume_repair.filter( + volume_repair_dsl::repair_id + .eq(region_snapshot_replacement_step.id), + ), + ) + .execute_async(&conn) + .await?; + + use db::schema::region_snapshot_replacement_step::dsl; + let result = + diesel::update(dsl::region_snapshot_replacement_step) + .filter(dsl::id.eq(region_snapshot_replacement_step.id)) + .filter(dsl::operating_saga_id.is_null()) + .filter(dsl::old_snapshot_volume_id.is_null()) + .filter( + dsl::replacement_state.eq( + RegionSnapshotReplacementStepState::Requested, + ), + ) + .set(dsl::replacement_state.eq( + RegionSnapshotReplacementStepState::VolumeDeleted, + )) + .check_if_exists::( + region_snapshot_replacement_step.id, + ) + .execute_and_check(&conn) + .await?; + + match result.status { + UpdateStatus::Updated => Ok(()), + + UpdateStatus::NotUpdatedButExists => { + let record = result.found; + + if record.replacement_state + == RegionSnapshotReplacementStepState::VolumeDeleted + { + Ok(()) + } else { + Err(err.bail(Error::conflict(format!( + "region snapshot replacement step {} set \ + to {:?} (operating saga id {:?})", + region_snapshot_replacement_step.id, + record.replacement_state, + record.operating_saga_id, + )))) + } + } + } + } + }) + .await + .map_err(|e| match err.take() { + Some(error) => error, + None => public_error_from_diesel(e, ErrorHandler::Server), + }) + } } #[cfg(test)] @@ -1083,6 +1371,7 @@ mod test { use crate::db::pub_test_utils::TestDatabase; use omicron_test_utils::dev; use omicron_uuid_kinds::DatasetUuid; + use sled_agent_client::types::VolumeConstructionRequest; #[tokio::test] async fn test_one_replacement_per_volume() { @@ -1100,6 +1389,20 @@ mod test { let volume_id = Uuid::new_v4(); + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), // not required to match! + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + let request_1 = RegionSnapshotReplacement::new( dataset_1_id, region_1_id, @@ -1146,6 +1449,20 @@ mod test { let volume_id = Uuid::new_v4(); + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), // not required to match! + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + let request_1 = RegionSnapshotReplacement::new( dataset_1_id, region_1_id, @@ -1182,6 +1499,20 @@ mod test { let volume_id = Uuid::new_v4(); + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), // not required to match! + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + let request = RegionSnapshotReplacement::new(dataset_id, region_id, snapshot_id); @@ -1214,11 +1545,25 @@ mod test { // Insert some replacement steps, and make sure counting works + let step_volume_id = Uuid::new_v4(); + + datastore + .volume_create(nexus_db_model::Volume::new( + step_volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), // not required to match! + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + { - let step = RegionSnapshotReplacementStep::new( - request_id, - Uuid::new_v4(), // volume id - ); + let step = + RegionSnapshotReplacementStep::new(request_id, step_volume_id); let result = datastore .insert_region_snapshot_replacement_step(&opctx, step) @@ -1247,11 +1592,25 @@ mod test { 1, ); + let step_volume_id = Uuid::new_v4(); + + datastore + .volume_create(nexus_db_model::Volume::new( + step_volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), // not required to match! + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + { - let mut step = RegionSnapshotReplacementStep::new( - request_id, - Uuid::new_v4(), // volume id - ); + let mut step = + RegionSnapshotReplacementStep::new(request_id, step_volume_id); step.replacement_state = RegionSnapshotReplacementStepState::Running; @@ -1283,11 +1642,25 @@ mod test { 1, ); + let step_volume_id = Uuid::new_v4(); + + datastore + .volume_create(nexus_db_model::Volume::new( + step_volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), // not required to match! + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + { - let mut step = RegionSnapshotReplacementStep::new( - request_id, - Uuid::new_v4(), // volume id - ); + let mut step = + RegionSnapshotReplacementStep::new(request_id, step_volume_id); // VolumeDeleted does not count as "in-progress" step.replacement_state = @@ -1337,6 +1710,20 @@ mod test { let volume_id = Uuid::new_v4(); + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), // not required to match! + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + let step = RegionSnapshotReplacementStep::new(Uuid::new_v4(), volume_id); let first_request_id = step.id; @@ -1431,6 +1818,22 @@ mod test { let db = TestDatabase::new_with_datastore(&logctx.log).await; let (opctx, datastore) = (db.opctx(), db.datastore()); + let volume_id = Uuid::new_v4(); + + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), // not required to match! + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + let mut request = RegionSnapshotReplacement::new( DatasetUuid::new_v4(), Uuid::new_v4(), @@ -1442,9 +1845,7 @@ mod test { datastore .insert_region_snapshot_replacement_request_with_volume_id( - &opctx, - request, - Uuid::new_v4(), + &opctx, request, volume_id, ) .await .unwrap(); @@ -1457,8 +1858,24 @@ mod test { .unwrap() .is_empty()); + let step_volume_id = Uuid::new_v4(); + + datastore + .volume_create(nexus_db_model::Volume::new( + step_volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), // not required to match! + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + let mut step = - RegionSnapshotReplacementStep::new(request_id, Uuid::new_v4()); + RegionSnapshotReplacementStep::new(request_id, step_volume_id); step.replacement_state = RegionSnapshotReplacementStepState::Complete; let result = datastore @@ -1468,8 +1885,24 @@ mod test { assert!(matches!(result, InsertStepResult::Inserted { .. })); + let step_volume_id = Uuid::new_v4(); + + datastore + .volume_create(nexus_db_model::Volume::new( + step_volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), // not required to match! + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + let mut step = - RegionSnapshotReplacementStep::new(request_id, Uuid::new_v4()); + RegionSnapshotReplacementStep::new(request_id, step_volume_id); step.replacement_state = RegionSnapshotReplacementStepState::Complete; let result = datastore @@ -1509,6 +1942,34 @@ mod test { let volume_id = Uuid::new_v4(); let old_snapshot_volume_id = Uuid::new_v4(); + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), // not required to match! + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + + datastore + .volume_create(nexus_db_model::Volume::new( + old_snapshot_volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), // not required to match! + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + let mut step = RegionSnapshotReplacementStep::new(request_id, volume_id); step.replacement_state = RegionSnapshotReplacementStepState::Complete; @@ -1558,6 +2019,20 @@ mod test { let volume_id = Uuid::new_v4(); + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![], + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + let request = RegionReplacement::new(Uuid::new_v4(), volume_id); datastore diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs index 4e0d1ccac1..505533da50 100644 --- a/nexus/db-queries/src/db/datastore/volume.rs +++ b/nexus/db-queries/src/db/datastore/volume.rs @@ -59,6 +59,7 @@ use serde::Deserialize; use serde::Deserializer; use serde::Serialize; use sled_agent_client::types::VolumeConstructionRequest; +use std::collections::HashSet; use std::collections::VecDeque; use std::net::AddrParseError; use std::net::SocketAddr; @@ -179,6 +180,23 @@ enum ReplaceSnapshotError { MultipleResourceUsageRecords(String), } +/// Crucible resources freed by previous volume deletes +#[derive(Debug, Serialize, Deserialize)] +pub struct FreedCrucibleResources { + /// Regions that previously could not be deleted (often due to region + /// snaphots) that were freed by a volume delete + pub datasets_and_regions: Vec<(Dataset, Region)>, + + /// Previously soft-deleted volumes that can now be hard-deleted + pub volumes: Vec, +} + +impl FreedCrucibleResources { + pub fn is_empty(&self) -> bool { + self.datasets_and_regions.is_empty() && self.volumes.is_empty() + } +} + impl DataStore { async fn volume_create_in_txn( conn: &async_bb8_diesel::Connection, @@ -412,7 +430,7 @@ impl DataStore { }) } - async fn volume_get_impl( + pub(super) async fn volume_get_impl( conn: &async_bb8_diesel::Connection, volume_id: Uuid, ) -> Result, diesel::result::Error> { @@ -1115,12 +1133,12 @@ impl DataStore { .await } - /// Find regions for deleted volumes that do not have associated region - /// snapshots and are not being used by any other non-deleted volumes, and - /// return them for garbage collection + /// Find read/write regions for deleted volumes that do not have associated + /// region snapshots and are not being used by any other non-deleted + /// volumes, and return them for garbage collection pub async fn find_deleted_volume_regions( &self, - ) -> ListResultVec<(Dataset, Region, Option)> { + ) -> LookupResult { let conn = self.pool_connection_unauthorized().await?; self.transaction_retry_wrapper("find_deleted_volume_regions") .transaction(&conn, |conn| async move { @@ -1132,8 +1150,7 @@ impl DataStore { async fn find_deleted_volume_regions_in_txn( conn: &async_bb8_diesel::Connection, - ) -> Result)>, diesel::result::Error> - { + ) -> Result { use db::schema::dataset::dsl as dataset_dsl; use db::schema::region::dsl as region_dsl; use db::schema::region_snapshot::dsl; @@ -1179,6 +1196,9 @@ impl DataStore { let mut deleted_regions = Vec::with_capacity(unfiltered_deleted_regions.len()); + let mut volume_set: HashSet = + HashSet::with_capacity(unfiltered_deleted_regions.len()); + for (dataset, region, region_snapshot, volume) in unfiltered_deleted_regions { @@ -1212,10 +1232,61 @@ impl DataStore { continue; } + if let Some(volume) = &volume { + volume_set.insert(volume.id()); + } + deleted_regions.push((dataset, region, volume)); } - Ok(deleted_regions) + let regions_for_deletion: HashSet = + deleted_regions.iter().map(|(_, region, _)| region.id()).collect(); + + let mut volumes = Vec::with_capacity(deleted_regions.len()); + + for volume_id in volume_set { + // Do not return a volume hard-deletion if there are still lingering + // read/write regions, unless all those lingering read/write regions + // will be deleted from the result of returning from this function. + let allocated_rw_regions: HashSet = + Self::get_allocated_regions_query(volume_id) + .get_results_async::<(Dataset, Region)>(conn) + .await? + .into_iter() + .filter_map(|(_, region)| { + if !region.read_only() { + Some(region.id()) + } else { + None + } + }) + .collect(); + + if allocated_rw_regions.is_subset(®ions_for_deletion) { + // If all the allocated rw regions for this volume are in the + // set of regions being returned for deletion, then we can + // hard-delete this volume. Read-only region accounting should + // have already been updated by soft-deleting this volume. + // + // Note: we'll be in this branch if allocated_rw_regions is + // empty. I believe the only time we'll hit this empty case is + // when the volume is fully populated with read-only resources + // (read-only regions and region snapshots). + volumes.push(volume_id); + } else { + // Not all r/w regions allocated to this volume are being + // deleted here, so we can't hard-delete the volume yet. + } + } + + Ok(FreedCrucibleResources { + datasets_and_regions: deleted_regions + .into_iter() + .map(|(d, r, _)| (d, r)) + .collect(), + + volumes, + }) } pub async fn read_only_resources_associated_with_volume( @@ -2621,8 +2692,11 @@ pub enum VolumeReplaceResult { // this call performed the replacement Done, - // the "existing" volume was deleted - ExistingVolumeDeleted, + // the "existing" volume was soft deleted + ExistingVolumeSoftDeleted, + + // the "existing" volume was hard deleted + ExistingVolumeHardDeleted, } impl DataStore { @@ -2747,14 +2821,14 @@ impl DataStore { // perform the region replacement now, and this will short-circuit // the rest of the process. - return Ok(VolumeReplaceResult::ExistingVolumeDeleted); + return Ok(VolumeReplaceResult::ExistingVolumeHardDeleted); }; if old_volume.time_deleted.is_some() { // Existing volume was soft-deleted, so return here for the same // reason: the region replacement process should be short-circuited // now. - return Ok(VolumeReplaceResult::ExistingVolumeDeleted); + return Ok(VolumeReplaceResult::ExistingVolumeSoftDeleted); } let old_vcr: VolumeConstructionRequest = @@ -3001,14 +3075,14 @@ impl DataStore { // perform the region replacement now, and this will short-circuit // the rest of the process. - return Ok(VolumeReplaceResult::ExistingVolumeDeleted); + return Ok(VolumeReplaceResult::ExistingVolumeHardDeleted); }; if old_volume.time_deleted.is_some() { // Existing volume was soft-deleted, so return here for the same // reason: the region replacement process should be short-circuited // now. - return Ok(VolumeReplaceResult::ExistingVolumeDeleted); + return Ok(VolumeReplaceResult::ExistingVolumeSoftDeleted); } let old_vcr: VolumeConstructionRequest = @@ -3682,7 +3756,15 @@ impl DataStore { let mut paginator = Paginator::new(SQL_BATCH_SIZE); let conn = self.pool_connection_authorized(opctx).await?; - let needle = address.to_string(); + let needle = match address { + SocketAddr::V4(_) => { + return Err(Error::internal_error(&format!( + "find_volumes_referencing_socket_addr not ipv6: {address}" + ))); + } + + SocketAddr::V6(addr) => addr, + }; while let Some(p) = paginator.next() { use db::schema::volume::dsl; @@ -3699,7 +3781,23 @@ impl DataStore { paginator = p.found_batch(&haystack, &|r| r.id()); for volume in haystack { - if volume.data().contains(&needle) { + let vcr: VolumeConstructionRequest = + match serde_json::from_str(&volume.data()) { + Ok(vcr) => vcr, + Err(e) => { + return Err(Error::internal_error(&format!( + "cannot deserialize volume data for {}: {e}", + volume.id(), + ))); + } + }; + + let rw_reference = region_in_vcr(&vcr, &needle) + .map_err(|e| Error::internal_error(&e.to_string()))?; + let ro_reference = read_only_target_in_vcr(&vcr, &needle) + .map_err(|e| Error::internal_error(&e.to_string()))?; + + if rw_reference || ro_reference { volumes.push(volume); } } diff --git a/nexus/db-queries/src/db/datastore/volume_repair.rs b/nexus/db-queries/src/db/datastore/volume_repair.rs index 7ea88c8542..598d9d77a2 100644 --- a/nexus/db-queries/src/db/datastore/volume_repair.rs +++ b/nexus/db-queries/src/db/datastore/volume_repair.rs @@ -11,6 +11,8 @@ use crate::db::datastore::RunnableQuery; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::model::VolumeRepair; +use crate::db::DbConnection; +use crate::transaction_retry::OptionalError; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::prelude::*; use diesel::result::DatabaseErrorKind; @@ -19,14 +21,75 @@ use omicron_common::api::external::Error; use uuid::Uuid; impl DataStore { - pub(super) fn volume_repair_insert_query( + /// Insert a volume repair record, taking a "lock" on the volume pointed to + /// by volume id with some repair id. + /// + /// If there exists a record that has a matching volume id and repair id, + /// return Ok(()). + /// + /// If there is no volume that matches the given volume id, return an error: + /// it should not be possible to lock a volume that does not exist! Note + /// that it is possible to lock a soft-deleted volume. + /// + /// If there is already an existing record that has a matching volume id but + /// a different repair id, then this function returns an Error::conflict. + pub(super) async fn volume_repair_insert_in_txn( + conn: &async_bb8_diesel::Connection, + err: OptionalError, volume_id: Uuid, repair_id: Uuid, - ) -> impl RunnableQuery { + ) -> Result<(), diesel::result::Error> { use db::schema::volume_repair::dsl; - diesel::insert_into(dsl::volume_repair) + // If a lock that matches the arguments exists already, return Ok + // + // Note: if rerunning this function (for example if a saga node was + // rerun), the volume could have existed when this lock was inserted the + // first time, but have been deleted now. + let maybe_lock = dsl::volume_repair + .filter(dsl::repair_id.eq(repair_id)) + .filter(dsl::volume_id.eq(volume_id)) + .first_async::(conn) + .await + .optional()?; + + if maybe_lock.is_some() { + return Ok(()); + } + + // Do not allow a volume repair record to be created if the volume does + // not exist, or was hard-deleted! + let maybe_volume = Self::volume_get_impl(conn, volume_id).await?; + + if maybe_volume.is_none() { + return Err(err.bail(Error::invalid_request(format!( + "cannot create record: volume {volume_id} does not exist" + )))); + } + + // Do not check for soft-deletion here: We may want to request locks for + // soft-deleted volumes. + + match diesel::insert_into(dsl::volume_repair) .values(VolumeRepair { volume_id, repair_id }) + .execute_async(conn) + .await + { + Ok(_) => Ok(()), + + Err(e) => match e { + DieselError::DatabaseError( + DatabaseErrorKind::UniqueViolation, + ref error_information, + ) if error_information.constraint_name() + == Some("volume_repair_pkey") => + { + Err(err.bail(Error::conflict("volume repair lock"))) + } + + _ => Err(e), + }, + } } pub async fn volume_repair_lock( @@ -36,21 +99,25 @@ impl DataStore { repair_id: Uuid, ) -> Result<(), Error> { let conn = self.pool_connection_authorized(opctx).await?; - Self::volume_repair_insert_query(volume_id, repair_id) - .execute_async(&*conn) + let err = OptionalError::new(); + + self.transaction_retry_wrapper("volume_repair_lock") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + Self::volume_repair_insert_in_txn( + &conn, err, volume_id, repair_id, + ) + .await + } + }) .await - .map(|_| ()) - .map_err(|e| match e { - DieselError::DatabaseError( - DatabaseErrorKind::UniqueViolation, - ref error_information, - ) if error_information.constraint_name() - == Some("volume_repair_pkey") => - { - Error::conflict("volume repair lock") + .map_err(|e| { + if let Some(err) = err.take() { + err + } else { + public_error_from_diesel(e, ErrorHandler::Server) } - - _ => public_error_from_diesel(e, ErrorHandler::Server), }) } @@ -102,6 +169,7 @@ mod test { use crate::db::pub_test_utils::TestDatabase; use omicron_test_utils::dev; + use sled_agent_client::types::VolumeConstructionRequest; #[tokio::test] async fn volume_lock_conflict_error_returned() { @@ -113,6 +181,20 @@ mod test { let lock_2 = Uuid::new_v4(); let volume_id = Uuid::new_v4(); + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![], + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + datastore.volume_repair_lock(&opctx, volume_id, lock_1).await.unwrap(); let err = datastore @@ -125,4 +207,55 @@ mod test { db.terminate().await; logctx.cleanup_successful(); } + + /// Assert that you can't take a volume repair lock if the volume does not + /// exist yet! + #[tokio::test] + async fn volume_lock_should_fail_without_volume() { + let logctx = + dev::test_setup_log("volume_lock_should_fail_without_volume"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let lock_1 = Uuid::new_v4(); + let volume_id = Uuid::new_v4(); + + datastore + .volume_repair_lock(&opctx, volume_id, lock_1) + .await + .unwrap_err(); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn volume_lock_relock_allowed() { + let logctx = dev::test_setup_log("volume_lock_relock_allowed"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let lock_id = Uuid::new_v4(); + let volume_id = Uuid::new_v4(); + + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![], + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + + datastore.volume_repair_lock(&opctx, volume_id, lock_id).await.unwrap(); + datastore.volume_repair_lock(&opctx, volume_id, lock_id).await.unwrap(); + + db.terminate().await; + logctx.cleanup_successful(); + } } diff --git a/nexus/reconfigurator/blippy/Cargo.toml b/nexus/reconfigurator/blippy/Cargo.toml new file mode 100644 index 0000000000..e7f7208871 --- /dev/null +++ b/nexus/reconfigurator/blippy/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "nexus-reconfigurator-blippy" +version = "0.1.0" +edition = "2021" + +[lints] +workspace = true + +[dependencies] +nexus-sled-agent-shared.workspace = true +nexus-types.workspace = true +omicron-common.workspace = true +omicron-uuid-kinds.workspace = true + +omicron-workspace-hack.workspace = true + +[dev-dependencies] +nexus-reconfigurator-planning.workspace = true +omicron-test-utils.workspace = true diff --git a/nexus/reconfigurator/blippy/src/blippy.rs b/nexus/reconfigurator/blippy/src/blippy.rs new file mode 100644 index 0000000000..9e9cc84b32 --- /dev/null +++ b/nexus/reconfigurator/blippy/src/blippy.rs @@ -0,0 +1,428 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::checks; +use crate::report::BlippyReport; +use crate::report::BlippyReportSortKey; +use core::fmt; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintDatasetConfig; +use nexus_types::deployment::BlueprintZoneConfig; +use nexus_types::inventory::ZpoolName; +use omicron_common::address::DnsSubnet; +use omicron_common::address::Ipv6Subnet; +use omicron_common::address::SLED_PREFIX; +use omicron_common::api::external::MacAddr; +use omicron_common::disk::DatasetKind; +use omicron_uuid_kinds::SledUuid; +use omicron_uuid_kinds::ZpoolUuid; +use std::collections::BTreeSet; +use std::net::IpAddr; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Note { + pub severity: Severity, + pub kind: Kind, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum Severity { + /// Indicator of a serious problem that means the blueprint is invalid. + Fatal, +} + +impl fmt::Display for Severity { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Severity::Fatal => write!(f, "FATAL"), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum Kind { + Sled { sled_id: SledUuid, kind: SledKind }, +} + +impl Kind { + pub fn display_component(&self) -> impl fmt::Display + '_ { + enum Component<'a> { + Sled(&'a SledUuid), + } + + impl fmt::Display for Component<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Component::Sled(id) => write!(f, "sled {id}"), + } + } + } + + match self { + Kind::Sled { sled_id, .. } => Component::Sled(sled_id), + } + } + + pub fn display_subkind(&self) -> impl fmt::Display + '_ { + enum Subkind<'a> { + Sled(&'a SledKind), + } + + impl fmt::Display for Subkind<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Subkind::Sled(kind) => write!(f, "{kind}"), + } + } + } + + match self { + Kind::Sled { kind, .. } => Subkind::Sled(kind), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum SledKind { + /// Two running zones have the same underlay IP address. + DuplicateUnderlayIp { + zone1: BlueprintZoneConfig, + zone2: BlueprintZoneConfig, + }, + /// A sled has two zones that are not members of the same sled subnet. + SledWithMixedUnderlaySubnets { + zone1: BlueprintZoneConfig, + zone2: BlueprintZoneConfig, + }, + /// Two sleds are using the same sled subnet. + ConflictingSledSubnets { + other_sled: SledUuid, + subnet: Ipv6Subnet, + }, + /// An internal DNS zone has an IP that is not one of the expected rack DNS + /// subnets. + InternalDnsZoneBadSubnet { + zone: BlueprintZoneConfig, + rack_dns_subnets: BTreeSet, + }, + /// Two running zones have the same external IP address. + DuplicateExternalIp { + zone1: BlueprintZoneConfig, + zone2: BlueprintZoneConfig, + ip: IpAddr, + }, + /// Two running zones' NICs have the same IP address. + DuplicateNicIp { + zone1: BlueprintZoneConfig, + zone2: BlueprintZoneConfig, + ip: IpAddr, + }, + /// Two running zones' NICs have the same MAC address. + DuplicateNicMac { + zone1: BlueprintZoneConfig, + zone2: BlueprintZoneConfig, + mac: MacAddr, + }, + /// Two zones with the same durable dataset kind are on the same zpool. + ZoneDurableDatasetCollision { + zone1: BlueprintZoneConfig, + zone2: BlueprintZoneConfig, + zpool: ZpoolName, + }, + /// Two zones with the same filesystem dataset kind are on the same zpool. + ZoneFilesystemDatasetCollision { + zone1: BlueprintZoneConfig, + zone2: BlueprintZoneConfig, + zpool: ZpoolName, + }, + /// One zpool has two datasets of the same kind. + ZpoolWithDuplicateDatasetKinds { + dataset1: BlueprintDatasetConfig, + dataset2: BlueprintDatasetConfig, + zpool: ZpoolUuid, + }, + /// A zpool is missing its Debug dataset. + ZpoolMissingDebugDataset { zpool: ZpoolUuid }, + /// A zpool is missing its Zone Root dataset. + ZpoolMissingZoneRootDataset { zpool: ZpoolUuid }, + /// A zone's filesystem dataset is missing from `blueprint_datasets`. + ZoneMissingFilesystemDataset { zone: BlueprintZoneConfig }, + /// A zone's durable dataset is missing from `blueprint_datasets`. + ZoneMissingDurableDataset { zone: BlueprintZoneConfig }, + /// A zone's durable dataset and transient root dataset are on different + /// zpools. + ZoneWithDatasetsOnDifferentZpools { + zone: BlueprintZoneConfig, + durable_zpool: ZpoolName, + transient_zpool: ZpoolName, + }, + /// A sled is missing entries in `Blueprint::blueprint_datasets`. + /// + /// `why` indicates why we expected this sled to have an entry. + SledMissingDatasets { why: &'static str }, + /// A sled is missing entries in `Blueprint::blueprint_disks`. + /// + /// `why` indicates why we expected this sled to have an entry. + SledMissingDisks { why: &'static str }, + /// A dataset is present but not referenced by any in-service zone or disk. + OrphanedDataset { dataset: BlueprintDatasetConfig }, + /// A dataset claims to be on a zpool that does not exist. + DatasetOnNonexistentZpool { dataset: BlueprintDatasetConfig }, +} + +impl fmt::Display for SledKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + SledKind::DuplicateUnderlayIp { zone1, zone2 } => { + write!( + f, + "duplicate underlay IP {} ({:?} {} and {:?} {})", + zone1.underlay_ip(), + zone1.zone_type.kind(), + zone1.id, + zone2.zone_type.kind(), + zone2.id, + ) + } + SledKind::SledWithMixedUnderlaySubnets { zone1, zone2 } => { + write!( + f, + "zones have underlay IPs on two different sled subnets: \ + {:?} {} ({}) and {:?} {} ({})", + zone1.zone_type.kind(), + zone1.id, + zone1.underlay_ip(), + zone2.zone_type.kind(), + zone2.id, + zone2.underlay_ip(), + ) + } + SledKind::ConflictingSledSubnets { other_sled, subnet } => { + write!( + f, + "duplicate sled subnet {} with sled {other_sled}", + subnet.net() + ) + } + SledKind::InternalDnsZoneBadSubnet { zone, rack_dns_subnets } => { + write!( + f, + "internal DNS zone {} underlay IP {} is not \ + one of the reserved rack DNS subnets ({:?})", + zone.id, + zone.underlay_ip(), + rack_dns_subnets + ) + } + SledKind::DuplicateExternalIp { zone1, zone2, ip } => { + write!( + f, + "duplicate external IP {ip} ({:?} {} and {:?} {})", + zone1.zone_type.kind(), + zone1.id, + zone2.zone_type.kind(), + zone2.id, + ) + } + SledKind::DuplicateNicIp { zone1, zone2, ip } => { + write!( + f, + "duplicate NIC IP {ip} ({:?} {} and {:?} {})", + zone1.zone_type.kind(), + zone1.id, + zone2.zone_type.kind(), + zone2.id, + ) + } + SledKind::DuplicateNicMac { zone1, zone2, mac } => { + write!( + f, + "duplicate NIC MAC {mac} ({:?} {} and {:?} {})", + zone1.zone_type.kind(), + zone1.id, + zone2.zone_type.kind(), + zone2.id, + ) + } + SledKind::ZoneDurableDatasetCollision { zone1, zone2, zpool } => { + write!( + f, + "zpool {zpool} has two zone datasets of the same kind \ + ({:?} {} and {:?} {})", + zone1.zone_type.kind(), + zone1.id, + zone2.zone_type.kind(), + zone2.id, + ) + } + SledKind::ZoneFilesystemDatasetCollision { + zone1, + zone2, + zpool, + } => { + write!( + f, + "zpool {zpool} has two zone filesystems of the same kind \ + ({:?} {} and {:?} {})", + zone1.zone_type.kind(), + zone1.id, + zone2.zone_type.kind(), + zone2.id, + ) + } + SledKind::ZpoolWithDuplicateDatasetKinds { + dataset1, + dataset2, + zpool, + } => { + write!( + f, + "two datasets of the same kind on zpool {zpool} \ + ({:?} {} and {:?} {})", + dataset1.kind, dataset1.id, dataset2.kind, dataset2.id, + ) + } + SledKind::ZpoolMissingDebugDataset { zpool } => { + write!(f, "zpool {zpool} is missing its Debug dataset") + } + SledKind::ZpoolMissingZoneRootDataset { zpool } => { + write!(f, "zpool {zpool} is missing its Zone Root dataset") + } + SledKind::ZoneMissingFilesystemDataset { zone } => { + write!( + f, + "in-service zone's filesytem dataset is missing: {:?} {}", + zone.zone_type.kind(), + zone.id, + ) + } + SledKind::ZoneMissingDurableDataset { zone } => { + write!( + f, + "in-service zone's durable dataset is missing: {:?} {}", + zone.zone_type.kind(), + zone.id, + ) + } + SledKind::ZoneWithDatasetsOnDifferentZpools { + zone, + durable_zpool, + transient_zpool, + } => { + write!( + f, + "zone {:?} {} has its durable dataset on \ + zpool {durable_zpool} but its root dataset on \ + zpool {transient_zpool}", + zone.zone_type.kind(), + zone.id, + ) + } + SledKind::SledMissingDatasets { why } => { + write!(f, "missing entry in blueprint_datasets ({why})") + } + SledKind::SledMissingDisks { why } => { + write!(f, "missing entry in blueprint_disks ({why})") + } + SledKind::OrphanedDataset { dataset } => { + let parent = match dataset.kind { + DatasetKind::Cockroach + | DatasetKind::Crucible + | DatasetKind::Clickhouse + | DatasetKind::ClickhouseKeeper + | DatasetKind::ClickhouseServer + | DatasetKind::ExternalDns + | DatasetKind::InternalDns + | DatasetKind::TransientZone { .. } => "zone", + DatasetKind::TransientZoneRoot + | DatasetKind::Debug + | DatasetKind::Update => "disk", + }; + write!( + f, + "in-service dataset ({:?} {}) with no associated {parent}", + dataset.kind, dataset.id + ) + } + SledKind::DatasetOnNonexistentZpool { dataset } => { + write!( + f, + "in-service dataset ({:?} {}) on non-existent zpool {}", + dataset.kind, dataset.id, dataset.pool + ) + } + } + } +} + +impl Note { + pub fn display(&self, sort_key: BlippyReportSortKey) -> NoteDisplay<'_> { + NoteDisplay { note: self, sort_key } + } +} + +#[derive(Debug)] +pub struct NoteDisplay<'a> { + note: &'a Note, + sort_key: BlippyReportSortKey, +} + +impl fmt::Display for NoteDisplay<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.sort_key { + BlippyReportSortKey::Kind => { + write!( + f, + "{}: {} note: {}", + self.note.kind.display_component(), + self.note.severity, + self.note.kind.display_subkind(), + ) + } + BlippyReportSortKey::Severity => { + write!( + f, + "{} note: {}: {}", + self.note.severity, + self.note.kind.display_component(), + self.note.kind.display_subkind(), + ) + } + } + } +} + +#[derive(Debug)] +pub struct Blippy<'a> { + blueprint: &'a Blueprint, + notes: Vec, +} + +impl<'a> Blippy<'a> { + pub fn new(blueprint: &'a Blueprint) -> Self { + let mut slf = Self { blueprint, notes: Vec::new() }; + checks::perform_all_blueprint_only_checks(&mut slf); + slf + } + + pub fn blueprint(&self) -> &'a Blueprint { + self.blueprint + } + + pub(crate) fn push_sled_note( + &mut self, + sled_id: SledUuid, + severity: Severity, + kind: SledKind, + ) { + self.notes.push(Note { severity, kind: Kind::Sled { sled_id, kind } }); + } + + pub fn into_report( + self, + sort_key: BlippyReportSortKey, + ) -> BlippyReport<'a> { + BlippyReport::new(self.blueprint, self.notes, sort_key) + } +} diff --git a/nexus/reconfigurator/blippy/src/checks.rs b/nexus/reconfigurator/blippy/src/checks.rs new file mode 100644 index 0000000000..f5673cb77c --- /dev/null +++ b/nexus/reconfigurator/blippy/src/checks.rs @@ -0,0 +1,1564 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::blippy::Blippy; +use crate::blippy::Severity; +use crate::blippy::SledKind; +use nexus_sled_agent_shared::inventory::ZoneKind; +use nexus_types::deployment::BlueprintDatasetConfig; +use nexus_types::deployment::BlueprintDatasetFilter; +use nexus_types::deployment::BlueprintZoneConfig; +use nexus_types::deployment::BlueprintZoneFilter; +use nexus_types::deployment::OmicronZoneExternalIp; +use omicron_common::address::DnsSubnet; +use omicron_common::address::Ipv6Subnet; +use omicron_common::address::SLED_PREFIX; +use omicron_common::disk::DatasetKind; +use omicron_uuid_kinds::SledUuid; +use omicron_uuid_kinds::ZpoolUuid; +use std::collections::btree_map::Entry; +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::net::Ipv6Addr; + +pub(crate) fn perform_all_blueprint_only_checks(blippy: &mut Blippy<'_>) { + check_underlay_ips(blippy); + check_external_networking(blippy); + check_dataset_zpool_uniqueness(blippy); + check_datasets(blippy); +} + +fn check_underlay_ips(blippy: &mut Blippy<'_>) { + let mut underlay_ips: BTreeMap = + BTreeMap::new(); + let mut inferred_sled_subnets_by_sled: BTreeMap< + SledUuid, + (Ipv6Subnet, &BlueprintZoneConfig), + > = BTreeMap::new(); + let mut inferred_sled_subnets_by_subnet: BTreeMap< + Ipv6Subnet, + SledUuid, + > = BTreeMap::new(); + let mut rack_dns_subnets: BTreeSet = BTreeSet::new(); + + for (sled_id, zone) in blippy + .blueprint() + .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) + { + let ip = zone.underlay_ip(); + + // There should be no duplicate underlay IPs. + if let Some(previous) = underlay_ips.insert(ip, zone) { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::DuplicateUnderlayIp { + zone1: previous.clone(), + zone2: zone.clone(), + }, + ); + } + + if zone.zone_type.is_internal_dns() { + // Internal DNS zones should have IPs coming from the reserved rack + // DNS subnets. + let subnet = DnsSubnet::from_addr(ip); + if rack_dns_subnets.is_empty() { + // The blueprint doesn't store the rack subnet explicitly, so we + // infer it based on the first internal DNS zone we see. + rack_dns_subnets.extend(subnet.rack_subnet().get_dns_subnets()); + } + if !rack_dns_subnets.contains(&subnet) { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::InternalDnsZoneBadSubnet { + zone: zone.clone(), + rack_dns_subnets: rack_dns_subnets.clone(), + }, + ); + } + } else { + let subnet = Ipv6Subnet::new(ip); + + // Any given subnet should be used by at most one sled. + match inferred_sled_subnets_by_subnet.entry(subnet) { + Entry::Vacant(slot) => { + slot.insert(sled_id); + } + Entry::Occupied(prev) => { + if *prev.get() != sled_id { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::ConflictingSledSubnets { + other_sled: *prev.get(), + subnet, + }, + ); + } + } + } + + // Any given sled should have IPs within at most one subnet. + // + // The blueprint doesn't store sled subnets explicitly, so we can't + // check that each sled is using the subnet it's supposed to. The + // best we can do is check that the sleds are internally consistent. + match inferred_sled_subnets_by_sled.entry(sled_id) { + Entry::Vacant(slot) => { + slot.insert((subnet, zone)); + } + Entry::Occupied(prev) => { + if prev.get().0 != subnet { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::SledWithMixedUnderlaySubnets { + zone1: prev.get().1.clone(), + zone2: zone.clone(), + }, + ); + } + } + } + } + } +} + +fn check_external_networking(blippy: &mut Blippy<'_>) { + let mut used_external_ips = BTreeMap::new(); + let mut used_external_floating_ips = BTreeMap::new(); + let mut used_external_snat_ips = BTreeMap::new(); + + let mut used_nic_ips = BTreeMap::new(); + let mut used_nic_macs = BTreeMap::new(); + + for (sled_id, zone, external_ip, nic) in blippy + .blueprint() + .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) + .filter_map(|(sled_id, zone)| { + zone.zone_type + .external_networking() + .map(|(external_ip, nic)| (sled_id, zone, external_ip, nic)) + }) + { + // There should be no duplicate external IPs. + if let Some(prev_zone) = used_external_ips.insert(external_ip, zone) { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::DuplicateExternalIp { + zone1: prev_zone.clone(), + zone2: zone.clone(), + ip: external_ip.ip(), + }, + ); + } + + // See the loop below; we build up separate maps to check for + // Floating/SNAT overlap that wouldn't be caught by the exact + // `used_external_ips` map above. + match external_ip { + OmicronZoneExternalIp::Floating(floating) => { + used_external_floating_ips.insert(floating.ip, zone); + } + OmicronZoneExternalIp::Snat(snat) => { + used_external_snat_ips + .insert(snat.snat_cfg.ip, (sled_id, zone)); + } + } + + // There should be no duplicate NIC IPs or MACs. + if let Some(prev_zone) = used_nic_ips.insert(nic.ip, zone) { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::DuplicateNicIp { + zone1: prev_zone.clone(), + zone2: zone.clone(), + ip: nic.ip, + }, + ); + } + if let Some(prev_zone) = used_nic_macs.insert(nic.mac, zone) { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::DuplicateNicMac { + zone1: prev_zone.clone(), + zone2: zone.clone(), + mac: nic.mac, + }, + ); + } + } + + // The loop above noted any exact duplicates; we should also check for any + // SNAT / Floating overlaps. For each SNAT IP, ensure we don't have a + // floating IP at the same address. + for (ip, (sled_id, zone2)) in used_external_snat_ips { + if let Some(&zone1) = used_external_floating_ips.get(&ip) { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::DuplicateExternalIp { + zone1: zone1.clone(), + zone2: zone2.clone(), + ip, + }, + ); + } + } +} + +fn check_dataset_zpool_uniqueness(blippy: &mut Blippy<'_>) { + let mut durable_kinds_by_zpool: BTreeMap> = + BTreeMap::new(); + let mut transient_kinds_by_zpool: BTreeMap< + ZpoolUuid, + BTreeMap, + > = BTreeMap::new(); + + // On any given zpool, we should have at most one zone of any given + // kind. + for (sled_id, zone) in blippy + .blueprint() + .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) + { + // Check "one kind per zpool" for durable datasets... + if let Some(dataset) = zone.zone_type.durable_dataset() { + let kind = zone.zone_type.kind(); + if let Some(previous) = durable_kinds_by_zpool + .entry(dataset.dataset.pool_name.id()) + .or_default() + .insert(kind, zone) + { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::ZoneDurableDatasetCollision { + zone1: previous.clone(), + zone2: zone.clone(), + zpool: dataset.dataset.pool_name.clone(), + }, + ); + } + } + + // ... and transient datasets. + if let Some(dataset) = zone.filesystem_dataset() { + let kind = zone.zone_type.kind(); + if let Some(previous) = transient_kinds_by_zpool + .entry(dataset.pool().id()) + .or_default() + .insert(kind, zone) + { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::ZoneFilesystemDatasetCollision { + zone1: previous.clone(), + zone2: zone.clone(), + zpool: dataset.into_parts().0, + }, + ); + } + } + + // If a zone has both durable and transient datasets, they should be on + // the same pool. + match (zone.zone_type.durable_zpool(), zone.filesystem_pool.as_ref()) { + (Some(durable), Some(transient)) if durable != transient => { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::ZoneWithDatasetsOnDifferentZpools { + zone: zone.clone(), + durable_zpool: durable.clone(), + transient_zpool: transient.clone(), + }, + ); + } + _ => (), + } + } +} + +type DatasetByKind<'a> = BTreeMap; +type DatasetsByZpool<'a> = BTreeMap>; + +#[derive(Debug)] +struct DatasetsBySled<'a> { + by_sled: BTreeMap>, + noted_sleds_missing_datasets: BTreeSet, +} + +impl<'a> DatasetsBySled<'a> { + fn new(blippy: &mut Blippy<'a>) -> Self { + let mut by_sled = BTreeMap::new(); + + for (&sled_id, config) in &blippy.blueprint().blueprint_datasets { + let by_zpool: &mut BTreeMap<_, _> = + by_sled.entry(sled_id).or_default(); + + for dataset in config.datasets.values() { + let by_kind: &mut BTreeMap<_, _> = + by_zpool.entry(dataset.pool.id()).or_default(); + + match by_kind.entry(dataset.kind.clone()) { + Entry::Vacant(slot) => { + slot.insert(dataset); + } + Entry::Occupied(prev) => { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::ZpoolWithDuplicateDatasetKinds { + dataset1: (*prev.get()).clone(), + dataset2: dataset.clone(), + zpool: dataset.pool.id(), + }, + ); + } + } + } + } + + Self { by_sled, noted_sleds_missing_datasets: BTreeSet::new() } + } + + // Get the datasets for each zpool on a given sled, or add a fatal note to + // `blippy` that the sled is missing an entry in `blueprint_datasets` for + // the specified reason `why`. + fn get_sled_or_note_missing( + &mut self, + blippy: &mut Blippy<'_>, + sled_id: SledUuid, + why: &'static str, + ) -> Option<&DatasetsByZpool<'a>> { + let maybe_datasets = self.by_sled.get(&sled_id); + if maybe_datasets.is_none() + && self.noted_sleds_missing_datasets.insert(sled_id) + { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::SledMissingDatasets { why }, + ); + } + maybe_datasets + } +} + +fn check_datasets(blippy: &mut Blippy<'_>) { + let mut datasets = DatasetsBySled::new(blippy); + + // As we loop through all the datasets we expect to see, mark them down. + // Afterwards, we'll check for any datasets present that we _didn't_ expect + // to see. + let mut expected_datasets = BTreeSet::new(); + + // All disks should have debug and zone root datasets. + // + // TODO-correctness We currently only include in-service disks in the + // blueprint; once we include expunged or decommissioned disks too, we + // should filter here to only in-service. + for (&sled_id, disk_config) in &blippy.blueprint().blueprint_disks { + let Some(sled_datasets) = datasets.get_sled_or_note_missing( + blippy, + sled_id, + "sled has an entry in blueprint_disks", + ) else { + continue; + }; + + for disk in &disk_config.disks { + let sled_datasets = sled_datasets.get(&disk.pool_id); + + match sled_datasets + .and_then(|by_zpool| by_zpool.get(&DatasetKind::Debug)) + { + Some(dataset) => { + expected_datasets.insert(dataset.id); + } + None => { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::ZpoolMissingDebugDataset { + zpool: disk.pool_id, + }, + ); + } + } + + match sled_datasets.and_then(|by_zpool| { + by_zpool.get(&DatasetKind::TransientZoneRoot) + }) { + Some(dataset) => { + expected_datasets.insert(dataset.id); + } + None => { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::ZpoolMissingZoneRootDataset { + zpool: disk.pool_id, + }, + ); + } + } + } + } + + // There should be a dataset for every dataset referenced by a running zone + // (filesystem or durable). + for (sled_id, zone_config) in blippy + .blueprint() + .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) + { + let Some(sled_datasets) = datasets.get_sled_or_note_missing( + blippy, + sled_id, + "sled has running zones", + ) else { + continue; + }; + + match &zone_config.filesystem_dataset() { + Some(dataset) => { + match sled_datasets + .get(&dataset.pool().id()) + .and_then(|by_zpool| by_zpool.get(dataset.dataset())) + { + Some(dataset) => { + expected_datasets.insert(dataset.id); + } + None => { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::ZoneMissingFilesystemDataset { + zone: zone_config.clone(), + }, + ); + } + } + } + None => { + // TODO-john Add a Severity::BackwardsCompatibility and note the + // missing filesystem pool + } + } + + if let Some(dataset) = zone_config.zone_type.durable_dataset() { + match sled_datasets + .get(&dataset.dataset.pool_name.id()) + .and_then(|by_zpool| by_zpool.get(&dataset.kind)) + { + Some(dataset) => { + expected_datasets.insert(dataset.id); + } + None => { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::ZoneMissingDurableDataset { + zone: zone_config.clone(), + }, + ); + } + } + } + } + + // TODO-correctness We currently only include in-service disks in the + // blueprint; once we include expunged or decommissioned disks too, we + // should filter here to only in-service. + let in_service_sled_zpools = blippy + .blueprint() + .blueprint_disks + .iter() + .map(|(sled_id, disk_config)| { + ( + sled_id, + disk_config + .disks + .iter() + .map(|disk| disk.pool_id) + .collect::>(), + ) + }) + .collect::>(); + let mut noted_sleds_without_disks = BTreeSet::new(); + + // All datasets should be on zpools that have disk records, and all datasets + // should have been referenced by either a zone or a disk above. + for (sled_id, dataset) in blippy + .blueprint() + .all_omicron_datasets(BlueprintDatasetFilter::InService) + { + if !expected_datasets.contains(&dataset.id) { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::OrphanedDataset { dataset: dataset.clone() }, + ); + continue; + } + + let Some(sled_zpools) = in_service_sled_zpools.get(&sled_id) else { + if noted_sleds_without_disks.insert(sled_id) { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::SledMissingDisks { + why: "sled has in-service datasets", + }, + ); + } + continue; + }; + + if !sled_zpools.contains(&dataset.pool.id()) { + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::DatasetOnNonexistentZpool { + dataset: dataset.clone(), + }, + ); + continue; + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::blippy::Kind; + use crate::blippy::Note; + use crate::BlippyReportSortKey; + use nexus_reconfigurator_planning::example::example; + use nexus_reconfigurator_planning::example::ExampleSystemBuilder; + use nexus_types::deployment::blueprint_zone_type; + use nexus_types::deployment::BlueprintZoneType; + use omicron_test_utils::dev::test_setup_log; + + // The tests below all take the example blueprint, mutate in some invalid + // way, and confirm that blippy reports the invalidity. This test confirms + // the unmutated blueprint has no blippy notes. + #[test] + fn test_example_blueprint_is_blippy_clean() { + static TEST_NAME: &str = "test_example_blueprint_is_blippy_clean"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, blueprint) = example(&logctx.log, TEST_NAME); + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + if !report.notes().is_empty() { + eprintln!("{}", report.display()); + panic!("example blueprint should have no blippy notes"); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_duplicate_underlay_ips() { + static TEST_NAME: &str = "test_duplicate_underlay_ips"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + // Copy the underlay IP from one Nexus to another. + let mut nexus_iter = blueprint.blueprint_zones.iter_mut().flat_map( + |(sled_id, zones_config)| { + zones_config.zones.iter_mut().filter_map(move |zone| { + if zone.zone_type.is_nexus() { + Some((*sled_id, zone)) + } else { + None + } + }) + }, + ); + let (nexus0_sled_id, nexus0) = + nexus_iter.next().expect("at least one Nexus zone"); + let (nexus1_sled_id, nexus1) = + nexus_iter.next().expect("at least two Nexus zones"); + assert_ne!(nexus0_sled_id, nexus1_sled_id); + + let dup_ip = nexus0.underlay_ip(); + match &mut nexus1.zone_type { + BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + internal_address, + .. + }) => { + internal_address.set_ip(dup_ip); + } + _ => unreachable!("this is a Nexus zone"), + }; + + // This illegal modification should result in at least three notes: a + // duplicate underlay IP, duplicate sled subnets, and sled1 having mixed + // underlay subnets (the details of which depend on the ordering of + // zones, so we'll sort that out here). + let nexus0 = nexus0.clone(); + let nexus1 = nexus1.clone(); + let (mixed_underlay_zone1, mixed_underlay_zone2) = { + let mut sled1_zones = blueprint + .blueprint_zones + .get(&nexus1_sled_id) + .unwrap() + .zones + .iter(); + let sled1_zone1 = sled1_zones.next().expect("at least one zone"); + let sled1_zone2 = sled1_zones.next().expect("at least two zones"); + if sled1_zone1.id == nexus1.id { + (nexus1.clone(), sled1_zone2.clone()) + } else { + (sled1_zone1.clone(), nexus1.clone()) + } + }; + let expected_notes = [ + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: nexus1_sled_id, + kind: SledKind::DuplicateUnderlayIp { + zone1: nexus0.clone(), + zone2: nexus1.clone(), + }, + }, + }, + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: nexus1_sled_id, + kind: SledKind::SledWithMixedUnderlaySubnets { + zone1: mixed_underlay_zone1, + zone2: mixed_underlay_zone2, + }, + }, + }, + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: nexus1_sled_id, + kind: SledKind::ConflictingSledSubnets { + other_sled: nexus0_sled_id, + subnet: Ipv6Subnet::new(dup_ip), + }, + }, + }, + ]; + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_bad_internal_dns_subnet() { + static TEST_NAME: &str = "test_bad_internal_dns_subnet"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + // Change the second internal DNS zone to be from a different rack + // subnet. + let mut internal_dns_iter = blueprint + .blueprint_zones + .iter_mut() + .flat_map(|(sled_id, zones_config)| { + zones_config.zones.iter_mut().filter_map(move |zone| { + if zone.zone_type.is_internal_dns() { + Some((*sled_id, zone)) + } else { + None + } + }) + }); + let (dns0_sled_id, dns0) = + internal_dns_iter.next().expect("at least one internal DNS zone"); + let (dns1_sled_id, dns1) = + internal_dns_iter.next().expect("at least two internal DNS zones"); + assert_ne!(dns0_sled_id, dns1_sled_id); + + let dns0_ip = dns0.underlay_ip(); + let rack_subnet = DnsSubnet::from_addr(dns0_ip).rack_subnet(); + let different_rack_subnet = { + // Flip the high bit of the existing underlay IP to guarantee a + // different rack subnet + let hi_bit = 1_u128 << 127; + let lo_bits = !hi_bit; + let hi_bit_ip = Ipv6Addr::from(hi_bit); + let lo_bits_ip = Ipv6Addr::from(lo_bits); + // Build XOR out of the operations we have... + let flipped_ip = if hi_bit_ip & dns0_ip == hi_bit_ip { + dns0_ip & lo_bits_ip + } else { + dns0_ip | hi_bit_ip + }; + DnsSubnet::from_addr(flipped_ip).rack_subnet() + }; + let different_dns_subnet = different_rack_subnet.get_dns_subnet(0); + + match &mut dns1.zone_type { + BlueprintZoneType::InternalDns( + blueprint_zone_type::InternalDns { + http_address, + dns_address, + .. + }, + ) => { + http_address.set_ip(different_dns_subnet.dns_address()); + dns_address.set_ip(different_dns_subnet.dns_address()); + } + _ => unreachable!("this is an internal DNS zone"), + }; + + let expected_note = Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: dns1_sled_id, + kind: SledKind::InternalDnsZoneBadSubnet { + zone: dns1.clone(), + rack_dns_subnets: rack_subnet + .get_dns_subnets() + .into_iter() + .collect(), + }, + }, + }; + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + assert!( + report.notes().contains(&expected_note), + "did not find expected note {expected_note:?}" + ); + + logctx.cleanup_successful(); + } + + #[test] + fn test_duplicate_external_ip() { + static TEST_NAME: &str = "test_duplicate_external_ip"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + // Copy the external IP from one Nexus to another. + let mut nexus_iter = blueprint.blueprint_zones.iter_mut().flat_map( + |(sled_id, zones_config)| { + zones_config.zones.iter_mut().filter_map(move |zone| { + if zone.zone_type.is_nexus() { + Some((*sled_id, zone)) + } else { + None + } + }) + }, + ); + let (nexus0_sled_id, nexus0) = + nexus_iter.next().expect("at least one Nexus zone"); + let (nexus1_sled_id, nexus1) = + nexus_iter.next().expect("at least two Nexus zones"); + assert_ne!(nexus0_sled_id, nexus1_sled_id); + + let dup_ip = match nexus0 + .zone_type + .external_networking() + .expect("Nexus has external networking") + .0 + { + OmicronZoneExternalIp::Floating(ip) => ip, + OmicronZoneExternalIp::Snat(_) => { + unreachable!("Nexus has a floating IP") + } + }; + match &mut nexus1.zone_type { + BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + external_ip, + .. + }) => { + *external_ip = dup_ip; + } + _ => unreachable!("this is a Nexus zone"), + }; + + let expected_notes = [Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: nexus1_sled_id, + kind: SledKind::DuplicateExternalIp { + zone1: nexus0.clone(), + zone2: nexus1.clone(), + ip: dup_ip.ip, + }, + }, + }]; + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_duplicate_nic_ip() { + static TEST_NAME: &str = "test_duplicate_nic_ip"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + // Copy the external IP from one Nexus to another. + let mut nexus_iter = blueprint.blueprint_zones.iter_mut().flat_map( + |(sled_id, zones_config)| { + zones_config.zones.iter_mut().filter_map(move |zone| { + if zone.zone_type.is_nexus() { + Some((*sled_id, zone)) + } else { + None + } + }) + }, + ); + let (nexus0_sled_id, nexus0) = + nexus_iter.next().expect("at least one Nexus zone"); + let (nexus1_sled_id, nexus1) = + nexus_iter.next().expect("at least two Nexus zones"); + assert_ne!(nexus0_sled_id, nexus1_sled_id); + + let dup_ip = nexus0 + .zone_type + .external_networking() + .expect("Nexus has external networking") + .1 + .ip; + match &mut nexus1.zone_type { + BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + nic, + .. + }) => { + nic.ip = dup_ip; + } + _ => unreachable!("this is a Nexus zone"), + }; + + let expected_notes = [Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: nexus1_sled_id, + kind: SledKind::DuplicateNicIp { + zone1: nexus0.clone(), + zone2: nexus1.clone(), + ip: dup_ip, + }, + }, + }]; + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_duplicate_nic_mac() { + static TEST_NAME: &str = "test_duplicate_nic_mac"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + // Copy the external IP from one Nexus to another. + let mut nexus_iter = blueprint.blueprint_zones.iter_mut().flat_map( + |(sled_id, zones_config)| { + zones_config.zones.iter_mut().filter_map(move |zone| { + if zone.zone_type.is_nexus() { + Some((*sled_id, zone)) + } else { + None + } + }) + }, + ); + let (nexus0_sled_id, nexus0) = + nexus_iter.next().expect("at least one Nexus zone"); + let (nexus1_sled_id, nexus1) = + nexus_iter.next().expect("at least two Nexus zones"); + assert_ne!(nexus0_sled_id, nexus1_sled_id); + + let dup_mac = nexus0 + .zone_type + .external_networking() + .expect("Nexus has external networking") + .1 + .mac; + match &mut nexus1.zone_type { + BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + nic, + .. + }) => { + nic.mac = dup_mac; + } + _ => unreachable!("this is a Nexus zone"), + }; + + let expected_notes = [Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: nexus1_sled_id, + kind: SledKind::DuplicateNicMac { + zone1: nexus0.clone(), + zone2: nexus1.clone(), + mac: dup_mac, + }, + }, + }]; + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_durable_dataset_collision() { + static TEST_NAME: &str = "test_durable_dataset_collision"; + let logctx = test_setup_log(TEST_NAME); + let (_, mut blueprint) = + ExampleSystemBuilder::new(&logctx.log, TEST_NAME) + .external_dns_count(2) + .unwrap() + .build(); + + // Copy the durable zpool from one external DNS to another. + let mut dns_iter = blueprint.blueprint_zones.iter_mut().flat_map( + |(sled_id, zones_config)| { + zones_config.zones.iter_mut().filter_map(move |zone| { + if zone.zone_type.is_external_dns() { + Some((*sled_id, zone)) + } else { + None + } + }) + }, + ); + let (dns0_sled_id, dns0) = + dns_iter.next().expect("at least one external DNS zone"); + let (dns1_sled_id, dns1) = + dns_iter.next().expect("at least two external DNS zones"); + assert_ne!(dns0_sled_id, dns1_sled_id); + + let dup_zpool = dns0 + .zone_type + .durable_zpool() + .expect("external DNS has a durable zpool") + .clone(); + match &mut dns1.zone_type { + BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { dataset, .. }, + ) => { + dataset.pool_name = dup_zpool.clone(); + } + _ => unreachable!("this is an external DNS zone"), + }; + + let expected_notes = [ + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: dns1_sled_id, + kind: SledKind::ZoneDurableDatasetCollision { + zone1: dns0.clone(), + zone2: dns1.clone(), + zpool: dup_zpool.clone(), + }, + }, + }, + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: dns1_sled_id, + kind: SledKind::ZoneWithDatasetsOnDifferentZpools { + zone: dns1.clone(), + durable_zpool: dup_zpool.clone(), + transient_zpool: dns1.filesystem_pool.clone().unwrap(), + }, + }, + }, + ]; + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_transient_root_dataset_collision() { + static TEST_NAME: &str = "test_transient_root_dataset_collision"; + let logctx = test_setup_log(TEST_NAME); + let (_, mut blueprint) = + ExampleSystemBuilder::new(&logctx.log, TEST_NAME) + .external_dns_count(2) + .unwrap() + .build(); + + // Copy the filesystem zpool from one external DNS to another. + let mut dns_iter = blueprint.blueprint_zones.iter_mut().flat_map( + |(sled_id, zones_config)| { + zones_config.zones.iter_mut().filter_map(move |zone| { + if zone.zone_type.is_external_dns() { + Some((*sled_id, zone)) + } else { + None + } + }) + }, + ); + let (dns0_sled_id, dns0) = + dns_iter.next().expect("at least one external DNS zone"); + let (dns1_sled_id, dns1) = + dns_iter.next().expect("at least two external DNS zones"); + assert_ne!(dns0_sled_id, dns1_sled_id); + + let dup_zpool = dns0 + .filesystem_pool + .as_ref() + .expect("external DNS has a filesystem zpool") + .clone(); + dns1.filesystem_pool = Some(dup_zpool.clone()); + + let expected_notes = [ + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: dns1_sled_id, + kind: SledKind::ZoneFilesystemDatasetCollision { + zone1: dns0.clone(), + zone2: dns1.clone(), + zpool: dup_zpool.clone(), + }, + }, + }, + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: dns1_sled_id, + kind: SledKind::ZoneWithDatasetsOnDifferentZpools { + zone: dns1.clone(), + durable_zpool: dns1 + .zone_type + .durable_zpool() + .unwrap() + .clone(), + transient_zpool: dup_zpool.clone(), + }, + }, + }, + ]; + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_zpool_with_duplicate_dataset_kinds() { + static TEST_NAME: &str = "test_zpool_with_duplicate_dataset_kinds"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + let mut by_kind = BTreeMap::new(); + + // Loop over the datasets until we find a dataset kind that already + // exists on a different zpool, then copy it over. + let mut found_sled_id = None; + let mut dataset1 = None; + let mut dataset2 = None; + let mut zpool = None; + 'outer: for (sled_id, datasets_config) in + blueprint.blueprint_datasets.iter_mut() + { + for dataset in datasets_config.datasets.values_mut() { + if let Some(prev) = + by_kind.insert(dataset.kind.clone(), dataset.clone()) + { + dataset.pool = prev.pool.clone(); + + found_sled_id = Some(*sled_id); + dataset1 = Some(prev); + dataset2 = Some(dataset.clone()); + zpool = Some(dataset.pool.clone()); + break 'outer; + } + } + } + let sled_id = found_sled_id.expect("found dataset to move"); + let dataset1 = dataset1.expect("found dataset to move"); + let dataset2 = dataset2.expect("found dataset to move"); + let zpool = zpool.expect("found dataset to move"); + + let expected_notes = [Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id, + kind: SledKind::ZpoolWithDuplicateDatasetKinds { + dataset1, + dataset2, + zpool: zpool.id(), + }, + }, + }]; + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_zpool_missing_default_datasets() { + static TEST_NAME: &str = "test_zpool_missing_default_datasets"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + // Drop the Debug dataset from one zpool and the ZoneRoot dataset from + // another; we should catch both errors. + let (sled_id, datasets_config) = blueprint + .blueprint_datasets + .iter_mut() + .next() + .expect("at least one sled"); + + let mut debug_dataset = None; + let mut zoneroot_dataset = None; + for dataset in &mut datasets_config.datasets.values_mut() { + match &dataset.kind { + DatasetKind::Debug if debug_dataset.is_none() => { + debug_dataset = Some(dataset.clone()); + } + DatasetKind::TransientZoneRoot + if debug_dataset.is_some() + && zoneroot_dataset.is_none() => + { + if Some(&dataset.pool) + != debug_dataset.as_ref().map(|d| &d.pool) + { + zoneroot_dataset = Some(dataset.clone()); + break; + } + } + _ => (), + } + } + let debug_dataset = + debug_dataset.expect("found Debug dataset to prune"); + let zoneroot_dataset = + zoneroot_dataset.expect("found ZoneRoot dataset to prune"); + assert_ne!(debug_dataset.pool, zoneroot_dataset.pool); + + // Actually strip these from the blueprint. + datasets_config.datasets.retain(|&dataset_id, _| { + dataset_id != debug_dataset.id && dataset_id != zoneroot_dataset.id + }); + + let expected_notes = [ + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: *sled_id, + kind: SledKind::ZpoolMissingDebugDataset { + zpool: debug_dataset.pool.id(), + }, + }, + }, + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: *sled_id, + kind: SledKind::ZpoolMissingZoneRootDataset { + zpool: zoneroot_dataset.pool.id(), + }, + }, + }, + ]; + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_zone_missing_datasets() { + static TEST_NAME: &str = "test_zone_missing_datasets"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + let (sled_id, datasets_config) = blueprint + .blueprint_datasets + .iter_mut() + .next() + .expect("at least one sled"); + let zones_config = blueprint + .blueprint_zones + .get(sled_id) + .expect("got zones for sled with datasets"); + + // Pick a zone with a durable dataset to remove, and a different zone + // with a filesystem_pool dataset to remove. + let mut durable_zone = None; + let mut root_zone = None; + for z in &zones_config.zones { + if durable_zone.is_none() { + if z.zone_type.durable_zpool().is_some() { + durable_zone = Some(z.clone()); + } + } else if root_zone.is_none() { + root_zone = Some(z); + break; + } + } + let durable_zone = + durable_zone.expect("found zone with durable dataset to prune"); + let root_zone = + root_zone.expect("found zone with root dataset to prune"); + assert_ne!(durable_zone.filesystem_pool, root_zone.filesystem_pool); + + // Actually strip these from the blueprint. + datasets_config.datasets.retain(|_, dataset| { + let matches_durable = (dataset.pool + == *durable_zone.zone_type.durable_zpool().unwrap()) + && (dataset.kind + == durable_zone.zone_type.durable_dataset().unwrap().kind); + let root_dataset = root_zone.filesystem_dataset().unwrap(); + let matches_root = (dataset.pool == *root_dataset.pool()) + && (dataset.kind == *root_dataset.dataset()); + !matches_durable && !matches_root + }); + + let expected_notes = [ + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: *sled_id, + kind: SledKind::ZoneMissingFilesystemDataset { + zone: root_zone.clone(), + }, + }, + }, + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: *sled_id, + kind: SledKind::ZoneMissingDurableDataset { + zone: durable_zone, + }, + }, + }, + ]; + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_sled_missing_datasets() { + static TEST_NAME: &str = "test_sled_missing_datasets"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + // Pick one sled and remove its blueprint_datasets entry entirely. + let removed_sled_id = *blueprint + .blueprint_datasets + .keys() + .next() + .expect("at least one sled"); + blueprint + .blueprint_datasets + .retain(|&sled_id, _| sled_id != removed_sled_id); + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + let mut found_sled_missing_note = false; + for note in report.notes() { + if note.severity == Severity::Fatal { + match ¬e.kind { + Kind::Sled { + sled_id, + kind: SledKind::SledMissingDatasets { .. }, + } if *sled_id == removed_sled_id => { + found_sled_missing_note = true; + } + _ => (), + } + } + } + assert!( + found_sled_missing_note, + "did not find expected note for missing datasets entry for \ + sled {removed_sled_id}" + ); + + logctx.cleanup_successful(); + } + + #[test] + fn test_sled_missing_disks() { + static TEST_NAME: &str = "test_sled_missing_disks"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + // Pick one sled and remove its blueprint_disks entry entirely. + let removed_sled_id = *blueprint + .blueprint_disks + .keys() + .next() + .expect("at least one sled"); + blueprint + .blueprint_disks + .retain(|&sled_id, _| sled_id != removed_sled_id); + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + let mut found_sled_missing_note = false; + for note in report.notes() { + if note.severity == Severity::Fatal { + match ¬e.kind { + Kind::Sled { + sled_id, + kind: SledKind::SledMissingDisks { .. }, + } if *sled_id == removed_sled_id => { + found_sled_missing_note = true; + } + _ => (), + } + } + } + assert!( + found_sled_missing_note, + "did not find expected note for missing disks entry for \ + sled {removed_sled_id}" + ); + + logctx.cleanup_successful(); + } + + #[test] + fn test_orphaned_datasets() { + static TEST_NAME: &str = "test_orphaned_datasets"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + // Pick two zones (one with a durable dataset and one with a filesystem + // root dataset), and remove both those zones, which should orphan their + // datasets. + let (sled_id, datasets_config) = blueprint + .blueprint_datasets + .iter_mut() + .next() + .expect("at least one sled"); + let zones_config = blueprint + .blueprint_zones + .get_mut(sled_id) + .expect("got zones for sled with datasets"); + let mut durable_zone = None; + let mut root_zone = None; + for z in &zones_config.zones { + if durable_zone.is_none() { + if z.zone_type.durable_zpool().is_some() { + durable_zone = Some(z.clone()); + } + } else if root_zone.is_none() { + root_zone = Some(z.clone()); + break; + } + } + let durable_zone = + durable_zone.expect("found zone with durable dataset to prune"); + let root_zone = + root_zone.expect("found zone with root dataset to prune"); + zones_config + .zones + .retain(|z| z.id != durable_zone.id && z.id != root_zone.id); + + let durable_dataset = durable_zone.zone_type.durable_dataset().unwrap(); + let root_dataset = root_zone.filesystem_dataset().unwrap(); + + // Find the datasets we expect to have been orphaned. + let expected_notes = datasets_config + .datasets + .values() + .filter_map(|dataset| { + if (dataset.pool == durable_dataset.dataset.pool_name + && dataset.kind == durable_dataset.kind) + || (dataset.pool == *root_dataset.pool() + && dataset.kind == *root_dataset.dataset()) + { + Some(Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: *sled_id, + kind: SledKind::OrphanedDataset { + dataset: dataset.clone(), + }, + }, + }) + } else { + None + } + }) + .collect::>(); + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } + + #[test] + fn test_dataset_on_nonexistent_zpool() { + static TEST_NAME: &str = "test_dataset_on_nonexistent_zpool"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME); + + // Remove one zpool from one sled, then check that all datasets on that + // zpool produce report notes. + let (sled_id, disks_config) = blueprint + .blueprint_disks + .iter_mut() + .next() + .expect("at least one sled"); + let removed_disk = disks_config.disks.remove(0); + eprintln!("removed disk {removed_disk:?}"); + + let expected_notes = blueprint + .blueprint_datasets + .get(sled_id) + .unwrap() + .datasets + .values() + .filter_map(|dataset| { + if dataset.pool.id() != removed_disk.pool_id { + return None; + } + + let note = match dataset.kind { + DatasetKind::Debug | DatasetKind::TransientZoneRoot => { + Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: *sled_id, + kind: SledKind::OrphanedDataset { + dataset: dataset.clone(), + }, + }, + } + } + _ => Note { + severity: Severity::Fatal, + kind: Kind::Sled { + sled_id: *sled_id, + kind: SledKind::DatasetOnNonexistentZpool { + dataset: dataset.clone(), + }, + }, + }, + }; + Some(note) + }) + .collect::>(); + assert!(!expected_notes.is_empty()); + + let report = + Blippy::new(&blueprint).into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + for note in expected_notes { + assert!( + report.notes().contains(¬e), + "did not find expected note {note:?}" + ); + } + + logctx.cleanup_successful(); + } +} diff --git a/nexus/reconfigurator/blippy/src/lib.rs b/nexus/reconfigurator/blippy/src/lib.rs new file mode 100644 index 0000000000..283bbfc0d0 --- /dev/null +++ b/nexus/reconfigurator/blippy/src/lib.rs @@ -0,0 +1,24 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Blippy: the blueprint checker +//! +//! [`Blippy`] performs a variety of checks on blueprints to ensure they are +//! internally-consistent (e.g., "every in-service zone that should have one or +//! more datasets do", or "any given external IP address is used by at most one +//! in-service zone"). It emits [`BlippyReport`]s in the form of a list of +//! [`BlippyNote`]s, each of which has an associated severity and parent +//! component (typically a sled). + +mod blippy; +mod checks; +mod report; + +pub use blippy::Blippy; +pub use blippy::Kind as BlippyKind; +pub use blippy::Note as BlippyNote; +pub use blippy::Severity as BlippySeverity; +pub use blippy::SledKind as BlippySledKind; +pub use report::BlippyReport; +pub use report::BlippyReportSortKey; diff --git a/nexus/reconfigurator/blippy/src/report.rs b/nexus/reconfigurator/blippy/src/report.rs new file mode 100644 index 0000000000..136d3b7538 --- /dev/null +++ b/nexus/reconfigurator/blippy/src/report.rs @@ -0,0 +1,85 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::blippy::Note; +use core::fmt; +use nexus_types::deployment::Blueprint; + +#[derive(Debug, Clone, Copy)] +pub enum BlippyReportSortKey { + Kind, + Severity, +} + +#[derive(Debug)] +pub struct BlippyReport<'a> { + blueprint: &'a Blueprint, + notes: Vec, + sort_key: BlippyReportSortKey, +} + +impl<'a> BlippyReport<'a> { + pub(crate) fn new( + blueprint: &'a Blueprint, + notes: Vec, + sort_key: BlippyReportSortKey, + ) -> Self { + let mut slf = Self { blueprint, notes, sort_key }; + slf.sort_notes_by_key(sort_key); + slf + } + + pub fn sort_notes_by_key(&mut self, key: BlippyReportSortKey) { + match key { + BlippyReportSortKey::Kind => { + self.notes.sort_unstable_by(|a, b| { + let a = (&a.kind, &a.severity); + let b = (&b.kind, &b.severity); + a.cmp(&b) + }); + } + BlippyReportSortKey::Severity => { + self.notes.sort_unstable_by(|a, b| { + let a = (&a.severity, &a.kind); + let b = (&b.severity, &b.kind); + a.cmp(&b) + }); + } + } + self.sort_key = key; + } + + pub fn blueprint(&self) -> &'a Blueprint { + self.blueprint + } + + pub fn notes(&self) -> &[Note] { + &self.notes + } + + pub fn display(&self) -> BlippyReportDisplay<'_> { + BlippyReportDisplay { report: self } + } +} + +#[derive(Debug)] +pub struct BlippyReportDisplay<'a> { + report: &'a BlippyReport<'a>, +} + +impl fmt::Display for BlippyReportDisplay<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let pluralize = if self.report.notes.len() == 1 { "" } else { "s" }; + writeln!( + f, + "blippy report for blueprint {}: {} note{pluralize}", + self.report.blueprint.id, + self.report.notes.len(), + )?; + for note in self.report.notes() { + writeln!(f, " {}", note.display(self.report.sort_key))?; + } + Ok(()) + } +} diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs index 5ba1665483..543b9bd278 100644 --- a/nexus/reconfigurator/execution/src/lib.rs +++ b/nexus/reconfigurator/execution/src/lib.rs @@ -400,7 +400,9 @@ fn register_dataset_records_step<'a>( &opctx, datastore, bp_id, - blueprint.all_omicron_datasets(BlueprintDatasetFilter::All), + blueprint + .all_omicron_datasets(BlueprintDatasetFilter::All) + .map(|(_sled_id, dataset)| dataset), ) .await?; diff --git a/nexus/reconfigurator/planning/Cargo.toml b/nexus/reconfigurator/planning/Cargo.toml index 63c5e3e44f..c7d978b387 100644 --- a/nexus/reconfigurator/planning/Cargo.toml +++ b/nexus/reconfigurator/planning/Cargo.toml @@ -20,6 +20,7 @@ ipnet.workspace = true itertools.workspace = true nexus-config.workspace = true nexus-inventory.workspace = true +nexus-reconfigurator-blippy.workspace = true nexus-sled-agent-shared.workspace = true nexus-types.workspace = true omicron-common.workspace = true diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index ad59fb3718..0aaadb624d 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -1834,11 +1834,13 @@ impl<'a> BlueprintBuilder<'a> { let mut skip_zpools = BTreeSet::new(); for zone_config in self .current_sled_zones(sled_id, BlueprintZoneFilter::ShouldBeRunning) + .filter(|z| z.zone_type.kind() == zone_kind) { if let Some(zpool) = zone_config.zone_type.durable_zpool() { - if zone_kind == zone_config.zone_type.kind() { - skip_zpools.insert(zpool); - } + skip_zpools.insert(zpool); + } + if let Some(zpool) = &zone_config.filesystem_pool { + skip_zpools.insert(zpool); } } @@ -1901,211 +1903,34 @@ impl<'a> BlueprintBuilder<'a> { #[cfg(test)] pub mod test { use super::*; - use crate::blueprint_builder::external_networking::ExternalIpAllocator; use crate::example::example; use crate::example::ExampleSystemBuilder; use crate::example::SimRngState; use crate::system::SledBuilder; use nexus_inventory::CollectionBuilder; - use nexus_types::deployment::BlueprintDatasetConfig; + use nexus_reconfigurator_blippy::Blippy; + use nexus_reconfigurator_blippy::BlippyReportSortKey; use nexus_types::deployment::BlueprintDatasetDisposition; - use nexus_types::deployment::BlueprintDatasetFilter; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::OmicronZoneNetworkResources; use nexus_types::external_api::views::SledPolicy; use omicron_common::address::IpRange; - use omicron_common::disk::DatasetKind; use omicron_test_utils::dev::test_setup_log; - use omicron_uuid_kinds::DatasetUuid; use std::collections::BTreeSet; use std::mem; pub const DEFAULT_N_SLEDS: usize = 3; - fn datasets_for_sled( - blueprint: &Blueprint, - sled_id: SledUuid, - ) -> &BTreeMap { - &blueprint - .blueprint_datasets - .get(&sled_id) - .unwrap_or_else(|| { - panic!("Cannot find datasets on missing sled: {sled_id}") - }) - .datasets - } - - fn find_dataset<'a>( - datasets: &'a BTreeMap, - zpool: &ZpoolName, - kind: DatasetKind, - ) -> &'a BlueprintDatasetConfig { - datasets.values().find(|dataset| { - &dataset.pool == zpool && - dataset.kind == kind - }).unwrap_or_else(|| { - let kinds = datasets.values().map(|d| (&d.id, &d.pool, &d.kind)).collect::>(); - panic!("Cannot find dataset of type {kind}\nFound the following: {kinds:#?}") - }) - } - /// Checks various conditions that should be true for all blueprints #[track_caller] pub fn verify_blueprint(blueprint: &Blueprint) { - // There should be no duplicate underlay IPs. - let mut underlay_ips: BTreeMap = - BTreeMap::new(); - for (_, zone) in - blueprint.all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) - { - if let Some(previous) = - underlay_ips.insert(zone.underlay_ip(), zone) - { - panic!( - "found duplicate underlay IP {} in zones {} and {}\ - \n\n\ - blueprint: {}", - zone.underlay_ip(), - zone.id, - previous.id, - blueprint.display(), - ); - } - } - - // There should be no duplicate external IPs. - // - // Checking this is slightly complicated due to SNAT IPs, so we'll - // delegate to an `ExternalIpAllocator`, which already contains the - // logic for dup checking. (`mark_ip_used` fails if the IP is _already_ - // marked as used.) - // - // We create this with an empty set of service IP pool ranges; those are - // used for allocation, which we don't do, and aren't needed for - // duplicate checking. - let mut ip_allocator = ExternalIpAllocator::new(&[]); - for (external_ip, _nic) in blueprint - .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) - .filter_map(|(_, zone)| zone.zone_type.external_networking()) - { - ip_allocator - .mark_ip_used(&external_ip) - .expect("no duplicate external IPs in running zones"); - } - - // On any given zpool, we should have at most one zone of any given - // kind. - // - // TODO: we may want a similar check for non-durable datasets? - let mut kinds_by_zpool: BTreeMap< - ZpoolUuid, - BTreeMap, - > = BTreeMap::new(); - for (_, zone) in blueprint.all_omicron_zones(BlueprintZoneFilter::All) { - if let Some(dataset) = zone.zone_type.durable_dataset() { - let kind = zone.zone_type.kind(); - if let Some(previous) = kinds_by_zpool - .entry(dataset.dataset.pool_name.id()) - .or_default() - .insert(kind, zone.id) - { - panic!( - "zpool {} has two zones of kind {kind:?}: {} and {}\ - \n\n\ - blueprint: {}", - dataset.dataset.pool_name, - zone.id, - previous, - blueprint.display(), - ); - } - } - } - - // All disks should have debug and zone root datasets. - for (sled_id, disk_config) in &blueprint.blueprint_disks { - for disk in &disk_config.disks { - eprintln!( - "checking datasets for sled {sled_id} disk {}", - disk.id - ); - let zpool = ZpoolName::new_external(disk.pool_id); - let datasets = datasets_for_sled(&blueprint, *sled_id); - - let dataset = - find_dataset(&datasets, &zpool, DatasetKind::Debug); - assert_eq!( - dataset.disposition, - BlueprintDatasetDisposition::InService - ); - let dataset = find_dataset( - &datasets, - &zpool, - DatasetKind::TransientZoneRoot, - ); - assert_eq!( - dataset.disposition, - BlueprintDatasetDisposition::InService - ); - } - } - - // All zones should have dataset records. - for (sled_id, zone_config) in - blueprint.all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) - { - match blueprint.sled_state.get(&sled_id) { - // Decommissioned sleds don't keep dataset state around. - // - // Normally we wouldn't observe zones from decommissioned sleds - // anyway, but that's the responsibility of the Planner, not the - // BlueprintBuilder. - None | Some(SledState::Decommissioned) => continue, - Some(SledState::Active) => (), - } - let datasets = datasets_for_sled(&blueprint, sled_id); - - let (zpool, kind) = - zone_config.filesystem_dataset().unwrap().into_parts(); - let dataset = find_dataset(&datasets, &zpool, kind); - assert_eq!( - dataset.disposition, - BlueprintDatasetDisposition::InService - ); - - if let Some(durable_dataset) = - zone_config.zone_type.durable_dataset() - { - let zpool = &durable_dataset.dataset.pool_name; - let dataset = - find_dataset(&datasets, &zpool, durable_dataset.kind); - assert_eq!( - dataset.disposition, - BlueprintDatasetDisposition::InService - ); - } - } - - // All datasets should be on zpools that have disk records. - for (sled_id, datasets) in &blueprint.blueprint_datasets { - let sled_disk_zpools = blueprint - .blueprint_disks - .get(&sled_id) - .expect("no disks for sled") - .disks - .iter() - .map(|disk| disk.pool_id) - .collect::>(); - - for dataset in datasets.datasets.values().filter(|dataset| { - dataset.disposition.matches(BlueprintDatasetFilter::InService) - }) { - assert!( - sled_disk_zpools.contains(&dataset.pool.id()), - "sled {sled_id} has dataset {dataset:?}, \ - which references a zpool without an associated disk", - ); - } + let blippy_report = + Blippy::new(blueprint).into_report(BlippyReportSortKey::Kind); + if !blippy_report.notes().is_empty() { + eprintln!("{}", blueprint.display()); + eprintln!("---"); + eprintln!("{}", blippy_report.display()); + panic!("expected blippy report for blueprint to have no notes"); } } @@ -2314,6 +2139,20 @@ pub mod test { *blueprint1.sled_state.get_mut(&decommision_sled_id).unwrap() = SledState::Decommissioned; + // We're going under the hood of the blueprint here; a sled can only get + // to the decommissioned state if all its disks/datasets/zones have been + // expunged, so do that too. + for zone in &mut blueprint1 + .blueprint_zones + .get_mut(&decommision_sled_id) + .expect("has zones") + .zones + { + zone.disposition = BlueprintZoneDisposition::Expunged; + } + blueprint1.blueprint_datasets.remove(&decommision_sled_id); + blueprint1.blueprint_disks.remove(&decommision_sled_id); + // Change the input to note that the sled is expunged, but still active. let mut builder = input.into_builder(); builder.sleds_mut().get_mut(&decommision_sled_id).unwrap().policy = diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/internal_dns.rs b/nexus/reconfigurator/planning/src/blueprint_builder/internal_dns.rs index 56fa39374d..4222481149 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/internal_dns.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/internal_dns.rs @@ -102,6 +102,7 @@ pub mod test { use crate::blueprint_builder::test::verify_blueprint; use crate::example::ExampleSystemBuilder; use nexus_types::deployment::BlueprintZoneFilter; + use omicron_common::disk::DatasetKind; use omicron_common::policy::INTERNAL_DNS_REDUNDANCY; use omicron_test_utils::dev::test_setup_log; @@ -128,6 +129,24 @@ pub mod test { let npruned = blueprint1.blueprint_zones.len() - 1; assert!(npruned > 0); + // Also prune out the zones' datasets, or we're left with an invalid + // blueprint. + for (_, dataset_config) in + blueprint1.blueprint_datasets.iter_mut().skip(1) + { + dataset_config.datasets.retain(|_id, dataset| { + // This is gross; once zone configs know explicit dataset IDs, + // we should retain by ID instead. + match &dataset.kind { + DatasetKind::InternalDns => false, + DatasetKind::TransientZone { name } => { + !name.starts_with("oxz_internal_dns") + } + _ => true, + } + }); + } + verify_blueprint(&blueprint1); // Create an allocator. diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index a18bb6e3d3..ce1a1ae960 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -1253,6 +1253,21 @@ mod test { for (_sled_id, zones) in blueprint1.blueprint_zones.iter_mut().take(2) { zones.zones.retain(|z| !z.zone_type.is_internal_dns()); } + for (_, dataset_config) in + blueprint1.blueprint_datasets.iter_mut().take(2) + { + dataset_config.datasets.retain(|_id, dataset| { + // This is gross; once zone configs know explicit dataset IDs, + // we should retain by ID instead. + match &dataset.kind { + DatasetKind::InternalDns => false, + DatasetKind::TransientZone { name } => { + !name.starts_with("oxz_internal_dns") + } + _ => true, + } + }); + } let blueprint2 = Planner::new_based_on( logctx.log.clone(), diff --git a/nexus/reconfigurator/planning/tests/output/planner_dataset_settings_modified_in_place_1_2.txt b/nexus/reconfigurator/planning/tests/output/planner_dataset_settings_modified_in_place_1_2.txt index f2d8334027..45d7feb667 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_dataset_settings_modified_in_place_1_2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_dataset_settings_modified_in_place_1_2.txt @@ -61,8 +61,8 @@ to: blueprint fe13be30-94c2-4fa6-aad5-ae3c5028f6bb oxp_f843fb62-0f04-4c7d-a56f-62531104dc77/crypt/zone/oxz_crucible_fc4f1769-9611-42d3-b8c1-f2be9b5359f6 35fa6ec8-6b58-4fcc-a5a2-36e66736e9c1 none none off oxp_96569b61-9e0c-4ee7-bd11-a5e0c541ca99/crypt/zone/oxz_crucible_fff71a84-09c2-4dab-bc18-8f4570f278bb 00abfe99-288d-4a63-abea-adfa62e74524 none none off oxp_3b6e2ade-57fc-4f9d-85c3-38fca27f1df6/crypt/zone/oxz_crucible_pantry_197067bc-9a21-444e-9794-6051d9f78a00 19736dbd-1d01-41e9-a800-ffc450464c2d none none off - oxp_3b6e2ade-57fc-4f9d-85c3-38fca27f1df6/crypt/zone/oxz_crucible_pantry_350fba7f-b754-429e-a21d-e91d139713f2 8be4aa2f-1612-4bdf-a0f6-7458b151308f none none off - oxp_3b6e2ade-57fc-4f9d-85c3-38fca27f1df6/crypt/zone/oxz_crucible_pantry_504963cb-3077-477c-b4e5-2d69bf9caa0c 7fd439f9-dcef-4cfb-b1a1-d298be9d2e3b none none off + oxp_5192ef62-5a12-4a0c-829d-a409da87909c/crypt/zone/oxz_crucible_pantry_350fba7f-b754-429e-a21d-e91d139713f2 8be4aa2f-1612-4bdf-a0f6-7458b151308f none none off + oxp_8778bcc5-dddf-4345-9fdf-5c46a36497b0/crypt/zone/oxz_crucible_pantry_504963cb-3077-477c-b4e5-2d69bf9caa0c 7fd439f9-dcef-4cfb-b1a1-d298be9d2e3b none none off oxp_3b6e2ade-57fc-4f9d-85c3-38fca27f1df6/crypt/zone/oxz_internal_dns_1e9422ca-a3d9-4435-bb17-39d5ad22b4ba 5651c4fb-d146-4270-8794-6ed7ceb6f130 none none off oxp_8778bcc5-dddf-4345-9fdf-5c46a36497b0/crypt/zone/oxz_internal_dns_4a0ec9f6-6ce6-4456-831e-5f8df7b57332 d2b9f103-8bf1-4603-873d-cec130430ba7 none none off oxp_5192ef62-5a12-4a0c-829d-a409da87909c/crypt/zone/oxz_internal_dns_efecb8a2-ce0b-416f-958b-de1fad1bef02 158e226c-e44e-427f-93af-ee96d2cfb9be none none off diff --git a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt index ef93527fa3..6414749fce 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt @@ -177,8 +177,8 @@ to: blueprint 1ac2d88f-27dd-4506-8585-6b2be832528e oxp_9d833141-18a1-4f24-8a34-6076c026aa87/crypt/debug f631d6a1-9db5-4fc7-978b-9ace485dfe16 100 GiB none gzip-9 oxp_a279461f-a7b9-413f-a79f-cb4dab4c3fce/crypt/debug 1b9c97d6-c90d-4109-b99c-9ab799b3c3b9 100 GiB none gzip-9 oxp_ff7e002b-3ad8-4d45-b03a-c46ef0ac8e59/crypt/debug 9427caff-29ec-4cd1-981b-26d4a7900052 100 GiB none gzip-9 -+ oxp_1e2ec79e-9c11-4133-ac77-e0b994a507d5/crypt/zone/oxz_crucible_pantry_ff9ce09c-afbf-425b-bbfa-3d8fb254f98e 7d47e5d6-a1a5-451a-b4b4-3a9747f8154a none none off -+ oxp_1e2ec79e-9c11-4133-ac77-e0b994a507d5/crypt/zone/oxz_nexus_845869e9-ecb2-4ec3-b6b8-2a836e459243 a759d2f3-003c-4fb8-b06b-f985e213b273 none none off ++ oxp_440ae69d-5e2e-4539-91d0-e2930bdd7203/crypt/zone/oxz_crucible_pantry_ff9ce09c-afbf-425b-bbfa-3d8fb254f98e 7d47e5d6-a1a5-451a-b4b4-3a9747f8154a none none off ++ oxp_440ae69d-5e2e-4539-91d0-e2930bdd7203/crypt/zone/oxz_nexus_845869e9-ecb2-4ec3-b6b8-2a836e459243 a759d2f3-003c-4fb8-b06b-f985e213b273 none none off omicron zones generation 2 -> 3: diff --git a/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_3_4.txt b/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_3_4.txt index 803d4ea2a1..39b3398b2e 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_3_4.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_3_4.txt @@ -288,8 +288,8 @@ to: blueprint 74f2e7fd-687e-4c9e-b5d8-e474a5bb8e7c oxp_fe379ac6-1938-4cc2-93a9-43b1447229ae/crypt/debug 3a49dd24-8ead-4196-b453-8aa3273b77d1 100 GiB none gzip-9 + oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/clickhouse 410eca9c-8eee-4a98-aea2-a363697974f7 none none off + oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_clickhouse_fa97835a-aabc-4fe9-9e85-3e50f207129c 08f15d4b-91dc-445d-88f4-cb9fa585444b none none off -+ oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_crucible_pantry_7741bb11-0d99-4856-95ae-725b6b9ff4fa 4eb52e76-39fa-414d-ae9b-2dcb1c7737f9 none none off -+ oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_nexus_69789010-8689-43ab-9a68-a944afcba05a e67b797b-a059-4c7e-a98b-fea18964bad6 none none off ++ oxp_2acfbb84-5ce0-424e-8d73-44c5071d4430/crypt/zone/oxz_crucible_pantry_7741bb11-0d99-4856-95ae-725b6b9ff4fa 4eb52e76-39fa-414d-ae9b-2dcb1c7737f9 none none off ++ oxp_2acfbb84-5ce0-424e-8d73-44c5071d4430/crypt/zone/oxz_nexus_69789010-8689-43ab-9a68-a944afcba05a e67b797b-a059-4c7e-a98b-fea18964bad6 none none off omicron zones generation 3 -> 4: diff --git a/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_5_6.txt b/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_5_6.txt index 93dc17c180..2c90c981fb 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_5_6.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_expunge_clickhouse_clusters_5_6.txt @@ -188,9 +188,9 @@ to: blueprint df68d4d4-5af4-4b56-95bb-1654a6957d4f oxp_427b2ccd-998f-4085-af21-e600604cf21e/crypt/zone/oxz_crucible_befe73dd-5970-49a4-9adf-7b4f453c45cf 95d72ef9-e070-49e4-a57b-2c392def6025 none none off oxp_2fa34d8e-13d9-42d3-b8ba-ca9d74ac496a/crypt/zone/oxz_crucible_d9106a19-f267-48db-a82b-004e643feb49 9b9fb14e-cd17-4a7a-a74a-bfd9c7682831 none none off oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_crucible_pantry_6c7f6a84-78b3-4dd9-878e-51bedfda471f aa190e01-9a4e-4131-9fcf-240532108c7f none none off - oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_crucible_pantry_7741bb11-0d99-4856-95ae-725b6b9ff4fa 4eb52e76-39fa-414d-ae9b-2dcb1c7737f9 none none off + oxp_2acfbb84-5ce0-424e-8d73-44c5071d4430/crypt/zone/oxz_crucible_pantry_7741bb11-0d99-4856-95ae-725b6b9ff4fa 4eb52e76-39fa-414d-ae9b-2dcb1c7737f9 none none off oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_internal_dns_0c42ad01-b854-4e7d-bd6c-25fdc3eddef4 1de9cde7-6c1e-4865-bd3d-378e22f62fb8 none none off - oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_nexus_69789010-8689-43ab-9a68-a944afcba05a e67b797b-a059-4c7e-a98b-fea18964bad6 none none off + oxp_2acfbb84-5ce0-424e-8d73-44c5071d4430/crypt/zone/oxz_nexus_69789010-8689-43ab-9a68-a944afcba05a e67b797b-a059-4c7e-a98b-fea18964bad6 none none off oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_nexus_7e763480-0f4f-43cb-ab9a-52b667d8fda5 5773e3b1-dde0-4b54-bc13-3c3bf816015e none none off oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/zone/oxz_ntp_f34f8d36-7137-48d3-9d13-6a46c4edcef4 c8c03dec-65d4-4c97-87c3-a43a8363c97c none none off oxp_21d60319-5fe1-4a3b-a4c0-6aa7465e7bde/crypt/debug f015e445-2e52-45c9-9f0a-49cb5ceae245 100 GiB none gzip-9 diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt index f4f63ad96a..8bb7635d75 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt @@ -353,9 +353,9 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 oxp_cf32a1ce-2c9e-49f5-b1cf-4af7f2a28901/crypt/debug 2dfc5c53-6618-4352-b754-86ef6463c20a 100 GiB none gzip-9 oxp_e405da11-cb6b-4ebc-bac1-9bc997352e10/crypt/debug 61a653cf-44a6-43c0-90e1-bec539511703 100 GiB none gzip-9 oxp_f4d7f914-ec73-4b65-8696-5068591d9065/crypt/debug b803d901-7e43-42fa-8372-43c3c5b3c1a9 100 GiB none gzip-9 -+ oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_nexus_508abd03-cbfe-4654-9a6d-7f15a1ad32e5 b781d032-3149-4c44-a7d3-5f8d80e4a607 none none off -+ oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_nexus_99f6d544-8599-4e2b-a55a-82d9e0034662 8a39677a-fbcf-4884-b000-63be3247fb63 none none off -+ oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_nexus_c26b3bda-5561-44a1-a69f-22103fe209a1 c9c1a582-1fe0-4001-9301-97230387563a none none off ++ oxp_5248a306-4a03-449e-a8a3-6f86d26da755/crypt/zone/oxz_nexus_508abd03-cbfe-4654-9a6d-7f15a1ad32e5 b781d032-3149-4c44-a7d3-5f8d80e4a607 none none off ++ oxp_55196665-ed61-4b23-9a74-0711bf2eaf90/crypt/zone/oxz_nexus_99f6d544-8599-4e2b-a55a-82d9e0034662 8a39677a-fbcf-4884-b000-63be3247fb63 none none off ++ oxp_6b2a719a-35eb-469f-aa54-114a1f21f37d/crypt/zone/oxz_nexus_c26b3bda-5561-44a1-a69f-22103fe209a1 c9c1a582-1fe0-4001-9301-97230387563a none none off omicron zones generation 2 -> 3: @@ -443,9 +443,9 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 oxp_cd62306a-aedf-47e8-93d5-92a358d64c7b/crypt/debug 73674f4b-1d93-404a-bc9c-8395efac97fd 100 GiB none gzip-9 oxp_f1693454-aac1-4265-b8a0-4e9f3f41c7b3/crypt/debug 938737fb-b72f-4727-8833-9697c518ca37 100 GiB none gzip-9 oxp_fe4fdfba-3b6d-47d3-8612-1fb2390b650a/crypt/debug 8e58b91f-9ce2-4256-8dec-5f90f31a73fa 100 GiB none gzip-9 -+ oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_nexus_2ec75441-3d7d-4b4b-9614-af03de5a3666 cd15e9c9-0238-493a-8b32-926d1cd1bce6 none none off -+ oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_nexus_3ca5292f-8a59-4475-bb72-0f43714d0fff 871b35e6-d234-4a96-bab4-d07314bc6ba2 none none off -+ oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_nexus_59950bc8-1497-44dd-8cbf-b6502ba921b2 63ec1a21-2c77-41b5-ad3e-e7bf39207107 none none off ++ oxp_39ca2e23-4c38-4743-afe0-26b0380b27db/crypt/zone/oxz_nexus_2ec75441-3d7d-4b4b-9614-af03de5a3666 cd15e9c9-0238-493a-8b32-926d1cd1bce6 none none off ++ oxp_60131a33-1f12-4dbb-9435-bdd368db1f51/crypt/zone/oxz_nexus_3ca5292f-8a59-4475-bb72-0f43714d0fff 871b35e6-d234-4a96-bab4-d07314bc6ba2 none none off ++ oxp_4fbd2fe0-2eac-41b8-8e8d-4fa46c3e8b6c/crypt/zone/oxz_nexus_59950bc8-1497-44dd-8cbf-b6502ba921b2 63ec1a21-2c77-41b5-ad3e-e7bf39207107 none none off omicron zones generation 2 -> 3: diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt index 72750bc8f0..9fe4b5218b 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt @@ -55,10 +55,10 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 oxp_cf32a1ce-2c9e-49f5-b1cf-4af7f2a28901/crypt/zone/oxz_crucible_c60379ba-4e30-4628-a79a-0ae509aef4c5 a6fcf496-70a1-49bf-a951-62fcec8dd5e2 none none off oxp_5248a306-4a03-449e-a8a3-6f86d26da755/crypt/zone/oxz_crucible_f0ff59e8-4105-4980-a4bb-a1f4c58de1e3 c596346d-4040-4103-b036-8fafdbaada00 none none off oxp_6b2a719a-35eb-469f-aa54-114a1f21f37d/crypt/zone/oxz_crucible_f1a7b9a7-fc6a-4b23-b829-045ff33117ff c864de0d-9859-4ad1-a30b-f5ac45ba03ed none none off - oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_nexus_508abd03-cbfe-4654-9a6d-7f15a1ad32e5 b781d032-3149-4c44-a7d3-5f8d80e4a607 none none off - oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_nexus_99f6d544-8599-4e2b-a55a-82d9e0034662 8a39677a-fbcf-4884-b000-63be3247fb63 none none off + oxp_5248a306-4a03-449e-a8a3-6f86d26da755/crypt/zone/oxz_nexus_508abd03-cbfe-4654-9a6d-7f15a1ad32e5 b781d032-3149-4c44-a7d3-5f8d80e4a607 none none off + oxp_55196665-ed61-4b23-9a74-0711bf2eaf90/crypt/zone/oxz_nexus_99f6d544-8599-4e2b-a55a-82d9e0034662 8a39677a-fbcf-4884-b000-63be3247fb63 none none off oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_nexus_a732c489-d29a-4f75-b900-5966385943af db6c139b-9028-4d8e-92c7-6cc1e9aa0131 none none off - oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_nexus_c26b3bda-5561-44a1-a69f-22103fe209a1 c9c1a582-1fe0-4001-9301-97230387563a none none off + oxp_6b2a719a-35eb-469f-aa54-114a1f21f37d/crypt/zone/oxz_nexus_c26b3bda-5561-44a1-a69f-22103fe209a1 c9c1a582-1fe0-4001-9301-97230387563a none none off oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/zone/oxz_ntp_621509d6-3772-4009-aca1-35eefd1098fb 3b5822d2-9918-4bd6-8b75-2f52bdd73189 none none off oxp_4069c804-c51a-4adc-8822-3cbbab56ed3f/crypt/debug bf9b39db-5a6a-4b45-b2da-c37425271014 100 GiB none gzip-9 oxp_5248a306-4a03-449e-a8a3-6f86d26da755/crypt/debug 1b4e8d9e-e447-4df1-8e0b-57edc318e8ad 100 GiB none gzip-9 @@ -145,10 +145,10 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 oxp_cd62306a-aedf-47e8-93d5-92a358d64c7b/crypt/zone/oxz_crucible_be920398-024a-4655-8c49-69b5ac48dfff 87f757d6-fa4c-4423-995c-1eab5e7d09a2 none none off oxp_39ca2e23-4c38-4743-afe0-26b0380b27db/crypt/zone/oxz_crucible_d47f4996-fac0-4657-bcea-01b1fee6404d c1af262a-2595-4236-98c8-21c5b63c80c3 none none off oxp_789d607d-d196-428e-a988-f7886a327859/crypt/zone/oxz_crucible_e001fea0-6594-4ece-97e3-6198c293e931 5e27b9bc-e69f-4258-83f2-5f9a1109a625 none none off - oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_nexus_2ec75441-3d7d-4b4b-9614-af03de5a3666 cd15e9c9-0238-493a-8b32-926d1cd1bce6 none none off - oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_nexus_3ca5292f-8a59-4475-bb72-0f43714d0fff 871b35e6-d234-4a96-bab4-d07314bc6ba2 none none off + oxp_39ca2e23-4c38-4743-afe0-26b0380b27db/crypt/zone/oxz_nexus_2ec75441-3d7d-4b4b-9614-af03de5a3666 cd15e9c9-0238-493a-8b32-926d1cd1bce6 none none off + oxp_60131a33-1f12-4dbb-9435-bdd368db1f51/crypt/zone/oxz_nexus_3ca5292f-8a59-4475-bb72-0f43714d0fff 871b35e6-d234-4a96-bab4-d07314bc6ba2 none none off oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_nexus_4ad0e9da-08f8-4d40-b4d3-d17e711b5bbf 45d32c13-cbbb-4382-a0ed-dc6574b827b7 none none off - oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_nexus_59950bc8-1497-44dd-8cbf-b6502ba921b2 63ec1a21-2c77-41b5-ad3e-e7bf39207107 none none off + oxp_4fbd2fe0-2eac-41b8-8e8d-4fa46c3e8b6c/crypt/zone/oxz_nexus_59950bc8-1497-44dd-8cbf-b6502ba921b2 63ec1a21-2c77-41b5-ad3e-e7bf39207107 none none off oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/zone/oxz_ntp_bf79a56a-97af-4cc4-94a5-8b20d64c2cda a410308c-e2cb-4e4d-9da6-1879336f93f2 none none off oxp_33d48d85-751e-4982-b738-eae4d9a05f01/crypt/debug 755e24a8-67cc-44b1-8c25-2dcb3acd988f 100 GiB none gzip-9 oxp_39ca2e23-4c38-4743-afe0-26b0380b27db/crypt/debug c834f8cd-25ee-4c62-af03-49cef53fc4c1 100 GiB none gzip-9 diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index ad39777054..f107f2af71 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -825,7 +825,7 @@ impl BackgroundTasksInitializer { done", period: config.region_snapshot_replacement_finish.period_secs, task_impl: Box::new(RegionSnapshotReplacementFinishDetector::new( - datastore, + datastore, sagas, )), opctx: opctx.child(BTreeMap::new()), watchers: vec![], diff --git a/nexus/src/app/background/tasks/region_replacement.rs b/nexus/src/app/background/tasks/region_replacement.rs index f86ba8eb8f..caa262bc8c 100644 --- a/nexus/src/app/background/tasks/region_replacement.rs +++ b/nexus/src/app/background/tasks/region_replacement.rs @@ -23,6 +23,7 @@ use nexus_db_model::RegionReplacement; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; use nexus_types::internal_api::background::RegionReplacementStatus; +use omicron_common::api::external::Error; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::TypedUuid; use serde_json::json; @@ -42,7 +43,7 @@ impl RegionReplacementDetector { &self, serialized_authn: authn::saga::Serialized, request: RegionReplacement, - ) -> Result<(), omicron_common::api::external::Error> { + ) -> Result<(), Error> { let params = sagas::region_replacement_start::Params { serialized_authn, request, @@ -68,17 +69,18 @@ impl BackgroundTask for RegionReplacementDetector { let mut status = RegionReplacementStatus::default(); - // Find regions on expunged physical disks + // Find read/write regions on expunged physical disks let regions_to_be_replaced = match self .datastore - .find_regions_on_expunged_physical_disks(opctx) + .find_read_write_regions_on_expunged_physical_disks(opctx) .await { Ok(regions) => regions, Err(e) => { let s = format!( - "find_regions_on_expunged_physical_disks failed: {e}" + "find_read_write_regions_on_expunged_physical_disks \ + failed: {e}" ); error!(&log, "{s}"); status.errors.push(s); @@ -134,15 +136,31 @@ impl BackgroundTask for RegionReplacementDetector { } Err(e) => { - let s = format!( - "error adding region replacement request for \ - region {} volume id {}: {e}", - region.id(), - region.volume_id(), - ); - error!(&log, "{s}"); + match e { + Error::Conflict { message } + if message.external_message() + == "volume repair lock" => + { + // This is not a fatal error! If there are + // competing region replacement and region + // snapshot replacements, then they are both + // attempting to lock volumes. + } + + _ => { + let s = format!( + "error adding region replacement \ + request for region {} volume id {}: \ + {e}", + region.id(), + region.volume_id(), + ); + error!(&log, "{s}"); + + status.errors.push(s); + } + } - status.errors.push(s); continue; } } @@ -173,7 +191,9 @@ impl BackgroundTask for RegionReplacementDetector { // If the replacement request is in the `requested` state and // the request's volume was soft-deleted or hard-deleted, avoid // sending the start request and instead transition the request - // to completed + // to completed. Note the saga will do the right thing if the + // volume is deleted, but this avoids the overhead of starting + // it. let volume_deleted = match self .datastore @@ -314,6 +334,21 @@ mod test { // Add a region replacement request for a fake region let volume_id = Uuid::new_v4(); + + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![], + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + let request = RegionReplacement::new(Uuid::new_v4(), volume_id); let request_id = request.id; diff --git a/nexus/src/app/background/tasks/region_replacement_driver.rs b/nexus/src/app/background/tasks/region_replacement_driver.rs index e7fe0d6338..6cc28f9dfd 100644 --- a/nexus/src/app/background/tasks/region_replacement_driver.rs +++ b/nexus/src/app/background/tasks/region_replacement_driver.rs @@ -258,6 +258,7 @@ mod test { use omicron_uuid_kinds::UpstairsKind; use omicron_uuid_kinds::UpstairsRepairKind; use omicron_uuid_kinds::UpstairsSessionKind; + use sled_agent_client::types::VolumeConstructionRequest; use uuid::Uuid; type ControlPlaneTestContext = @@ -288,6 +289,20 @@ mod test { let new_region_id = Uuid::new_v4(); let volume_id = Uuid::new_v4(); + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![], + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + let request = { let mut request = RegionReplacement::new(region_id, volume_id); request.replacement_state = RegionReplacementState::Running; @@ -382,6 +397,20 @@ mod test { .unwrap(); } + datastore + .volume_create(nexus_db_model::Volume::new( + old_region.volume_id(), + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: old_region.volume_id(), + block_size: 512, + sub_volumes: vec![], + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + // Add a region replacement request for that region, and change it to // state ReplacementDone. Set the new_region_id to the region created // above. @@ -481,6 +510,20 @@ mod test { .unwrap(); } + datastore + .volume_create(nexus_db_model::Volume::new( + old_region.volume_id(), + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: old_region.volume_id(), + block_size: 512, + sub_volumes: vec![], + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + // Add a region replacement request for that region, and change it to // state Running. Set the new_region_id to the region created above. let request = { @@ -630,6 +673,20 @@ mod test { .unwrap(); } + datastore + .volume_create(nexus_db_model::Volume::new( + old_region.volume_id(), + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: old_region.volume_id(), + block_size: 512, + sub_volumes: vec![], + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + // Add a region replacement request for that region, and change it to // state Running. Set the new_region_id to the region created above. let request = { diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_finish.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_finish.rs index 0eebd37fb7..61a84c579d 100644 --- a/nexus/src/app/background/tasks/region_snapshot_replacement_finish.rs +++ b/nexus/src/app/background/tasks/region_snapshot_replacement_finish.rs @@ -8,9 +8,15 @@ //! Once all related region snapshot replacement steps are done, the region //! snapshot replacement can be completed. +use crate::app::authn; use crate::app::background::BackgroundTask; +use crate::app::saga::StartSaga; +use crate::app::sagas; +use crate::app::sagas::region_snapshot_replacement_finish::*; +use crate::app::sagas::NexusSaga; use futures::future::BoxFuture; use futures::FutureExt; +use nexus_db_model::RegionSnapshotReplacement; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; use nexus_types::internal_api::background::RegionSnapshotReplacementFinishStatus; @@ -19,11 +25,31 @@ use std::sync::Arc; pub struct RegionSnapshotReplacementFinishDetector { datastore: Arc, + sagas: Arc, } impl RegionSnapshotReplacementFinishDetector { - pub fn new(datastore: Arc) -> Self { - RegionSnapshotReplacementFinishDetector { datastore } + pub fn new(datastore: Arc, sagas: Arc) -> Self { + RegionSnapshotReplacementFinishDetector { datastore, sagas } + } + + async fn send_finish_request( + &self, + opctx: &OpContext, + request: RegionSnapshotReplacement, + ) -> Result<(), omicron_common::api::external::Error> { + let params = sagas::region_snapshot_replacement_finish::Params { + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + request, + }; + + let saga_dag = SagaRegionSnapshotReplacementFinish::prepare(¶ms)?; + + // We only care that the saga was started, and don't wish to wait for it + // to complete, so use `StartSaga::saga_start`, rather than `saga_run`. + self.sagas.saga_start(saga_dag).await?; + + Ok(()) } async fn transition_requests_to_done( @@ -120,21 +146,23 @@ impl RegionSnapshotReplacementFinishDetector { } }; - // Transition region snapshot replacement to Complete - match self - .datastore - .set_region_snapshot_replacement_complete(opctx, request.id) - .await - { + let request_id = request.id; + + match self.send_finish_request(opctx, request).await { Ok(()) => { - let s = format!("set request {} to done", request.id); + let s = format!( + "region snapshot replacement finish invoked ok for \ + {request_id}" + ); + info!(&log, "{s}"); - status.records_set_to_done.push(s); + status.finish_invoked_ok.push(s); } Err(e) => { let s = format!( - "marking snapshot replacement as done failed: {e}" + "invoking region snapshot replacement finish for \ + {request_id} failed: {e}", ); error!(&log, "{s}"); status.errors.push(s); @@ -170,6 +198,7 @@ mod test { use nexus_db_queries::db::datastore::region_snapshot_replacement; use nexus_test_utils_macros::nexus_test; use omicron_uuid_kinds::DatasetUuid; + use sled_agent_client::types::VolumeConstructionRequest; use uuid::Uuid; type ControlPlaneTestContext = @@ -186,8 +215,10 @@ mod test { datastore.clone(), ); - let mut task = - RegionSnapshotReplacementFinishDetector::new(datastore.clone()); + let mut task = RegionSnapshotReplacementFinishDetector::new( + datastore.clone(), + nexus.sagas.clone(), + ); // Noop test let result: RegionSnapshotReplacementFinishStatus = @@ -208,11 +239,25 @@ mod test { let request_id = request.id; + let volume_id = Uuid::new_v4(); + + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), // not required to match! + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + datastore .insert_region_snapshot_replacement_request_with_volume_id( - &opctx, - request, - Uuid::new_v4(), + &opctx, request, volume_id, ) .await .unwrap(); @@ -232,6 +277,7 @@ mod test { .unwrap(); let new_region_id = Uuid::new_v4(); + let new_region_volume_id = Uuid::new_v4(); let old_snapshot_volume_id = Uuid::new_v4(); datastore @@ -240,6 +286,7 @@ mod test { request_id, operating_saga_id, new_region_id, + new_region_volume_id, old_snapshot_volume_id, ) .await @@ -267,14 +314,44 @@ mod test { let operating_saga_id = Uuid::new_v4(); + let step_volume_id = Uuid::new_v4(); + datastore + .volume_create(nexus_db_model::Volume::new( + step_volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), // not required to match! + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + let mut step_1 = - RegionSnapshotReplacementStep::new(request_id, Uuid::new_v4()); + RegionSnapshotReplacementStep::new(request_id, step_volume_id); step_1.replacement_state = RegionSnapshotReplacementStepState::Complete; step_1.operating_saga_id = Some(operating_saga_id); let step_1_id = step_1.id; + let step_volume_id = Uuid::new_v4(); + datastore + .volume_create(nexus_db_model::Volume::new( + step_volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), // not required to match! + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + let mut step_2 = - RegionSnapshotReplacementStep::new(request_id, Uuid::new_v4()); + RegionSnapshotReplacementStep::new(request_id, step_volume_id); step_2.replacement_state = RegionSnapshotReplacementStepState::Complete; step_2.operating_saga_id = Some(operating_saga_id); let step_2_id = step_2.id; @@ -335,8 +412,9 @@ mod test { assert_eq!( result, RegionSnapshotReplacementFinishStatus { - records_set_to_done: vec![format!( - "set request {request_id} to done" + finish_invoked_ok: vec![format!( + "region snapshot replacement finish invoked ok for \ + {request_id}" )], errors: vec![], }, diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs index db30e1d074..57bbf3741c 100644 --- a/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs +++ b/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs @@ -155,6 +155,7 @@ mod test { use nexus_db_model::RegionSnapshotReplacementState; use nexus_test_utils_macros::nexus_test; use omicron_uuid_kinds::DatasetUuid; + use sled_agent_client::types::VolumeConstructionRequest; use uuid::Uuid; type ControlPlaneTestContext = @@ -199,11 +200,25 @@ mod test { let request_1_id = request.id; + let volume_id = Uuid::new_v4(); + + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), // not required to match! + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + datastore .insert_region_snapshot_replacement_request_with_volume_id( - &opctx, - request, - Uuid::new_v4(), + &opctx, request, volume_id, ) .await .unwrap(); @@ -219,11 +234,25 @@ mod test { let request_2_id = request.id; + let volume_id = Uuid::new_v4(); + + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), // not required to match! + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + datastore .insert_region_snapshot_replacement_request_with_volume_id( - &opctx, - request, - Uuid::new_v4(), + &opctx, request, volume_id, ) .await .unwrap(); diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs index 04d86a1268..f2b82a3943 100644 --- a/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs +++ b/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs @@ -23,6 +23,7 @@ use nexus_db_model::RegionSnapshotReplacement; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; use nexus_types::internal_api::background::RegionSnapshotReplacementStartStatus; +use omicron_common::api::external::Error; use serde_json::json; use std::sync::Arc; @@ -40,7 +41,7 @@ impl RegionSnapshotReplacementDetector { &self, serialized_authn: authn::saga::Serialized, request: RegionSnapshotReplacement, - ) -> Result<(), omicron_common::api::external::Error> { + ) -> Result<(), Error> { let params = sagas::region_snapshot_replacement_start::Params { serialized_authn, request, @@ -138,17 +139,33 @@ impl RegionSnapshotReplacementDetector { } Err(e) => { - let s = - format!("error creating replacement request: {e}"); - - error!( - &log, - "{s}"; - "snapshot_id" => %region_snapshot.snapshot_id, - "region_id" => %region_snapshot.region_id, - "dataset_id" => %region_snapshot.dataset_id, - ); - status.errors.push(s); + match e { + Error::Conflict { message } + if message.external_message() + == "volume repair lock" => + { + // This is not a fatal error! If there are + // competing region replacement and region + // snapshot replacements, then they are both + // attempting to lock volumes. + } + + _ => { + let s = format!( + "error creating replacement request: {e}" + ); + + error!( + &log, + "{s}"; + "snapshot_id" => %region_snapshot.snapshot_id, + "region_id" => %region_snapshot.region_id, + "dataset_id" => %region_snapshot.dataset_id, + ); + + status.errors.push(s); + } + } } } } @@ -185,6 +202,67 @@ impl RegionSnapshotReplacementDetector { for request in requests { let request_id = request.id; + // If the region snapshot is gone, then there are no more references + // in any volume, and the whole region snapshot replacement can be + // fast-tracked to Complete. + + let maybe_region_snapshot = match self + .datastore + .region_snapshot_get( + request.old_dataset_id.into(), + request.old_region_id, + request.old_snapshot_id, + ) + .await + { + Ok(maybe_region_snapshot) => maybe_region_snapshot, + + Err(e) => { + let s = format!("query for region snapshot failed: {e}"); + + error!( + &log, + "{s}"; + "request.snapshot_id" => %request.old_snapshot_id, + "request.region_id" => %request.old_region_id, + "request.dataset_id" => %request.old_dataset_id, + ); + status.errors.push(s); + return; + } + }; + + if maybe_region_snapshot.is_none() { + match self + .datastore + .set_region_snapshot_replacement_complete_from_requested( + &opctx, request.id, + ) + .await + { + Ok(()) => { + let s = format!( + "region snapshot replacement {request_id} \ + completed ok" + ); + info!(&log, "{s}"); + status.requests_completed_ok.push(s); + } + + Err(e) => { + let s = format!( + "query to set region snapshot request state \ + to complete failed: {e}" + ); + + error!(&log, "{s}"; "request.id" => %request_id); + status.errors.push(s); + } + } + + continue; + } + let result = self .send_start_request( authn::saga::Serialized::for_opctx(opctx), @@ -269,6 +347,7 @@ mod test { use nexus_db_model::Snapshot; use nexus_db_model::SnapshotIdentity; use nexus_db_model::SnapshotState; + use nexus_db_model::VolumeResourceUsage; use nexus_db_queries::authz; use nexus_db_queries::db::lookup::LookupPath; use nexus_test_utils::resource_helpers::create_project; @@ -276,6 +355,8 @@ mod test { use omicron_common::api::external; use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::GenericUuid; + use sled_agent_client::types::CrucibleOpts; + use sled_agent_client::types::VolumeConstructionRequest; use std::collections::BTreeMap; use uuid::Uuid; @@ -309,19 +390,43 @@ mod test { // Add a region snapshot replacement request for a fake region snapshot - let request = RegionSnapshotReplacement::new( - DatasetUuid::new_v4(), // dataset id - Uuid::new_v4(), // region id - Uuid::new_v4(), // snapshot id + let dataset_id = DatasetUuid::new_v4(); + let region_id = Uuid::new_v4(); + let snapshot_id = Uuid::new_v4(); + + let region_snapshot = RegionSnapshot::new( + dataset_id, + region_id, + snapshot_id, + "[::]:12345".to_string(), ); + datastore.region_snapshot_create(region_snapshot).await.unwrap(); + + let request = + RegionSnapshotReplacement::new(dataset_id, region_id, snapshot_id); + let request_id = request.id; + let volume_id = Uuid::new_v4(); + + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), // not required to match! + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + datastore .insert_region_snapshot_replacement_request_with_volume_id( - &opctx, - request, - Uuid::new_v4(), + &opctx, request, volume_id, ) .await .unwrap(); @@ -339,6 +444,7 @@ mod test { "region snapshot replacement start invoked ok for \ {request_id}" )], + requests_completed_ok: vec![], errors: vec![], }, ); @@ -407,6 +513,22 @@ mod test { .await .unwrap(); + let volume_id = Uuid::new_v4(); + + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![], + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + datastore .project_ensure_snapshot( &opctx, @@ -426,7 +548,7 @@ mod test { project_id, disk_id: Uuid::new_v4(), - volume_id: Uuid::new_v4(), + volume_id, destination_volume_id: Uuid::new_v4(), gen: Generation::new(), @@ -508,4 +630,194 @@ mod test { dataset_to_zpool.get(&first_zpool.id.to_string()).unwrap(); assert_eq!(&request.old_dataset_id.to_string(), dataset_id); } + + #[nexus_test(server = crate::Server)] + async fn test_delete_region_snapshot_replacement_volume_causes_complete( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let starter = Arc::new(NoopStartSaga::new()); + let mut task = RegionSnapshotReplacementDetector::new( + datastore.clone(), + starter.clone(), + ); + + // Noop test + let result: RegionSnapshotReplacementStartStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + assert_eq!(result, RegionSnapshotReplacementStartStatus::default()); + assert_eq!(starter.count_reset(), 0); + + // The volume reference counting machinery needs a fake dataset to exist + // (region snapshots are joined with the dataset table when creating the + // CrucibleResources object) + + let disk_test = DiskTest::new(cptestctx).await; + + let dataset_id = disk_test.zpools().next().unwrap().datasets[0].id; + + // Add a region snapshot replacement request for a fake region snapshot + + let region_id = Uuid::new_v4(); + let snapshot_id = Uuid::new_v4(); + + let region_snapshot = RegionSnapshot::new( + dataset_id, + region_id, + snapshot_id, + "[::1]:12345".to_string(), + ); + + datastore + .region_snapshot_create(region_snapshot.clone()) + .await + .unwrap(); + + let request = + RegionSnapshotReplacement::new(dataset_id, region_id, snapshot_id); + + let request_id = request.id; + + let volume_id = Uuid::new_v4(); + + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), // not required to match! + block_size: 512, + sub_volumes: vec![], + read_only_parent: Some(Box::new( + VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 1, + extent_count: 1, + gen: 1, + opts: CrucibleOpts { + id: Uuid::new_v4(), + target: vec![ + // the region snapshot + String::from("[::1]:12345"), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }, + )), + }) + .unwrap(), + )) + .await + .unwrap(); + + // Assert usage + + let records = datastore + .volume_usage_records_for_resource( + VolumeResourceUsage::RegionSnapshot { + dataset_id: region_snapshot.dataset_id.into(), + region_id: region_snapshot.region_id, + snapshot_id: region_snapshot.snapshot_id, + }, + ) + .await + .unwrap(); + + assert!(!records.is_empty()); + assert_eq!(records[0].volume_id, volume_id); + + datastore + .insert_region_snapshot_replacement_request_with_volume_id( + &opctx, request, volume_id, + ) + .await + .unwrap(); + + // Before the task starts, soft-delete the volume, and delete the + // region snapshot (like the volume delete saga would do). + + let crucible_resources = + datastore.soft_delete_volume(volume_id).await.unwrap(); + + // Assert no more usage + + let records = datastore + .volume_usage_records_for_resource( + VolumeResourceUsage::RegionSnapshot { + dataset_id: region_snapshot.dataset_id.into(), + region_id: region_snapshot.region_id, + snapshot_id: region_snapshot.snapshot_id, + }, + ) + .await + .unwrap(); + + assert!(records.is_empty()); + + // The region snapshot should have been returned for deletion + + let datasets_and_snapshots = + datastore.snapshots_to_delete(&crucible_resources).await.unwrap(); + + assert!(!datasets_and_snapshots.is_empty()); + + let region_snapshot_to_delete = &datasets_and_snapshots[0].1; + + assert_eq!( + region_snapshot_to_delete.dataset_id, + region_snapshot.dataset_id, + ); + assert_eq!( + region_snapshot_to_delete.region_id, + region_snapshot.region_id, + ); + assert_eq!( + region_snapshot_to_delete.snapshot_id, + region_snapshot.snapshot_id, + ); + + // So delete it! + + datastore + .region_snapshot_remove( + region_snapshot_to_delete.dataset_id.into(), + region_snapshot_to_delete.region_id, + region_snapshot_to_delete.snapshot_id, + ) + .await + .unwrap(); + + // Activate the task - it should pick the request up but not attempt to + // run the start saga + + let result: RegionSnapshotReplacementStartStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + + assert_eq!( + result, + RegionSnapshotReplacementStartStatus { + requests_created_ok: vec![], + start_invoked_ok: vec![], + requests_completed_ok: vec![format!( + "region snapshot replacement {request_id} completed ok" + )], + errors: vec![], + }, + ); + + // Assert start saga not invoked + assert_eq!(starter.count_reset(), 0); + } } diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_step.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_step.rs index ac259ecba8..f481126312 100644 --- a/nexus/src/app/background/tasks/region_snapshot_replacement_step.rs +++ b/nexus/src/app/background/tasks/region_snapshot_replacement_step.rs @@ -32,10 +32,11 @@ use futures::future::BoxFuture; use futures::FutureExt; use nexus_db_model::RegionSnapshotReplacementStep; use nexus_db_queries::context::OpContext; -use nexus_db_queries::db::datastore::region_snapshot_replacement; +use nexus_db_queries::db::datastore::region_snapshot_replacement::*; use nexus_db_queries::db::DataStore; use nexus_types::identity::Asset; use nexus_types::internal_api::background::RegionSnapshotReplacementStepStatus; +use omicron_common::api::external::Error; use serde_json::json; use std::sync::Arc; @@ -53,7 +54,7 @@ impl RegionSnapshotReplacementFindAffected { &self, opctx: &OpContext, request: RegionSnapshotReplacementStep, - ) -> Result<(), omicron_common::api::external::Error> { + ) -> Result<(), Error> { let params = sagas::region_snapshot_replacement_step::Params { serialized_authn: authn::saga::Serialized::for_opctx(opctx), request, @@ -70,7 +71,7 @@ impl RegionSnapshotReplacementFindAffected { &self, opctx: &OpContext, request: RegionSnapshotReplacementStep, - ) -> Result<(), omicron_common::api::external::Error> { + ) -> Result<(), Error> { let Some(old_snapshot_volume_id) = request.old_snapshot_volume_id else { // This state is illegal! @@ -79,9 +80,7 @@ impl RegionSnapshotReplacementFindAffected { request.id, ); - return Err(omicron_common::api::external::Error::internal_error( - &s, - )); + return Err(Error::internal_error(&s)); }; let params = @@ -315,6 +314,21 @@ impl RegionSnapshotReplacementFindAffected { // functions execute), an indefinite amount of work would be // created, continually "moving" the snapshot_addr from // temporary volume to temporary volume. + // + // If the volume was soft deleted, then skip making a step for + // it. + + if volume.time_deleted.is_some() { + info!( + log, + "volume was soft-deleted, skipping creating a step for \ + it"; + "request id" => ?request.id, + "volume id" => ?volume.id(), + ); + + continue; + } match self .datastore @@ -326,7 +340,7 @@ impl RegionSnapshotReplacementFindAffected { .await { Ok(insertion_result) => match insertion_result { - region_snapshot_replacement::InsertStepResult::Inserted { step_id } => { + InsertStepResult::Inserted { step_id } => { let s = format!("created {step_id}"); info!( log, @@ -337,7 +351,7 @@ impl RegionSnapshotReplacementFindAffected { status.step_records_created_ok.push(s); } - region_snapshot_replacement::InsertStepResult::AlreadyHandled { .. } => { + InsertStepResult::AlreadyHandled { .. } => { info!( log, "step already exists for volume id"; @@ -345,17 +359,32 @@ impl RegionSnapshotReplacementFindAffected { "volume id" => ?volume.id(), ); } - } + }, Err(e) => { let s = format!("error creating step request: {e}"); - error!( + warn!( log, "{s}"; "request id" => ?request.id, "volume id" => ?volume.id(), ); - status.errors.push(s); + + match e { + Error::Conflict { message } + if message.external_message() + == "volume repair lock" => + { + // This is not a fatal error! If there are + // competing region replacement and region + // snapshot replacements, then they are both + // attempting to lock volumes. + } + + _ => { + status.errors.push(s); + } + } } } } @@ -392,13 +421,81 @@ impl RegionSnapshotReplacementFindAffected { }; for request in step_requests { - let request_id = request.id; + let request_step_id = request.id; + + // Check if the volume was deleted _after_ the replacement step was + // created. Avoid launching the region snapshot replacement step + // saga if it was deleted: the saga will do the right thing if it is + // deleted, but this avoids the overhead of starting it. + + let volume_deleted = + match self.datastore.volume_deleted(request.volume_id).await { + Ok(volume_deleted) => volume_deleted, + + Err(e) => { + let s = format!( + "error checking if volume id {} was \ + deleted: {e}", + request.volume_id, + ); + error!(&log, "{s}"); + + status.errors.push(s); + continue; + } + }; + + if volume_deleted { + // Volume was soft or hard deleted, so proceed with clean up, + // which if this is in state Requested there won't be any + // additional associated state, so transition the record to + // Completed. + + info!( + &log, + "request {} step {} volume {} was soft or hard deleted!", + request.request_id, + request_step_id, + request.volume_id, + ); + + let result = self + .datastore + .set_region_snapshot_replacement_step_volume_deleted_from_requested( + opctx, request, + ) + .await; + + match result { + Ok(()) => { + let s = format!( + "request step {request_step_id} transitioned from \ + requested to volume_deleted" + ); + + info!(&log, "{s}"); + status.step_set_volume_deleted_ok.push(s); + } + + Err(e) => { + let s = format!( + "error transitioning {request_step_id} from \ + requested to complete: {e}" + ); + + error!(&log, "{s}"); + status.errors.push(s); + } + } + + continue; + } match self.send_start_request(opctx, request.clone()).await { Ok(()) => { let s = format!( "region snapshot replacement step saga invoked ok for \ - {request_id}" + {request_step_id}" ); info!( @@ -413,7 +510,7 @@ impl RegionSnapshotReplacementFindAffected { Err(e) => { let s = format!( "invoking region snapshot replacement step saga for \ - {request_id} failed: {e}" + {request_step_id} failed: {e}" ); error!( @@ -575,11 +672,25 @@ mod test { let request_id = request.id; + let volume_id = Uuid::new_v4(); + + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), // not required to match! + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + datastore .insert_region_snapshot_replacement_request_with_volume_id( - &opctx, - request, - Uuid::new_v4(), + &opctx, request, volume_id, ) .await .unwrap(); @@ -599,6 +710,7 @@ mod test { .unwrap(); let new_region_id = Uuid::new_v4(); + let new_region_volume_id = Uuid::new_v4(); let old_snapshot_volume_id = Uuid::new_v4(); datastore @@ -607,6 +719,7 @@ mod test { request_id, operating_saga_id, new_region_id, + new_region_volume_id, old_snapshot_volume_id, ) .await @@ -731,11 +844,27 @@ mod test { // Now, add some Complete records and make sure the garbage collection // saga is invoked. + let volume_id = Uuid::new_v4(); + + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + let result = datastore .insert_region_snapshot_replacement_step(&opctx, { let mut record = RegionSnapshotReplacementStep::new( Uuid::new_v4(), - Uuid::new_v4(), + volume_id, ); record.replacement_state = @@ -747,16 +876,29 @@ mod test { .await .unwrap(); - assert!(matches!( - result, - region_snapshot_replacement::InsertStepResult::Inserted { .. } - )); + assert!(matches!(result, InsertStepResult::Inserted { .. })); + + let volume_id = Uuid::new_v4(); + + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); let result = datastore .insert_region_snapshot_replacement_step(&opctx, { let mut record = RegionSnapshotReplacementStep::new( Uuid::new_v4(), - Uuid::new_v4(), + volume_id, ); record.replacement_state = @@ -768,10 +910,7 @@ mod test { .await .unwrap(); - assert!(matches!( - result, - region_snapshot_replacement::InsertStepResult::Inserted { .. } - )); + assert!(matches!(result, InsertStepResult::Inserted { .. })); // Activate the task - it should pick the complete steps up and try to // run the region snapshot replacement step garbage collect saga diff --git a/nexus/src/app/external_endpoints.rs b/nexus/src/app/external_endpoints.rs index f837edc4fb..b93b692465 100644 --- a/nexus/src/app/external_endpoints.rs +++ b/nexus/src/app/external_endpoints.rs @@ -33,15 +33,17 @@ use anyhow::Context; use nexus_db_model::AuthenticationMode; use nexus_db_model::Certificate; use nexus_db_model::DnsGroup; +use nexus_db_model::DnsZone; +use nexus_db_model::Silo; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::datastore::Discoverability; use nexus_db_queries::db::model::ServiceKind; +use nexus_db_queries::db::pagination::Paginator; use nexus_db_queries::db::DataStore; use nexus_types::identity::Resource; use nexus_types::silo::silo_dns_name; use nexus_types::silo::DEFAULT_SILO_ID; use omicron_common::api::external::http_pagination::PaginatedBy; -use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::bail_unless; use openssl::pkey::PKey; @@ -488,69 +490,61 @@ pub(crate) async fn read_all_endpoints( datastore: &DataStore, opctx: &OpContext, ) -> Result { - // We will not look for more than this number of external DNS zones, Silos, - // or certificates. We do not expect very many of any of these objects. - const MAX: u32 = 200; - let pagparams_id = DataPageParams { - marker: None, - limit: NonZeroU32::new(MAX).unwrap(), - direction: dropshot::PaginationOrder::Ascending, - }; - let pagbyid = PaginatedBy::Id(pagparams_id); - let pagparams_name = DataPageParams { - marker: None, - limit: NonZeroU32::new(MAX).unwrap(), - direction: dropshot::PaginationOrder::Ascending, - }; - - let silos = - datastore.silos_list(opctx, &pagbyid, Discoverability::All).await?; - let external_dns_zones = datastore - .dns_zones_list(opctx, DnsGroup::External, &pagparams_name) - .await?; + // The batch size here is pretty arbitrary. On the vast majority of + // systems, there will only ever be a handful of any of these objects. Some + // systems are known to have a few dozen silos and a few hundred TLS + // certificates. This code path is not particularly latency-sensitive. Our + // purpose in limiting the batch size is just to avoid unbounded-size + // database transactions. + // + // unwrap(): safe because 200 is non-zero. + let batch_size = NonZeroU32::new(200).unwrap(); + + // Fetch all silos. + let mut silos = Vec::new(); + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = datastore + .silos_list( + opctx, + &PaginatedBy::Id(p.current_pagparams()), + Discoverability::All, + ) + .await?; + paginator = p.found_batch(&batch, &|s: &Silo| s.id()); + silos.extend(batch.into_iter()); + } + + // Fetch all external DNS zones. We should really only ever have one, but + // we may as well paginate this. + let mut external_dns_zones = Vec::new(); + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = datastore + .dns_zones_list(opctx, DnsGroup::External, &p.current_pagparams()) + .await?; + paginator = p.found_batch(&batch, &|z: &DnsZone| z.zone_name.clone()); + external_dns_zones.extend(batch.into_iter()); + } bail_unless!( !external_dns_zones.is_empty(), "expected at least one external DNS zone" ); - let certs = datastore - .certificate_list_for(opctx, Some(ServiceKind::Nexus), &pagbyid, false) - .await?; - - // If we found too many of any of these things, complain as loudly as we - // can. Our results will be wrong. But we still don't want to fail if we - // can avoid it because we want to be able to serve as many endpoints as we - // can. - // TODO-reliability we should prevent people from creating more than this - // maximum number of Silos and certificates. - let max = usize::try_from(MAX).unwrap(); - if silos.len() >= max { - error!( - &opctx.log, - "reading endpoints: expected at most {} silos, but found at \ - least {}. TLS may not work on some Silos' external endpoints.", - MAX, - silos.len(), - ); - } - if external_dns_zones.len() >= max { - error!( - &opctx.log, - "reading endpoints: expected at most {} external DNS zones, but \ - found at least {}. TLS may not work on some Silos' external \ - endpoints.", - MAX, - external_dns_zones.len(), - ); - } - if certs.len() >= max { - error!( - &opctx.log, - "reading endpoints: expected at most {} certificates, but \ - found at least {}. TLS may not work on some Silos' external \ - endpoints.", - MAX, - certs.len(), - ); + + // Fetch all TLS certificates. + let mut certs = Vec::new(); + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = datastore + .certificate_list_for( + opctx, + Some(ServiceKind::Nexus), + &PaginatedBy::Id(p.current_pagparams()), + false, + ) + .await?; + paginator = p.found_batch(&batch, &|s: &Certificate| s.id()); + certs.extend(batch); } Ok(ExternalEndpoints::new(silos, certs, external_dns_zones)) diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index 405a972976..d8ba6abbdd 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -39,6 +39,7 @@ pub mod project_create; pub mod region_replacement_drive; pub mod region_replacement_finish; pub mod region_replacement_start; +pub mod region_snapshot_replacement_finish; pub mod region_snapshot_replacement_garbage_collect; pub mod region_snapshot_replacement_start; pub mod region_snapshot_replacement_step; @@ -173,7 +174,8 @@ fn make_action_registry() -> ActionRegistry { region_snapshot_replacement_start::SagaRegionSnapshotReplacementStart, region_snapshot_replacement_garbage_collect::SagaRegionSnapshotReplacementGarbageCollect, region_snapshot_replacement_step::SagaRegionSnapshotReplacementStep, - region_snapshot_replacement_step_garbage_collect::SagaRegionSnapshotReplacementStepGarbageCollect + region_snapshot_replacement_step_garbage_collect::SagaRegionSnapshotReplacementStepGarbageCollect, + region_snapshot_replacement_finish::SagaRegionSnapshotReplacementFinish ]; #[cfg(test)] diff --git a/nexus/src/app/sagas/region_replacement_finish.rs b/nexus/src/app/sagas/region_replacement_finish.rs index c7efa2f03f..2212e6fdf3 100644 --- a/nexus/src/app/sagas/region_replacement_finish.rs +++ b/nexus/src/app/sagas/region_replacement_finish.rs @@ -311,6 +311,20 @@ pub(crate) mod test { operating_saga_id: None, }; + datastore + .volume_create(nexus_db_model::Volume::new( + new_volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: new_volume_id, + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + datastore .insert_region_replacement_request(&opctx, request.clone()) .await diff --git a/nexus/src/app/sagas/region_replacement_start.rs b/nexus/src/app/sagas/region_replacement_start.rs index ce063dc5be..aa9e83c037 100644 --- a/nexus/src/app/sagas/region_replacement_start.rs +++ b/nexus/src/app/sagas/region_replacement_start.rs @@ -491,7 +491,7 @@ async fn srrs_get_old_region_address( async fn srrs_replace_region_in_volume( sagactx: NexusActionContext, -) -> Result<(), ActionError> { +) -> Result { let log = sagactx.user_data().log(); let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; @@ -555,8 +555,6 @@ async fn srrs_replace_region_in_volume( .await .map_err(ActionError::action_failed)?; - debug!(log, "replacement returned {:?}", volume_replace_region_result); - match volume_replace_region_result { VolumeReplaceResult::AlreadyHappened | VolumeReplaceResult::Done => { // The replacement was done either by this run of this saga node, or @@ -565,10 +563,11 @@ async fn srrs_replace_region_in_volume( // with the rest of the saga (to properly clean up allocated // resources). - Ok(()) + Ok(volume_replace_region_result) } - VolumeReplaceResult::ExistingVolumeDeleted => { + VolumeReplaceResult::ExistingVolumeSoftDeleted + | VolumeReplaceResult::ExistingVolumeHardDeleted => { // Unwind the saga here to clean up the resources allocated during // this saga. The associated background task will transition this // request's state to Completed. diff --git a/nexus/src/app/sagas/region_snapshot_replacement_finish.rs b/nexus/src/app/sagas/region_snapshot_replacement_finish.rs new file mode 100644 index 0000000000..d992f753d6 --- /dev/null +++ b/nexus/src/app/sagas/region_snapshot_replacement_finish.rs @@ -0,0 +1,211 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! After the change to store a "new region volume" in the region snapshot +//! replacement request, that volume requires garbage collection before the +//! region snapshot replacement transitions to Complete. It's this saga's +//! responsibility to ensure that cleanup. This saga handles the following +//! region snapshot replacement request state transitions: +//! +//! ```text +//! Running <-- +//! | +//! | | +//! v | +//! | +//! Completing -- +//! +//! | +//! v +//! +//! Complete +//! ``` +//! +//! The first thing this saga does is set itself as the "operating saga" for the +//! request, and change the state to "Completing". Then, it performs the volume +//! delete sub-saga for the new region volume. Finally, it updates the region +//! snapshot replacement request by clearing the operating saga id and changing +//! the state to "Complete". +//! +//! Any unwind will place the state back into Running. + +use super::{ + ActionRegistry, NexusActionContext, NexusSaga, SagaInitError, + ACTION_GENERATE_ID, +}; +use crate::app::sagas::declare_saga_actions; +use crate::app::sagas::volume_delete; +use crate::app::{authn, db}; +use serde::Deserialize; +use serde::Serialize; +use steno::ActionError; +use steno::Node; +use uuid::Uuid; + +// region snapshot replacement finish saga: input parameters + +#[derive(Debug, Deserialize, Serialize)] +pub(crate) struct Params { + pub serialized_authn: authn::saga::Serialized, + pub request: db::model::RegionSnapshotReplacement, +} + +// region snapshot replacement finish saga: actions + +declare_saga_actions! { + region_snapshot_replacement_finish; + SET_SAGA_ID -> "unused_1" { + + rsrfs_set_saga_id + - rsrfs_set_saga_id_undo + } + UPDATE_REQUEST_RECORD -> "unused_4" { + + rsrfs_update_request_record + } +} + +// region snapshot replacement finish saga: definition + +#[derive(Debug)] +pub(crate) struct SagaRegionSnapshotReplacementFinish; +impl NexusSaga for SagaRegionSnapshotReplacementFinish { + const NAME: &'static str = "region-snapshot-replacement-finish"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + region_snapshot_replacement_finish_register_actions(registry); + } + + fn make_saga_dag( + params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + builder.append(Node::action( + "saga_id", + "GenerateSagaId", + ACTION_GENERATE_ID.as_ref(), + )); + + builder.append(set_saga_id_action()); + + if let Some(new_region_volume_id) = params.request.new_region_volume_id + { + let subsaga_params = volume_delete::Params { + serialized_authn: params.serialized_authn.clone(), + volume_id: new_region_volume_id, + }; + + let subsaga_dag = { + let subsaga_builder = steno::DagBuilder::new( + steno::SagaName::new(volume_delete::SagaVolumeDelete::NAME), + ); + volume_delete::SagaVolumeDelete::make_saga_dag( + &subsaga_params, + subsaga_builder, + )? + }; + + builder.append(Node::constant( + "params_for_volume_delete_subsaga", + serde_json::to_value(&subsaga_params).map_err(|e| { + SagaInitError::SerializeError( + "params_for_volume_delete_subsaga".to_string(), + e, + ) + })?, + )); + + builder.append(Node::subsaga( + "volume_delete_subsaga_no_result", + subsaga_dag, + "params_for_volume_delete_subsaga", + )); + } + + builder.append(update_request_record_action()); + + Ok(builder.build()?) + } +} + +// region snapshot replacement finish saga: action implementations + +async fn rsrfs_set_saga_id( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + // Change the request record here to an intermediate "completing" state to + // block out other sagas that will be triggered for the same request. + osagactx + .datastore() + .set_region_snapshot_replacement_completing( + &opctx, + params.request.id, + saga_id, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +async fn rsrfs_set_saga_id_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + osagactx + .datastore() + .undo_set_region_snapshot_replacement_completing( + &opctx, + params.request.id, + saga_id, + ) + .await?; + + Ok(()) +} + +async fn rsrfs_update_request_record( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + // Update the replacement request record to 'Complete' and clear the + // operating saga id. There is no undo step for this, it should succeed + // idempotently. + datastore + .set_region_snapshot_replacement_complete( + &opctx, + params.request.id, + saga_id, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} diff --git a/nexus/src/app/sagas/region_snapshot_replacement_garbage_collect.rs b/nexus/src/app/sagas/region_snapshot_replacement_garbage_collect.rs index 762182724b..675b2b0cb3 100644 --- a/nexus/src/app/sagas/region_snapshot_replacement_garbage_collect.rs +++ b/nexus/src/app/sagas/region_snapshot_replacement_garbage_collect.rs @@ -284,11 +284,27 @@ pub(crate) mod test { RegionSnapshotReplacementState::ReplacementDone; request.old_snapshot_volume_id = Some(old_snapshot_volume_id); + let volume_id = Uuid::new_v4(); + + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + datastore .insert_region_snapshot_replacement_request_with_volume_id( &opctx, request.clone(), - Uuid::new_v4(), + volume_id, ) .await .unwrap(); diff --git a/nexus/src/app/sagas/region_snapshot_replacement_start.rs b/nexus/src/app/sagas/region_snapshot_replacement_start.rs index 4855f64ac2..b9ed75c288 100644 --- a/nexus/src/app/sagas/region_snapshot_replacement_start.rs +++ b/nexus/src/app/sagas/region_snapshot_replacement_start.rs @@ -65,6 +65,7 @@ use crate::app::{authn, db}; use nexus_types::identity::Asset; use nexus_types::identity::Resource; use omicron_common::api::external::Error; +use omicron_uuid_kinds::DatasetUuid; use serde::Deserialize; use serde::Serialize; use sled_agent_client::types::CrucibleOpts; @@ -91,6 +92,9 @@ declare_saga_actions! { + rsrss_set_saga_id - rsrss_set_saga_id_undo } + GET_CLONE_SOURCE -> "clone_source" { + + rsrss_get_clone_source + } GET_ALLOC_REGION_PARAMS -> "alloc_region_params" { + rsrss_get_alloc_region_params } @@ -101,9 +105,47 @@ declare_saga_actions! { FIND_NEW_REGION -> "new_dataset_and_region" { + rsrss_find_new_region } + // One of the common sharp edges of sagas is that the compensating action of + // a node does _not_ run if the forward action fails. Said another way, for + // this node: + // + // EXAMPLE -> "output" { + // + forward_action + // - forward_action_undo + // } + // + // If `forward_action` fails, `forward_action_undo` is never executed. + // Forward actions are therefore required to be atomic, in that they either + // fully apply or don't apply at all. + // + // Sagas with nodes that ensure multiple regions exist cannot be atomic + // because they can partially fail (for example: what if only 2 out of 3 + // ensures succeed?). In order for the compensating action to be run, it + // must exist as a separate node that has a no-op forward action: + // + // EXAMPLE_UNDO -> "not_used" { + // + noop + // - forward_action_undo + // } + // EXAMPLE -> "output" { + // + forward_action + // } + // + // This saga will only ever ensure that a single region exists, so you might + // think you could get away with a single node that combines the forward and + // compensating action - you'd be mistaken! The Crucible agent's region + // ensure is not atomic in all cases: if the region fails to create, it + // enters the `failed` state, but is not deleted. Nexus must clean these up. + NEW_REGION_ENSURE_UNDO -> "not_used" { + + rsrss_noop + - rsrss_new_region_ensure_undo + } NEW_REGION_ENSURE -> "ensured_dataset_and_region" { + rsrss_new_region_ensure - - rsrss_new_region_ensure_undo + } + NEW_REGION_VOLUME_CREATE -> "new_region_volume" { + + rsrss_new_region_volume_create + - rsrss_new_region_volume_create_undo } GET_OLD_SNAPSHOT_VOLUME_ID -> "old_snapshot_volume_id" { + rsrss_get_old_snapshot_volume_id @@ -149,11 +191,20 @@ impl NexusSaga for SagaRegionSnapshotReplacementStart { ACTION_GENERATE_ID.as_ref(), )); + builder.append(Node::action( + "new_region_volume_id", + "GenerateNewRegionVolumeId", + ACTION_GENERATE_ID.as_ref(), + )); + builder.append(set_saga_id_action()); + builder.append(get_clone_source_action()); builder.append(get_alloc_region_params_action()); builder.append(alloc_new_region_action()); builder.append(find_new_region_action()); + builder.append(new_region_ensure_undo_action()); builder.append(new_region_ensure_action()); + builder.append(new_region_volume_create_action()); builder.append(get_old_snapshot_volume_id_action()); builder.append(create_fake_volume_action()); builder.append(replace_snapshot_in_volume_action()); @@ -219,6 +270,169 @@ async fn rsrss_set_saga_id_undo( Ok(()) } +#[derive(Debug, Deserialize, Serialize, PartialEq, Eq)] +enum CloneSource { + RegionSnapshot { dataset_id: DatasetUuid, region_id: Uuid }, + Region { region_id: Uuid }, +} + +async fn rsrss_get_clone_source( + sagactx: NexusActionContext, +) -> Result { + let params = sagactx.saga_params::()?; + let osagactx = sagactx.user_data(); + let log = osagactx.log(); + + // Find either a region snapshot or a read-only region that is associated + // with the request snapshot that has not been expunged, and return that as + // the source to be used to populate the read-only region that will replace + // the request's region snapshot. + // + // Importantly, determine the clone source before new region alloc step in + // this saga, otherwise the query that searches for read-only region + // candidates will match the newly allocated region (that is not created + // yet!). + // + // Choose a clone source based on the following policy: + // + // - choose a region snapshot associated with the one being replaced + // + // - choose a read-only region from the associated snapshot volume + // + // - choose the region snapshot being replaced (only if it is not expunged! + // if the downstairs being cloned from is on an expunged dataset, we have + // to assume that the clone will never succeed, even if the expunged + // thing is still there) + // + // The policy here prefers to choose a clone source that isn't the region + // snapshot in the request: if it's flaky, it shouldn't be used as a clone + // source! This function does not know _why_ the replacement request was + // created for that region snapshot, and assumes that there may be a problem + // with it and will choose it as a last resort (if no other candidate clone + // source is found and the request's region snapshot is not on an expunged + // dataset, then it has to be chosen as a clone source, as the alternative + // is lost data). The request's region snapshot may also be completely fine, + // for example if a scrub is being requested. + // + // Also, the policy also prefers to choose to clone from a region snapshot + // instead of a read-only region: this is an arbitrary order, there is no + // reason behind this. The region snapshots and read-only regions will have + // identical contents. + + // First, try to select another region snapshot that's part of this + // snapshot. + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let mut non_expunged_region_snapshots = osagactx + .datastore() + .find_non_expunged_region_snapshots( + &opctx, + params.request.old_snapshot_id, + ) + .await + .map_err(ActionError::action_failed)?; + + // Filter out the request's region snapshot - if there are no other + // candidates, this could be chosen later in this function. + + non_expunged_region_snapshots.retain(|rs| { + !(rs.dataset_id == params.request.old_dataset_id + && rs.region_id == params.request.old_region_id + && rs.snapshot_id == params.request.old_snapshot_id) + }); + + if let Some(candidate) = non_expunged_region_snapshots.pop() { + info!( + log, + "found another non-expunged region snapshot"; + "snapshot_id" => %params.request.old_snapshot_id, + "dataset_id" => %candidate.dataset_id, + "region_id" => %candidate.region_id, + ); + + return Ok(CloneSource::RegionSnapshot { + dataset_id: candidate.dataset_id.into(), + region_id: candidate.region_id, + }); + } + + // Next, try to select a read-only region that's associated with the + // snapshot volume + + info!( + log, + "no region snapshot clone source candidates"; + "snapshot_id" => %params.request.old_snapshot_id, + ); + + // Look up the existing snapshot + let maybe_db_snapshot = osagactx + .datastore() + .snapshot_get(&opctx, params.request.old_snapshot_id) + .await + .map_err(ActionError::action_failed)?; + + let Some(db_snapshot) = maybe_db_snapshot else { + return Err(ActionError::action_failed(Error::internal_error( + &format!( + "snapshot {} was hard deleted!", + params.request.old_snapshot_id + ), + ))); + }; + + let mut non_expunged_read_only_regions = osagactx + .datastore() + .find_non_expunged_regions(&opctx, db_snapshot.volume_id) + .await + .map_err(ActionError::action_failed)?; + + if let Some(candidate) = non_expunged_read_only_regions.pop() { + info!( + log, + "found region clone source candidate"; + "snapshot_id" => %params.request.old_snapshot_id, + "dataset_id" => %candidate.dataset_id(), + "region_id" => %candidate.id(), + ); + + return Ok(CloneSource::Region { region_id: candidate.id() }); + } + + // If no other non-expunged region snapshot or read-only region exists, then + // check if the request's region snapshot is non-expunged. This will use the + // region snapshot that is being replaced as a clone source, which may not + // work if there's a problem with that region snapshot that this replacement + // request is meant to fix! + + let request_dataset_on_in_service_physical_disk = osagactx + .datastore() + .dataset_physical_disk_in_service(params.request.old_dataset_id.into()) + .await + .map_err(ActionError::action_failed)?; + + if request_dataset_on_in_service_physical_disk { + // If the request region snapshot's dataset has not been expunged, it + // can be used + return Ok(CloneSource::RegionSnapshot { + dataset_id: params.request.old_dataset_id.into(), + region_id: params.request.old_region_id, + }); + } + + // If all targets of a Volume::Region are on expunged datasets, then the + // user's data is gone, and this code will fail to select a clone source. + + return Err(ActionError::action_failed(format!( + "no clone source candidate for {}!", + params.request.old_snapshot_id, + ))); +} + #[derive(Debug, Deserialize, Serialize)] struct AllocRegionParams { block_size: u64, @@ -380,6 +594,10 @@ async fn rsrss_find_new_region( Ok(dataset_and_region) } +async fn rsrss_noop(_sagactx: NexusActionContext) -> Result<(), ActionError> { + Ok(()) +} + async fn rsrss_new_region_ensure( sagactx: NexusActionContext, ) -> Result< @@ -390,55 +608,72 @@ async fn rsrss_new_region_ensure( let osagactx = sagactx.user_data(); let log = osagactx.log(); - // With a list of datasets and regions to ensure, other sagas need to have a - // separate no-op forward step for the undo action to ensure that the undo - // step occurs in the case that the ensure partially fails. Here this is not - // required, there's only one dataset and region. let new_dataset_and_region = sagactx .lookup::<(db::model::Dataset, db::model::Region)>( "new_dataset_and_region", )?; - let region_snapshot = osagactx - .datastore() - .region_snapshot_get( - params.request.old_dataset_id.into(), - params.request.old_region_id, - params.request.old_snapshot_id, - ) - .await - .map_err(ActionError::action_failed)?; + let clone_source = sagactx.lookup::("clone_source")?; + + let mut source_repair_addr: SocketAddrV6 = match clone_source { + CloneSource::RegionSnapshot { dataset_id, region_id } => { + let region_snapshot = osagactx + .datastore() + .region_snapshot_get( + dataset_id, + region_id, + params.request.old_snapshot_id, + ) + .await + .map_err(ActionError::action_failed)?; - let Some(region_snapshot) = region_snapshot else { - return Err(ActionError::action_failed(format!( - "region snapshot {} {} {} deleted!", - params.request.old_dataset_id, - params.request.old_region_id, - params.request.old_snapshot_id, - ))); - }; + let Some(region_snapshot) = region_snapshot else { + return Err(ActionError::action_failed(format!( + "region snapshot {} {} {} deleted!", + dataset_id, region_id, params.request.old_snapshot_id, + ))); + }; - let (new_dataset, new_region) = new_dataset_and_region; + match region_snapshot.snapshot_addr.parse() { + Ok(addr) => addr, - // Currently, the repair port is set using a fixed offset above the - // downstairs port. Once this goes away, Nexus will require a way to query - // for the repair port! + Err(e) => { + return Err(ActionError::action_failed(format!( + "error parsing region_snapshot.snapshot_addr: {e}" + ))); + } + } + } - let mut source_repair_addr: SocketAddrV6 = - match region_snapshot.snapshot_addr.parse() { - Ok(addr) => addr, + CloneSource::Region { region_id } => { + let maybe_addr = osagactx + .datastore() + .region_addr(region_id) + .await + .map_err(ActionError::action_failed)?; - Err(e) => { - return Err(ActionError::action_failed(format!( - "error parsing region_snapshot.snapshot_addr: {e}" - ))); + match maybe_addr { + Some(addr) => addr, + + None => { + return Err(ActionError::action_failed(format!( + "region clone source {region_id} has no port!" + ))); + } } - }; + } + }; + + // Currently, the repair port is set using a fixed offset above the + // downstairs port. Once this goes away, Nexus will require a way to query + // for the repair port! source_repair_addr.set_port( source_repair_addr.port() + crucible_common::REPAIR_PORT_OFFSET, ); + let (new_dataset, new_region) = new_dataset_and_region; + let ensured_region = osagactx .nexus() .ensure_region_in_dataset( @@ -474,6 +709,94 @@ async fn rsrss_new_region_ensure_undo( Ok(()) } +async fn rsrss_new_region_volume_create( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + + let new_region_volume_id = + sagactx.lookup::("new_region_volume_id")?; + + let (new_dataset, ensured_region) = sagactx.lookup::<( + db::model::Dataset, + crucible_agent_client::types::Region, + )>( + "ensured_dataset_and_region", + )?; + + let Some(new_dataset_address) = new_dataset.address() else { + return Err(ActionError::action_failed(format!( + "dataset {} does not have an address!", + new_dataset.id(), + ))); + }; + + let new_region_address = SocketAddrV6::new( + *new_dataset_address.ip(), + ensured_region.port_number, + 0, + 0, + ); + + // Create a volume to inflate the reference count of the newly created + // read-only region. If this is not done it's possible that a user could + // delete the snapshot volume _after_ the new read-only region was swapped + // in, removing the last reference to it and causing garbage collection. + + let volume_construction_request = VolumeConstructionRequest::Volume { + id: new_region_volume_id, + block_size: 0, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 0, + blocks_per_extent: 0, + extent_count: 0, + gen: 0, + opts: CrucibleOpts { + id: new_region_volume_id, + target: vec![new_region_address.to_string()], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }], + read_only_parent: None, + }; + + let volume_data = serde_json::to_string(&volume_construction_request) + .map_err(|e| { + ActionError::action_failed(Error::internal_error(&e.to_string())) + })?; + + let volume = db::model::Volume::new(new_region_volume_id, volume_data); + + osagactx + .datastore() + .volume_create(volume) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +async fn rsrss_new_region_volume_create_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + + // Delete the volume. + + let new_region_volume_id = + sagactx.lookup::("new_region_volume_id")?; + osagactx.datastore().volume_hard_delete(new_region_volume_id).await?; + + Ok(()) +} + async fn rsrss_get_old_snapshot_volume_id( sagactx: NexusActionContext, ) -> Result { @@ -660,7 +983,7 @@ async fn get_replace_params( async fn rsrss_replace_snapshot_in_volume( sagactx: NexusActionContext, -) -> Result<(), ActionError> { +) -> Result { let log = sagactx.user_data().log(); let osagactx = sagactx.user_data(); @@ -694,10 +1017,11 @@ async fn rsrss_replace_snapshot_in_volume( // if the transaction occurred on the non-deleted volume so proceed // with the rest of the saga. - Ok(()) + Ok(volume_replace_snapshot_result) } - VolumeReplaceResult::ExistingVolumeDeleted => { + VolumeReplaceResult::ExistingVolumeSoftDeleted + | VolumeReplaceResult::ExistingVolumeHardDeleted => { // If the snapshot volume was deleted, we still want to proceed with // replacing the rest of the uses of the region snapshot. Note this // also covers the case where this saga node runs (performing the @@ -706,7 +1030,7 @@ async fn rsrss_replace_snapshot_in_volume( // deleted. If this saga unwound here, that would violate the // property of idempotency. - Ok(()) + Ok(volume_replace_snapshot_result) } } } @@ -780,6 +1104,9 @@ async fn rsrss_update_request_record( let old_region_volume_id = sagactx.lookup::("new_volume_id")?; + let new_region_volume_id = + sagactx.lookup::("new_region_volume_id")?; + // Now that the region has been ensured and the construction request has // been updated, update the replacement request record to 'ReplacementDone' // and clear the operating saga id. There is no undo step for this, it @@ -790,6 +1117,7 @@ async fn rsrss_update_request_record( params.request.id, saga_id, new_region_id, + new_region_volume_id, old_region_volume_id, ) .await @@ -806,6 +1134,7 @@ pub(crate) mod test { app::sagas::region_snapshot_replacement_start::*, app::sagas::test_helpers::test_opctx, app::RegionAllocationStrategy, }; + use nexus_db_model::PhysicalDiskPolicy; use nexus_db_model::RegionSnapshotReplacement; use nexus_db_model::RegionSnapshotReplacementState; use nexus_db_model::Volume; @@ -815,9 +1144,11 @@ pub(crate) mod test { use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::create_snapshot; use nexus_test_utils::resource_helpers::DiskTest; + use nexus_test_utils::resource_helpers::DiskTestBuilder; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::views; use nexus_types::identity::Asset; + use omicron_uuid_kinds::GenericUuid; use sled_agent_client::types::VolumeConstructionRequest; type ControlPlaneTestContext = @@ -830,7 +1161,7 @@ pub(crate) mod test { /// Create four zpools, a disk, and a snapshot of that disk async fn prepare_for_test( cptestctx: &ControlPlaneTestContext, - ) -> PrepareResult { + ) -> PrepareResult<'_> { let client = &cptestctx.external_client; let nexus = &cptestctx.server.server_context().nexus; let datastore = nexus.datastore(); @@ -876,20 +1207,21 @@ pub(crate) mod test { panic!("test snapshot {:?} should exist", snapshot_id) }); - PrepareResult { db_disk, snapshot, db_snapshot } + PrepareResult { db_disk, snapshot, db_snapshot, disk_test } } - struct PrepareResult { + struct PrepareResult<'a> { db_disk: nexus_db_model::Disk, snapshot: views::Snapshot, db_snapshot: nexus_db_model::Snapshot, + disk_test: DiskTest<'a, crate::Server>, } #[nexus_test(server = crate::Server)] async fn test_region_snapshot_replacement_start_saga( cptestctx: &ControlPlaneTestContext, ) { - let PrepareResult { db_disk, snapshot, db_snapshot } = + let PrepareResult { db_disk, snapshot, db_snapshot, .. } = prepare_for_test(cptestctx).await; let nexus = &cptestctx.server.server_context().nexus; @@ -990,8 +1322,10 @@ pub(crate) mod test { .await .unwrap(); - assert_eq!(volumes.len(), 1); - assert_eq!(volumes[0].id(), db_snapshot.volume_id); + assert!(volumes + .iter() + .map(|v| v.id()) + .any(|vid| vid == db_snapshot.volume_id)); } fn new_test_params( @@ -1009,9 +1343,11 @@ pub(crate) mod test { pub(crate) async fn verify_clean_slate( cptestctx: &ControlPlaneTestContext, + test: &DiskTest<'_, crate::Server>, request: &RegionSnapshotReplacement, affected_volume_original: &Volume, ) { + let sled_agent = &cptestctx.sled_agent.sled_agent; let datastore = cptestctx.server.server_context().nexus.datastore(); crate::app::sagas::test_helpers::assert_no_failed_undo_steps( @@ -1024,6 +1360,10 @@ pub(crate) mod test { // original disk, and three for the (currently unused) snapshot // destination volume assert_eq!(region_allocations(&datastore).await, 6); + + // Assert that only those six provisioned regions are non-destroyed + assert_no_other_ensured_regions(sled_agent, test, &datastore).await; + assert_region_snapshot_replacement_request_untouched( cptestctx, &datastore, &request, ) @@ -1031,11 +1371,12 @@ pub(crate) mod test { assert_volume_untouched(&datastore, &affected_volume_original).await; } - async fn region_allocations(datastore: &DataStore) -> usize { + async fn regions(datastore: &DataStore) -> Vec { use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use async_bb8_diesel::AsyncSimpleConnection; use diesel::QueryDsl; + use diesel::SelectableHelper; use nexus_db_queries::db::queries::ALLOW_FULL_TABLE_SCAN_SQL; use nexus_db_queries::db::schema::region::dsl; @@ -1047,15 +1388,56 @@ pub(crate) mod test { conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await.unwrap(); dsl::region - .count() - .get_result_async(&conn) + .select(db::model::Region::as_select()) + .get_results_async(&conn) .await - .map(|x: i64| x as usize) }) .await .unwrap() } + async fn region_allocations(datastore: &DataStore) -> usize { + regions(datastore).await.len() + } + + async fn assert_no_other_ensured_regions( + sled_agent: &omicron_sled_agent::sim::SledAgent, + test: &DiskTest<'_, crate::Server>, + datastore: &DataStore, + ) { + let mut non_destroyed_regions_from_agent = vec![]; + + for zpool in test.zpools() { + for dataset in &zpool.datasets { + let crucible_dataset = + sled_agent.get_crucible_dataset(zpool.id, dataset.id).await; + for region in crucible_dataset.list().await { + match region.state { + crucible_agent_client::types::State::Tombstoned + | crucible_agent_client::types::State::Destroyed => { + // ok + } + + _ => { + non_destroyed_regions_from_agent + .push(region.clone()); + } + } + } + } + } + + let db_regions = regions(datastore).await; + let db_region_ids: Vec = + db_regions.iter().map(|x| x.id()).collect(); + + for region in non_destroyed_regions_from_agent { + let region_id = region.id.0.parse().unwrap(); + let contains = db_region_ids.contains(®ion_id); + assert!(contains, "db does not have {:?}", region_id); + } + } + async fn assert_region_snapshot_replacement_request_untouched( cptestctx: &ControlPlaneTestContext, datastore: &DataStore, @@ -1094,11 +1476,78 @@ pub(crate) mod test { assert_eq!(actual, expected); } + #[nexus_test(server = crate::Server)] + async fn test_action_failure_can_unwind( + cptestctx: &ControlPlaneTestContext, + ) { + let PrepareResult { db_disk, snapshot, db_snapshot, disk_test } = + prepare_for_test(cptestctx).await; + + let log = &cptestctx.logctx.log; + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = test_opctx(cptestctx); + + let disk_allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + assert_eq!(disk_allocated_regions.len(), 3); + + let region: &nexus_db_model::Region = &disk_allocated_regions[0].1; + let snapshot_id = snapshot.identity.id; + + let region_snapshot = datastore + .region_snapshot_get(region.dataset_id(), region.id(), snapshot_id) + .await + .unwrap() + .unwrap(); + + let request = + RegionSnapshotReplacement::for_region_snapshot(®ion_snapshot); + + datastore + .insert_region_snapshot_replacement_request(&opctx, request.clone()) + .await + .unwrap(); + + let affected_volume_original = + datastore.volume_get(db_snapshot.volume_id).await.unwrap().unwrap(); + + verify_clean_slate( + &cptestctx, + &disk_test, + &request, + &affected_volume_original, + ) + .await; + + crate::app::sagas::test_helpers::action_failure_can_unwind::< + SagaRegionSnapshotReplacementStart, + _, + _, + >( + nexus, + || Box::pin(async { new_test_params(&opctx, &request) }), + || { + Box::pin(async { + verify_clean_slate( + &cptestctx, + &disk_test, + &request, + &affected_volume_original, + ) + .await; + }) + }, + log, + ) + .await; + } + #[nexus_test(server = crate::Server)] async fn test_action_failure_can_unwind_idempotently( cptestctx: &ControlPlaneTestContext, ) { - let PrepareResult { db_disk, snapshot, db_snapshot } = + let PrepareResult { db_disk, snapshot, db_snapshot, disk_test } = prepare_for_test(cptestctx).await; let log = &cptestctx.logctx.log; @@ -1130,8 +1579,13 @@ pub(crate) mod test { let affected_volume_original = datastore.volume_get(db_snapshot.volume_id).await.unwrap().unwrap(); - verify_clean_slate(&cptestctx, &request, &affected_volume_original) - .await; + verify_clean_slate( + &cptestctx, + &disk_test, + &request, + &affected_volume_original, + ) + .await; crate::app::sagas::test_helpers::action_failure_can_unwind_idempotently::< SagaRegionSnapshotReplacementStart, @@ -1143,6 +1597,7 @@ pub(crate) mod test { || Box::pin(async { verify_clean_slate( &cptestctx, + &disk_test, &request, &affected_volume_original, ).await; @@ -1155,7 +1610,7 @@ pub(crate) mod test { async fn test_actions_succeed_idempotently( cptestctx: &ControlPlaneTestContext, ) { - let PrepareResult { db_disk, snapshot, db_snapshot: _ } = + let PrepareResult { db_disk, snapshot, .. } = prepare_for_test(cptestctx).await; let nexus = &cptestctx.server.server_context().nexus; @@ -1192,4 +1647,384 @@ pub(crate) mod test { ) .await; } + + /// Assert this saga does not leak regions if the replacement read-only + /// region cannot be created. + #[nexus_test(server = crate::Server)] + async fn test_no_leak_region(cptestctx: &ControlPlaneTestContext) { + let PrepareResult { db_disk, snapshot, db_snapshot, disk_test } = + prepare_for_test(cptestctx).await; + + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = test_opctx(cptestctx); + + let disk_allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + assert_eq!(disk_allocated_regions.len(), 3); + + let region: &nexus_db_model::Region = &disk_allocated_regions[0].1; + let snapshot_id = snapshot.identity.id; + + let region_snapshot = datastore + .region_snapshot_get(region.dataset_id(), region.id(), snapshot_id) + .await + .unwrap() + .unwrap(); + + let request = + RegionSnapshotReplacement::for_region_snapshot(®ion_snapshot); + + datastore + .insert_region_snapshot_replacement_request(&opctx, request.clone()) + .await + .unwrap(); + + let affected_volume_original = + datastore.volume_get(db_snapshot.volume_id).await.unwrap().unwrap(); + + disk_test.set_always_fail_callback().await; + + // Run the region snapshot replacement start saga + let dag = + create_saga_dag::(Params { + serialized_authn: Serialized::for_opctx(&opctx), + request: request.clone(), + allocation_strategy: RegionAllocationStrategy::Random { + seed: None, + }, + }) + .unwrap(); + + let runnable_saga = nexus.sagas.saga_prepare(dag).await.unwrap(); + + // Actually run the saga + runnable_saga.run_to_completion().await.unwrap(); + + verify_clean_slate( + &cptestctx, + &disk_test, + &request, + &affected_volume_original, + ) + .await; + } + + /// Tests that the region snapshot replacement start saga will not choose + /// the request's region snapshot, but instead will choose the other + /// non-expunged one. + #[nexus_test(server = crate::Server)] + async fn test_region_snapshot_replacement_start_prefer_not_self( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.new(o!()), + datastore.clone(), + ); + + // Create four zpools, each with one dataset. This is required for + // region and region snapshot replacement to have somewhere to move the + // data, and for this test we're doing one expungements. + let sled_id = cptestctx.first_sled(); + + let disk_test = DiskTestBuilder::new(&cptestctx) + .on_specific_sled(sled_id) + .with_zpool_count(4) + .build() + .await; + + // Any volumes sent to the Pantry for reconciliation should return + // active for this test + + cptestctx + .sled_agent + .pantry_server + .as_ref() + .unwrap() + .pantry + .set_auto_activate_volumes() + .await; + + // Create a disk and a snapshot + let client = &cptestctx.external_client; + let _project_id = + create_project(&client, PROJECT_NAME).await.identity.id; + + let disk = create_disk(&client, PROJECT_NAME, "disk").await; + let snapshot = + create_snapshot(&client, PROJECT_NAME, "disk", "snap").await; + + // Before expunging any physical disk, save some DB models + let (.., db_disk) = LookupPath::new(&opctx, &datastore) + .disk_id(disk.identity.id) + .fetch() + .await + .unwrap(); + + let (.., db_snapshot) = LookupPath::new(&opctx, &datastore) + .snapshot_id(snapshot.identity.id) + .fetch() + .await + .unwrap(); + + let disk_allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + let snapshot_allocated_regions = datastore + .get_allocated_regions(db_snapshot.volume_id) + .await + .unwrap(); + + assert_eq!(disk_allocated_regions.len(), 3); + assert_eq!(snapshot_allocated_regions.len(), 0); + + // Expunge one physical disk + { + let (dataset, _) = &disk_allocated_regions[0]; + + let zpool = disk_test + .zpools() + .find(|x| *x.id.as_untyped_uuid() == dataset.pool_id) + .expect("Expected at least one zpool"); + + let (_, db_zpool) = LookupPath::new(&opctx, datastore) + .zpool_id(zpool.id.into_untyped_uuid()) + .fetch() + .await + .unwrap(); + + datastore + .physical_disk_update_policy( + &opctx, + db_zpool.physical_disk_id.into(), + PhysicalDiskPolicy::Expunged, + ) + .await + .unwrap(); + } + + // Request that the second region snapshot be replaced + + let region_snapshot = datastore + .region_snapshot_get( + disk_allocated_regions[1].0.id(), // dataset id + disk_allocated_regions[1].1.id(), // region id + snapshot.identity.id, + ) + .await + .unwrap() + .unwrap(); + + let request_id = datastore + .create_region_snapshot_replacement_request( + &opctx, + ®ion_snapshot, + ) + .await + .unwrap(); + + // Manually invoke the region snapshot replacement start saga + + let saga_outputs = nexus + .sagas + .saga_execute::(Params { + serialized_authn: Serialized::for_opctx(&opctx), + + request: datastore + .get_region_snapshot_replacement_request_by_id( + &opctx, request_id, + ) + .await + .unwrap(), + + allocation_strategy: RegionAllocationStrategy::Random { + seed: None, + }, + }) + .await + .unwrap(); + + // The third region snapshot should have been selected as the clone + // source + + let selected_clone_source = saga_outputs + .lookup_node_output::("clone_source") + .unwrap(); + + assert_eq!( + selected_clone_source, + CloneSource::RegionSnapshot { + dataset_id: disk_allocated_regions[2].0.id(), + region_id: disk_allocated_regions[2].1.id(), + }, + ); + + let snapshot_allocated_regions = datastore + .get_allocated_regions(db_snapshot.volume_id) + .await + .unwrap(); + + assert_eq!(snapshot_allocated_regions.len(), 1); + assert!(snapshot_allocated_regions.iter().all(|(_, r)| r.read_only())); + } + + /// Tests that a region snapshot replacement request can select the region + /// snapshot being replaced as a clone source (but only if it is not + /// expunged!) + #[nexus_test(server = crate::Server)] + async fn test_region_snapshot_replacement_start_hail_mary( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.new(o!()), + datastore.clone(), + ); + + // Create five zpools, each with one dataset. This is required for + // region and region snapshot replacement to have somewhere to move the + // data, and for this test we're doing two expungements. + let sled_id = cptestctx.first_sled(); + + let disk_test = DiskTestBuilder::new(&cptestctx) + .on_specific_sled(sled_id) + .with_zpool_count(5) + .build() + .await; + + // Any volumes sent to the Pantry for reconciliation should return + // active for this test + + cptestctx + .sled_agent + .pantry_server + .as_ref() + .unwrap() + .pantry + .set_auto_activate_volumes() + .await; + + // Create a disk and a snapshot + let client = &cptestctx.external_client; + let _project_id = + create_project(&client, PROJECT_NAME).await.identity.id; + + let disk = create_disk(&client, PROJECT_NAME, "disk").await; + let snapshot = + create_snapshot(&client, PROJECT_NAME, "disk", "snap").await; + + // Before expunging any physical disk, save some DB models + let (.., db_disk) = LookupPath::new(&opctx, &datastore) + .disk_id(disk.identity.id) + .fetch() + .await + .unwrap(); + + let (.., db_snapshot) = LookupPath::new(&opctx, &datastore) + .snapshot_id(snapshot.identity.id) + .fetch() + .await + .unwrap(); + + let disk_allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + let snapshot_allocated_regions = datastore + .get_allocated_regions(db_snapshot.volume_id) + .await + .unwrap(); + + assert_eq!(disk_allocated_regions.len(), 3); + assert_eq!(snapshot_allocated_regions.len(), 0); + + // Expunge two physical disks + for i in [0, 1] { + let (dataset, _) = &disk_allocated_regions[i]; + + let zpool = disk_test + .zpools() + .find(|x| *x.id.as_untyped_uuid() == dataset.pool_id) + .expect("Expected at least one zpool"); + + let (_, db_zpool) = LookupPath::new(&opctx, datastore) + .zpool_id(zpool.id.into_untyped_uuid()) + .fetch() + .await + .unwrap(); + + datastore + .physical_disk_update_policy( + &opctx, + db_zpool.physical_disk_id.into(), + PhysicalDiskPolicy::Expunged, + ) + .await + .unwrap(); + } + + // Request that the third region snapshot be replaced + + let region_snapshot = datastore + .region_snapshot_get( + disk_allocated_regions[2].0.id(), // dataset id + disk_allocated_regions[2].1.id(), // region id + snapshot.identity.id, + ) + .await + .unwrap() + .unwrap(); + + let request_id = datastore + .create_region_snapshot_replacement_request( + &opctx, + ®ion_snapshot, + ) + .await + .unwrap(); + + // Manually invoke the region snapshot replacement start saga + + let saga_outputs = nexus + .sagas + .saga_execute::(Params { + serialized_authn: Serialized::for_opctx(&opctx), + + request: datastore + .get_region_snapshot_replacement_request_by_id( + &opctx, request_id, + ) + .await + .unwrap(), + + allocation_strategy: RegionAllocationStrategy::Random { + seed: None, + }, + }) + .await + .unwrap(); + + // This should have chosen the request's region snapshot as a clone + // source, and replaced it with a read-only region + + let selected_clone_source = saga_outputs + .lookup_node_output::("clone_source") + .unwrap(); + + assert_eq!( + selected_clone_source, + CloneSource::RegionSnapshot { + dataset_id: disk_allocated_regions[2].0.id(), + region_id: disk_allocated_regions[2].1.id(), + }, + ); + + let snapshot_allocated_regions = datastore + .get_allocated_regions(db_snapshot.volume_id) + .await + .unwrap(); + + assert_eq!(snapshot_allocated_regions.len(), 1); + assert!(snapshot_allocated_regions.iter().all(|(_, r)| r.read_only())); + } } diff --git a/nexus/src/app/sagas/region_snapshot_replacement_step.rs b/nexus/src/app/sagas/region_snapshot_replacement_step.rs index 7b1d598861..a236fcf62c 100644 --- a/nexus/src/app/sagas/region_snapshot_replacement_step.rs +++ b/nexus/src/app/sagas/region_snapshot_replacement_step.rs @@ -375,7 +375,8 @@ async fn rsrss_replace_snapshot_in_volume( // with the saga. } - VolumeReplaceResult::ExistingVolumeDeleted => { + VolumeReplaceResult::ExistingVolumeSoftDeleted + | VolumeReplaceResult::ExistingVolumeHardDeleted => { // Proceed with the saga but skip the notification step. } } @@ -423,6 +424,20 @@ async fn rsrss_notify_upstairs( let params = sagactx.saga_params::()?; let log = sagactx.user_data().log(); + // If the associated volume was deleted, then skip this notification step as + // there is no Upstairs to talk to. Continue with the saga to transition the + // step request to Complete, and then perform the associated clean up. + + let volume_replace_snapshot_result = sagactx + .lookup::("volume_replace_snapshot_result")?; + if matches!( + volume_replace_snapshot_result, + VolumeReplaceResult::ExistingVolumeSoftDeleted + | VolumeReplaceResult::ExistingVolumeHardDeleted + ) { + return Ok(()); + } + // Make an effort to notify a Propolis if one was booted for this volume. // This is best effort: if there is a failure, this saga will unwind and be // triggered again for the same request. If there is no Propolis booted for diff --git a/nexus/src/app/sagas/region_snapshot_replacement_step_garbage_collect.rs b/nexus/src/app/sagas/region_snapshot_replacement_step_garbage_collect.rs index b83f917a70..15c6a39651 100644 --- a/nexus/src/app/sagas/region_snapshot_replacement_step_garbage_collect.rs +++ b/nexus/src/app/sagas/region_snapshot_replacement_step_garbage_collect.rs @@ -187,8 +187,24 @@ pub(crate) mod test { .await .unwrap(); + let step_volume_id = Uuid::new_v4(); + + datastore + .volume_create(nexus_db_model::Volume::new( + step_volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), + block_size: 512, + sub_volumes: vec![], // nothing needed here + read_only_parent: None, + }) + .unwrap(), + )) + .await + .unwrap(); + let mut request = - RegionSnapshotReplacementStep::new(Uuid::new_v4(), Uuid::new_v4()); + RegionSnapshotReplacementStep::new(Uuid::new_v4(), step_volume_id); request.replacement_state = RegionSnapshotReplacementStepState::Complete; request.old_snapshot_volume_id = Some(old_snapshot_volume_id); diff --git a/nexus/src/app/sagas/volume_delete.rs b/nexus/src/app/sagas/volume_delete.rs index a9be1f34ee..a8ded4e33c 100644 --- a/nexus/src/app/sagas/volume_delete.rs +++ b/nexus/src/app/sagas/volume_delete.rs @@ -27,12 +27,9 @@ use super::ActionRegistry; use super::NexusActionContext; use super::NexusSaga; use crate::app::sagas::declare_saga_actions; -use nexus_db_model::Dataset; -use nexus_db_model::Region; -use nexus_db_model::Volume; use nexus_db_queries::authn; use nexus_db_queries::db::datastore::CrucibleResources; -use nexus_types::identity::Asset; +use nexus_db_queries::db::datastore::FreedCrucibleResources; use serde::Deserialize; use serde::Serialize; use steno::ActionError; @@ -330,8 +327,6 @@ async fn svd_delete_crucible_snapshot_records( Ok(()) } -type FreedCrucibleRegions = Vec<(Dataset, Region, Option)>; - /// Deleting region snapshots in a previous saga node may have freed up regions /// that were deleted in the DB but couldn't be deleted by the Crucible Agent /// because a snapshot existed. Look for those here. These will be a different @@ -417,7 +412,7 @@ type FreedCrucibleRegions = Vec<(Dataset, Region, Option)>; /// another snapshot delete. async fn svd_find_freed_crucible_regions( sagactx: NexusActionContext, -) -> Result { +) -> Result { let osagactx = sagactx.user_data(); // Find regions freed up for deletion by a previous saga node deleting the @@ -432,11 +427,7 @@ async fn svd_find_freed_crucible_regions( }, )?; - // Don't serialize the whole Volume, as the data field contains key material! - Ok(freed_datasets_regions_and_volumes - .into_iter() - .map(|x| (x.0, x.1, x.2.map(|v: Volume| v.id()))) - .collect()) + Ok(freed_datasets_regions_and_volumes) } async fn svd_delete_freed_crucible_regions( @@ -448,9 +439,11 @@ async fn svd_delete_freed_crucible_regions( // Find regions freed up for deletion by a previous saga node deleting the // region snapshots. let freed_datasets_regions_and_volumes = - sagactx.lookup::("freed_crucible_regions")?; + sagactx.lookup::("freed_crucible_regions")?; - for (dataset, region, volume_id) in freed_datasets_regions_and_volumes { + for (dataset, region) in + &freed_datasets_regions_and_volumes.datasets_and_regions + { // Send DELETE calls to the corresponding Crucible agents osagactx .nexus() @@ -477,18 +470,17 @@ async fn svd_delete_freed_crucible_regions( e, )) })?; + } - // Remove volume DB record - if let Some(volume_id) = volume_id { - osagactx.datastore().volume_hard_delete(volume_id).await.map_err( - |e| { - ActionError::action_failed(format!( - "failed to volume_hard_delete {}: {:?}", - volume_id, e, - )) - }, - )?; - } + for volume_id in &freed_datasets_regions_and_volumes.volumes { + osagactx.datastore().volume_hard_delete(*volume_id).await.map_err( + |e| { + ActionError::action_failed(format!( + "failed to volume_hard_delete {}: {:?}", + volume_id, e, + )) + }, + )?; } Ok(()) diff --git a/nexus/test-utils/src/background.rs b/nexus/test-utils/src/background.rs index 32a2f24d9d..7c8857123c 100644 --- a/nexus/test-utils/src/background.rs +++ b/nexus/test-utils/src/background.rs @@ -305,7 +305,7 @@ pub async fn run_region_snapshot_replacement_finish( assert!(status.errors.is_empty()); - status.records_set_to_done.len() + status.finish_invoked_ok.len() } /// Run all replacement related background tasks until they aren't doing diff --git a/nexus/tests/integration_tests/crucible_replacements.rs b/nexus/tests/integration_tests/crucible_replacements.rs index 5f7e92c393..57dc624187 100644 --- a/nexus/tests/integration_tests/crucible_replacements.rs +++ b/nexus/tests/integration_tests/crucible_replacements.rs @@ -5,9 +5,12 @@ //! Tests related to region and region snapshot replacement use dropshot::test_util::ClientTestContext; +use nexus_client::types::LastResult; use nexus_db_model::PhysicalDiskPolicy; use nexus_db_model::RegionReplacementState; +use nexus_db_model::RegionSnapshotReplacementState; use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::datastore::region_snapshot_replacement::*; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::DataStore; use nexus_test_utils::background::*; @@ -15,11 +18,22 @@ use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::resource_helpers::create_default_ip_pool; use nexus_test_utils::resource_helpers::create_disk; +use nexus_test_utils::resource_helpers::create_disk_from_snapshot; use nexus_test_utils::resource_helpers::create_project; +use nexus_test_utils::resource_helpers::create_snapshot; +use nexus_test_utils::resource_helpers::object_create; use nexus_test_utils_macros::nexus_test; +use nexus_types::external_api::params; +use nexus_types::external_api::views; +use nexus_types::identity::Asset; +use nexus_types::internal_api::background::*; +use omicron_common::api::external; +use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use omicron_uuid_kinds::GenericUuid; use slog::Logger; +use std::collections::HashSet; +use std::net::SocketAddr; use std::sync::Arc; use uuid::Uuid; @@ -40,12 +54,37 @@ fn get_disk_url(disk_name: &str) -> String { format!("/v1/disks/{disk_name}?project={}", PROJECT_NAME) } +fn get_disks_url() -> String { + format!("/v1/disks?project={}", PROJECT_NAME) +} + +fn get_snapshot_url(snapshot_name: &str) -> String { + format!("/v1/snapshots/{snapshot_name}?project={}", PROJECT_NAME) +} + +fn get_snapshots_url() -> String { + format!("/v1/snapshots?project={}", PROJECT_NAME) +} + async fn create_project_and_pool(client: &ClientTestContext) -> Uuid { create_default_ip_pool(client).await; let project = create_project(client, PROJECT_NAME).await; project.identity.id } +async fn collection_list( + client: &ClientTestContext, + list_url: &str, +) -> Vec +where + T: Clone + serde::de::DeserializeOwned, +{ + NexusRequest::iter_collection_authn(client, list_url, "", None) + .await + .expect("failed to list") + .all_items +} + /// Assert that the first part of region replacement does not create a freed /// crucible region (that would be picked up by a volume delete saga) #[nexus_test] @@ -118,280 +157,355 @@ async fn test_region_replacement_does_not_create_freed_region( assert!(datastore.find_deleted_volume_regions().await.unwrap().is_empty()); } -struct RegionReplacementDeletedVolumeTest<'a> { - log: Logger, - datastore: Arc, - disk_test: DiskTest<'a>, - client: ClientTestContext, - internal_client: ClientTestContext, - replacement_request_id: Uuid, -} - -#[derive(Debug)] -struct ExpectedEndState(pub RegionReplacementState); - -#[derive(Debug)] -struct ExpectedIntermediateState(pub RegionReplacementState); - -impl<'a> RegionReplacementDeletedVolumeTest<'a> { - pub async fn new(cptestctx: &'a ControlPlaneTestContext) -> Self { - let nexus = &cptestctx.server.server_context().nexus; - - // Create four zpools, each with one dataset. This is required for - // region and region snapshot replacement to have somewhere to move the - // data. - let disk_test = DiskTestBuilder::new(&cptestctx) - .on_specific_sled(cptestctx.first_sled()) - .with_zpool_count(4) - .build() - .await; - - let client = &cptestctx.external_client; - let internal_client = &cptestctx.internal_client; - let datastore = nexus.datastore().clone(); +mod region_replacement { + use super::*; - let opctx = OpContext::for_tests( - cptestctx.logctx.log.new(o!()), - datastore.clone(), - ); + #[derive(Debug)] + struct ExpectedEndState(pub RegionReplacementState); - // Create a disk - let _project_id = create_project_and_pool(client).await; + #[derive(Debug)] + struct ExpectedIntermediateState(pub RegionReplacementState); - let disk = create_disk(&client, PROJECT_NAME, "disk").await; + #[derive(Debug)] + struct ExpectedStartState(pub RegionReplacementState); - // Manually create the region replacement request for the first - // allocated region of that disk - - let (.., db_disk) = LookupPath::new(&opctx, &datastore) - .disk_id(disk.identity.id) - .fetch() - .await - .unwrap(); - - assert_eq!(db_disk.id(), disk.identity.id); - - let disk_allocated_regions = - datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); - let (_, region) = &disk_allocated_regions[0]; - - let replacement_request_id = datastore - .create_region_replacement_request_for_region(&opctx, ®ion) - .await - .unwrap(); - - // Assert the request is in state Requested + pub(super) struct DeletedVolumeTest<'a> { + log: Logger, + datastore: Arc, + disk_test: DiskTest<'a>, + client: ClientTestContext, + internal_client: ClientTestContext, + replacement_request_id: Uuid, + } - let region_replacement = datastore - .get_region_replacement_request_by_id( - &opctx, + impl<'a> DeletedVolumeTest<'a> { + pub async fn new(cptestctx: &'a ControlPlaneTestContext) -> Self { + let nexus = &cptestctx.server.server_context().nexus; + + // Create four zpools, each with one dataset. This is required for + // region and region snapshot replacement to have somewhere to move + // the data. + let disk_test = DiskTestBuilder::new(&cptestctx) + .on_specific_sled(cptestctx.first_sled()) + .with_zpool_count(4) + .build() + .await; + + let client = &cptestctx.external_client; + let internal_client = &cptestctx.internal_client; + let datastore = nexus.datastore().clone(); + + let opctx = OpContext::for_tests( + cptestctx.logctx.log.new(o!()), + datastore.clone(), + ); + + // Create a disk + let _project_id = create_project_and_pool(client).await; + + let disk = create_disk(&client, PROJECT_NAME, "disk").await; + + // Manually create the region replacement request for the first + // allocated region of that disk + + let (.., db_disk) = LookupPath::new(&opctx, &datastore) + .disk_id(disk.identity.id) + .fetch() + .await + .unwrap(); + + assert_eq!(db_disk.id(), disk.identity.id); + + let disk_allocated_regions = datastore + .get_allocated_regions(db_disk.volume_id) + .await + .unwrap(); + let (_, region) = &disk_allocated_regions[0]; + + let replacement_request_id = datastore + .create_region_replacement_request_for_region(&opctx, ®ion) + .await + .unwrap(); + + // Assert the request is in state Requested + + let region_replacement = datastore + .get_region_replacement_request_by_id( + &opctx, + replacement_request_id, + ) + .await + .unwrap(); + + assert_eq!( + region_replacement.replacement_state, + RegionReplacementState::Requested, + ); + + DeletedVolumeTest { + log: cptestctx.logctx.log.new(o!()), + datastore, + disk_test, + client: client.clone(), + internal_client: internal_client.clone(), replacement_request_id, - ) - .await - .unwrap(); - - assert_eq!( - region_replacement.replacement_state, - RegionReplacementState::Requested, - ); - - RegionReplacementDeletedVolumeTest { - log: cptestctx.logctx.log.new(o!()), - datastore, - disk_test, - client: client.clone(), - internal_client: internal_client.clone(), - replacement_request_id, + } } - } - pub fn opctx(&self) -> OpContext { - OpContext::for_tests(self.log.clone(), self.datastore.clone()) - } + pub fn opctx(&self) -> OpContext { + OpContext::for_tests(self.log.clone(), self.datastore.clone()) + } - pub async fn delete_the_disk(&self) { - let disk_url = get_disk_url("disk"); - NexusRequest::object_delete(&self.client, &disk_url) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .expect("failed to delete disk"); - } + pub async fn delete_the_disk(&self) { + let disk_url = get_disk_url("disk"); + NexusRequest::object_delete(&self.client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + } - /// Make sure: - /// - /// - all region replacement related background tasks run to completion - /// - this harness' region replacement request has transitioned to Complete - /// - no Crucible resources are leaked - pub async fn finish_test(&self) { - // Make sure that all the background tasks can run to completion. + /// Make sure: + /// + /// - all region replacement related background tasks run to completion + /// - this harness' region replacement request has transitioned to + /// Complete + /// - no Crucible resources are leaked + pub async fn finish_test(&self) { + // Make sure that all the background tasks can run to completion. - run_replacement_tasks_to_completion(&self.internal_client).await; + run_replacement_tasks_to_completion(&self.internal_client).await; - // Assert the request is in state Complete + // Assert the request is in state Complete - let region_replacement = self - .datastore - .get_region_replacement_request_by_id( - &self.opctx(), - self.replacement_request_id, - ) - .await - .unwrap(); + let region_replacement = self + .datastore + .get_region_replacement_request_by_id( + &self.opctx(), + self.replacement_request_id, + ) + .await + .unwrap(); - assert_eq!( - region_replacement.replacement_state, - RegionReplacementState::Complete, - ); + assert_eq!( + region_replacement.replacement_state, + RegionReplacementState::Complete, + ); - // Assert there are no more Crucible resources + // Assert there are no more Crucible resources - assert!(self.disk_test.crucible_resources_deleted().await); - } + assert!(self.disk_test.crucible_resources_deleted().await); + } - async fn wait_for_request_state( - &self, - expected_end_state: ExpectedEndState, - expected_intermediate_state: ExpectedIntermediateState, - ) { - wait_for_condition( - || { - let datastore = self.datastore.clone(); - let opctx = self.opctx(); - let replacement_request_id = self.replacement_request_id; - - async move { - let region_replacement = datastore - .get_region_replacement_request_by_id( - &opctx, - replacement_request_id, - ) - .await - .unwrap(); - - let state = region_replacement.replacement_state; - - if state == expected_end_state.0 { - // The saga transitioned the request ok - Ok(()) - } else if state == expected_intermediate_state.0 { - // The saga is still running - Err(CondCheckError::<()>::NotYet) - } else { - // Any other state is not expected - panic!("unexpected state {state:?}!"); + async fn wait_for_request_state( + &self, + expected_end_state: ExpectedEndState, + expected_intermediate_state: ExpectedIntermediateState, + expected_start_state: ExpectedStartState, + ) { + wait_for_condition( + || { + let datastore = self.datastore.clone(); + let opctx = self.opctx(); + let replacement_request_id = self.replacement_request_id; + + async move { + let region_replacement = datastore + .get_region_replacement_request_by_id( + &opctx, + replacement_request_id, + ) + .await + .unwrap(); + + let state = region_replacement.replacement_state; + + // If the expected start and end state are the same + // (i.e. there's a back edge in the associated request's + // state machine), then it's impossible to determine + // when the saga starts and stops based on the state. + if expected_end_state.0 == expected_start_state.0 { + if state == expected_end_state.0 { + // The saga transitioned the request ok, or + // hasn't started yet. Either way we have to + // return here, and the call site should perform + // an additional check for some associated + // expected result. + Ok(()) + } else if state == expected_intermediate_state.0 { + // The saga is still running + Err(CondCheckError::<()>::NotYet) + } else { + // Any other state is not expected + panic!("unexpected state {state:?}!"); + } + } else { + if state == expected_end_state.0 { + // The saga transitioned the request ok + Ok(()) + } else if state == expected_intermediate_state.0 + || state == expected_start_state.0 + { + // The saga is still running, or hasn't started + // yet. + Err(CondCheckError::<()>::NotYet) + } else { + // Any other state is not expected + panic!("unexpected state {state:?}!"); + } + } } - } - }, - &std::time::Duration::from_millis(500), - &std::time::Duration::from_secs(60), - ) - .await - .expect("request transitioned to expected state"); - - // Assert the request state - - let region_replacement = self - .datastore - .get_region_replacement_request_by_id( - &self.opctx(), - self.replacement_request_id, + }, + &std::time::Duration::from_millis(500), + &std::time::Duration::from_secs(60), ) .await - .unwrap(); - - assert_eq!(region_replacement.replacement_state, expected_end_state.0); - } - - /// Run the "region replacement" task to transition the request to Running. - pub async fn transition_request_to_running(&self) { - // Activate the "region replacement" background task - - run_region_replacement(&self.internal_client).await; + .expect("request transitioned to expected state"); + + // Assert the request state + + let region_replacement = self + .datastore + .get_region_replacement_request_by_id( + &self.opctx(), + self.replacement_request_id, + ) + .await + .unwrap(); + + assert_eq!( + region_replacement.replacement_state, + expected_end_state.0 + ); + } - // The activation above could only have started the associated saga, so - // wait until the request is in state Running. + /// Run the "region replacement" task to transition the request to + /// Running. + pub async fn transition_request_to_running(&self) { + // Activate the "region replacement" background task - self.wait_for_request_state( - ExpectedEndState(RegionReplacementState::Running), - ExpectedIntermediateState(RegionReplacementState::Allocating), - ) - .await; - } + run_region_replacement(&self.internal_client).await; - /// Call the region replacement drive task to attach the associated volume - /// to the simulated pantry, ostensibly for reconciliation - pub async fn attach_request_volume_to_pantry(&self) { - // Run the "region replacement driver" task to attach the associated - // volume to the simulated pantry. + // The activation above could only have started the associated saga, + // so wait until the request is in state Running. - run_region_replacement_driver(&self.internal_client).await; + self.wait_for_request_state( + ExpectedEndState(RegionReplacementState::Running), + ExpectedIntermediateState(RegionReplacementState::Allocating), + ExpectedStartState(RegionReplacementState::Requested), + ) + .await; + } - // The activation above could only have started the associated saga, so - // wait until the request is in the expected end state. + /// Call the region replacement drive task to attach the associated volume + /// to the simulated pantry, ostensibly for reconciliation + pub async fn attach_request_volume_to_pantry(&self) { + // Run the "region replacement driver" task to attach the associated + // volume to the simulated pantry. - self.wait_for_request_state( - ExpectedEndState(RegionReplacementState::Running), - ExpectedIntermediateState(RegionReplacementState::Driving), - ) - .await; + run_region_replacement_driver(&self.internal_client).await; - // Additionally, assert that the drive saga recorded that it sent the - // attachment request to the simulated pantry + // The activation above could only have started the associated saga, + // so wait until the request is in the expected end state. - let most_recent_step = self - .datastore - .current_region_replacement_request_step( - &self.opctx(), - self.replacement_request_id, + self.wait_for_request_state( + ExpectedEndState(RegionReplacementState::Running), + ExpectedIntermediateState(RegionReplacementState::Driving), + ExpectedStartState(RegionReplacementState::Running), ) - .await - .unwrap() - .unwrap(); - - assert!(most_recent_step.pantry_address().is_some()); - } + .await; - /// Manually activate the background attachment for the request volume - pub async fn manually_activate_attached_volume( - &self, - cptestctx: &'a ControlPlaneTestContext, - ) { - let pantry = - cptestctx.sled_agent.pantry_server.as_ref().unwrap().pantry.clone(); - - let region_replacement = self - .datastore - .get_region_replacement_request_by_id( - &self.opctx(), - self.replacement_request_id, + // Additionally, assert that the drive saga recorded that it sent + // the attachment request to the simulated pantry. + // + // If `wait_for_request_state` has the same expected start and end + // state (as it does above), it's possible to exit that function + // having not yet started the saga yet, and this requires an + // additional `wait_for_condition` to wait for the expected recorded + // step. + + let most_recent_step = wait_for_condition( + || { + let datastore = self.datastore.clone(); + let opctx = self.opctx(); + let replacement_request_id = self.replacement_request_id; + + async move { + match datastore + .current_region_replacement_request_step( + &opctx, + replacement_request_id, + ) + .await + .unwrap() + { + Some(step) => Ok(step), + + None => { + // The saga either has not started yet or is + // still running - see the comment before this + // check for more info. + Err(CondCheckError::<()>::NotYet) + } + } + } + }, + &std::time::Duration::from_millis(500), + &std::time::Duration::from_secs(10), ) .await - .unwrap(); + .expect("most recent step"); - pantry - .activate_background_attachment( - region_replacement.volume_id.to_string(), - ) - .await - .unwrap(); - } + assert!(most_recent_step.pantry_address().is_some()); + } - /// Transition request to ReplacementDone via the region replacement drive - /// saga - pub async fn transition_request_to_replacement_done(&self) { - // Run the "region replacement driver" task + /// Manually activate the background attachment for the request volume + pub async fn manually_activate_attached_volume( + &self, + cptestctx: &'a ControlPlaneTestContext, + ) { + let pantry = cptestctx + .sled_agent + .pantry_server + .as_ref() + .unwrap() + .pantry + .clone(); + + let region_replacement = self + .datastore + .get_region_replacement_request_by_id( + &self.opctx(), + self.replacement_request_id, + ) + .await + .unwrap(); + + pantry + .activate_background_attachment( + region_replacement.volume_id.to_string(), + ) + .await + .unwrap(); + } - run_region_replacement_driver(&self.internal_client).await; + /// Transition request to ReplacementDone via the region replacement + /// drive saga + pub async fn transition_request_to_replacement_done(&self) { + // Run the "region replacement driver" task - // The activation above could only have started the associated saga, so - // wait until the request is in the expected end state. + run_region_replacement_driver(&self.internal_client).await; - self.wait_for_request_state( - ExpectedEndState(RegionReplacementState::ReplacementDone), - ExpectedIntermediateState(RegionReplacementState::Driving), - ) - .await; + // The activation above could only have started the associated saga, + // so wait until the request is in the expected end state. + + self.wait_for_request_state( + ExpectedEndState(RegionReplacementState::ReplacementDone), + ExpectedIntermediateState(RegionReplacementState::Driving), + ExpectedStartState(RegionReplacementState::Running), + ) + .await; + } } } @@ -401,7 +515,8 @@ impl<'a> RegionReplacementDeletedVolumeTest<'a> { async fn test_delete_volume_region_replacement_state_requested( cptestctx: &ControlPlaneTestContext, ) { - let test_harness = RegionReplacementDeletedVolumeTest::new(cptestctx).await; + let test_harness = + region_replacement::DeletedVolumeTest::new(cptestctx).await; // The request leaves the `new` function in state Requested: delete the // disk, then finish the test. @@ -417,7 +532,8 @@ async fn test_delete_volume_region_replacement_state_requested( async fn test_delete_volume_region_replacement_state_running( cptestctx: &ControlPlaneTestContext, ) { - let test_harness = RegionReplacementDeletedVolumeTest::new(cptestctx).await; + let test_harness = + region_replacement::DeletedVolumeTest::new(cptestctx).await; // The request leaves the `new` function in state Requested: // - transition the request to "Running" @@ -437,7 +553,8 @@ async fn test_delete_volume_region_replacement_state_running( async fn test_delete_volume_region_replacement_state_running_on_pantry( cptestctx: &ControlPlaneTestContext, ) { - let test_harness = RegionReplacementDeletedVolumeTest::new(cptestctx).await; + let test_harness = + region_replacement::DeletedVolumeTest::new(cptestctx).await; // The request leaves the `new` function in state Requested: // - transition the request to "Running" @@ -459,7 +576,8 @@ async fn test_delete_volume_region_replacement_state_running_on_pantry( async fn test_delete_volume_region_replacement_state_replacement_done( cptestctx: &ControlPlaneTestContext, ) { - let test_harness = RegionReplacementDeletedVolumeTest::new(cptestctx).await; + let test_harness = + region_replacement::DeletedVolumeTest::new(cptestctx).await; // The request leaves the `new` function in state Requested: // - transition the request to "Running" @@ -481,3 +599,1478 @@ async fn test_delete_volume_region_replacement_state_replacement_done( test_harness.finish_test().await; } + +/// Assert that the problem experienced in issue 6353 is fixed +#[nexus_test] +async fn test_racing_replacements_for_soft_deleted_disk_volume( + cptestctx: &ControlPlaneTestContext, +) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + // Create four zpools, each with one dataset. This is required for region + // and region snapshot replacement to have somewhere to move the data. + let sled_id = cptestctx.first_sled(); + let mut disk_test = DiskTestBuilder::new(&cptestctx) + .on_specific_sled(sled_id) + .with_zpool_count(4) + .build() + .await; + + // Create a disk, then a snapshot of that disk + let client = &cptestctx.external_client; + let _project_id = create_project_and_pool(client).await; + + let disk = create_disk(&client, PROJECT_NAME, "disk").await; + + let snapshots_url = format!("/v1/snapshots?project={}", PROJECT_NAME); + + let snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "snapshot".parse().unwrap(), + description: String::from("a snapshot"), + }, + disk: disk.identity.name.into(), + }, + ) + .await; + + // Before deleting the disk, save the DB model + let (.., db_disk) = LookupPath::new(&opctx, &datastore) + .disk_id(disk.identity.id) + .fetch() + .await + .unwrap(); + + assert_eq!(db_disk.id(), disk.identity.id); + + // Next, expunge a physical disk that contains a region snapshot (which + // means it'll have the region too) + + let disk_allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + let (dataset, region) = &disk_allocated_regions[0]; + let zpool = disk_test + .zpools() + .find(|x| *x.id.as_untyped_uuid() == dataset.pool_id) + .expect("Expected at least one zpool"); + + let (_, db_zpool) = LookupPath::new(&opctx, datastore) + .zpool_id(zpool.id.into_untyped_uuid()) + .fetch() + .await + .unwrap(); + + datastore + .physical_disk_update_policy( + &opctx, + db_zpool.physical_disk_id.into(), + PhysicalDiskPolicy::Expunged, + ) + .await + .unwrap(); + + // Only one region snapshot should be been returned by the following call + // due to the allocation policy. + + let expunged_region_snapshots = datastore + .find_region_snapshots_on_expunged_physical_disks(&opctx) + .await + .unwrap(); + + assert_eq!(expunged_region_snapshots.len(), 1); + + for expunged_region_snapshot in expunged_region_snapshots { + assert_eq!(expunged_region_snapshot.snapshot_id, snapshot.identity.id); + } + + // Either one or two read/write regions will be returned: + // + // - one for the disk, and + // - one for the snapshot destination volume, depending on if it was + // allocated on to the physical disk that was expunged. + + let expunged_regions = datastore + .find_read_write_regions_on_expunged_physical_disks(&opctx) + .await + .unwrap(); + + match expunged_regions.len() { + 1 => { + assert_eq!(expunged_regions[0].id(), region.id()); + } + + 2 => { + assert!(expunged_regions.iter().any(|r| r.id() == region.id())); + + let (.., db_snapshot) = LookupPath::new(&opctx, datastore) + .snapshot_id(snapshot.identity.id) + .fetch() + .await + .unwrap(); + + let snapshot_allocated_datasets_and_regions = datastore + .get_allocated_regions(db_snapshot.destination_volume_id) + .await + .unwrap(); + + let snapshot_allocated_regions: Vec = + snapshot_allocated_datasets_and_regions + .into_iter() + .map(|(_, r)| r.id()) + .collect(); + + assert!(expunged_regions.iter().any(|region| { + snapshot_allocated_regions.contains(®ion.id()) + })); + } + + _ => { + panic!("unexpected number of expunged regions!"); + } + } + + // Now, race the region replacement with the region snapshot replacement: + // + // 1) region replacement will allocate a new region and swap it into the + // disk volume. + + let internal_client = &cptestctx.internal_client; + + let _ = + activate_background_task(&internal_client, "region_replacement").await; + + // After that task invocation, there should be one running region + // replacement for the disk's region. Filter out the replacement request for + // the snapshot destination volume if it's there. The above background task + // only starts the associated saga, so wait for it to complete. + + wait_for_condition( + || { + let opctx = OpContext::for_tests( + cptestctx.logctx.log.new(o!()), + datastore.clone(), + ); + + async move { + let region_replacements: Vec<_> = datastore + .get_running_region_replacements(&opctx) + .await + .unwrap() + .into_iter() + .filter(|x| x.old_region_id == region.id()) + .collect(); + + if region_replacements.len() == 1 { + // The saga transitioned the request ok + Ok(()) + } else { + // The saga is still running + Err(CondCheckError::<()>::NotYet) + } + } + }, + &std::time::Duration::from_millis(500), + &std::time::Duration::from_secs(20), + ) + .await + .expect("request transitioned to expected state"); + + let region_replacements: Vec<_> = datastore + .get_running_region_replacements(&opctx) + .await + .unwrap() + .into_iter() + .filter(|x| x.old_region_id == region.id()) + .collect(); + + assert_eq!(region_replacements.len(), 1); + + // 2) region snapshot replacement start will replace the region snapshot in + // the snapshot volume + + let _ = activate_background_task( + &internal_client, + "region_snapshot_replacement_start", + ) + .await; + + // After that, there should be one "replacement done" region snapshot + // replacement for the associated region snapshot. The above background task + // only starts the associated saga, so wait for it to complete. + wait_for_condition( + || { + let opctx = OpContext::for_tests( + cptestctx.logctx.log.new(o!()), + datastore.clone(), + ); + + async move { + let region_snapshot_replacements = datastore + .get_replacement_done_region_snapshot_replacements(&opctx) + .await + .unwrap(); + + if region_snapshot_replacements.len() == 1 { + // The saga transitioned the request ok + Ok(()) + } else { + // The saga is still running + Err(CondCheckError::<()>::NotYet) + } + } + }, + &std::time::Duration::from_millis(500), + &std::time::Duration::from_secs(20), + ) + .await + .expect("request transitioned to expected state"); + + let region_snapshot_replacements = datastore + .get_replacement_done_region_snapshot_replacements(&opctx) + .await + .unwrap(); + + assert_eq!(region_snapshot_replacements.len(), 1); + assert_eq!( + region_snapshot_replacements[0].old_dataset_id, + dataset.id().into() + ); + assert_eq!(region_snapshot_replacements[0].old_region_id, region.id()); + assert_eq!( + region_snapshot_replacements[0].old_snapshot_id, + snapshot.identity.id + ); + assert_eq!( + region_snapshot_replacements[0].replacement_state, + RegionSnapshotReplacementState::ReplacementDone, + ); + + assert!(datastore.find_deleted_volume_regions().await.unwrap().is_empty()); + + // 3) Delete the disk + let disk_url = get_disk_url("disk"); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + // The volume should be soft-deleted now. The region snapshot replacement + // swapped out the region snapshot from the snapshot volume to the temporary + // volume for later deletion, but has not actually deleted that temporary + // volume yet, so the count will not have gone to 0. + + let volume = datastore.volume_get(db_disk.volume_id).await.unwrap(); + assert!(volume.is_some()); + assert!(volume.unwrap().time_deleted.is_some()); + + // 4) region snapshot replacement garbage collect will delete the temporary + // volume with the stashed reference to the region snapshot, bringing the + // reference count to zero. + + let _ = activate_background_task( + &internal_client, + "region_snapshot_replacement_garbage_collection", + ) + .await; + + // Assert the region snapshot was deleted. + wait_for_condition( + || { + let dataset_id = dataset.id(); + let region_id = region.id(); + let snapshot_id = snapshot.identity.id; + + async move { + let region_snapshot = datastore + .region_snapshot_get(dataset_id, region_id, snapshot_id) + .await + .unwrap(); + + match region_snapshot { + Some(_) => { + // Region snapshot not garbage collected yet + Err(CondCheckError::<()>::NotYet) + } + + None => { + // Region snapshot garbage collected ok + Ok(()) + } + } + } + }, + &std::time::Duration::from_millis(500), + &std::time::Duration::from_secs(60), + ) + .await + .expect("region snapshot garbage collected"); + + // Assert that the disk's volume is still only soft-deleted, because the two + // other associated region snapshots still exist. + let volume = datastore.volume_get(db_disk.volume_id).await.unwrap(); + assert!(volume.is_some()); + + // Check on the old region id - it should not be deleted + let maybe_region = + datastore.get_region_optional(region.id()).await.unwrap(); + + eprintln!("old_region_id: {:?}", &maybe_region); + assert!(maybe_region.is_some()); + + // But the new region id will be! + let maybe_region = datastore + .get_region_optional(region_replacements[0].new_region_id.unwrap()) + .await + .unwrap(); + + eprintln!("new region id: {:?}", &maybe_region); + assert!(maybe_region.is_none()); + + // The region_replacement drive task should invoke the drive saga now, which + // will skip over all notification steps and transition the request to + // ReplacementDone + + let last_background_task = + activate_background_task(&internal_client, "region_replacement_driver") + .await; + + assert!(match last_background_task.last { + LastResult::Completed(last_result_completed) => { + match serde_json::from_value::( + last_result_completed.details, + ) { + Err(e) => { + eprintln!("{e}"); + false + } + + Ok(v) => !v.drive_invoked_ok.is_empty(), + } + } + + _ => { + false + } + }); + + // wait for the drive saga to complete here + wait_for_condition( + || { + let opctx = OpContext::for_tests( + cptestctx.logctx.log.new(o!()), + datastore.clone(), + ); + let replacement_request_id = region_replacements[0].id; + + async move { + let region_replacement = datastore + .get_region_replacement_request_by_id( + &opctx, + replacement_request_id, + ) + .await + .unwrap(); + + let state = region_replacement.replacement_state; + + if state == RegionReplacementState::ReplacementDone { + // The saga transitioned the request ok + Ok(()) + } else if state == RegionReplacementState::Driving { + // The drive saga is still running + Err(CondCheckError::<()>::NotYet) + } else if state == RegionReplacementState::Running { + // The drive saga hasn't started yet + Err(CondCheckError::<()>::NotYet) + } else if state == RegionReplacementState::Completing { + // The saga transitioned the request ok, and it's now being + // finished by the region replacement finish saga + Ok(()) + } else if state == RegionReplacementState::Complete { + // The saga transitioned the request ok, and it was finished + // by the region replacement finish saga + Ok(()) + } else { + // Any other state is not expected + panic!("unexpected state {state:?}!"); + } + } + }, + &std::time::Duration::from_millis(500), + &std::time::Duration::from_secs(60), + ) + .await + .expect("request transitioned to expected state"); + + // After the region snapshot replacement process runs to completion, there + // should be no more crucible resources left. Run the "region snapshot + // replacement step" background task until there's nothing left, then the + // "region snapshot replacement finish", then make sure there are no + // crucible resources left. + + let mut count = 0; + loop { + let actions_taken = + run_region_snapshot_replacement_step(&internal_client).await; + + if actions_taken == 0 { + break; + } + + count += 1; + + if count > 20 { + assert!(false); + } + } + + let _ = activate_background_task( + &internal_client, + "region_snapshot_replacement_finish", + ) + .await; + + // Ensure the region snapshot replacement request went to Complete + + wait_for_condition( + || { + let opctx = OpContext::for_tests( + cptestctx.logctx.log.new(o!()), + datastore.clone(), + ); + let request_id = region_snapshot_replacements[0].id; + + async move { + let region_snapshot_replacement = datastore + .get_region_snapshot_replacement_request_by_id( + &opctx, request_id, + ) + .await + .unwrap(); + + let state = region_snapshot_replacement.replacement_state; + + if state == RegionSnapshotReplacementState::Complete { + Ok(()) + } else { + // Any other state is not expected + Err(CondCheckError::<()>::NotYet) + } + } + }, + &std::time::Duration::from_millis(500), + &std::time::Duration::from_secs(60), + ) + .await + .expect("request transitioned to expected state"); + + // Delete the snapshot + + let snapshot_url = get_snapshot_url("snapshot"); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + + // and now there should be no higher level resources left + + let disks_url = get_disks_url(); + assert_eq!( + collection_list::(&client, &disks_url).await.len(), + 0 + ); + + let snapshots_url = get_snapshots_url(); + assert_eq!( + collection_list::(&client, &snapshots_url).await.len(), + 0 + ); + + // Make sure that all the background tasks can run to completion. + + run_replacement_tasks_to_completion(&internal_client).await; + + // The disk volume should be deleted by the snapshot delete: wait until this + // happens + + wait_for_condition( + || { + let datastore = datastore.clone(); + let volume_id = db_disk.volume_id; + + async move { + let volume = datastore.volume_get(volume_id).await.unwrap(); + if volume.is_none() { + Ok(()) + } else { + Err(CondCheckError::<()>::NotYet) + } + } + }, + &std::time::Duration::from_millis(500), + &std::time::Duration::from_secs(10), + ) + .await + .expect("disk volume deleted"); + + // There should be no more crucible resources left. Don't just check for + // `crucible_resources_deleted` here! We have set one of the physical disk + // policies to expunged, so Nexus will not attempt to clean up any resources + // on that physical disk. + + disk_test.remove_zpool(db_zpool.id()).await; + + // Now, assert that all crucible resources are cleaned up + + assert!(disk_test.crucible_resources_deleted().await); +} + +mod region_snapshot_replacement { + use super::*; + + #[derive(Debug)] + struct ExpectedEndState(pub RegionSnapshotReplacementState); + + #[derive(Debug)] + struct ExpectedIntermediateState(pub RegionSnapshotReplacementState); + + #[derive(Debug)] + struct ExpectedStartState(pub RegionSnapshotReplacementState); + + pub(super) struct DeletedVolumeTest<'a> { + log: Logger, + datastore: Arc, + disk_test: DiskTest<'a>, + client: ClientTestContext, + internal_client: ClientTestContext, + replacement_request_id: Uuid, + snapshot_socket_addr: SocketAddr, + } + + impl<'a> DeletedVolumeTest<'a> { + pub async fn new(cptestctx: &'a ControlPlaneTestContext) -> Self { + let nexus = &cptestctx.server.server_context().nexus; + + // Create four zpools, each with one dataset. This is required for + // region and region snapshot replacement to have somewhere to move + // the data. + let disk_test = DiskTestBuilder::new(&cptestctx) + .on_specific_sled(cptestctx.first_sled()) + .with_zpool_count(4) + .build() + .await; + + let client = &cptestctx.external_client; + let internal_client = &cptestctx.internal_client; + let datastore = nexus.datastore().clone(); + + let opctx = OpContext::for_tests( + cptestctx.logctx.log.new(o!()), + datastore.clone(), + ); + + // Create a disk, a snapshot of that disk, and a disk from that + // snapshot + let _project_id = create_project_and_pool(client).await; + + let disk = create_disk(&client, PROJECT_NAME, "disk").await; + + let snapshot = + create_snapshot(&client, PROJECT_NAME, "disk", "snapshot") + .await; + + let disk_from_snapshot = create_disk_from_snapshot( + &client, + PROJECT_NAME, + "disk-from-snapshot", + snapshot.identity.id, + ) + .await; + + // Manually create the region snapshot replacement request for the + // first allocated region of that disk + + let (.., db_disk) = LookupPath::new(&opctx, &datastore) + .disk_id(disk.identity.id) + .fetch() + .await + .unwrap(); + + assert_eq!(db_disk.id(), disk.identity.id); + + let disk_allocated_regions = datastore + .get_allocated_regions(db_disk.volume_id) + .await + .unwrap(); + let (_, region) = &disk_allocated_regions[0]; + + let region_snapshot = datastore + .region_snapshot_get( + region.dataset_id(), + region.id(), + snapshot.identity.id, + ) + .await + .expect("found region snapshot without error") + .unwrap(); + + let replacement_request_id = datastore + .create_region_snapshot_replacement_request( + &opctx, + ®ion_snapshot, + ) + .await + .unwrap(); + + // Assert the request is in state Requested + + let region_snapshot_replacement = datastore + .get_region_snapshot_replacement_request_by_id( + &opctx, + replacement_request_id, + ) + .await + .unwrap(); + + assert_eq!( + region_snapshot_replacement.replacement_state, + RegionSnapshotReplacementState::Requested, + ); + + // Assert two volumes reference the snapshot addr + + let snapshot_socket_addr = + region_snapshot.snapshot_addr.parse().unwrap(); + + let volumes = datastore + .find_volumes_referencing_socket_addr( + &opctx, + snapshot_socket_addr, + ) + .await + .unwrap(); + + assert_eq!(volumes.len(), 2); + + // Validate that they are snapshot and disk from snapshot + + let volumes_set: HashSet = + volumes.into_iter().map(|v| v.id()).collect(); + + let (.., db_snapshot) = LookupPath::new(&opctx, &datastore) + .snapshot_id(snapshot.identity.id) + .fetch() + .await + .unwrap(); + + let (.., db_disk_from_snapshot) = + LookupPath::new(&opctx, &datastore) + .disk_id(disk_from_snapshot.identity.id) + .fetch() + .await + .unwrap(); + + assert!(volumes_set.contains(&db_snapshot.volume_id)); + assert!(volumes_set.contains(&db_disk_from_snapshot.volume_id)); + + DeletedVolumeTest { + log: cptestctx.logctx.log.new(o!()), + datastore, + disk_test, + client: client.clone(), + internal_client: internal_client.clone(), + replacement_request_id, + snapshot_socket_addr, + } + } + + pub fn opctx(&self) -> OpContext { + OpContext::for_tests(self.log.clone(), self.datastore.clone()) + } + + pub async fn delete_the_disk(&self) { + let disk_url = get_disk_url("disk"); + NexusRequest::object_delete(&self.client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + } + + pub async fn delete_the_snapshot(&self) { + let snapshot_url = get_snapshot_url("snapshot"); + NexusRequest::object_delete(&self.client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + } + + pub async fn delete_the_disk_from_snapshot(&self) { + let disk_url = get_disk_url("disk-from-snapshot"); + NexusRequest::object_delete(&self.client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk-from-snapshot"); + } + + /// Make sure: + /// + /// - all region snapshot replacement related background tasks run to + /// completion + /// - this harness' region snapshot replacement request has transitioned + /// to Complete + /// - there are no more volumes that reference the request's region + /// snapshot + pub async fn finish_test(&self) { + // Make sure that all the background tasks can run to completion. + + run_replacement_tasks_to_completion(&self.internal_client).await; + + // Assert the request is in state Complete + + wait_for_condition( + || { + let datastore = self.datastore.clone(); + let opctx = self.opctx(); + let replacement_request_id = self.replacement_request_id; + + async move { + let region_replacement = datastore + .get_region_snapshot_replacement_request_by_id( + &opctx, + replacement_request_id, + ) + .await + .unwrap(); + + let state = region_replacement.replacement_state; + + if state == RegionSnapshotReplacementState::Complete { + // The saga transitioned the request ok + Ok(()) + } else { + // The saga is still running + Err(CondCheckError::<()>::NotYet) + } + } + }, + &std::time::Duration::from_millis(500), + &std::time::Duration::from_secs(10), + ) + .await + .expect("request transitioned to expected state"); + + let region_snapshot_replacement = self + .datastore + .get_region_snapshot_replacement_request_by_id( + &self.opctx(), + self.replacement_request_id, + ) + .await + .unwrap(); + + assert_eq!( + region_snapshot_replacement.replacement_state, + RegionSnapshotReplacementState::Complete, + ); + + // Assert no volumes are referencing the snapshot address + + let volumes = self + .datastore + .find_volumes_referencing_socket_addr( + &self.opctx(), + self.snapshot_socket_addr, + ) + .await + .unwrap(); + + if !volumes.is_empty() { + eprintln!("{:?}", volumes); + } + + assert!(volumes.is_empty()); + } + + /// Assert no Crucible resources are leaked + pub async fn assert_no_crucible_resources_leaked(&self) { + assert!(self.disk_test.crucible_resources_deleted().await); + } + + async fn wait_for_request_state( + &self, + expected_end_state: ExpectedEndState, + expected_intermediate_state: ExpectedIntermediateState, + expected_start_state: ExpectedStartState, + ) { + wait_for_condition( + || { + let datastore = self.datastore.clone(); + let opctx = self.opctx(); + let replacement_request_id = self.replacement_request_id; + + async move { + let request = datastore + .get_region_snapshot_replacement_request_by_id( + &opctx, + replacement_request_id, + ) + .await + .unwrap(); + + let state = request.replacement_state; + + if state == expected_end_state.0 { + // The saga transitioned the request ok + Ok(()) + } else if state == expected_intermediate_state.0 { + // The saga is still running + Err(CondCheckError::<()>::NotYet) + } else if state == expected_start_state.0 { + // The saga hasn't started yet + Err(CondCheckError::<()>::NotYet) + } else { + // Any other state is not expected + panic!("unexpected state {state:?}!"); + } + } + }, + &std::time::Duration::from_millis(500), + &std::time::Duration::from_secs(60), + ) + .await + .expect("request transitioned to expected state"); + + // Assert the request state + + let region_snapshot_replacement = self + .datastore + .get_region_snapshot_replacement_request_by_id( + &self.opctx(), + self.replacement_request_id, + ) + .await + .unwrap(); + + assert_eq!( + region_snapshot_replacement.replacement_state, + expected_end_state.0, + ); + } + + /// Run the "region snapshot replacement" task to transition the request + /// to ReplacementDone. + pub async fn transition_request_to_replacement_done(&self) { + // Activate the "region snapshot replacement start" background task + + run_region_snapshot_replacement_start(&self.internal_client).await; + + // The activation above could only have started the associated saga, + // so wait until the request is in state Running. + + self.wait_for_request_state( + ExpectedEndState( + RegionSnapshotReplacementState::ReplacementDone, + ), + ExpectedIntermediateState( + RegionSnapshotReplacementState::Allocating, + ), + ExpectedStartState(RegionSnapshotReplacementState::Requested), + ) + .await; + } + + /// Run the "region snapshot replacement garbage collection" task to + /// transition the request to Running. + pub async fn transition_request_to_running(&self) { + // Activate the "region snapshot replacement garbage collection" + // background task + + run_region_snapshot_replacement_garbage_collection( + &self.internal_client, + ) + .await; + + // The activation above could only have started the associated saga, + // so wait until the request is in state Running. + + self.wait_for_request_state( + ExpectedEndState(RegionSnapshotReplacementState::Running), + ExpectedIntermediateState( + RegionSnapshotReplacementState::DeletingOldVolume, + ), + ExpectedStartState( + RegionSnapshotReplacementState::ReplacementDone, + ), + ) + .await; + } + + /// Manually create a region snapshot replacement step for the disk + /// created from the snapshot + pub async fn create_manual_region_snapshot_replacement_step(&self) { + let disk_url = get_disk_url("disk-from-snapshot"); + + let disk_from_snapshot: external::Disk = + NexusRequest::object_get(&self.client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + let (.., db_disk_from_snapshot) = + LookupPath::new(&self.opctx(), &self.datastore) + .disk_id(disk_from_snapshot.identity.id) + .fetch() + .await + .unwrap(); + + let result = self + .datastore + .create_region_snapshot_replacement_step( + &self.opctx(), + self.replacement_request_id, + db_disk_from_snapshot.volume_id, + ) + .await + .unwrap(); + + match result { + InsertStepResult::Inserted { .. } => {} + + _ => { + assert!(false, "bad result from create_region_snapshot_replacement_step"); + } + } + } + } +} + +/// Assert that a region snapshot replacement request in state "Requested" can +/// have its snapshot deleted and still transition to Complete +#[nexus_test] +async fn test_delete_volume_region_snapshot_replacement_state_requested( + cptestctx: &ControlPlaneTestContext, +) { + let test_harness = + region_snapshot_replacement::DeletedVolumeTest::new(cptestctx).await; + + // The request leaves the above `new` function in state Requested: delete + // the snapshot, then finish the test. + + test_harness.delete_the_snapshot().await; + + test_harness.finish_test().await; + + // Delete all the non-deleted resources + + test_harness.delete_the_disk().await; + test_harness.delete_the_disk_from_snapshot().await; + + // Assert there are no more Crucible resources + + test_harness.assert_no_crucible_resources_leaked().await; +} + +/// Assert that a region snapshot replacement request in state "Requested" can +/// have its snapshot deleted, and the snapshot's source disk can be deleted, +/// and the request will still transition to Complete +#[nexus_test] +async fn test_delete_volume_region_snapshot_replacement_state_requested_2( + cptestctx: &ControlPlaneTestContext, +) { + let test_harness = + region_snapshot_replacement::DeletedVolumeTest::new(cptestctx).await; + + // The request leaves the above `new` function in state Requested: + // - delete the snapshot + // - delete the snapshot's source disk + // - the only thing that will remain is the disk-from-snap that was created + // - finally, call finish_test + + test_harness.delete_the_snapshot().await; + test_harness.delete_the_disk().await; + + test_harness.finish_test().await; + + // Delete all the non-deleted resources + + test_harness.delete_the_disk_from_snapshot().await; + + // Assert there are no more Crucible resources + + test_harness.assert_no_crucible_resources_leaked().await; +} + +/// Assert that a region snapshot replacement request in state "Requested" can +/// have everything be deleted, and the request will still transition to +/// Complete +#[nexus_test] +async fn test_delete_volume_region_snapshot_replacement_state_requested_3( + cptestctx: &ControlPlaneTestContext, +) { + let test_harness = + region_snapshot_replacement::DeletedVolumeTest::new(cptestctx).await; + + // The request leaves the above `new` function in state Requested: + // - delete the snapshot + // - delete the snapshot's source disk + // - delete the disk created from the snapshot + // - finally, call finish_test + + test_harness.delete_the_snapshot().await; + test_harness.delete_the_disk().await; + test_harness.delete_the_disk_from_snapshot().await; + + test_harness.finish_test().await; + + // Assert there are no more Crucible resources + + test_harness.assert_no_crucible_resources_leaked().await; +} + +/// Assert that a region snapshot replacement request in state "ReplacementDone" +/// can have its snapshot deleted, and the request will still transition to +/// Complete +#[nexus_test] +async fn test_delete_volume_region_snapshot_replacement_state_replacement_done( + cptestctx: &ControlPlaneTestContext, +) { + let test_harness = + region_snapshot_replacement::DeletedVolumeTest::new(cptestctx).await; + + // The request leaves the above `new` function in state Requested: + // - transition the request to "ReplacementDone" + // - delete the snapshot + // - finally, call finish_test + + test_harness.transition_request_to_replacement_done().await; + + test_harness.delete_the_snapshot().await; + test_harness.delete_the_disk().await; + + test_harness.finish_test().await; + + // Delete all the non-deleted resources + + test_harness.delete_the_disk_from_snapshot().await; + + // Assert there are no more Crucible resources + + test_harness.assert_no_crucible_resources_leaked().await; +} + +/// Assert that a region snapshot replacement request in state "Running" +/// can have its snapshot deleted, and the request will still transition to +/// Complete +#[nexus_test] +async fn test_delete_volume_region_snapshot_replacement_state_running( + cptestctx: &ControlPlaneTestContext, +) { + let test_harness = + region_snapshot_replacement::DeletedVolumeTest::new(cptestctx).await; + + // The request leaves the above `new` function in state Requested: + // - transition the request to "ReplacementDone" + // - transition the request to "Running" + // - delete the snapshot + // - finally, call finish_test + + test_harness.transition_request_to_replacement_done().await; + test_harness.transition_request_to_running().await; + + test_harness.delete_the_snapshot().await; + test_harness.delete_the_disk().await; + + test_harness.finish_test().await; + + // Delete all the non-deleted resources + + test_harness.delete_the_disk_from_snapshot().await; + + // Assert there are no more Crucible resources + + test_harness.assert_no_crucible_resources_leaked().await; +} + +/// Assert that a region snapshot replacement step can have its associated +/// volume deleted and still transition to VolumeDeleted +#[nexus_test] +async fn test_delete_volume_region_snapshot_replacement_step( + cptestctx: &ControlPlaneTestContext, +) { + let test_harness = + region_snapshot_replacement::DeletedVolumeTest::new(cptestctx).await; + + // The request leaves the above `new` function in state Requested: + // - transition the request to "ReplacementDone" + // - transition the request to "Running" + // - manually create a region snapshot replacement step for the disk created + // from the snapshot + // - delete the disk created from the snapshot + // - finally, call finish_test + + test_harness.transition_request_to_replacement_done().await; + test_harness.transition_request_to_running().await; + + test_harness.create_manual_region_snapshot_replacement_step().await; + test_harness.delete_the_disk_from_snapshot().await; + + test_harness.finish_test().await; + + // Delete all the non-deleted resources + + test_harness.delete_the_disk().await; + test_harness.delete_the_snapshot().await; + + // Assert there are no more Crucible resources + + test_harness.assert_no_crucible_resources_leaked().await; +} + +/// Tests that replacement can occur until completion +#[nexus_test] +async fn test_replacement_sanity(cptestctx: &ControlPlaneTestContext) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + // Create four zpools, each with one dataset. This is required for region + // and region snapshot replacement to have somewhere to move the data. + let sled_id = cptestctx.first_sled(); + + let disk_test = DiskTestBuilder::new(&cptestctx) + .on_specific_sled(sled_id) + .with_zpool_count(4) + .build() + .await; + + // Create a disk and a snapshot and a disk from that snapshot + let client = &cptestctx.external_client; + let _project_id = create_project_and_pool(client).await; + + let disk = create_disk(&client, PROJECT_NAME, "disk").await; + let snapshot = create_snapshot(&client, PROJECT_NAME, "disk", "snap").await; + let _disk_from_snapshot = create_disk_from_snapshot( + &client, + PROJECT_NAME, + "disk-from-snap", + snapshot.identity.id, + ) + .await; + + // Before expunging the physical disk, save the DB model + let (.., db_disk) = LookupPath::new(&opctx, &datastore) + .disk_id(disk.identity.id) + .fetch() + .await + .unwrap(); + + assert_eq!(db_disk.id(), disk.identity.id); + + // Next, expunge a physical disk that contains a region + + let disk_allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + let (dataset, _) = &disk_allocated_regions[0]; + + let zpool = disk_test + .zpools() + .find(|x| *x.id.as_untyped_uuid() == dataset.pool_id) + .expect("Expected at least one zpool"); + + let (_, db_zpool) = LookupPath::new(&opctx, datastore) + .zpool_id(zpool.id.into_untyped_uuid()) + .fetch() + .await + .unwrap(); + + datastore + .physical_disk_update_policy( + &opctx, + db_zpool.physical_disk_id.into(), + PhysicalDiskPolicy::Expunged, + ) + .await + .unwrap(); + + // Any volumes sent to the Pantry for reconciliation should return active + // for this test + + cptestctx + .sled_agent + .pantry_server + .as_ref() + .unwrap() + .pantry + .set_auto_activate_volumes() + .await; + + // Now, run all replacement tasks to completion + let internal_client = &cptestctx.internal_client; + run_replacement_tasks_to_completion(&internal_client).await; +} + +/// Tests that multiple replacements can occur until completion +#[nexus_test] +async fn test_region_replacement_triple_sanity( + cptestctx: &ControlPlaneTestContext, +) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + // Create five zpools, each with one dataset. This is required for region + // and region snapshot replacement to have somewhere to move the data, and + // for this test we're doing two expungements. + let sled_id = cptestctx.first_sled(); + + let disk_test = DiskTestBuilder::new(&cptestctx) + .on_specific_sled(sled_id) + .with_zpool_count(6) + .build() + .await; + + // Any volumes sent to the Pantry for reconciliation should return active + // for this test + + cptestctx + .sled_agent + .pantry_server + .as_ref() + .unwrap() + .pantry + .set_auto_activate_volumes() + .await; + + // Create a disk and a snapshot and a disk from that snapshot + let client = &cptestctx.external_client; + let _project_id = create_project_and_pool(client).await; + + let disk = create_disk(&client, PROJECT_NAME, "disk").await; + let snapshot = create_snapshot(&client, PROJECT_NAME, "disk", "snap").await; + let _disk_from_snapshot = create_disk_from_snapshot( + &client, + PROJECT_NAME, + "disk-from-snap", + snapshot.identity.id, + ) + .await; + + // Before expunging any physical disk, save some DB models + let (.., db_disk) = LookupPath::new(&opctx, &datastore) + .disk_id(disk.identity.id) + .fetch() + .await + .unwrap(); + + let (.., db_snapshot) = LookupPath::new(&opctx, &datastore) + .snapshot_id(snapshot.identity.id) + .fetch() + .await + .unwrap(); + + let internal_client = &cptestctx.internal_client; + + let disk_allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + let snapshot_allocated_regions = + datastore.get_allocated_regions(db_snapshot.volume_id).await.unwrap(); + + assert_eq!(disk_allocated_regions.len(), 3); + assert_eq!(snapshot_allocated_regions.len(), 0); + + for i in disk_allocated_regions { + let (dataset, _) = &i; + + let zpool = disk_test + .zpools() + .find(|x| *x.id.as_untyped_uuid() == dataset.pool_id) + .expect("Expected at least one zpool"); + + let (_, db_zpool) = LookupPath::new(&opctx, datastore) + .zpool_id(zpool.id.into_untyped_uuid()) + .fetch() + .await + .unwrap(); + + datastore + .physical_disk_update_policy( + &opctx, + db_zpool.physical_disk_id.into(), + PhysicalDiskPolicy::Expunged, + ) + .await + .unwrap(); + + // Now, run all replacement tasks to completion + run_replacement_tasks_to_completion(&internal_client).await; + } + + let disk_allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + let snapshot_allocated_regions = + datastore.get_allocated_regions(db_snapshot.volume_id).await.unwrap(); + + assert_eq!(disk_allocated_regions.len(), 3); + assert!(disk_allocated_regions.iter().all(|(_, r)| !r.read_only())); + + // Assert region snapshots replaced with three read-only regions + assert_eq!(snapshot_allocated_regions.len(), 3); + assert!(snapshot_allocated_regions.iter().all(|(_, r)| r.read_only())); +} + +/// Tests that multiple replacements can occur until completion, after expunging +/// two physical disks before any replacements occur (aka we can lose two +/// physical disks and still recover) +#[nexus_test] +async fn test_region_replacement_triple_sanity_2( + cptestctx: &ControlPlaneTestContext, +) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + // Create five zpools, each with one dataset. This is required for region + // and region snapshot replacement to have somewhere to move the data, and + // for this test we're doing two expungements. + let sled_id = cptestctx.first_sled(); + + let disk_test = DiskTestBuilder::new(&cptestctx) + .on_specific_sled(sled_id) + .with_zpool_count(6) + .build() + .await; + + // Any volumes sent to the Pantry for reconciliation should return active + // for this test + + cptestctx + .sled_agent + .pantry_server + .as_ref() + .unwrap() + .pantry + .set_auto_activate_volumes() + .await; + + // Create a disk and a snapshot and a disk from that snapshot + let client = &cptestctx.external_client; + let _project_id = create_project_and_pool(client).await; + + let disk = create_disk(&client, PROJECT_NAME, "disk").await; + let snapshot = create_snapshot(&client, PROJECT_NAME, "disk", "snap").await; + let _disk_from_snapshot = create_disk_from_snapshot( + &client, + PROJECT_NAME, + "disk-from-snap", + snapshot.identity.id, + ) + .await; + + // Before expunging any physical disk, save some DB models + let (.., db_disk) = LookupPath::new(&opctx, &datastore) + .disk_id(disk.identity.id) + .fetch() + .await + .unwrap(); + + let (.., db_snapshot) = LookupPath::new(&opctx, &datastore) + .snapshot_id(snapshot.identity.id) + .fetch() + .await + .unwrap(); + + let internal_client = &cptestctx.internal_client; + + let disk_allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + let snapshot_allocated_regions = + datastore.get_allocated_regions(db_snapshot.volume_id).await.unwrap(); + + assert_eq!(disk_allocated_regions.len(), 3); + assert_eq!(snapshot_allocated_regions.len(), 0); + + // Expunge two physical disks before any replacements occur + for i in [0, 1] { + let (dataset, _) = &disk_allocated_regions[i]; + + let zpool = disk_test + .zpools() + .find(|x| *x.id.as_untyped_uuid() == dataset.pool_id) + .expect("Expected at least one zpool"); + + let (_, db_zpool) = LookupPath::new(&opctx, datastore) + .zpool_id(zpool.id.into_untyped_uuid()) + .fetch() + .await + .unwrap(); + + datastore + .physical_disk_update_policy( + &opctx, + db_zpool.physical_disk_id.into(), + PhysicalDiskPolicy::Expunged, + ) + .await + .unwrap(); + } + + // Now, run all replacement tasks to completion + run_replacement_tasks_to_completion(&internal_client).await; + + // Expunge the last physical disk + { + let (dataset, _) = &disk_allocated_regions[2]; + + let zpool = disk_test + .zpools() + .find(|x| *x.id.as_untyped_uuid() == dataset.pool_id) + .expect("Expected at least one zpool"); + + let (_, db_zpool) = LookupPath::new(&opctx, datastore) + .zpool_id(zpool.id.into_untyped_uuid()) + .fetch() + .await + .unwrap(); + + datastore + .physical_disk_update_policy( + &opctx, + db_zpool.physical_disk_id.into(), + PhysicalDiskPolicy::Expunged, + ) + .await + .unwrap(); + } + + // Now, run all replacement tasks to completion + run_replacement_tasks_to_completion(&internal_client).await; + + let disk_allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + let snapshot_allocated_regions = + datastore.get_allocated_regions(db_snapshot.volume_id).await.unwrap(); + + assert_eq!(disk_allocated_regions.len(), 3); + assert!(disk_allocated_regions.iter().all(|(_, r)| !r.read_only())); + + // Assert region snapshots replaced with three read-only regions + assert_eq!(snapshot_allocated_regions.len(), 3); + assert!(snapshot_allocated_regions.iter().all(|(_, r)| r.read_only())); +} diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index d9888f9ccd..db16113dd7 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -769,10 +769,9 @@ async fn test_disk_region_creation_failure( .await .unwrap(); - // After the failed allocation, the disk should be Faulted + // After the failed allocation, the disk creation should have unwound let disks = disks_list(&client, &disks_url).await; - assert_eq!(disks.len(), 1); - assert_eq!(disks[0].state, DiskState::Faulted); + assert_eq!(disks.len(), 0); } // Tests that invalid block sizes are rejected @@ -2578,7 +2577,7 @@ async fn test_disk_expunge(cptestctx: &ControlPlaneTestContext) { // All three regions should be returned let expunged_regions = datastore - .find_regions_on_expunged_physical_disks(&opctx) + .find_read_write_regions_on_expunged_physical_disks(&opctx) .await .unwrap(); diff --git a/nexus/tests/integration_tests/volume_management.rs b/nexus/tests/integration_tests/volume_management.rs index 6a9ce28389..f0eb294e58 100644 --- a/nexus/tests/integration_tests/volume_management.rs +++ b/nexus/tests/integration_tests/volume_management.rs @@ -3764,12 +3764,13 @@ impl TestReadOnlyRegionReferenceUsage { // read-only regions should never be returned by find_deleted_volume_regions pub async fn region_not_returned_by_find_deleted_volume_regions(&self) { - let deleted_volume_regions = + let freed_crucible_resources = self.datastore.find_deleted_volume_regions().await.unwrap(); - assert!(!deleted_volume_regions + assert!(!freed_crucible_resources + .datasets_and_regions .into_iter() - .any(|(_, r, _)| r.id() == self.region.id())); + .any(|(_, r)| r.id() == self.region.id())); } pub async fn create_first_volume_region_in_rop(&self) { diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index ea41375d78..8915ac8746 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -248,11 +248,13 @@ impl Blueprint { pub fn all_omicron_datasets( &self, filter: BlueprintDatasetFilter, - ) -> impl Iterator { + ) -> impl Iterator { self.blueprint_datasets .iter() - .flat_map(move |(_, datasets)| datasets.datasets.values()) - .filter(move |d| d.disposition.matches(filter)) + .flat_map(move |(sled_id, datasets)| { + datasets.datasets.values().map(|dataset| (*sled_id, dataset)) + }) + .filter(move |(_, d)| d.disposition.matches(filter)) } /// Iterate over the [`BlueprintZoneConfig`] instances in the blueprint @@ -637,7 +639,16 @@ fn zone_sort_key(z: &T) -> impl Ord { /// /// Part of [`BlueprintZonesConfig`]. #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, + Diffus, )] pub struct BlueprintZoneConfig { /// The disposition (desired state) of this zone recorded in the blueprint. @@ -1010,7 +1021,16 @@ impl BlueprintDatasetDisposition { /// Information about a dataset as recorded in a blueprint #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, + Diffus, )] pub struct BlueprintDatasetConfig { // TODO: Display this in diffs - leave for now, for backwards compat diff --git a/nexus/types/src/deployment/network_resources.rs b/nexus/types/src/deployment/network_resources.rs index 358e7c229d..b08e5a2582 100644 --- a/nexus/types/src/deployment/network_resources.rs +++ b/nexus/types/src/deployment/network_resources.rs @@ -148,7 +148,18 @@ impl OmicronZoneNetworkResources { } /// External IP variants possible for Omicron-managed zones. -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize, Deserialize)] +#[derive( + Debug, + Clone, + Copy, + Hash, + PartialOrd, + Ord, + PartialEq, + Eq, + Serialize, + Deserialize, +)] pub enum OmicronZoneExternalIp { Floating(OmicronZoneExternalFloatingIp), Snat(OmicronZoneExternalSnatIp), @@ -204,6 +215,8 @@ pub enum OmicronZoneExternalIpKey { Clone, Copy, Hash, + PartialOrd, + Ord, PartialEq, Eq, JsonSchema, @@ -223,6 +236,8 @@ pub struct OmicronZoneExternalFloatingIp { Copy, PartialEq, Eq, + PartialOrd, + Ord, JsonSchema, Serialize, Deserialize, @@ -249,6 +264,8 @@ impl OmicronZoneExternalFloatingAddr { Clone, Copy, Hash, + PartialOrd, + Ord, PartialEq, Eq, JsonSchema, diff --git a/nexus/types/src/deployment/zone_type.rs b/nexus/types/src/deployment/zone_type.rs index bb5225986c..c1ecd18158 100644 --- a/nexus/types/src/deployment/zone_type.rs +++ b/nexus/types/src/deployment/zone_type.rs @@ -23,7 +23,16 @@ use std::net::Ipv6Addr; use std::net::SocketAddrV6; #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, + Diffus, )] #[serde(tag = "type", rename_all = "snake_case")] pub enum BlueprintZoneType { @@ -339,7 +348,16 @@ pub mod blueprint_zone_type { use std::net::SocketAddrV6; #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, + Diffus, )] pub struct BoundaryNtp { pub address: SocketAddrV6, @@ -353,7 +371,16 @@ pub mod blueprint_zone_type { /// Used in single-node clickhouse setups #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, + Diffus, )] pub struct Clickhouse { pub address: SocketAddrV6, @@ -361,7 +388,16 @@ pub mod blueprint_zone_type { } #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, + Diffus, )] pub struct ClickhouseKeeper { pub address: SocketAddrV6, @@ -370,7 +406,16 @@ pub mod blueprint_zone_type { /// Used in replicated clickhouse setups #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, + Diffus, )] pub struct ClickhouseServer { pub address: SocketAddrV6, @@ -378,7 +423,16 @@ pub mod blueprint_zone_type { } #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, + Diffus, )] pub struct CockroachDb { pub address: SocketAddrV6, @@ -386,7 +440,16 @@ pub mod blueprint_zone_type { } #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, + Diffus, )] pub struct Crucible { pub address: SocketAddrV6, @@ -394,14 +457,32 @@ pub mod blueprint_zone_type { } #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, + Diffus, )] pub struct CruciblePantry { pub address: SocketAddrV6, } #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, + Diffus, )] pub struct ExternalDns { pub dataset: OmicronZoneDataset, @@ -414,7 +495,16 @@ pub mod blueprint_zone_type { } #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, + Diffus, )] pub struct InternalDns { pub dataset: OmicronZoneDataset, @@ -434,14 +524,32 @@ pub mod blueprint_zone_type { } #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, + Diffus, )] pub struct InternalNtp { pub address: SocketAddrV6, } #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, + Diffus, )] pub struct Nexus { /// The address at which the internal nexus server is reachable. @@ -457,7 +565,16 @@ pub mod blueprint_zone_type { } #[derive( - Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffus, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + JsonSchema, + Deserialize, + Serialize, + Diffus, )] pub struct Oximeter { pub address: SocketAddrV6, diff --git a/nexus/types/src/internal_api/background.rs b/nexus/types/src/internal_api/background.rs index cf3d652587..bee2f56c34 100644 --- a/nexus/types/src/internal_api/background.rs +++ b/nexus/types/src/internal_api/background.rs @@ -37,6 +37,7 @@ pub struct LookupRegionPortStatus { pub struct RegionSnapshotReplacementStartStatus { pub requests_created_ok: Vec, pub start_invoked_ok: Vec, + pub requests_completed_ok: Vec, pub errors: Vec, } @@ -55,13 +56,14 @@ pub struct RegionSnapshotReplacementStepStatus { pub step_records_created_ok: Vec, pub step_garbage_collect_invoked_ok: Vec, pub step_invoked_ok: Vec, + pub step_set_volume_deleted_ok: Vec, pub errors: Vec, } /// The status of a `region_snapshot_replacement_finish` background task activation #[derive(Serialize, Deserialize, Default, Debug, PartialEq, Eq)] pub struct RegionSnapshotReplacementFinishStatus { - pub records_set_to_done: Vec, + pub finish_invoked_ok: Vec, pub errors: Vec, } diff --git a/openapi/clickhouse-admin-server.json b/openapi/clickhouse-admin-server.json index c82c7c0d8e..50c526569e 100644 --- a/openapi/clickhouse-admin-server.json +++ b/openapi/clickhouse-admin-server.json @@ -616,7 +616,7 @@ "type": "object", "properties": { "time": { - "$ref": "#/components/schemas/Timestamp" + "type": "string" }, "value": { "type": "number", @@ -628,17 +628,6 @@ "value" ] }, - "Timestamp": { - "anyOf": [ - { - "type": "string", - "format": "date-time" - }, - { - "type": "string" - } - ] - }, "SystemTable": { "description": "Available metrics tables in the `system` database", "type": "string", diff --git a/openapi/clickhouse-admin-single.json b/openapi/clickhouse-admin-single.json index b00bf56314..c6b99da245 100644 --- a/openapi/clickhouse-admin-single.json +++ b/openapi/clickhouse-admin-single.json @@ -131,7 +131,7 @@ "type": "object", "properties": { "time": { - "$ref": "#/components/schemas/Timestamp" + "type": "string" }, "value": { "type": "number", @@ -143,17 +143,6 @@ "value" ] }, - "Timestamp": { - "anyOf": [ - { - "type": "string", - "format": "date-time" - }, - { - "type": "string" - } - ] - }, "SystemTable": { "description": "Available metrics tables in the `system` database", "type": "string", diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 14a4c92692..c7d2b36de4 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -785,9 +785,9 @@ } }, "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}": { - "get": { - "summary": "Fetch a support bundle from a particular dataset", - "operationId": "support_bundle_get", + "post": { + "summary": "Create a support bundle within a particular dataset", + "operationId": "support_bundle_create", "parameters": [ { "in": "path", @@ -815,32 +815,50 @@ "schema": { "$ref": "#/components/schemas/TypedUuidForZpoolKind" } + }, + { + "in": "query", + "name": "hash", + "required": true, + "schema": { + "type": "string", + "format": "hex string (32 bytes)" + } } ], "requestBody": { "content": { - "application/json": { + "application/octet-stream": { "schema": { - "$ref": "#/components/schemas/SupportBundleGetQueryParams" + "type": "string", + "format": "binary" } } }, "required": true }, "responses": { - "default": { - "description": "", + "201": { + "description": "successful creation", "content": { - "*/*": { - "schema": {} + "application/json": { + "schema": { + "$ref": "#/components/schemas/SupportBundleMetadata" + } } } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" } } }, - "post": { - "summary": "Create a support bundle within a particular dataset", - "operationId": "support_bundle_create", + "delete": { + "summary": "Delete a support bundle from a particular dataset", + "operationId": "support_bundle_delete", "parameters": [ { "in": "path", @@ -868,50 +886,165 @@ "schema": { "$ref": "#/components/schemas/TypedUuidForZpoolKind" } + } + ], + "responses": { + "204": { + "description": "successful deletion" }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/download": { + "get": { + "summary": "Fetch a support bundle from a particular dataset", + "operationId": "support_bundle_download", + "parameters": [ { - "in": "query", - "name": "hash", + "in": "path", + "name": "dataset_id", + "description": "The dataset on which this support bundle was provisioned", "required": true, "schema": { - "type": "string", - "format": "hex string (32 bytes)" + "$ref": "#/components/schemas/TypedUuidForDatasetKind" + } + }, + { + "in": "path", + "name": "support_bundle_id", + "description": "The ID of the support bundle itself", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForSupportBundleKind" + } + }, + { + "in": "path", + "name": "zpool_id", + "description": "The zpool on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForZpoolKind" } } ], - "requestBody": { - "content": { - "application/octet-stream": { - "schema": { - "type": "string", - "format": "binary" + "responses": { + "default": { + "description": "", + "content": { + "*/*": { + "schema": {} } } + } + } + }, + "head": { + "summary": "Fetch metadata about a support bundle from a particular dataset", + "operationId": "support_bundle_head", + "parameters": [ + { + "in": "path", + "name": "dataset_id", + "description": "The dataset on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForDatasetKind" + } }, - "required": true - }, + { + "in": "path", + "name": "support_bundle_id", + "description": "The ID of the support bundle itself", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForSupportBundleKind" + } + }, + { + "in": "path", + "name": "zpool_id", + "description": "The zpool on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForZpoolKind" + } + } + ], "responses": { - "201": { - "description": "successful creation", + "default": { + "description": "", "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/SupportBundleMetadata" - } + "*/*": { + "schema": {} } } + } + } + } + }, + "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/download/{file}": { + "get": { + "summary": "Fetch a file within a support bundle from a particular dataset", + "operationId": "support_bundle_download_file", + "parameters": [ + { + "in": "path", + "name": "dataset_id", + "description": "The dataset on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForDatasetKind" + } }, - "4XX": { - "$ref": "#/components/responses/Error" + { + "in": "path", + "name": "file", + "description": "The path of the file within the support bundle to query", + "required": true, + "schema": { + "type": "string" + } }, - "5XX": { - "$ref": "#/components/responses/Error" + { + "in": "path", + "name": "support_bundle_id", + "description": "The ID of the support bundle itself", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForSupportBundleKind" + } + }, + { + "in": "path", + "name": "zpool_id", + "description": "The zpool on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForZpoolKind" + } + } + ], + "responses": { + "default": { + "description": "", + "content": { + "*/*": { + "schema": {} + } + } } } }, - "delete": { - "summary": "Delete a support bundle from a particular dataset", - "operationId": "support_bundle_delete", + "head": { + "summary": "Fetch metadata about a file within a support bundle from a particular dataset", + "operationId": "support_bundle_head_file", "parameters": [ { "in": "path", @@ -922,6 +1055,15 @@ "$ref": "#/components/schemas/TypedUuidForDatasetKind" } }, + { + "in": "path", + "name": "file", + "description": "The path of the file within the support bundle to query", + "required": true, + "schema": { + "type": "string" + } + }, { "in": "path", "name": "support_bundle_id", @@ -942,20 +1084,64 @@ } ], "responses": { - "204": { - "description": "successful deletion" + "default": { + "description": "", + "content": { + "*/*": { + "schema": {} + } + } + } + } + } + }, + "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/index": { + "get": { + "summary": "Fetch the index (list of files within a support bundle)", + "operationId": "support_bundle_index", + "parameters": [ + { + "in": "path", + "name": "dataset_id", + "description": "The dataset on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForDatasetKind" + } }, - "4XX": { - "$ref": "#/components/responses/Error" + { + "in": "path", + "name": "support_bundle_id", + "description": "The ID of the support bundle itself", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForSupportBundleKind" + } }, - "5XX": { - "$ref": "#/components/responses/Error" + { + "in": "path", + "name": "zpool_id", + "description": "The zpool on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForZpoolKind" + } + } + ], + "responses": { + "default": { + "description": "", + "content": { + "*/*": { + "schema": {} + } + } } } }, "head": { - "summary": "Fetch a support bundle from a particular dataset", - "operationId": "support_bundle_head", + "summary": "Fetch metadata about the list of files within a support bundle", + "operationId": "support_bundle_head_index", "parameters": [ { "in": "path", @@ -985,16 +1171,6 @@ } } ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/SupportBundleGetQueryParams" - } - } - }, - "required": true - }, "responses": { "default": { "description": "", @@ -5613,18 +5789,6 @@ "format": "uint8", "minimum": 0 }, - "SupportBundleGetQueryParams": { - "description": "Query parameters for reading the support bundle", - "type": "object", - "properties": { - "query_type": { - "$ref": "#/components/schemas/SupportBundleQueryType" - } - }, - "required": [ - "query_type" - ] - }, "SupportBundleMetadata": { "description": "Metadata about a support bundle", "type": "object", @@ -5641,60 +5805,6 @@ "support_bundle_id" ] }, - "SupportBundleQueryType": { - "description": "Describes the type of access to the support bundle", - "oneOf": [ - { - "description": "Access the whole support bundle", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "whole" - ] - } - }, - "required": [ - "type" - ] - }, - { - "description": "Access the names of all files within the support bundle", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "index" - ] - } - }, - "required": [ - "type" - ] - }, - { - "description": "Access a specific file within the support bundle", - "type": "object", - "properties": { - "file_path": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "path" - ] - } - }, - "required": [ - "file_path", - "type" - ] - } - ] - }, "SupportBundleState": { "type": "string", "enum": [ diff --git a/oximeter/collector/src/agent.rs b/oximeter/collector/src/agent.rs index e924cb2ee3..7e28831fa0 100644 --- a/oximeter/collector/src/agent.rs +++ b/oximeter/collector/src/agent.rs @@ -745,9 +745,9 @@ mod tests { let count = stats.collections.datum.value() as usize; assert!(count != 0); - assert_eq!( - count, - collection_count.load(Ordering::SeqCst), + let server_count = collection_count.load(Ordering::SeqCst); + assert!( + count == server_count || count - 1 == server_count, "number of collections reported by the collection \ task differs from the number reported by the empty \ producer server itself" @@ -892,9 +892,16 @@ mod tests { assert_eq!(stats.collections.datum.value(), 0); assert!(count != 0); - assert_eq!( - count, - collection_count.load(Ordering::SeqCst), + + // The server may have handled a request that we've not yet recorded on + // our collection task side, so we allow the server count to be greater + // than our own. But since the collection task is single-threaded, it + // cannot ever be more than _one_ greater than our count, since we + // should increment that counter before making another request to the + // server. + let server_count = collection_count.load(Ordering::SeqCst); + assert!( + count == server_count || count - 1 == server_count, "number of collections reported by the collection \ task differs from the number reported by the always-ded \ producer server itself" diff --git a/package-manifest.toml b/package-manifest.toml index 83b1ba8168..b28ac7d59f 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -166,7 +166,8 @@ source.packages = [ "internal-dns-cli.tar.gz", "omicron-clickhouse-admin.tar.gz", "zone-setup.tar.gz", - "zone-network-install.tar.gz" + "zone-network-install.tar.gz", + "clickana.tar.gz" ] output.type = "zone" @@ -197,7 +198,8 @@ source.packages = [ "internal-dns-cli.tar.gz", "omicron-clickhouse-admin.tar.gz", "zone-setup.tar.gz", - "zone-network-install.tar.gz" + "zone-network-install.tar.gz", + "clickana.tar.gz" ] output.type = "zone" @@ -924,3 +926,12 @@ service_name = "probe" source.type = "composite" source.packages = ["thundermuffin.tar.gz"] output.type = "zone" + +[package.clickana] +service_name = "clickana" +only_for_targets.image = "standard" +source.type = "local" +source.rust.binary_names = ["clickana"] +source.rust.release = true +output.type = "zone" +output.intermediate_only = true \ No newline at end of file diff --git a/package/Cargo.toml b/package/Cargo.toml index b63a5ed96f..ccc768bb8e 100644 --- a/package/Cargo.toml +++ b/package/Cargo.toml @@ -25,6 +25,7 @@ reqwest = { workspace = true, features = [ "rustls-tls" ] } ring.workspace = true semver.workspace = true serde.workspace = true +shell-words.workspace = true sled-hardware.workspace = true slog.workspace = true slog-async.workspace = true diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs index cc4050cbce..f4bda47e2c 100644 --- a/package/src/bin/omicron-package.rs +++ b/package/src/bin/omicron-package.rs @@ -4,31 +4,29 @@ //! Utility for bundling target binaries as tarfiles. -use anyhow::{anyhow, bail, ensure, Context, Result}; +use anyhow::{anyhow, bail, Context, Result}; use camino::{Utf8Path, Utf8PathBuf}; use clap::{Parser, Subcommand}; use futures::stream::{self, StreamExt, TryStreamExt}; use illumos_utils::{zfs, zone}; use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; +use omicron_package::cargo_plan::build_cargo_plan; +use omicron_package::config::{Config, ConfigArgs}; use omicron_package::target::KnownTarget; use omicron_package::{parse, BuildCommand, DeployCommand, TargetCommand}; -use omicron_zone_package::config::{Config as PackageConfig, PackageMap}; +use omicron_zone_package::config::Config as PackageConfig; use omicron_zone_package::package::{Package, PackageOutput, PackageSource}; use omicron_zone_package::progress::Progress; use omicron_zone_package::target::Target; use rayon::prelude::*; use ring::digest::{Context as DigestContext, Digest, SHA256}; use sled_hardware::cleanup::cleanup_networking_resources; -use slog::debug; use slog::o; use slog::Drain; use slog::Logger; use slog::{info, warn}; -use std::collections::{BTreeMap, BTreeSet}; use std::env; use std::fs::create_dir_all; -use std::io::Write; -use std::str::FromStr; use std::sync::{Arc, OnceLock}; use std::time::Duration; use tokio::io::{AsyncReadExt, AsyncWriteExt, BufReader}; @@ -43,11 +41,6 @@ enum SubCommand { Deploy(DeployCommand), } -fn parse_duration_ms(arg: &str) -> Result { - let ms = arg.parse()?; - Ok(std::time::Duration::from_millis(ms)) -} - #[derive(Debug, Parser)] #[clap(name = "packaging tool")] struct Args { @@ -63,170 +56,70 @@ struct Args { )] manifest: Utf8PathBuf, - #[clap( - short, - long, - help = "The name of the build target to use for this command", - default_value_t = ACTIVE.to_string(), - )] - target: String, - /// The output directory, where artifacts should be built and staged #[clap(long = "artifacts", default_value = "out/")] - artifact_dir: Utf8PathBuf, - - #[clap( - short, - long, - help = "Skip confirmation prompt for destructive operations", - action, - default_value_t = false - )] - force: bool, - - #[clap( - long, - help = "Number of retries to use when re-attempting failed package downloads", - action, - default_value_t = 10 - )] - retry_count: usize, + pub artifact_dir: Utf8PathBuf, - #[clap( - long, - help = "Duration, in ms, to wait before re-attempting failed package downloads", - action, - value_parser = parse_duration_ms, - default_value = "1000", - )] - retry_duration: std::time::Duration, + #[clap(flatten)] + config_args: ConfigArgs, #[clap(subcommand)] subcommand: SubCommand, } -#[derive(Debug, Default)] -struct CargoPlan<'a> { - command: &'a str, - packages: BTreeSet<&'a String>, - bins: BTreeSet<&'a String>, - features: BTreeSet<&'a String>, - release: bool, -} +async fn do_show_cargo_commands(config: &Config) -> Result<()> { + let metadata = cargo_metadata::MetadataCommand::new().no_deps().exec()?; + let features = config.cargo_features(); + let cargo_plan = + build_cargo_plan(&metadata, config.packages_to_build(), &features)?; -impl<'a> CargoPlan<'a> { - async fn run(&self, log: &Logger) -> Result<()> { - if self.bins.is_empty() { - return Ok(()); - } + let release_command = cargo_plan.release.build_command("build"); + let debug_command = cargo_plan.debug.build_command("build"); - let mut cmd = Command::new("cargo"); - // We rely on the rust-toolchain.toml file for toolchain information, - // rather than specifying one within the packaging tool. - cmd.arg(self.command); - // We specify _both_ --package and --bin; --bin does not imply - // --package, and without any --package options Cargo unifies features - // across all workspace default members. See rust-lang/cargo#8157. - for package in &self.packages { - cmd.arg("--package").arg(package); - } - for bin in &self.bins { - cmd.arg("--bin").arg(bin); - } - if !self.features.is_empty() { - cmd.arg("--features").arg(self.features.iter().fold( - String::new(), - |mut acc, s| { - if !acc.is_empty() { - acc.push(' '); - } - acc.push_str(s); - acc - }, - )); - } - if self.release { - cmd.arg("--release"); - } - info!(log, "running: {:?}", cmd.as_std()); - let status = cmd - .status() - .await - .context(format!("Failed to run command: ({:?})", cmd))?; - if !status.success() { - bail!("Failed to build packages"); - } + print!("release command: "); + if let Some(command) = release_command { + println!("{}", command_to_string(&command)); + } else { + println!("(none)"); + } - Ok(()) + print!("debug command: "); + if let Some(command) = debug_command { + println!("{}", command_to_string(&command)); + } else { + println!("(none)"); } + + Ok(()) +} + +fn command_to_string(command: &Command) -> String { + // Use shell-words to join the command and arguments into a single string. + let mut v = vec![command + .as_std() + .get_program() + .to_str() + .expect("program is valid UTF-8")]; + v.extend( + command + .as_std() + .get_args() + .map(|arg| arg.to_str().expect("argument is valid UTF-8")), + ); + + shell_words::join(&v) } async fn do_for_all_rust_packages( config: &Config, command: &str, ) -> Result<()> { - // Collect a map of all of the workspace packages - let workspace = cargo_metadata::MetadataCommand::new().no_deps().exec()?; - let workspace_pkgs = workspace - .packages - .into_iter() - .filter_map(|package| { - workspace - .workspace_members - .contains(&package.id) - .then_some((package.name.clone(), package)) - }) - .collect::>(); - - // Generate a list of all features we might want to request - let features = config - .target - .0 - .iter() - .map(|(name, value)| format!("{name}-{value}")) - .collect::>(); - - // We split the packages to be built into "release" and "debug" lists - let mut release = - CargoPlan { command, release: true, ..Default::default() }; - let mut debug = CargoPlan { command, release: false, ..Default::default() }; - - for (name, pkg) in config.packages_to_build().0 { - // If this is a Rust package, `name` (the map key) is the name of the - // corresponding Rust crate. - if let PackageSource::Local { rust: Some(rust_pkg), .. } = &pkg.source { - let plan = if rust_pkg.release { &mut release } else { &mut debug }; - // Add the package name to the plan - plan.packages.insert(name); - // Get the package metadata - let metadata = workspace_pkgs.get(name).with_context(|| { - format!("package '{name}' is not a workspace package") - })?; - // Add the binaries we want to build to the plan - let bins = metadata - .targets - .iter() - .filter_map(|target| target.is_bin().then_some(&target.name)) - .collect::>(); - for bin in &rust_pkg.binary_names { - ensure!( - bins.contains(bin), - "bin target '{bin}' does not belong to package '{name}'" - ); - plan.bins.insert(bin); - } - // Add all features we want to request to the plan - plan.features.extend( - features - .iter() - .filter(|feature| metadata.features.contains_key(*feature)), - ); - } - } + let metadata = cargo_metadata::MetadataCommand::new().no_deps().exec()?; + let features = config.cargo_features(); + let cargo_plan = + build_cargo_plan(&metadata, config.packages_to_build(), &features)?; - release.run(&config.log).await?; - debug.run(&config.log).await?; - Ok(()) + cargo_plan.run(command, config.log()).await } async fn do_check(config: &Config) -> Result<()> { @@ -240,7 +133,7 @@ async fn do_build(config: &Config) -> Result<()> { async fn do_dot(config: &Config) -> Result<()> { println!( "{}", - omicron_package::dot::do_dot(&config.target, &config.package_config)? + omicron_package::dot::do_dot(config.target(), config.package_config())? ); Ok(()) } @@ -262,9 +155,6 @@ async fn do_list_outputs( Ok(()) } -// The name reserved for the currently-in-use build target. -const ACTIVE: &str = "active"; - async fn do_target( artifact_dir: &Utf8Path, name: &str, @@ -302,7 +192,8 @@ async fn do_target( println!("Created new build target '{name}' and set it as active"); } TargetCommand::List => { - let active = tokio::fs::read_link(target_dir.join(ACTIVE)).await?; + let active = + tokio::fs::read_link(target_dir.join(Config::ACTIVE)).await?; let active = Utf8PathBuf::try_from(active)?; for entry in walkdir::WalkDir::new(&target_dir) .max_depth(1) @@ -341,7 +232,7 @@ async fn get_single_target( target_dir: impl AsRef, name: &str, ) -> Result { - if name == ACTIVE { + if name == Config::ACTIVE { bail!( "The name '{name}' is reserved, please try another (e.g. 'default')\n\ Usage: '{} -t target ...'", @@ -358,7 +249,7 @@ async fn replace_active_link( let src = src.as_ref(); let target_dir = target_dir.as_ref(); - let dst = target_dir.join(ACTIVE); + let dst = target_dir.join(Config::ACTIVE); if !target_dir.join(src).exists() { bail!("Target file {} does not exist", src); } @@ -467,7 +358,7 @@ async fn ensure_package( output_directory: &Utf8Path, disable_cache: bool, ) -> Result<()> { - let target = &config.target; + let target = config.target(); let progress = ui.add_package(package_name.to_string()); match &package.source { PackageSource::Prebuilt { repo, commit, sha256 } => { @@ -484,7 +375,7 @@ async fn ensure_package( }; if should_download { - let mut attempts_left = config.retry_count + 1; + let mut attempts_left = config.retry_count() + 1; loop { match download_prebuilt( &progress, @@ -504,7 +395,7 @@ async fn ensure_package( if attempts_left == 0 { return Err(err); } - tokio::time::sleep(config.retry_duration).await; + tokio::time::sleep(config.retry_duration()).await; progress.reset(); } } @@ -555,7 +446,7 @@ async fn do_package( create_dir_all(&output_directory) .map_err(|err| anyhow!("Cannot create output directory: {}", err))?; - let ui = ProgressUI::new(&config.log); + let ui = ProgressUI::new(config.log()); do_build(&config).await?; @@ -596,8 +487,8 @@ async fn do_stamp( ) -> Result<()> { // Find the package which should be stamped let (_name, package) = config - .package_config - .packages_to_deploy(&config.target) + .package_config() + .packages_to_deploy(config.target()) .0 .into_iter() .find(|(name, _pkg)| name.as_str() == package_name) @@ -620,7 +511,8 @@ async fn do_unpack( })?; // Copy all packages to the install location in parallel. - let packages = config.package_config.packages_to_deploy(&config.target).0; + let packages = + config.package_config().packages_to_deploy(&config.target()).0; packages.par_iter().try_for_each( |(package_name, package)| -> Result<()> { @@ -629,7 +521,7 @@ async fn do_unpack( let dst = package.get_output_path(&package.service_name, install_dir); info!( - &config.log, + config.log(), "Installing service"; "src" => %src, "dst" => %dst, @@ -661,7 +553,7 @@ async fn do_unpack( let tar_path = install_dir.join(format!("{}.tar", service_name)); let service_path = install_dir.join(service_name); info!( - &config.log, + config.log(), "Unpacking service tarball"; "tar_path" => %tar_path, "service_path" => %service_path, @@ -681,14 +573,14 @@ fn do_activate(config: &Config, install_dir: &Utf8Path) -> Result<()> { // Install the bootstrap service, which itself extracts and // installs other services. if let Some(package) = - config.package_config.packages.get("omicron-sled-agent") + config.package_config().packages.get("omicron-sled-agent") { let manifest_path = install_dir .join(&package.service_name) .join("pkg") .join("manifest.xml"); info!( - config.log, + config.log(), "Installing bootstrap service from {}", manifest_path ); @@ -722,7 +614,7 @@ async fn uninstall_all_omicron_zones() -> Result<()> { fn uninstall_all_omicron_datasets(config: &Config) -> Result<()> { let datasets = match zfs::get_all_omicron_datasets_for_delete() { Err(e) => { - warn!(config.log, "Failed to get omicron datasets: {}", e); + warn!(config.log(), "Failed to get omicron datasets: {}", e); return Err(e); } Ok(datasets) => datasets, @@ -737,7 +629,7 @@ fn uninstall_all_omicron_datasets(config: &Config) -> Result<()> { datasets ))?; for dataset in &datasets { - info!(config.log, "Deleting dataset: {dataset}"); + info!(config.log(), "Deleting dataset: {dataset}"); zfs::Zfs::destroy_dataset(dataset)?; } @@ -747,8 +639,8 @@ fn uninstall_all_omicron_datasets(config: &Config) -> Result<()> { // Attempts to both disable and delete all requested packages. fn uninstall_all_packages(config: &Config) { for (_, package) in config - .package_config - .packages_to_deploy(&config.target) + .package_config() + .packages_to_deploy(config.target()) .0 .into_iter() .filter(|(_, package)| matches!(package.output, PackageOutput::Tarball)) @@ -812,18 +704,18 @@ fn remove_all_except>( } async fn do_deactivate(config: &Config) -> Result<()> { - info!(&config.log, "Removing all Omicron zones"); + info!(config.log(), "Removing all Omicron zones"); uninstall_all_omicron_zones().await?; - info!(config.log, "Uninstalling all packages"); + info!(config.log(), "Uninstalling all packages"); uninstall_all_packages(config); - info!(config.log, "Removing networking resources"); - cleanup_networking_resources(&config.log).await?; + info!(config.log(), "Removing networking resources"); + cleanup_networking_resources(config.log()).await?; Ok(()) } async fn do_uninstall(config: &Config) -> Result<()> { do_deactivate(config).await?; - info!(config.log, "Removing datasets"); + info!(config.log(), "Removing datasets"); uninstall_all_omicron_datasets(config)?; Ok(()) } @@ -834,7 +726,7 @@ async fn do_clean( install_dir: &Utf8Path, ) -> Result<()> { do_uninstall(&config).await?; - info!(config.log, "Removing artifacts from {}", artifact_dir); + info!(config.log(), "Removing artifacts from {}", artifact_dir); const ARTIFACTS_TO_KEEP: &[&str] = &[ "clickhouse", "cockroachdb", @@ -843,10 +735,10 @@ async fn do_clean( "downloads", "softnpu", ]; - remove_all_except(artifact_dir, ARTIFACTS_TO_KEEP, &config.log)?; - info!(config.log, "Removing installed objects in: {}", install_dir); + remove_all_except(artifact_dir, ARTIFACTS_TO_KEEP, config.log())?; + info!(config.log(), "Removing installed objects in: {}", install_dir); const INSTALLED_OBJECTS_TO_KEEP: &[&str] = &["opte"]; - remove_all_except(install_dir, INSTALLED_OBJECTS_TO_KEEP, &config.log)?; + remove_all_except(install_dir, INSTALLED_OBJECTS_TO_KEEP, config.log())?; Ok(()) } @@ -957,102 +849,6 @@ impl Progress for PackageProgress { } } -struct Config { - log: Logger, - // Description of all possible packages. - package_config: PackageConfig, - // Description of the target we're trying to operate on. - target: Target, - // The list of packages the user wants us to build (all, if empty) - only: Vec, - // True if we should skip confirmations for destructive operations. - force: bool, - // Number of times to retry failed downloads. - retry_count: usize, - // Duration to wait before retrying failed downloads. - retry_duration: std::time::Duration, -} - -impl Config { - /// Prompts the user for input before proceeding with an operation. - fn confirm(&self, prompt: &str) -> Result<()> { - if self.force { - return Ok(()); - } - - print!("{prompt}\n[yY to confirm] >> "); - let _ = std::io::stdout().flush(); - - let mut input = String::new(); - std::io::stdin().read_line(&mut input)?; - match input.as_str().trim() { - "y" | "Y" => Ok(()), - _ => bail!("Aborting"), - } - } - - /// Returns target packages to be assembled on the builder machine, limited - /// to those specified in `only` (if set). - fn packages_to_build(&self) -> PackageMap<'_> { - let packages = self.package_config.packages_to_build(&self.target); - if self.only.is_empty() { - return packages; - } - - let mut filtered_packages = PackageMap(BTreeMap::new()); - let mut to_walk = PackageMap(BTreeMap::new()); - // add the requested packages to `to_walk` - for package_name in &self.only { - to_walk.0.insert( - package_name, - packages.0.get(package_name).unwrap_or_else(|| { - panic!( - "Explicitly-requested package '{}' does not exist", - package_name - ) - }), - ); - } - // dependencies are listed by output name, so create a lookup table to - // get a package by its output name. - let lookup_by_output = packages - .0 - .iter() - .map(|(name, package)| { - (package.get_output_file(name), (*name, *package)) - }) - .collect::>(); - // packages yet to be walked are added to `to_walk`. pop each entry and - // add its dependencies to `to_walk`, then add the package we finished - // walking to `filtered_packages`. - while let Some((package_name, package)) = to_walk.0.pop_first() { - if let PackageSource::Composite { packages } = &package.source { - for output in packages { - // find the package by output name - let (dep_name, dep_package) = - lookup_by_output.get(output).unwrap_or_else(|| { - panic!( - "Could not find a package which creates '{}'", - output - ) - }); - if dep_name.as_str() == package_name { - panic!("'{}' depends on itself", package_name); - } - // if we've seen this package already, it will be in - // `filtered_packages`. otherwise, add it to `to_walk`. - if !filtered_packages.0.contains_key(dep_name) { - to_walk.0.insert(dep_name, dep_package); - } - } - } - // we're done looking at this package's deps - filtered_packages.0.insert(package_name, package); - } - filtered_packages - } -} - #[tokio::main] async fn main() -> Result<()> { let args = Args::try_parse()?; @@ -1069,43 +865,13 @@ async fn main() -> Result<()> { let drain = slog_async::Async::new(drain).build().fuse(); let log = Logger::root(drain, o!()); - let target_help_str = || -> String { - format!( - "Try calling: '{} -t default target create' to create a new build target", - env::current_exe().unwrap().display() - ) - }; - let get_config = || -> Result { - let target_path = args.artifact_dir.join("target").join(&args.target); - let raw_target = - std::fs::read_to_string(&target_path).inspect_err(|_| { - eprintln!( - "Failed to read build target: {}\n{}", - target_path, - target_help_str() - ); - })?; - let target: Target = KnownTarget::from_str(&raw_target) - .inspect_err(|_| { - eprintln!( - "Failed to parse {} as target\n{}", - target_path, - target_help_str() - ); - })? - .into(); - debug!(log, "target[{}]: {:?}", args.target, target); - - Ok(Config { - log: log.clone(), + Config::get_config( + &log, package_config, - target, - only: Vec::new(), - force: args.force, - retry_count: args.retry_count, - retry_duration: args.retry_duration, - }) + &args.config_args, + &args.artifact_dir, + ) }; // Use a CWD that is the root of the Omicron repository. @@ -1119,7 +885,12 @@ async fn main() -> Result<()> { match args.subcommand { SubCommand::Build(BuildCommand::Target { subcommand }) => { - do_target(&args.artifact_dir, &args.target, &subcommand).await?; + do_target( + &args.artifact_dir, + &args.config_args.target, + &subcommand, + ) + .await?; } SubCommand::Build(BuildCommand::Dot) => { do_dot(&get_config()?).await?; @@ -1130,7 +901,7 @@ async fn main() -> Result<()> { } SubCommand::Build(BuildCommand::Package { disable_cache, only }) => { let mut config = get_config()?; - config.only = only; + config.set_only(only); do_package(&config, &args.artifact_dir, disable_cache).await?; } SubCommand::Build(BuildCommand::Stamp { package_name, version }) => { @@ -1142,6 +913,9 @@ async fn main() -> Result<()> { ) .await?; } + SubCommand::Build(BuildCommand::ShowCargoCommands) => { + do_show_cargo_commands(&get_config()?).await?; + } SubCommand::Build(BuildCommand::Check) => { do_check(&get_config()?).await? } diff --git a/package/src/cargo_plan.rs b/package/src/cargo_plan.rs new file mode 100644 index 0000000000..1a32b199fb --- /dev/null +++ b/package/src/cargo_plan.rs @@ -0,0 +1,172 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::BTreeMap; +use std::collections::BTreeSet; + +use anyhow::bail; +use anyhow::ensure; +use anyhow::Context; +use anyhow::Result; +use cargo_metadata::Metadata; +use omicron_zone_package::config::PackageMap; +use omicron_zone_package::package::PackageSource; +use slog::info; +use slog::Logger; +use tokio::process::Command; + +/// For a configuration, build a plan: the set of packages, binaries, and +/// features to operate on in release and debug modes. +pub fn build_cargo_plan<'a>( + metadata: &Metadata, + package_map: PackageMap<'a>, + features: &'a [String], +) -> Result> { + // Collect a map of all of the workspace packages + let workspace_pkgs = metadata + .packages + .iter() + .filter_map(|package| { + metadata + .workspace_members + .contains(&package.id) + .then_some((package.name.clone(), package)) + }) + .collect::>(); + + let mut release = CargoTargets::new(BuildKind::Release); + let mut debug = CargoTargets::new(BuildKind::Debug); + + for (name, pkg) in package_map.0 { + // If this is a Rust package, `name` (the map key) is the name of the + // corresponding Rust crate. + if let PackageSource::Local { rust: Some(rust_pkg), .. } = &pkg.source { + let plan = if rust_pkg.release { &mut release } else { &mut debug }; + // Add the package name to the plan + plan.packages.insert(name); + // Get the package metadata + let metadata = workspace_pkgs.get(name).with_context(|| { + format!("package '{name}' is not a workspace package") + })?; + // Add the binaries we want to build to the plan + let bins = metadata + .targets + .iter() + .filter_map(|target| target.is_bin().then_some(&target.name)) + .collect::>(); + for bin in &rust_pkg.binary_names { + ensure!( + bins.contains(bin), + "bin target '{bin}' does not belong to package '{name}'" + ); + plan.bins.insert(bin); + } + // Add all features we want to request to the plan + plan.features.extend( + features + .iter() + .filter(|feature| metadata.features.contains_key(*feature)), + ); + } + } + + Ok(CargoPlan { release, debug }) +} + +#[derive(Debug)] +pub struct CargoPlan<'a> { + pub release: CargoTargets<'a>, + pub debug: CargoTargets<'a>, +} + +impl CargoPlan<'_> { + pub async fn run(&self, command: &str, log: &Logger) -> Result<()> { + self.release.run(command, log).await?; + self.debug.run(command, log).await?; + Ok(()) + } +} + +/// A set of packages, binaries, and features to operate on. +#[derive(Debug)] +pub struct CargoTargets<'a> { + pub kind: BuildKind, + pub packages: BTreeSet<&'a String>, + pub bins: BTreeSet<&'a String>, + pub features: BTreeSet<&'a String>, +} + +impl CargoTargets<'_> { + fn new(kind: BuildKind) -> Self { + Self { + kind, + packages: BTreeSet::new(), + bins: BTreeSet::new(), + features: BTreeSet::new(), + } + } + + pub fn build_command(&self, command: &str) -> Option { + if self.bins.is_empty() { + return None; + } + + let mut cmd = Command::new("cargo"); + // We rely on the rust-toolchain.toml file for toolchain information, + // rather than specifying one within the packaging tool. + cmd.arg(command); + // We specify _both_ --package and --bin; --bin does not imply + // --package, and without any --package options Cargo unifies features + // across all workspace default members. See rust-lang/cargo#8157. + for package in &self.packages { + cmd.arg("--package").arg(package); + } + for bin in &self.bins { + cmd.arg("--bin").arg(bin); + } + if !self.features.is_empty() { + cmd.arg("--features").arg(self.features.iter().fold( + String::new(), + |mut acc, s| { + if !acc.is_empty() { + acc.push(' '); + } + acc.push_str(s); + acc + }, + )); + } + match self.kind { + BuildKind::Release => { + cmd.arg("--release"); + } + BuildKind::Debug => {} + } + + Some(cmd) + } + + pub async fn run(&self, command: &str, log: &Logger) -> Result<()> { + let Some(mut cmd) = self.build_command(command) else { + return Ok(()); + }; + + info!(log, "running: {:?}", cmd.as_std()); + let status = cmd + .status() + .await + .context(format!("Failed to run command: ({:?})", cmd))?; + if !status.success() { + bail!("Failed to build packages"); + } + + Ok(()) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum BuildKind { + Release, + Debug, +} diff --git a/package/src/config.rs b/package/src/config.rs new file mode 100644 index 0000000000..f80bd36057 --- /dev/null +++ b/package/src/config.rs @@ -0,0 +1,246 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use anyhow::{bail, Result}; +use camino::Utf8Path; +use clap::Args; +use omicron_zone_package::{ + config::{Config as PackageConfig, PackageMap}, + package::PackageSource, + target::Target, +}; +use slog::{debug, Logger}; +use std::{ + collections::BTreeMap, env, io::Write, str::FromStr, time::Duration, +}; + +use crate::target::KnownTarget; + +#[derive(Debug, Args)] +pub struct ConfigArgs { + /// The name of the build target to use for this command + #[clap( + short, + long, + default_value_t = Config::ACTIVE.to_string(), + )] + pub target: String, + + /// Skip confirmation prompt for destructive operations + #[clap(short, long, action, default_value_t = false)] + pub force: bool, + + /// Number of retries to use when re-attempting failed package downloads + #[clap(long, action, default_value_t = 10)] + pub retry_count: usize, + + /// Duration, in ms, to wait before re-attempting failed package downloads + #[clap( + long, + action, + value_parser = parse_duration_ms, + default_value = "1000", + )] + pub retry_duration: Duration, +} + +fn parse_duration_ms(arg: &str) -> Result { + let ms = arg.parse()?; + Ok(Duration::from_millis(ms)) +} + +#[derive(Debug)] +pub struct Config { + log: Logger, + // Description of all possible packages. + package_config: PackageConfig, + // Description of the target we're trying to operate on. + target: Target, + // The list of packages the user wants us to build (all, if empty) + only: Vec, + // True if we should skip confirmations for destructive operations. + force: bool, + // Number of times to retry failed downloads. + retry_count: usize, + // Duration to wait before retrying failed downloads. + retry_duration: Duration, +} + +impl Config { + /// The name reserved for the currently-in-use build target. + pub const ACTIVE: &str = "active"; + + /// Builds a new configuration. + pub fn get_config( + log: &Logger, + package_config: PackageConfig, + args: &ConfigArgs, + artifact_dir: &Utf8Path, + ) -> Result { + let target_help_str = || -> String { + format!( + "Try calling: '{} -t default target create' to create a new build target", + env::current_exe().unwrap().display() + ) + }; + + let target_path = artifact_dir.join("target").join(&args.target); + let raw_target = + std::fs::read_to_string(&target_path).inspect_err(|_| { + eprintln!( + "Failed to read build target: {}\n{}", + target_path, + target_help_str() + ); + })?; + let target: Target = KnownTarget::from_str(&raw_target) + .inspect_err(|_| { + eprintln!( + "Failed to parse {} as target\n{}", + target_path, + target_help_str() + ); + })? + .into(); + debug!(log, "target[{}]: {:?}", args.target, target); + + Ok(Config { + log: log.clone(), + package_config, + target, + only: Vec::new(), + force: args.force, + retry_count: args.retry_count, + retry_duration: args.retry_duration, + }) + } + + /// Sets the `only` field. + #[inline] + pub fn set_only(&mut self, only: Vec) -> &mut Self { + self.only = only; + self + } + + /// Returns the logger. + #[inline] + pub fn log(&self) -> &Logger { + &self.log + } + + /// Returns the target currently being operated on. + #[inline] + pub fn target(&self) -> &Target { + &self.target + } + + /// Returns the underlying package configuration. + #[inline] + pub fn package_config(&self) -> &PackageConfig { + &self.package_config + } + + /// Returns the retry count. + #[inline] + pub fn retry_count(&self) -> usize { + self.retry_count + } + + /// Returns the retry duration. + #[inline] + pub fn retry_duration(&self) -> Duration { + self.retry_duration + } + + /// Prompts the user for input before proceeding with an operation. + pub fn confirm(&self, prompt: &str) -> Result<()> { + if self.force { + return Ok(()); + } + + print!("{prompt}\n[yY to confirm] >> "); + let _ = std::io::stdout().flush(); + + let mut input = String::new(); + std::io::stdin().read_line(&mut input)?; + match input.as_str().trim() { + "y" | "Y" => Ok(()), + _ => bail!("Aborting"), + } + } + + /// Returns target packages to be assembled on the builder machine, limited + /// to those specified in `only` (if set). + pub fn packages_to_build(&self) -> PackageMap<'_> { + let packages = self.package_config.packages_to_build(&self.target); + if self.only.is_empty() { + return packages; + } + + let mut filtered_packages = PackageMap(BTreeMap::new()); + let mut to_walk = PackageMap(BTreeMap::new()); + // add the requested packages to `to_walk` + for package_name in &self.only { + to_walk.0.insert( + package_name, + packages.0.get(package_name).unwrap_or_else(|| { + panic!( + "Explicitly-requested package '{}' does not exist", + package_name + ) + }), + ); + } + // dependencies are listed by output name, so create a lookup table to + // get a package by its output name. + let lookup_by_output = packages + .0 + .iter() + .map(|(name, package)| { + (package.get_output_file(name), (*name, *package)) + }) + .collect::>(); + // packages yet to be walked are added to `to_walk`. pop each entry and + // add its dependencies to `to_walk`, then add the package we finished + // walking to `filtered_packages`. + while let Some((package_name, package)) = to_walk.0.pop_first() { + if let PackageSource::Composite { packages } = &package.source { + for output in packages { + // find the package by output name + let (dep_name, dep_package) = + lookup_by_output.get(output).unwrap_or_else(|| { + panic!( + "Could not find a package which creates '{}'", + output + ) + }); + if dep_name.as_str() == package_name { + panic!("'{}' depends on itself", package_name); + } + // if we've seen this package already, it will be in + // `filtered_packages`. otherwise, add it to `to_walk`. + if !filtered_packages.0.contains_key(dep_name) { + to_walk.0.insert(dep_name, dep_package); + } + } + } + // we're done looking at this package's deps + filtered_packages.0.insert(package_name, package); + } + filtered_packages + } + + /// Return a list of all possible Cargo features that could be requested for + /// the packages being built. + /// + /// Out of these, the features that actually get requested are determined by + /// which features are available for the list of packages being built. + pub fn cargo_features(&self) -> Vec { + self.target + .0 + .iter() + .map(|(name, value)| format!("{name}-{value}")) + .collect::>() + } +} diff --git a/package/src/lib.rs b/package/src/lib.rs index b37c1774fd..8ef9a4c951 100644 --- a/package/src/lib.rs +++ b/package/src/lib.rs @@ -5,6 +5,8 @@ use clap::Subcommand; use serde::de::DeserializeOwned; use thiserror::Error; +pub mod cargo_plan; +pub mod config; pub mod dot; pub mod target; @@ -130,6 +132,8 @@ pub enum BuildCommand { /// The version to be stamped onto the package. version: semver::Version, }, + /// Show the Cargo commands that would be run to build the packages. + ShowCargoCommands, /// Checks the packages specified in a manifest, without building them. Check, } diff --git a/schema/crdb/add-completing-and-new-region-volume/up01.sql b/schema/crdb/add-completing-and-new-region-volume/up01.sql new file mode 100644 index 0000000000..6a973eb3c3 --- /dev/null +++ b/schema/crdb/add-completing-and-new-region-volume/up01.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.region_snapshot_replacement_state ADD VALUE IF NOT EXISTS 'completing' AFTER 'complete'; diff --git a/schema/crdb/add-completing-and-new-region-volume/up02.sql b/schema/crdb/add-completing-and-new-region-volume/up02.sql new file mode 100644 index 0000000000..42c0028ff5 --- /dev/null +++ b/schema/crdb/add-completing-and-new-region-volume/up02.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.region_snapshot_replacement ADD COLUMN IF NOT EXISTS new_region_volume_id UUID; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 75b7dbaf08..67b8ab0041 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -4427,7 +4427,8 @@ CREATE TYPE IF NOT EXISTS omicron.public.region_snapshot_replacement_state AS EN 'replacement_done', 'deleting_old_volume', 'running', - 'complete' + 'complete', + 'completing' ); CREATE TABLE IF NOT EXISTS omicron.public.region_snapshot_replacement ( @@ -4445,7 +4446,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.region_snapshot_replacement ( replacement_state omicron.public.region_snapshot_replacement_state NOT NULL, - operating_saga_id UUID + operating_saga_id UUID, + + new_region_volume_id UUID ); CREATE INDEX IF NOT EXISTS lookup_region_snapshot_replacement_by_state on omicron.public.region_snapshot_replacement (replacement_state); @@ -4694,7 +4697,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '116.0.0', NULL) + (TRUE, NOW(), NOW(), '117.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/sled-agent/api/src/lib.rs b/sled-agent/api/src/lib.rs index 634640079a..56c6760be7 100644 --- a/sled-agent/api/src/lib.rs +++ b/sled-agent/api/src/lib.rs @@ -184,23 +184,61 @@ pub trait SledAgentApi { /// Fetch a support bundle from a particular dataset #[endpoint { method = GET, - path = "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}" + path = "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/download" }] - async fn support_bundle_get( + async fn support_bundle_download( rqctx: RequestContext, path_params: Path, - body: TypedBody, ) -> Result, HttpError>; - /// Fetch a support bundle from a particular dataset + /// Fetch a file within a support bundle from a particular dataset + #[endpoint { + method = GET, + path = "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/download/{file}" + }] + async fn support_bundle_download_file( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// Fetch the index (list of files within a support bundle) + #[endpoint { + method = GET, + path = "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/index" + }] + async fn support_bundle_index( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// Fetch metadata about a support bundle from a particular dataset #[endpoint { method = HEAD, - path = "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}" + path = "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/download" }] async fn support_bundle_head( rqctx: RequestContext, path_params: Path, - body: TypedBody, + ) -> Result, HttpError>; + + /// Fetch metadata about a file within a support bundle from a particular dataset + #[endpoint { + method = HEAD, + path = "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/download/{file}" + }] + async fn support_bundle_head_file( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// Fetch metadata about the list of files within a support bundle + #[endpoint { + method = HEAD, + path = "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/index" + }] + async fn support_bundle_head_index( + rqctx: RequestContext, + path_params: Path, ) -> Result, HttpError>; /// Delete a support bundle from a particular dataset @@ -689,6 +727,9 @@ pub struct SupportBundlePathParam { pub struct SupportBundleFilePathParam { #[serde(flatten)] pub parent: SupportBundlePathParam, + + /// The path of the file within the support bundle to query + pub file: String, } /// Metadata about a support bundle @@ -702,24 +743,6 @@ pub struct SupportBundleGetHeaders { range: String, } -/// Query parameters for reading the support bundle -#[derive(Deserialize, Serialize, JsonSchema)] -pub struct SupportBundleGetQueryParams { - pub query_type: SupportBundleQueryType, -} - -/// Describes the type of access to the support bundle -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum SupportBundleQueryType { - /// Access the whole support bundle - Whole, - /// Access the names of all files within the support bundle - Index, - /// Access a specific file within the support bundle - Path { file_path: String }, -} - #[derive(Deserialize, Debug, Serialize, JsonSchema, PartialEq)] #[serde(rename_all = "snake_case")] pub enum SupportBundleState { diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 88259537d2..edba1c47b1 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -6,6 +6,7 @@ use super::sled_agent::SledAgent; use crate::sled_agent::Error as SledAgentError; +use crate::support_bundle::storage::SupportBundleQueryType; use crate::zone_bundle::BundleError; use bootstore::schemes::v0::NetworkConfig; use camino::Utf8PathBuf; @@ -267,37 +268,135 @@ impl SledAgentApi for SledAgentImpl { Ok(HttpResponseCreated(metadata)) } - async fn support_bundle_get( + async fn support_bundle_download( rqctx: RequestContext, path_params: Path, - body: TypedBody, ) -> Result, HttpError> { let sa = rqctx.context(); let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } = path_params.into_inner(); let range = rqctx.range(); - let query = body.into_inner().query_type; Ok(sa .as_support_bundle_storage() - .get(zpool_id, dataset_id, support_bundle_id, range, query) + .get( + zpool_id, + dataset_id, + support_bundle_id, + range, + SupportBundleQueryType::Whole, + ) + .await?) + } + + async fn support_bundle_download_file( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let SupportBundleFilePathParam { + parent: + SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id }, + file, + } = path_params.into_inner(); + + let range = rqctx.range(); + Ok(sa + .as_support_bundle_storage() + .get( + zpool_id, + dataset_id, + support_bundle_id, + range, + SupportBundleQueryType::Path { file_path: file }, + ) + .await?) + } + + async fn support_bundle_index( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } = + path_params.into_inner(); + + let range = rqctx.range(); + Ok(sa + .as_support_bundle_storage() + .get( + zpool_id, + dataset_id, + support_bundle_id, + range, + SupportBundleQueryType::Index, + ) .await?) } async fn support_bundle_head( rqctx: RequestContext, path_params: Path, - body: TypedBody, ) -> Result, HttpError> { let sa = rqctx.context(); let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } = path_params.into_inner(); let range = rqctx.range(); - let query = body.into_inner().query_type; Ok(sa .as_support_bundle_storage() - .head(zpool_id, dataset_id, support_bundle_id, range, query) + .head( + zpool_id, + dataset_id, + support_bundle_id, + range, + SupportBundleQueryType::Whole, + ) + .await?) + } + + async fn support_bundle_head_file( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let SupportBundleFilePathParam { + parent: + SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id }, + file, + } = path_params.into_inner(); + + let range = rqctx.range(); + Ok(sa + .as_support_bundle_storage() + .head( + zpool_id, + dataset_id, + support_bundle_id, + range, + SupportBundleQueryType::Path { file_path: file }, + ) + .await?) + } + + async fn support_bundle_head_index( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } = + path_params.into_inner(); + + let range = rqctx.range(); + Ok(sa + .as_support_bundle_storage() + .head( + zpool_id, + dataset_id, + support_bundle_id, + range, + SupportBundleQueryType::Index, + ) .await?) } diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index db250eb914..7c5fb44d13 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -76,6 +76,7 @@ use omicron_common::address::WICKETD_NEXUS_PROXY_PORT; use omicron_common::address::WICKETD_PORT; use omicron_common::address::{ get_internal_dns_server_addresses, CLICKHOUSE_ADMIN_PORT, + CLICKHOUSE_TCP_PORT, }; use omicron_common::address::{Ipv6Subnet, NEXUS_TECHPORT_EXTERNAL_PORT}; use omicron_common::address::{BOOTSTRAP_ARTIFACT_PORT, COCKROACH_ADMIN_PORT}; @@ -1597,13 +1598,20 @@ impl ServiceManager { addr.to_string() }; + // The ClickHouse client connects via the TCP port + let ch_address = { + let mut addr = *address; + addr.set_port(CLICKHOUSE_TCP_PORT); + addr.to_string() + }; + let clickhouse_admin_config = PropertyGroupBuilder::new("config") .add_property("http_address", "astring", admin_address) .add_property( "ch_address", "astring", - address.to_string(), + ch_address.to_string(), ) .add_property( "ch_binary", @@ -1668,13 +1676,20 @@ impl ServiceManager { addr.to_string() }; + // The ClickHouse client connects via the TCP port + let ch_address = { + let mut addr = *address; + addr.set_port(CLICKHOUSE_TCP_PORT); + addr.to_string() + }; + let clickhouse_admin_config = PropertyGroupBuilder::new("config") .add_property("http_address", "astring", admin_address) .add_property( "ch_address", "astring", - address.to_string(), + ch_address.to_string(), ) .add_property( "ch_binary", diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index 2d23f9150b..60dcb1be31 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -438,10 +438,9 @@ impl SledAgentApi for SledAgentSimImpl { )) } - async fn support_bundle_get( + async fn support_bundle_download( rqctx: RequestContext, path_params: Path, - _body: TypedBody, ) -> Result, HttpError> { let sa = rqctx.context(); let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } = @@ -458,10 +457,95 @@ impl SledAgentApi for SledAgentSimImpl { .unwrap()) } + async fn support_bundle_download_file( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let SupportBundleFilePathParam { + parent: + SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id }, + file: _, + } = path_params.into_inner(); + + sa.support_bundle_get(zpool_id, dataset_id, support_bundle_id).await?; + + Ok(http::Response::builder() + .status(http::StatusCode::OK) + .header(http::header::CONTENT_TYPE, "text/html") + .body(dropshot::Body::with_content( + "simulated support bundle file; do not eat", + )) + .unwrap()) + } + + async fn support_bundle_index( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } = + path_params.into_inner(); + + sa.support_bundle_get(zpool_id, dataset_id, support_bundle_id).await?; + + Ok(http::Response::builder() + .status(http::StatusCode::OK) + .header(http::header::CONTENT_TYPE, "text/html") + .body(dropshot::Body::with_content( + "simulated support bundle index; do not eat", + )) + .unwrap()) + } + async fn support_bundle_head( rqctx: RequestContext, path_params: Path, - _body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } = + path_params.into_inner(); + + sa.support_bundle_get(zpool_id, dataset_id, support_bundle_id).await?; + + let fictional_length = 10000; + + Ok(http::Response::builder() + .status(http::StatusCode::OK) + .header(http::header::CONTENT_TYPE, "text/html") + .header(hyper::header::ACCEPT_RANGES, "bytes") + .header(hyper::header::CONTENT_LENGTH, fictional_length) + .body(dropshot::Body::empty()) + .unwrap()) + } + + async fn support_bundle_head_file( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let SupportBundleFilePathParam { + parent: + SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id }, + file: _, + } = path_params.into_inner(); + + sa.support_bundle_get(zpool_id, dataset_id, support_bundle_id).await?; + + let fictional_length = 10000; + + Ok(http::Response::builder() + .status(http::StatusCode::OK) + .header(http::header::CONTENT_TYPE, "text/html") + .header(hyper::header::ACCEPT_RANGES, "bytes") + .header(hyper::header::CONTENT_LENGTH, fictional_length) + .body(dropshot::Body::empty()) + .unwrap()) + } + + async fn support_bundle_head_index( + rqctx: RequestContext, + path_params: Path, ) -> Result, HttpError> { let sa = rqctx.context(); let SupportBundlePathParam { zpool_id, dataset_id, support_bundle_id } = diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index dc8cf63fe4..2299ba9db9 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -117,6 +117,8 @@ impl CrucibleDataInner { bail!("region creation error!"); } + let read_only = params.source.is_some(); + let region = Region { id: params.id, block_size: params.block_size, @@ -129,8 +131,8 @@ impl CrucibleDataInner { cert_pem: None, key_pem: None, root_pem: None, - source: None, - read_only: params.source.is_some(), + source: params.source, + read_only, }; let old = self.regions.insert(id, region.clone()); @@ -168,12 +170,6 @@ impl CrucibleDataInner { let id = Uuid::from_str(&id.0).unwrap(); if let Some(region) = self.regions.get_mut(&id) { - if region.state == State::Failed { - // The real Crucible agent would not let a Failed region be - // deleted - bail!("cannot delete in state Failed"); - } - region.state = State::Destroyed; self.used_ports.remove(®ion.port_number); Ok(Some(region.clone())) @@ -1370,29 +1366,41 @@ pub struct PantryVolume { activate_job: Option, } +pub struct PantryInner { + /// Map Volume UUID to PantryVolume struct + volumes: HashMap, + + jobs: HashSet, + + /// Auto activate volumes attached in the background + auto_activate_volumes: bool, +} + /// Simulated crucible pantry pub struct Pantry { pub id: OmicronZoneUuid, - /// Map Volume UUID to PantryVolume struct - volumes: Mutex>, sled_agent: Arc, - jobs: Mutex>, + inner: Mutex, } impl Pantry { pub fn new(sled_agent: Arc) -> Self { Self { id: OmicronZoneUuid::new_v4(), - volumes: Mutex::new(HashMap::default()), sled_agent, - jobs: Mutex::new(HashSet::default()), + inner: Mutex::new(PantryInner { + volumes: HashMap::default(), + jobs: HashSet::default(), + auto_activate_volumes: false, + }), } } pub async fn status(&self) -> Result { + let inner = self.inner.lock().await; Ok(PantryStatus { - volumes: self.volumes.lock().await.keys().cloned().collect(), - num_job_handles: self.jobs.lock().await.len(), + volumes: inner.volumes.keys().cloned().collect(), + num_job_handles: inner.jobs.len(), }) } @@ -1400,8 +1408,9 @@ impl Pantry { &self, volume_id: String, ) -> Result { - let volumes = self.volumes.lock().await; - match volumes.get(&volume_id) { + let inner = self.inner.lock().await; + + match inner.volumes.get(&volume_id) { Some(entry) => Ok(entry.vcr.clone()), None => Err(HttpError::for_not_found(None, volume_id)), @@ -1413,9 +1422,9 @@ impl Pantry { volume_id: String, volume_construction_request: VolumeConstructionRequest, ) -> Result<()> { - let mut volumes = self.volumes.lock().await; + let mut inner = self.inner.lock().await; - volumes.insert( + inner.volumes.insert( volume_id, PantryVolume { vcr: volume_construction_request, @@ -1431,29 +1440,34 @@ impl Pantry { Ok(()) } + pub async fn set_auto_activate_volumes(&self) { + self.inner.lock().await.auto_activate_volumes = true; + } + pub async fn attach_activate_background( &self, volume_id: String, activate_job_id: String, volume_construction_request: VolumeConstructionRequest, ) -> Result<(), HttpError> { - let mut volumes = self.volumes.lock().await; - let mut jobs = self.jobs.lock().await; + let mut inner = self.inner.lock().await; + + let auto_activate_volumes = inner.auto_activate_volumes; - volumes.insert( + inner.volumes.insert( volume_id, PantryVolume { vcr: volume_construction_request, status: VolumeStatus { - active: false, - seen_active: false, + active: auto_activate_volumes, + seen_active: auto_activate_volumes, num_job_handles: 1, }, activate_job: Some(activate_job_id.clone()), }, ); - jobs.insert(activate_job_id); + inner.jobs.insert(activate_job_id); Ok(()) } @@ -1463,8 +1477,8 @@ impl Pantry { volume_id: String, ) -> Result { let activate_job = { - let volumes = self.volumes.lock().await; - volumes.get(&volume_id).unwrap().activate_job.clone().unwrap() + let inner = self.inner.lock().await; + inner.volumes.get(&volume_id).unwrap().activate_job.clone().unwrap() }; let mut status = self.volume_status(volume_id.clone()).await?; @@ -1481,9 +1495,9 @@ impl Pantry { &self, volume_id: String, ) -> Result { - let volumes = self.volumes.lock().await; + let inner = self.inner.lock().await; - match volumes.get(&volume_id) { + match inner.volumes.get(&volume_id) { Some(pantry_volume) => Ok(pantry_volume.status.clone()), None => Err(HttpError::for_not_found(None, volume_id)), @@ -1495,9 +1509,9 @@ impl Pantry { volume_id: String, status: VolumeStatus, ) -> Result<(), HttpError> { - let mut volumes = self.volumes.lock().await; + let mut inner = self.inner.lock().await; - match volumes.get_mut(&volume_id) { + match inner.volumes.get_mut(&volume_id) { Some(pantry_volume) => { pantry_volume.status = status; Ok(()) @@ -1511,8 +1525,8 @@ impl Pantry { &self, job_id: String, ) -> Result { - let jobs = self.jobs.lock().await; - if !jobs.contains(&job_id) { + let inner = self.inner.lock().await; + if !inner.jobs.contains(&job_id) { return Err(HttpError::for_not_found(None, job_id)); } Ok(true) @@ -1522,11 +1536,11 @@ impl Pantry { &self, job_id: String, ) -> Result, HttpError> { - let mut jobs = self.jobs.lock().await; - if !jobs.contains(&job_id) { + let mut inner = self.inner.lock().await; + if !inner.jobs.contains(&job_id) { return Err(HttpError::for_not_found(None, job_id)); } - jobs.remove(&job_id); + inner.jobs.remove(&job_id); Ok(Ok(true)) } @@ -1539,9 +1553,9 @@ impl Pantry { self.entry(volume_id).await?; // Make up job - let mut jobs = self.jobs.lock().await; + let mut inner = self.inner.lock().await; let job_id = Uuid::new_v4().to_string(); - jobs.insert(job_id.clone()); + inner.jobs.insert(job_id.clone()); Ok(job_id) } @@ -1555,8 +1569,9 @@ impl Pantry { // the simulated instance ensure, then call // [`instance_issue_disk_snapshot_request`] as the snapshot logic is the // same. - let volumes = self.volumes.lock().await; - let volume_construction_request = &volumes.get(&volume_id).unwrap().vcr; + let inner = self.inner.lock().await; + let volume_construction_request = + &inner.volumes.get(&volume_id).unwrap().vcr; self.sled_agent .map_disk_ids_to_region_ids(volume_construction_request) @@ -1636,16 +1651,16 @@ impl Pantry { self.entry(volume_id).await?; // Make up job - let mut jobs = self.jobs.lock().await; + let mut inner = self.inner.lock().await; let job_id = Uuid::new_v4().to_string(); - jobs.insert(job_id.clone()); + inner.jobs.insert(job_id.clone()); Ok(job_id) } pub async fn detach(&self, volume_id: String) -> Result<()> { - let mut volumes = self.volumes.lock().await; - volumes.remove(&volume_id); + let mut inner = self.inner.lock().await; + inner.volumes.remove(&volume_id); Ok(()) } } diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index b9bf703933..37526690cb 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -962,12 +962,21 @@ impl SledAgent { continue; }; - // First, ensure the dataset exists - let dataset_id = zone.id.into_untyped_uuid(); - self.inner - .storage - .upsert_filesystem(dataset_id, dataset_name) - .await?; + // NOTE: This code will be deprecated by https://github.com/oxidecomputer/omicron/pull/7160 + // + // However, we need to ensure that all blueprints have datasets + // within them before we can remove this back-fill. + // + // Therefore, we do something hairy here: We ensure the filesystem + // exists, but don't specify any dataset UUID value. + // + // This means that: + // - If the dataset exists and has a UUID, this will be a no-op + // - If the dataset doesn't exist, it'll be created without its + // oxide:uuid zfs property set + // - If a subsequent call to "datasets_ensure" tries to set a UUID, + // it should be able to get set (once). + self.inner.storage.upsert_filesystem(None, dataset_name).await?; } self.inner diff --git a/sled-agent/src/support_bundle/storage.rs b/sled-agent/src/support_bundle/storage.rs index e51f35e146..97d345a8d2 100644 --- a/sled-agent/src/support_bundle/storage.rs +++ b/sled-agent/src/support_bundle/storage.rs @@ -100,6 +100,17 @@ impl From for HttpError { } } +/// Describes the type of access to the support bundle +#[derive(Clone, Debug)] +pub(crate) enum SupportBundleQueryType { + /// Access the whole support bundle + Whole, + /// Access the names of all files within the support bundle + Index, + /// Access a specific file within the support bundle + Path { file_path: String }, +} + // Implements "seeking" and "putting a capacity on a file" manually. // // TODO: When https://github.com/zip-rs/zip2/issues/231 is resolved, diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index f0653c7905..d6ffd42d0a 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -35,7 +35,6 @@ use std::collections::HashSet; use std::future::Future; use tokio::sync::{mpsc, oneshot, watch}; use tokio::time::{interval, Duration, MissedTickBehavior}; -use uuid::Uuid; // The size of the mpsc bounded channel used to communicate // between the `StorageHandle` and `StorageManager`. @@ -100,7 +99,7 @@ enum StorageManagerState { #[derive(Debug)] pub(crate) struct NewFilesystemRequest { - dataset_id: Uuid, + dataset_id: Option, dataset_name: DatasetName, responder: DebugIgnore>>, } @@ -526,7 +525,7 @@ impl StorageHandle { // and ask for the set of all datasets from Nexus. pub async fn upsert_filesystem( &self, - dataset_id: Uuid, + dataset_id: Option, dataset_name: DatasetName, ) -> Result<(), Error> { let (tx, rx) = oneshot::channel(); @@ -1499,27 +1498,9 @@ impl StorageManager { zoned, encryption_details, size_details, - id: Some(DatasetUuid::from_untyped_uuid(request.dataset_id)), + id: request.dataset_id, additional_options: None, })?; - // Ensure the dataset has a usable UUID. - if let Ok(id_str) = Zfs::get_oxide_value(&fs_name, "uuid") { - if let Ok(id) = id_str.parse::() { - if id != request.dataset_id { - return Err(Error::UuidMismatch { - name: request.dataset_name.full_name(), - old: id, - new: request.dataset_id, - }); - } - return Ok(()); - } - } - Zfs::set_oxide_value( - &fs_name, - "uuid", - &request.dataset_id.to_string(), - )?; Ok(()) } @@ -1544,7 +1525,6 @@ mod tests { use std::collections::BTreeMap; use std::str::FromStr; use std::sync::atomic::Ordering; - use uuid::Uuid; // A helper struct to advance time. struct TimeTravel {} @@ -2005,16 +1985,92 @@ mod tests { .expect("Ensuring disks should work after key manager is ready"); assert!(!result.has_error(), "{:?}", result); - // Create a filesystem on the newly formatted U.2 - let dataset_id = Uuid::new_v4(); + // Create a filesystem on the newly formatted U.2. + // + // We can call "upsert_filesystem" both with and without a UUID. + let dataset_id = DatasetUuid::new_v4(); + let zpool_name = ZpoolName::new_external(config.disks[0].pool_id); + let dataset_name = + DatasetName::new(zpool_name.clone(), DatasetKind::Crucible); + harness + .handle() + .upsert_filesystem(Some(dataset_id), dataset_name.clone()) + .await + .unwrap(); + // Observe the dataset exists, and the UUID is set. + let observed_dataset = &Zfs::get_dataset_properties( + &[dataset_name.full_name()], + WhichDatasets::SelfOnly, + ) + .unwrap()[0]; + assert_eq!(observed_dataset.id, Some(dataset_id)); + + harness + .handle() + .upsert_filesystem(None, dataset_name.clone()) + .await + .unwrap(); + // Observe the dataset still exists, and the UUID is still set, + // even though we did not ask for a new value explicitly. + let observed_dataset = &Zfs::get_dataset_properties( + &[dataset_name.full_name()], + WhichDatasets::SelfOnly, + ) + .unwrap()[0]; + assert_eq!(observed_dataset.id, Some(dataset_id)); + + harness.cleanup().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn upsert_filesystem_no_uuid() { + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); + let logctx = test_setup_log("upsert_filesystem"); + let mut harness = StorageManagerTestHarness::new(&logctx.log).await; + + // Test setup: Add a U.2 and M.2, adopt them into the "control plane" + // for usage. + harness.handle().key_manager_ready().await; + let raw_disks = + harness.add_vdevs(&["u2_under_test.vdev", "m2_helping.vdev"]).await; + let config = harness.make_config(1, &raw_disks); + let result = harness + .handle() + .omicron_physical_disks_ensure(config.clone()) + .await + .expect("Ensuring disks should work after key manager is ready"); + assert!(!result.has_error(), "{:?}", result); + + // Create a filesystem on the newly formatted U.2, without a UUID let zpool_name = ZpoolName::new_external(config.disks[0].pool_id); let dataset_name = DatasetName::new(zpool_name.clone(), DatasetKind::Crucible); harness .handle() - .upsert_filesystem(dataset_id, dataset_name) + .upsert_filesystem(None, dataset_name.clone()) .await .unwrap(); + let observed_dataset = &Zfs::get_dataset_properties( + &[dataset_name.full_name()], + WhichDatasets::SelfOnly, + ) + .unwrap()[0]; + assert_eq!(observed_dataset.id, None); + + // Later, we can set the UUID to a specific value + let dataset_id = DatasetUuid::new_v4(); + harness + .handle() + .upsert_filesystem(Some(dataset_id), dataset_name.clone()) + .await + .unwrap(); + let observed_dataset = &Zfs::get_dataset_properties( + &[dataset_name.full_name()], + WhichDatasets::SelfOnly, + ) + .unwrap()[0]; + assert_eq!(observed_dataset.id, Some(dataset_id)); harness.cleanup().await; logctx.cleanup_successful(); diff --git a/tools/console_version b/tools/console_version index 85ca41f755..ef52d38564 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="c1ebd8d9acae4ff7a09b2517265fba52ebdfe82e" -SHA2="840dbfda1c0def66212e7602d7be6e8acf1b26ba218f10ce3e627df49f5ce9e2" +COMMIT="d583ae70786db46ace23f0e45bc9fdcffc21e6ae" +SHA2="590fda51f0599879effea4f4b2754ec572f6c1f4d5a586016ff6a2e2d8b6f5f9"