From db6914c58cdecef82f4893b55e49c6a68f625726 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 17 Dec 2024 16:21:38 -0800 Subject: [PATCH 1/5] [sled-agent] Avoid causing UUID conflicts --- sled-agent/src/sled_agent.rs | 20 ++++++++++++++------ sled-storage/src/manager.rs | 25 +++---------------------- 2 files changed, 17 insertions(+), 28 deletions(-) diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index b9bf703933..4fb1ba3ad3 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -962,12 +962,20 @@ impl SledAgent { continue; }; - // First, ensure the dataset exists - let dataset_id = zone.id.into_untyped_uuid(); - self.inner - .storage - .upsert_filesystem(dataset_id, dataset_name) - .await?; + // NOTE: This code will be deprecated by https://github.com/oxidecomputer/omicron/pull/7160 + // + // However, we need to ensure that all blueprints have datasets + // within them before we can remove this back-fill. + // + // Therefore, we do something hairy here: We ensure the filesystem + // exists, but don't specify any dataset UUID value. + // + // This means that: + // - If the dataset exists and has a UUID, this will be a no-op + // - If the dataset doesn't exist, it'll be created + // - If a subsequent call to "datasets_ensure" tries to set a UUID, + // it should be able to get set (once). + self.inner.storage.upsert_filesystem(None, dataset_name).await?; } self.inner diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index f0653c7905..4083b66b6a 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -35,7 +35,6 @@ use std::collections::HashSet; use std::future::Future; use tokio::sync::{mpsc, oneshot, watch}; use tokio::time::{interval, Duration, MissedTickBehavior}; -use uuid::Uuid; // The size of the mpsc bounded channel used to communicate // between the `StorageHandle` and `StorageManager`. @@ -100,7 +99,7 @@ enum StorageManagerState { #[derive(Debug)] pub(crate) struct NewFilesystemRequest { - dataset_id: Uuid, + dataset_id: Option, dataset_name: DatasetName, responder: DebugIgnore>>, } @@ -526,7 +525,7 @@ impl StorageHandle { // and ask for the set of all datasets from Nexus. pub async fn upsert_filesystem( &self, - dataset_id: Uuid, + dataset_id: Option, dataset_name: DatasetName, ) -> Result<(), Error> { let (tx, rx) = oneshot::channel(); @@ -1499,27 +1498,9 @@ impl StorageManager { zoned, encryption_details, size_details, - id: Some(DatasetUuid::from_untyped_uuid(request.dataset_id)), + id: request.dataset_id, additional_options: None, })?; - // Ensure the dataset has a usable UUID. - if let Ok(id_str) = Zfs::get_oxide_value(&fs_name, "uuid") { - if let Ok(id) = id_str.parse::() { - if id != request.dataset_id { - return Err(Error::UuidMismatch { - name: request.dataset_name.full_name(), - old: id, - new: request.dataset_id, - }); - } - return Ok(()); - } - } - Zfs::set_oxide_value( - &fs_name, - "uuid", - &request.dataset_id.to_string(), - )?; Ok(()) } From b4e752af7ec1bdfb87b03ac9a4cd1ba1aca6e5fc Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 17 Dec 2024 16:32:12 -0800 Subject: [PATCH 2/5] Allow dataset_ensure to override UUIDs --- sled-storage/src/manager.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index 4083b66b6a..81ba30f0ce 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -28,7 +28,6 @@ use omicron_common::disk::{ }; use omicron_common::ledger::Ledger; use omicron_uuid_kinds::DatasetUuid; -use omicron_uuid_kinds::GenericUuid; use slog::{error, info, o, warn, Logger}; use std::collections::BTreeMap; use std::collections::HashSet; @@ -1000,11 +999,17 @@ impl StorageManager { }; if old_id != config.id { - return Err(Error::UuidMismatch { - name: config.name.full_name(), - old: old_id.into_untyped_uuid(), - new: config.id.into_untyped_uuid(), - }); + // NOTE(https://github.com/oxidecomputer/omicron/issues/7265): + // + // This should potentially return a "UuidMismatch" error in the + // future, rather than overwriting the existing dataset UUID. + warn!( + log, + "Dataset UUID mismatch. Choosing to take new value."; + "old" => ?old_id, + "new" => ?config.id + ); + return Ok(false); } let old_props = match SharedDatasetConfig::try_from(old_dataset) { From 5117c18b64ab2c354fb6295fd1a2ac1a60f7e3a4 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 17 Dec 2024 16:51:43 -0800 Subject: [PATCH 3/5] Fix tests --- sled-storage/src/manager.rs | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index 81ba30f0ce..ca001e2178 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -1530,7 +1530,6 @@ mod tests { use std::collections::BTreeMap; use std::str::FromStr; use std::sync::atomic::Ordering; - use uuid::Uuid; // A helper struct to advance time. struct TimeTravel {} @@ -1991,16 +1990,39 @@ mod tests { .expect("Ensuring disks should work after key manager is ready"); assert!(!result.has_error(), "{:?}", result); - // Create a filesystem on the newly formatted U.2 - let dataset_id = Uuid::new_v4(); + // Create a filesystem on the newly formatted U.2. + // + // We can call "upsert_filesystem" both with and without a UUID. + let dataset_id = DatasetUuid::new_v4(); let zpool_name = ZpoolName::new_external(config.disks[0].pool_id); let dataset_name = DatasetName::new(zpool_name.clone(), DatasetKind::Crucible); harness .handle() - .upsert_filesystem(dataset_id, dataset_name) + .upsert_filesystem(Some(dataset_id), dataset_name.clone()) + .await + .unwrap(); + // Observe the dataset exists, and the UUID is set. + let observed_dataset = &Zfs::get_dataset_properties( + &[dataset_name.full_name()], + WhichDatasets::SelfOnly, + ) + .unwrap()[0]; + assert_eq!(observed_dataset.id, Some(dataset_id)); + + harness + .handle() + .upsert_filesystem(None, dataset_name.clone()) .await .unwrap(); + // Observe the dataset still exists, and the UUID is still set, + // even though we did not ask for a new value explicitly. + let observed_dataset = &Zfs::get_dataset_properties( + &[dataset_name.full_name()], + WhichDatasets::SelfOnly, + ) + .unwrap()[0]; + assert_eq!(observed_dataset.id, Some(dataset_id)); harness.cleanup().await; logctx.cleanup_successful(); From cc1758257d04efce8805d985c76cb259d50ed510 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 18 Dec 2024 10:10:27 -0800 Subject: [PATCH 4/5] Do not overwrite UUIDs, throw conflict on mismatch --- sled-storage/src/manager.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index ca001e2178..b4a07f606c 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -28,6 +28,7 @@ use omicron_common::disk::{ }; use omicron_common::ledger::Ledger; use omicron_uuid_kinds::DatasetUuid; +use omicron_uuid_kinds::GenericUuid; use slog::{error, info, o, warn, Logger}; use std::collections::BTreeMap; use std::collections::HashSet; @@ -999,17 +1000,11 @@ impl StorageManager { }; if old_id != config.id { - // NOTE(https://github.com/oxidecomputer/omicron/issues/7265): - // - // This should potentially return a "UuidMismatch" error in the - // future, rather than overwriting the existing dataset UUID. - warn!( - log, - "Dataset UUID mismatch. Choosing to take new value."; - "old" => ?old_id, - "new" => ?config.id - ); - return Ok(false); + return Err(Error::UuidMismatch { + name: config.name.full_name(), + old: old_id.into_untyped_uuid(), + new: config.id.into_untyped_uuid(), + }); } let old_props = match SharedDatasetConfig::try_from(old_dataset) { From f1bb19aaaacb71432be7cfaca331b33e0df20123 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 18 Dec 2024 10:58:35 -0800 Subject: [PATCH 5/5] UUID test, comment --- sled-agent/src/sled_agent.rs | 3 +- sled-storage/src/manager.rs | 53 ++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 4fb1ba3ad3..37526690cb 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -972,7 +972,8 @@ impl SledAgent { // // This means that: // - If the dataset exists and has a UUID, this will be a no-op - // - If the dataset doesn't exist, it'll be created + // - If the dataset doesn't exist, it'll be created without its + // oxide:uuid zfs property set // - If a subsequent call to "datasets_ensure" tries to set a UUID, // it should be able to get set (once). self.inner.storage.upsert_filesystem(None, dataset_name).await?; diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index b4a07f606c..d6ffd42d0a 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -2023,6 +2023,59 @@ mod tests { logctx.cleanup_successful(); } + #[tokio::test] + async fn upsert_filesystem_no_uuid() { + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); + let logctx = test_setup_log("upsert_filesystem"); + let mut harness = StorageManagerTestHarness::new(&logctx.log).await; + + // Test setup: Add a U.2 and M.2, adopt them into the "control plane" + // for usage. + harness.handle().key_manager_ready().await; + let raw_disks = + harness.add_vdevs(&["u2_under_test.vdev", "m2_helping.vdev"]).await; + let config = harness.make_config(1, &raw_disks); + let result = harness + .handle() + .omicron_physical_disks_ensure(config.clone()) + .await + .expect("Ensuring disks should work after key manager is ready"); + assert!(!result.has_error(), "{:?}", result); + + // Create a filesystem on the newly formatted U.2, without a UUID + let zpool_name = ZpoolName::new_external(config.disks[0].pool_id); + let dataset_name = + DatasetName::new(zpool_name.clone(), DatasetKind::Crucible); + harness + .handle() + .upsert_filesystem(None, dataset_name.clone()) + .await + .unwrap(); + let observed_dataset = &Zfs::get_dataset_properties( + &[dataset_name.full_name()], + WhichDatasets::SelfOnly, + ) + .unwrap()[0]; + assert_eq!(observed_dataset.id, None); + + // Later, we can set the UUID to a specific value + let dataset_id = DatasetUuid::new_v4(); + harness + .handle() + .upsert_filesystem(Some(dataset_id), dataset_name.clone()) + .await + .unwrap(); + let observed_dataset = &Zfs::get_dataset_properties( + &[dataset_name.full_name()], + WhichDatasets::SelfOnly, + ) + .unwrap()[0]; + assert_eq!(observed_dataset.id, Some(dataset_id)); + + harness.cleanup().await; + logctx.cleanup_successful(); + } + #[tokio::test] async fn ensure_datasets() { illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);