From 5fe27097b20d258ed9478690cf4f7077334727f9 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Thu, 7 Sep 2023 16:23:10 -0400 Subject: [PATCH 1/9] Simulated Crucible agent should be stricter There's a few more info messages here that will aid debugging, plus a few ways the simulated Crucible agent is now stricter: - it should be impossible to create a snapshot of a region in a bad state. - it should be an error to try to delete a snapshot for a non-existent region (the real sled agent would return a 404) - it should be an error to attempt creating a running snapshot for a snapshot that does not exist The actual Crucible agent reuses freed ports, so the simulated one should too. --- sled-agent/src/sim/sled_agent.rs | 29 ++- sled-agent/src/sim/storage.rs | 419 +++++++++++++++++++++++++++---- 2 files changed, 380 insertions(+), 68 deletions(-) diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 4ab4160f78..25efdae305 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -61,6 +61,7 @@ pub struct SledAgent { pub v2p_mappings: Mutex>>, mock_propolis: Mutex>, PropolisClient)>>, + log: Logger, } fn extract_targets_from_volume_construction_request( @@ -150,6 +151,7 @@ impl SledAgent { disk_id_to_region_ids: Mutex::new(HashMap::new()), v2p_mappings: Mutex::new(HashMap::new()), mock_propolis: Mutex::new(None), + log, }) } @@ -157,9 +159,9 @@ impl SledAgent { /// /// Crucible regions are returned with a port number, and volume /// construction requests contain a single Nexus region (which points to - /// three crucible regions). Extract the region addresses, lookup the - /// region from the port (which should be unique), and pair disk id with - /// region ids. This map is referred to later when making snapshots. + /// three crucible regions). Extract the region addresses, lookup the region + /// from the port and pair disk id with region ids. This map is referred to + /// later when making snapshots. pub async fn map_disk_ids_to_region_ids( &self, volume_construction_request: &VolumeConstructionRequest, @@ -187,22 +189,18 @@ impl SledAgent { let storage = self.storage.lock().await; for target in targets { - let crucible_data = storage - .get_dataset_for_port(target.port()) + let region = storage + .get_region_for_port(target.port()) .await .ok_or_else(|| { Error::internal_error(&format!( - "no dataset for port {}", + "no region for port {}", target.port() )) })?; - for region in crucible_data.list().await { - if region.port_number == target.port() { - let region_id = Uuid::from_str(®ion.id.0).unwrap(); - region_ids.push(region_id); - } - } + let region_id = Uuid::from_str(®ion.id.0).unwrap(); + region_ids.push(region_id); } let mut disk_id_to_region_ids = self.disk_id_to_region_ids.lock().await; @@ -541,6 +539,8 @@ impl SledAgent { Error::not_found_by_id(ResourceType::Disk, &disk_id) })?; + info!(self.log, "disk id {} region ids are {:?}", disk_id, region_ids); + let storage = self.storage.lock().await; for region_id in region_ids { @@ -548,7 +548,10 @@ impl SledAgent { storage.get_dataset_for_region(*region_id).await; if let Some(crucible_data) = crucible_data { - crucible_data.create_snapshot(*region_id, snapshot_id).await; + crucible_data + .create_snapshot(*region_id, snapshot_id) + .await + .map_err(|e| Error::internal_error(&e.to_string()))?; } else { return Err(Error::not_found_by_id( ResourceType::Disk, diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 2dd249d4f5..0c60b9eb96 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -23,6 +23,7 @@ use futures::lock::Mutex; use nexus_client::types::{ ByteCount, PhysicalDiskKind, PhysicalDiskPutRequest, ZpoolPutRequest, }; +use serde::Serialize; use slog::Logger; use std::collections::HashMap; use std::collections::HashSet; @@ -33,18 +34,23 @@ use uuid::Uuid; type CreateCallback = Box State + Send + 'static>; +#[derive(Serialize)] struct CrucibleDataInner { + #[serde(skip)] log: Logger, regions: HashMap, snapshots: HashMap>, running_snapshots: HashMap>, + #[serde(skip)] on_create: Option, creating_a_running_snapshot_should_fail: bool, - next_port: u16, + start_port: u16, + end_port: u16, + used_ports: HashSet, } impl CrucibleDataInner { - fn new(log: Logger, crucible_port: u16) -> Self { + fn new(log: Logger, start_port: u16, end_port: u16) -> Self { Self { log, regions: HashMap::new(), @@ -52,7 +58,9 @@ impl CrucibleDataInner { running_snapshots: HashMap::new(), on_create: None, creating_a_running_snapshot_should_fail: false, - next_port: crucible_port, + start_port, + end_port, + used_ports: HashSet::new(), } } @@ -64,6 +72,18 @@ impl CrucibleDataInner { self.regions.values().cloned().collect() } + fn get_free_port(&mut self) -> u16 { + for port in self.start_port..self.end_port { + if self.used_ports.contains(&port) { + continue; + } + self.used_ports.insert(port); + return port; + } + + panic!("no free ports for simulated crucible agent!"); + } + fn create(&mut self, params: CreateRegion) -> Region { let id = Uuid::from_str(¶ms.id.0).unwrap(); @@ -79,7 +99,7 @@ impl CrucibleDataInner { extent_size: params.extent_size, extent_count: params.extent_count, // NOTE: This is a lie - no server is running. - port_number: self.next_port, + port_number: self.get_free_port(), state, encrypted: false, cert_pem: None, @@ -93,7 +113,6 @@ impl CrucibleDataInner { "Region already exists, but with a different ID" ); } - self.next_port += 1; region } @@ -102,11 +121,6 @@ impl CrucibleDataInner { self.regions.get(&id).cloned() } - fn get_mut(&mut self, id: &RegionId) -> Option<&mut Region> { - let id = Uuid::from_str(&id.0).unwrap(); - self.regions.get_mut(&id) - } - fn delete(&mut self, id: RegionId) -> Result> { // Can't delete a ZFS dataset if there are snapshots if !self.snapshots_for_region(&id).is_empty() { @@ -122,15 +136,39 @@ impl CrucibleDataInner { let id = Uuid::from_str(&id.0).unwrap(); if let Some(region) = self.regions.get_mut(&id) { region.state = State::Destroyed; + self.used_ports.remove(®ion.port_number); Ok(Some(region.clone())) } else { Ok(None) } } - fn create_snapshot(&mut self, id: Uuid, snapshot_id: Uuid) -> Snapshot { + fn create_snapshot( + &mut self, + id: Uuid, + snapshot_id: Uuid, + ) -> Result { info!(self.log, "Creating region {} snapshot {}", id, snapshot_id); - self.snapshots + + if let Some(region) = self.get(RegionId(id.to_string())) { + match region.state { + State::Failed | State::Destroyed | State::Tombstoned => { + bail!( + "cannot create snapshot of region in state {:?}", + region.state + ); + } + + State::Requested | State::Created => { + // ok + } + } + } else { + bail!("cannot create snapshot of non-existent region!"); + } + + Ok(self + .snapshots .entry(id) .or_insert_with(|| HashMap::new()) .entry(snapshot_id.to_string()) @@ -138,7 +176,7 @@ impl CrucibleDataInner { name: snapshot_id.to_string(), created: Utc::now(), }) - .clone() + .clone()) } fn snapshots_for_region(&self, id: &RegionId) -> Vec { @@ -198,10 +236,14 @@ impl CrucibleDataInner { } info!(self.log, "Deleting region {} snapshot {}", id.0, name); + let region_id = Uuid::from_str(&id.0).unwrap(); if let Some(map) = self.snapshots.get_mut(®ion_id) { map.remove(name); + } else { + bail!("trying to delete snapshot for non-existent region!"); } + Ok(()) } @@ -218,7 +260,12 @@ impl CrucibleDataInner { bail!("failure creating running snapshot"); } + if self.get_snapshot_for_region(id, name).is_none() { + bail!("cannot create running snapshot, snapshot does not exist!"); + } + let id = Uuid::from_str(&id.0).unwrap(); + let port_number = self.get_free_port(); let map = self.running_snapshots.entry(id).or_insert_with(|| HashMap::new()); @@ -226,20 +273,21 @@ impl CrucibleDataInner { // If a running snapshot exists already, return it - this endpoint must // be idempotent. if let Some(running_snapshot) = map.get(&name.to_string()) { + self.used_ports.remove(&port_number); return Ok(running_snapshot.clone()); } let running_snapshot = RunningSnapshot { id: RegionId(Uuid::new_v4().to_string()), name: name.to_string(), - port_number: self.next_port, + port_number, state: State::Created, }; + let map = + self.running_snapshots.entry(id).or_insert_with(|| HashMap::new()); map.insert(name.to_string(), running_snapshot.clone()); - self.next_port += 1; - Ok(running_snapshot) } @@ -255,6 +303,7 @@ impl CrucibleDataInner { if let Some(running_snapshot) = map.get_mut(&name.to_string()) { running_snapshot.state = State::Destroyed; + self.used_ports.remove(&running_snapshot.port_number); } Ok(()) @@ -277,15 +326,264 @@ impl CrucibleDataInner { .filter(|rs| rs.state != State::Destroyed) .count(); - info!( - self.log, - "is_empty non_destroyed_regions {} snapshots {} running_snapshots {}", - non_destroyed_regions, - snapshots, - running_snapshots, - ); + let empty = non_destroyed_regions == 0 + && snapshots == 0 + && running_snapshots == 0; + + if !empty { + info!( + self.log, + "is_empty state: {:?}", + serde_json::to_string(&self).unwrap(), + ); + + info!( + self.log, + "is_empty non_destroyed_regions {} snapshots {} running_snapshots {}", + non_destroyed_regions, + snapshots, + running_snapshots, + ); + } + + empty + } +} + +#[cfg(test)] +mod test { + use super::*; + use omicron_test_utils::dev::test_setup_log; + + /// Validate that the Crucible agent reuses ports + #[test] + fn crucible_ports_get_reused() { + let logctx = test_setup_log("crucible_ports_get_reused"); + let mut agent = CrucibleDataInner::new(logctx.log, 1000, 2000); + + // Create a region, then delete it. + + let region_id = Uuid::new_v4(); + let region = agent.create(CreateRegion { + block_size: 512, + extent_count: 10, + extent_size: 10, + id: RegionId(region_id.to_string()), + encrypted: true, + cert_pem: None, + key_pem: None, + root_pem: None, + }); + + let first_region_port = region.port_number; + + assert!(agent + .delete(RegionId(region_id.to_string())) + .unwrap() + .is_some()); + + // Create another region, make sure it gets the same port number, but + // don't delete it. + + let second_region_id = Uuid::new_v4(); + let second_region = agent.create(CreateRegion { + block_size: 512, + extent_count: 10, + extent_size: 10, + id: RegionId(second_region_id.to_string()), + encrypted: true, + cert_pem: None, + key_pem: None, + root_pem: None, + }); + + assert_eq!(second_region.port_number, first_region_port,); + + // Create another region, delete it. After this, we still have the + // second region. + + let third_region = agent.create(CreateRegion { + block_size: 512, + extent_count: 10, + extent_size: 10, + id: RegionId(Uuid::new_v4().to_string()), + encrypted: true, + cert_pem: None, + key_pem: None, + root_pem: None, + }); + + let third_region_port = third_region.port_number; + + assert!(agent + .delete(RegionId(third_region.id.to_string())) + .unwrap() + .is_some()); + + // Create a running snapshot, make sure it gets the same port number + // as the third region did. This ensures that the Crucible agent shares + // ports between regions and running snapshots. + + let snapshot_id = Uuid::new_v4(); + agent.create_snapshot(second_region_id, snapshot_id).unwrap(); + + let running_snapshot = agent + .create_running_snapshot( + &RegionId(second_region_id.to_string()), + &snapshot_id.to_string(), + ) + .unwrap(); + + assert_eq!(running_snapshot.port_number, third_region_port,); + } + + /// Validate that users must delete snapshots before deleting the region + #[test] + fn must_delete_snapshots_first() { + let logctx = test_setup_log("must_delete_snapshots_first"); + let mut agent = CrucibleDataInner::new(logctx.log, 1000, 2000); + + let region_id = Uuid::new_v4(); + let snapshot_id = Uuid::new_v4(); + + let _region = agent.create(CreateRegion { + block_size: 512, + extent_count: 10, + extent_size: 10, + id: RegionId(region_id.to_string()), + encrypted: true, + cert_pem: None, + key_pem: None, + root_pem: None, + }); + + agent.create_snapshot(region_id, snapshot_id).unwrap(); + + agent.delete(RegionId(region_id.to_string())).unwrap_err(); + } + + /// Validate that users cannot delete snapshots before deleting the "running + /// snapshots" (the read-only downstairs for that snapshot) + #[test] + fn must_delete_read_only_downstairs_first() { + let logctx = test_setup_log("must_delete_read_only_downstairs_first"); + let mut agent = CrucibleDataInner::new(logctx.log, 1000, 2000); + + let region_id = Uuid::new_v4(); + let snapshot_id = Uuid::new_v4(); + + let _region = agent.create(CreateRegion { + block_size: 512, + extent_count: 10, + extent_size: 10, + id: RegionId(region_id.to_string()), + encrypted: true, + cert_pem: None, + key_pem: None, + root_pem: None, + }); + + agent.create_snapshot(region_id, snapshot_id).unwrap(); + + agent + .create_running_snapshot( + &RegionId(region_id.to_string()), + &snapshot_id.to_string(), + ) + .unwrap(); + + agent + .delete_snapshot( + &RegionId(region_id.to_string()), + &snapshot_id.to_string(), + ) + .unwrap_err(); + } + + /// Validate that users cannot boot a read-only downstairs for a snapshot + /// that does not exist. + #[test] + fn cannot_boot_read_only_downstairs_with_no_snapshot() { + let logctx = + test_setup_log("cannot_boot_read_only_downstairs_with_no_snapshot"); + let mut agent = CrucibleDataInner::new(logctx.log, 1000, 2000); + + let region_id = Uuid::new_v4(); + let snapshot_id = Uuid::new_v4(); + + let _region = agent.create(CreateRegion { + block_size: 512, + extent_count: 10, + extent_size: 10, + id: RegionId(region_id.to_string()), + encrypted: true, + cert_pem: None, + key_pem: None, + root_pem: None, + }); + + agent + .create_running_snapshot( + &RegionId(region_id.to_string()), + &snapshot_id.to_string(), + ) + .unwrap_err(); + } + + /// Validate that users cannot create a snapshot from a non-existent region + #[test] + fn snapshot_needs_region() { + let logctx = test_setup_log("snapshot_needs_region"); + let mut agent = CrucibleDataInner::new(logctx.log, 1000, 2000); + + let region_id = Uuid::new_v4(); + let snapshot_id = Uuid::new_v4(); + + agent.create_snapshot(region_id, snapshot_id).unwrap_err(); + } + + /// Validate that users cannot create a "running" snapshot from a + /// non-existent region + #[test] + fn running_snapshot_needs_region() { + let logctx = test_setup_log("snapshot_needs_region"); + let mut agent = CrucibleDataInner::new(logctx.log, 1000, 2000); - non_destroyed_regions == 0 && snapshots == 0 && running_snapshots == 0 + let region_id = Uuid::new_v4(); + let snapshot_id = Uuid::new_v4(); + + agent + .create_running_snapshot( + &RegionId(region_id.to_string()), + &snapshot_id.to_string(), + ) + .unwrap_err(); + } + + /// Validate that users cannot create snapshots for destroyed regions + #[test] + fn cannot_create_snapshot_for_destroyed_region() { + let logctx = + test_setup_log("cannot_create_snapshot_for_destroyed_region"); + let mut agent = CrucibleDataInner::new(logctx.log, 1000, 2000); + + let region_id = Uuid::new_v4(); + let snapshot_id = Uuid::new_v4(); + + let _region = agent.create(CreateRegion { + block_size: 512, + extent_count: 10, + extent_size: 10, + id: RegionId(region_id.to_string()), + encrypted: true, + cert_pem: None, + key_pem: None, + root_pem: None, + }); + + agent.delete(RegionId(region_id.to_string())).unwrap(); + + agent.create_snapshot(region_id, snapshot_id).unwrap_err(); } } @@ -295,8 +593,12 @@ pub struct CrucibleData { } impl CrucibleData { - fn new(log: Logger, crucible_port: u16) -> Self { - Self { inner: Mutex::new(CrucibleDataInner::new(log, crucible_port)) } + fn new(log: Logger, start_port: u16, end_port: u16) -> Self { + Self { + inner: Mutex::new(CrucibleDataInner::new( + log, start_port, end_port, + )), + } } pub async fn set_create_callback(&self, callback: CreateCallback) { @@ -319,20 +621,11 @@ impl CrucibleData { self.inner.lock().await.delete(id) } - pub async fn set_state(&self, id: &RegionId, state: State) { - self.inner - .lock() - .await - .get_mut(id) - .expect("region does not exist") - .state = state; - } - pub async fn create_snapshot( &self, id: Uuid, snapshot_id: Uuid, - ) -> Snapshot { + ) -> Result { self.inner.lock().await.create_snapshot(id, snapshot_id) } @@ -397,13 +690,18 @@ pub struct CrucibleServer { } impl CrucibleServer { - fn new(log: &Logger, crucible_ip: IpAddr, crucible_port: u16) -> Self { + fn new( + log: &Logger, + crucible_ip: IpAddr, + start_port: u16, + end_port: u16, + ) -> Self { // SocketAddr::new with port set to 0 will grab any open port to host // the emulated crucible agent, but set the fake downstairs listen ports // to start at `crucible_port`. let data = Arc::new(CrucibleData::new( - log.new(slog::o!("port" => format!("{crucible_port}"))), - crucible_port, + log.new(slog::o!("start_port" => format!("{start_port}"), "end_port" => format!("{end_port}"))), + start_port, end_port, )); let config = dropshot::ConfigDropshot { bind_address: SocketAddr::new(crucible_ip, 0), @@ -451,10 +749,13 @@ impl Zpool { log: &Logger, id: Uuid, crucible_ip: IpAddr, - crucible_port: u16, + start_port: u16, + end_port: u16, ) -> &CrucibleServer { - self.datasets - .insert(id, CrucibleServer::new(log, crucible_ip, crucible_port)); + self.datasets.insert( + id, + CrucibleServer::new(log, crucible_ip, start_port, end_port), + ); self.datasets .get(&id) .expect("Failed to get the dataset we just inserted") @@ -476,19 +777,25 @@ impl Zpool { None } - pub async fn get_dataset_for_port( - &self, - port: u16, - ) -> Option> { + pub async fn get_region_for_port(&self, port: u16) -> Option { + let mut regions = vec![]; + for dataset in self.datasets.values() { for region in &dataset.data().list().await { + if region.state == State::Destroyed { + continue; + } + if port == region.port_number { - return Some(dataset.data()); + regions.push(region.clone()); } } } - None + // At most, 1 active region with a port should be returned. + assert!(regions.len() < 2); + + regions.pop() } } @@ -598,6 +905,7 @@ impl Storage { dataset_id, self.crucible_ip, self.next_crucible_port, + self.next_crucible_port + 100, ); self.next_crucible_port += 100; @@ -648,17 +956,18 @@ impl Storage { None } - pub async fn get_dataset_for_port( - &self, - port: u16, - ) -> Option> { + pub async fn get_region_for_port(&self, port: u16) -> Option { + let mut regions = vec![]; for zpool in self.zpools.values() { - if let Some(dataset) = zpool.get_dataset_for_port(port).await { - return Some(dataset); + if let Some(region) = zpool.get_region_for_port(port).await { + regions.push(region); } } - None + // At most, 1 active region with a port should be returned. + assert!(regions.len() < 2); + + regions.pop() } } From bbf5a6a33d656f8e334f7e93ed73ddc3d21fcfe4 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Wed, 4 Oct 2023 13:16:04 -0400 Subject: [PATCH 2/9] fmt and clippy --- sled-agent/src/sim/storage.rs | 66 +++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 5487d57769..5056b18e3c 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -383,16 +383,18 @@ mod test { // Create a region, then delete it. let region_id = Uuid::new_v4(); - let region = agent.create(CreateRegion { - block_size: 512, - extent_count: 10, - extent_size: 10, - id: RegionId(region_id.to_string()), - encrypted: true, - cert_pem: None, - key_pem: None, - root_pem: None, - }); + let region = agent + .create(CreateRegion { + block_size: 512, + extent_count: 10, + extent_size: 10, + id: RegionId(region_id.to_string()), + encrypted: true, + cert_pem: None, + key_pem: None, + root_pem: None, + }) + .unwrap(); let first_region_port = region.port_number; @@ -405,32 +407,36 @@ mod test { // don't delete it. let second_region_id = Uuid::new_v4(); - let second_region = agent.create(CreateRegion { - block_size: 512, - extent_count: 10, - extent_size: 10, - id: RegionId(second_region_id.to_string()), - encrypted: true, - cert_pem: None, - key_pem: None, - root_pem: None, - }); + let second_region = agent + .create(CreateRegion { + block_size: 512, + extent_count: 10, + extent_size: 10, + id: RegionId(second_region_id.to_string()), + encrypted: true, + cert_pem: None, + key_pem: None, + root_pem: None, + }) + .unwrap(); assert_eq!(second_region.port_number, first_region_port,); // Create another region, delete it. After this, we still have the // second region. - let third_region = agent.create(CreateRegion { - block_size: 512, - extent_count: 10, - extent_size: 10, - id: RegionId(Uuid::new_v4().to_string()), - encrypted: true, - cert_pem: None, - key_pem: None, - root_pem: None, - }); + let third_region = agent + .create(CreateRegion { + block_size: 512, + extent_count: 10, + extent_size: 10, + id: RegionId(Uuid::new_v4().to_string()), + encrypted: true, + cert_pem: None, + key_pem: None, + root_pem: None, + }) + .unwrap(); let third_region_port = third_region.port_number; From bdea3c7483e26143e221ca8dad26cd888369bd27 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Tue, 17 Oct 2023 12:19:42 -0400 Subject: [PATCH 3/9] revert Cargo.toml change --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 5b12b2c7a7..fb610128ed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -380,7 +380,7 @@ panic = "abort" # See https://github.com/oxidecomputer/omicron/issues/4009 for some background context here. # By reducing the debug level (though keeping enough to have meaningful # backtraces), we reduce incremental build time and binary size significantly. -#debug = "line-tables-only" +debug = "line-tables-only" # `bindgen` is used by `samael`'s build script; building it with optimizations # makes that build script run ~5x faster, more than offsetting the additional From 9d91d3817884a5d020e44947f21495b3676f386d Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Wed, 1 Nov 2023 16:43:28 -0400 Subject: [PATCH 4/9] crucible#1004: don't check for a snapshot that's gone --- sled-agent/src/sim/http_entrypoints_storage.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sled-agent/src/sim/http_entrypoints_storage.rs b/sled-agent/src/sim/http_entrypoints_storage.rs index dcc449b61a..4398763c08 100644 --- a/sled-agent/src/sim/http_entrypoints_storage.rs +++ b/sled-agent/src/sim/http_entrypoints_storage.rs @@ -281,13 +281,6 @@ async fn region_delete_running_snapshot( )); } - if crucible.get_snapshot_for_region(&p.id, &p.name).await.is_none() { - return Err(HttpError::for_not_found( - None, - format!("snapshot {:?} not found", p.name), - )); - } - crucible.delete_running_snapshot(&p.id, &p.name).await.map_err(|e| { HttpError::for_internal_error(format!( "running snapshot create failure: {:?}", From d7d0515c52502979e2d114ba095b7c558ed3732b Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Sat, 7 Sep 2024 05:46:34 +0000 Subject: [PATCH 5/9] cargo fmt --- sled-agent/src/sim/storage.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 3c69fbb9c5..15b243129a 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -18,7 +18,6 @@ use crucible_agent_client::types::{ use dropshot::HandlerTaskMode; use dropshot::HttpError; use futures::lock::Mutex; -use serde::Serialize; use omicron_common::disk::DatasetManagementStatus; use omicron_common::disk::DatasetsConfig; use omicron_common::disk::DatasetsManagementResult; @@ -32,6 +31,7 @@ use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::PropolisUuid; use omicron_uuid_kinds::ZpoolUuid; use propolis_client::types::VolumeConstructionRequest; +use serde::Serialize; use slog::Logger; use std::collections::HashMap; use std::collections::HashSet; From ec4c9e27515a83c7f7623e68cf0ebea2a773caa8 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Sat, 7 Sep 2024 09:30:55 +0000 Subject: [PATCH 6/9] logctx.cleanup_successful --- sled-agent/src/sim/storage.rs | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 15b243129a..640253a192 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -398,7 +398,7 @@ mod test { #[test] fn crucible_ports_get_reused() { let logctx = test_setup_log("crucible_ports_get_reused"); - let mut agent = CrucibleDataInner::new(logctx.log, 1000, 2000); + let mut agent = CrucibleDataInner::new(logctx.log.clone(), 1000, 2000); // Create a region, then delete it. @@ -483,13 +483,15 @@ mod test { .unwrap(); assert_eq!(running_snapshot.port_number, third_region_port,); + + logctx.cleanup_successful(); } /// Validate that users must delete snapshots before deleting the region #[test] fn must_delete_snapshots_first() { let logctx = test_setup_log("must_delete_snapshots_first"); - let mut agent = CrucibleDataInner::new(logctx.log, 1000, 2000); + let mut agent = CrucibleDataInner::new(logctx.log.clone(), 1000, 2000); let region_id = Uuid::new_v4(); let snapshot_id = Uuid::new_v4(); @@ -509,6 +511,8 @@ mod test { agent.create_snapshot(region_id, snapshot_id).unwrap(); agent.delete(RegionId(region_id.to_string())).unwrap_err(); + + logctx.cleanup_successful(); } /// Validate that users cannot delete snapshots before deleting the "running @@ -516,7 +520,7 @@ mod test { #[test] fn must_delete_read_only_downstairs_first() { let logctx = test_setup_log("must_delete_read_only_downstairs_first"); - let mut agent = CrucibleDataInner::new(logctx.log, 1000, 2000); + let mut agent = CrucibleDataInner::new(logctx.log.clone(), 1000, 2000); let region_id = Uuid::new_v4(); let snapshot_id = Uuid::new_v4(); @@ -548,6 +552,8 @@ mod test { &snapshot_id.to_string(), ) .unwrap_err(); + + logctx.cleanup_successful(); } /// Validate that users cannot boot a read-only downstairs for a snapshot @@ -556,7 +562,7 @@ mod test { fn cannot_boot_read_only_downstairs_with_no_snapshot() { let logctx = test_setup_log("cannot_boot_read_only_downstairs_with_no_snapshot"); - let mut agent = CrucibleDataInner::new(logctx.log, 1000, 2000); + let mut agent = CrucibleDataInner::new(logctx.log.clone(), 1000, 2000); let region_id = Uuid::new_v4(); let snapshot_id = Uuid::new_v4(); @@ -579,18 +585,22 @@ mod test { &snapshot_id.to_string(), ) .unwrap_err(); + + logctx.cleanup_successful(); } /// Validate that users cannot create a snapshot from a non-existent region #[test] fn snapshot_needs_region() { let logctx = test_setup_log("snapshot_needs_region"); - let mut agent = CrucibleDataInner::new(logctx.log, 1000, 2000); + let mut agent = CrucibleDataInner::new(logctx.log.clone(), 1000, 2000); let region_id = Uuid::new_v4(); let snapshot_id = Uuid::new_v4(); agent.create_snapshot(region_id, snapshot_id).unwrap_err(); + + logctx.cleanup_successful(); } /// Validate that users cannot create a "running" snapshot from a @@ -598,7 +608,7 @@ mod test { #[test] fn running_snapshot_needs_region() { let logctx = test_setup_log("snapshot_needs_region"); - let mut agent = CrucibleDataInner::new(logctx.log, 1000, 2000); + let mut agent = CrucibleDataInner::new(logctx.log.clone(), 1000, 2000); let region_id = Uuid::new_v4(); let snapshot_id = Uuid::new_v4(); @@ -609,6 +619,8 @@ mod test { &snapshot_id.to_string(), ) .unwrap_err(); + + logctx.cleanup_successful(); } /// Validate that users cannot create snapshots for destroyed regions @@ -616,7 +628,7 @@ mod test { fn cannot_create_snapshot_for_destroyed_region() { let logctx = test_setup_log("cannot_create_snapshot_for_destroyed_region"); - let mut agent = CrucibleDataInner::new(logctx.log, 1000, 2000); + let mut agent = CrucibleDataInner::new(logctx.log.clone(), 1000, 2000); let region_id = Uuid::new_v4(); let snapshot_id = Uuid::new_v4(); @@ -636,6 +648,8 @@ mod test { agent.delete(RegionId(region_id.to_string())).unwrap(); agent.create_snapshot(region_id, snapshot_id).unwrap_err(); + + logctx.cleanup_successful(); } } From 2a3cbd5455ecd2563a88ac790f63f840e966a6d9 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Tue, 10 Sep 2024 08:07:21 +0000 Subject: [PATCH 7/9] add region ids to bail messages --- sled-agent/src/sim/storage.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 640253a192..5a861e4f96 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -185,7 +185,7 @@ impl CrucibleDataInner { match region.state { State::Failed | State::Destroyed | State::Tombstoned => { bail!( - "cannot create snapshot of region in state {:?}", + "cannot create snapshot of region {id:?} in state {:?}", region.state ); } @@ -195,7 +195,7 @@ impl CrucibleDataInner { } } } else { - bail!("cannot create snapshot of non-existent region!"); + bail!("cannot create snapshot of non-existent region {id:?}!"); } Ok(self From 4b705aff6aef4b7d792612de3e0fff3b2f619506 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Tue, 10 Sep 2024 08:08:04 +0000 Subject: [PATCH 8/9] get_free_port could move down below check --- sled-agent/src/sim/storage.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 5a861e4f96..45f1cb1fc2 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -304,7 +304,6 @@ impl CrucibleDataInner { } let id = Uuid::from_str(&id.0).unwrap(); - let port_number = self.get_free_port(); let map = self.running_snapshots.entry(id).or_insert_with(|| HashMap::new()); @@ -312,10 +311,11 @@ impl CrucibleDataInner { // If a running snapshot exists already, return it - this endpoint must // be idempotent. if let Some(running_snapshot) = map.get(&name.to_string()) { - self.used_ports.remove(&port_number); return Ok(running_snapshot.clone()); } + let port_number = self.get_free_port(); + let running_snapshot = RunningSnapshot { id: RegionId(Uuid::new_v4().to_string()), name: name.to_string(), From bbca204feb0a0fe4bfac163e50105e57198a559e Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Tue, 10 Sep 2024 09:39:11 +0000 Subject: [PATCH 9/9] be explicit: we are testing the simulated crucible agent! --- sled-agent/src/sim/storage.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 45f1cb1fc2..5bbafa2ac3 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -394,7 +394,8 @@ mod test { use super::*; use omicron_test_utils::dev::test_setup_log; - /// Validate that the Crucible agent reuses ports + /// Validate that the simulated Crucible agent reuses ports when regions are + /// deleted. #[test] fn crucible_ports_get_reused() { let logctx = test_setup_log("crucible_ports_get_reused");