diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 667a666375..5058739da0 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -797,12 +797,23 @@ enum ValidateCommands { /// Validate each `volume_references` column in the region snapshots table ValidateVolumeReferences, + /// Find either regions Nexus knows about that the corresponding Crucible + /// agent says were deleted, or regions that Nexus doesn't know about. + ValidateRegions(ValidateRegionsArgs), + /// Find either region snapshots Nexus knows about that the corresponding /// Crucible agent says were deleted, or region snapshots that Nexus doesn't /// know about. ValidateRegionSnapshots, } +#[derive(Debug, Args)] +struct ValidateRegionsArgs { + /// Delete Regions Nexus is unaware of + #[clap(long, default_value_t = false)] + clean_up_orphaned_regions: bool, +} + #[derive(Debug, Args)] struct VolumeArgs { #[command(subcommand)] @@ -1093,6 +1104,20 @@ impl DbArgs { DbCommands::Validate(ValidateArgs { command: ValidateCommands::ValidateVolumeReferences, }) => cmd_db_validate_volume_references(&datastore).await, + DbCommands::Validate(ValidateArgs { + command: ValidateCommands::ValidateRegions(args), + }) => { + let clean_up_orphaned_regions = + if args.clean_up_orphaned_regions { + let token = omdb.check_allow_destructive()?; + CleanUpOrphanedRegions::Yes { _token: token } + } else { + CleanUpOrphanedRegions::No + }; + + cmd_db_validate_regions(&datastore, clean_up_orphaned_regions) + .await + } DbCommands::Validate(ValidateArgs { command: ValidateCommands::ValidateRegionSnapshots, }) => cmd_db_validate_region_snapshots(&datastore).await, @@ -4526,6 +4551,283 @@ async fn cmd_db_validate_volume_references( Ok(()) } +enum CleanUpOrphanedRegions { + Yes { _token: DestructiveOperationToken }, + No, +} + +async fn cmd_db_validate_regions( + datastore: &DataStore, + clean_up_orphaned_regions: CleanUpOrphanedRegions, +) -> Result<(), anyhow::Error> { + // *Lifetime note*: + // + // The lifetime of the region record in cockroachdb is longer than the time + // the Crucible agent's region is in a non-destroyed state: Nexus will + // perform the query to allocate regions (inserting them into the database) + // before it ensures those regions are created (i.e. making the POST request + // to the appropriate Crucible agent to create them), and it will request + // that the regions be deleted (then wait for that region to transition to + // the destroyed state) before hard-deleting the records in the database. + + // First, get all region records (with their corresponding dataset) + let datasets_and_regions: Vec<(Dataset, Region)> = datastore + .pool_connection_for_tests() + .await? + .transaction_async(|conn| async move { + // Selecting all datasets and regions requires a full table scan + conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await?; + + use db::schema::dataset::dsl as dataset_dsl; + use db::schema::region::dsl; + + dsl::region + .inner_join( + dataset_dsl::dataset + .on(dsl::dataset_id.eq(dataset_dsl::id)), + ) + .select((Dataset::as_select(), Region::as_select())) + .get_results_async(&conn) + .await + }) + .await?; + + #[derive(Tabled)] + struct Row { + dataset_id: DatasetUuid, + region_id: Uuid, + dataset_addr: std::net::SocketAddrV6, + error: String, + } + + let mut rows = Vec::new(); + + // Reconcile with the corresponding Crucible Agent: are they aware of each + // region in the database? + for (dataset, region) in &datasets_and_regions { + // If the dataset was expunged, do not attempt to contact the Crucible + // agent! + let in_service = + datastore.dataset_physical_disk_in_service(dataset.id()).await?; + + if !in_service { + eprintln!( + "dataset {} {:?} is not in service, skipping", + dataset.id(), + dataset.address(), + ); + continue; + } + + use crucible_agent_client::types::RegionId; + use crucible_agent_client::types::State; + use crucible_agent_client::Client as CrucibleAgentClient; + + let Some(dataset_addr) = dataset.address() else { + eprintln!("Dataset {} missing an IP address", dataset.id()); + continue; + }; + + let url = format!("http://{}", dataset_addr); + let client = CrucibleAgentClient::new(&url); + + let actual_region = + match client.region_get(&RegionId(region.id().to_string())).await { + Ok(region) => region.into_inner(), + + Err(e) => { + // Either there was a communication error, or the agent is + // unaware of the Region (this would be a 404). + match e { + crucible_agent_client::Error::ErrorResponse(rv) + if rv.status() == http::StatusCode::NOT_FOUND => + { + rows.push(Row { + dataset_id: dataset.id(), + region_id: region.id(), + dataset_addr, + error: String::from( + "Agent does not know about this region!", + ), + }); + } + + _ => { + eprintln!( + "{} region_get {:?}: {e}", + dataset_addr, + region.id(), + ); + } + } + + continue; + } + }; + + // The Agent is aware of this region, but is it in the appropriate + // state? + + match actual_region.state { + State::Destroyed => { + // If it is destroyed, then this is invalid as the record should + // be hard-deleted as well (see the lifetime note above). Note + // that omdb could be racing a Nexus that is performing region + // deletion: if the region transitioned to Destroyed but Nexus + // is waiting to re-poll, it will not have hard-deleted the + // region record yet. + + rows.push(Row { + dataset_id: dataset.id(), + region_id: region.id(), + dataset_addr, + error: String::from( + "region may need to be manually hard-deleted", + ), + }); + } + + _ => { + // ok + } + } + } + + // Reconcile with the Crucible agents: are there regions that Nexus does not + // know about? Ask each Crucible agent for its list of regions, then check + // in the database: if that region is _not_ in the database, then either it + // was never created by Nexus, or it was hard-deleted by Nexus. Either way, + // omdb should (if the command line argument is supplied) request that the + // orphaned region be deleted. + // + // Note: This should not delete what is actually a valid region, see the + // lifetime note above. + + let mut orphaned_bytes: u64 = 0; + + let db_region_ids: BTreeSet = + datasets_and_regions.iter().map(|(_, r)| r.id()).collect(); + + // Find all the Crucible datasets + let datasets: Vec = datastore + .pool_connection_for_tests() + .await? + .transaction_async(|conn| async move { + // Selecting all datasets and regions requires a full table scan + conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await?; + + use db::schema::dataset::dsl; + + dsl::dataset + .filter(dsl::kind.eq(nexus_db_model::DatasetKind::Crucible)) + .select(Dataset::as_select()) + .get_results_async(&conn) + .await + }) + .await?; + + for dataset in &datasets { + // If the dataset was expunged, do not attempt to contact the Crucible + // agent! + let in_service = + datastore.dataset_physical_disk_in_service(dataset.id()).await?; + + if !in_service { + eprintln!( + "dataset {} {:?} is not in service, skipping", + dataset.id(), + dataset.address(), + ); + continue; + } + + use crucible_agent_client::types::State; + use crucible_agent_client::Client as CrucibleAgentClient; + + let Some(dataset_addr) = dataset.address() else { + eprintln!("Dataset {} missing an IP address", dataset.id()); + continue; + }; + + let url = format!("http://{}", dataset_addr); + let client = CrucibleAgentClient::new(&url); + + let actual_regions = match client.region_list().await { + Ok(v) => v.into_inner(), + Err(e) => { + eprintln!("{} region_list: {e}", dataset_addr); + continue; + } + }; + + for actual_region in actual_regions { + // Skip doing anything if the region is already tombstoned or + // destroyed + match actual_region.state { + State::Destroyed | State::Tombstoned => { + // the Crucible agent will eventually clean this up, or + // already has. + continue; + } + + State::Failed | State::Requested | State::Created => { + // this region needs cleaning up if there isn't an + // associated db record + } + } + + let actual_region_id: Uuid = actual_region.id.0.parse().unwrap(); + if !db_region_ids.contains(&actual_region_id) { + orphaned_bytes += actual_region.block_size + * actual_region.extent_size + * u64::from(actual_region.extent_count); + + match clean_up_orphaned_regions { + CleanUpOrphanedRegions::Yes { .. } => { + match client.region_delete(&actual_region.id).await { + Ok(_) => { + eprintln!( + "{} region {} deleted ok", + dataset_addr, actual_region.id, + ); + } + + Err(e) => { + eprintln!( + "{} region_delete {:?}: {e}", + dataset_addr, actual_region.id, + ); + } + } + } + + CleanUpOrphanedRegions::No => { + // Do not delete this region, just print a row + rows.push(Row { + dataset_id: dataset.id(), + region_id: actual_region_id, + dataset_addr, + error: String::from( + "Nexus does not know about this region!", + ), + }); + } + } + } + } + } + + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .to_string(); + + println!("{}", table); + + eprintln!("found {} orphaned bytes", orphaned_bytes); + + Ok(()) +} + async fn cmd_db_validate_region_snapshots( datastore: &DataStore, ) -> Result<(), anyhow::Error> { @@ -4581,6 +4883,15 @@ async fn cmd_db_validate_region_snapshots( .or_default() .insert(region_snapshot.snapshot_id); + // If the dataset was expunged, do not attempt to contact the Crucible + // agent! + let in_service = + datastore.dataset_physical_disk_in_service(dataset.id()).await?; + + if !in_service { + continue; + } + use crucible_agent_client::types::RegionId; use crucible_agent_client::types::State; use crucible_agent_client::Client as CrucibleAgentClient; @@ -4593,11 +4904,21 @@ async fn cmd_db_validate_region_snapshots( let url = format!("http://{}", dataset_addr); let client = CrucibleAgentClient::new(&url); - let actual_region_snapshots = client + let actual_region_snapshots = match client .region_get_snapshots(&RegionId( region_snapshot.region_id.to_string(), )) - .await?; + .await + { + Ok(v) => v, + Err(e) => { + eprintln!( + "{} region_get_snapshots {:?}: {e}", + dataset_addr, region_snapshot.region_id, + ); + continue; + } + }; let snapshot_id = region_snapshot.snapshot_id.to_string(); @@ -4741,6 +5062,15 @@ async fn cmd_db_validate_region_snapshots( // Reconcile with the Crucible agents: are there snapshots that Nexus does // not know about? for (dataset, region) in datasets_and_regions { + // If the dataset was expunged, do not attempt to contact the Crucible + // agent! + let in_service = + datastore.dataset_physical_disk_in_service(dataset.id()).await?; + + if !in_service { + continue; + } + use crucible_agent_client::types::RegionId; use crucible_agent_client::types::State; use crucible_agent_client::Client as CrucibleAgentClient; @@ -4753,9 +5083,20 @@ async fn cmd_db_validate_region_snapshots( let url = format!("http://{}", dataset_addr); let client = CrucibleAgentClient::new(&url); - let actual_region_snapshots = client + let actual_region_snapshots = match client .region_get_snapshots(&RegionId(region.id().to_string())) - .await?; + .await + { + Ok(v) => v, + Err(e) => { + eprintln!( + "{} region_get_snapshots {:?}: {e}", + dataset_addr, + region.id(), + ); + continue; + } + }; let default = HashSet::default(); let nexus_region_snapshots: &HashSet = diff --git a/nexus/src/app/sagas/region_snapshot_replacement_start.rs b/nexus/src/app/sagas/region_snapshot_replacement_start.rs index 4855f64ac2..2092c3065a 100644 --- a/nexus/src/app/sagas/region_snapshot_replacement_start.rs +++ b/nexus/src/app/sagas/region_snapshot_replacement_start.rs @@ -101,9 +101,43 @@ declare_saga_actions! { FIND_NEW_REGION -> "new_dataset_and_region" { + rsrss_find_new_region } + // One of the common sharp edges of sagas is that the compensating action of + // a node does _not_ run if the forward action fails. Said another way, for + // this node: + // + // EXAMPLE -> "output" { + // + forward_action + // - forward_action_undo + // } + // + // If `forward_action` fails, `forward_action_undo` is never executed. + // Forward actions are therefore required to be atomic, in that they either + // fully apply or don't apply at all. + // + // Sagas with nodes that ensure multiple regions exist cannot be atomic + // because they can partially fail (for example: what if only 2 out of 3 + // ensures succeed?). In order for the compensating action to be run, it + // must exist as a separate node that has a no-op forward action: + // + // EXAMPLE_UNDO -> "not_used" { + // + noop + // - forward_action_undo + // } + // EXAMPLE -> "output" { + // + forward_action + // } + // + // This saga will only ever ensure that a single region exists, so you might + // think you could get away with a single node that combines the forward and + // compensating action - you'd be mistaken! The Crucible agent's region + // ensure is not atomic in all cases: if the region fails to create, it + // enters the `failed` state, but is not deleted. Nexus must clean these up. + NEW_REGION_ENSURE_UNDO -> "not_used" { + + rsrss_noop + - rsrss_new_region_ensure_undo + } NEW_REGION_ENSURE -> "ensured_dataset_and_region" { + rsrss_new_region_ensure - - rsrss_new_region_ensure_undo } GET_OLD_SNAPSHOT_VOLUME_ID -> "old_snapshot_volume_id" { + rsrss_get_old_snapshot_volume_id @@ -153,6 +187,7 @@ impl NexusSaga for SagaRegionSnapshotReplacementStart { builder.append(get_alloc_region_params_action()); builder.append(alloc_new_region_action()); builder.append(find_new_region_action()); + builder.append(new_region_ensure_undo_action()); builder.append(new_region_ensure_action()); builder.append(get_old_snapshot_volume_id_action()); builder.append(create_fake_volume_action()); @@ -380,6 +415,10 @@ async fn rsrss_find_new_region( Ok(dataset_and_region) } +async fn rsrss_noop(_sagactx: NexusActionContext) -> Result<(), ActionError> { + Ok(()) +} + async fn rsrss_new_region_ensure( sagactx: NexusActionContext, ) -> Result< @@ -390,10 +429,6 @@ async fn rsrss_new_region_ensure( let osagactx = sagactx.user_data(); let log = osagactx.log(); - // With a list of datasets and regions to ensure, other sagas need to have a - // separate no-op forward step for the undo action to ensure that the undo - // step occurs in the case that the ensure partially fails. Here this is not - // required, there's only one dataset and region. let new_dataset_and_region = sagactx .lookup::<(db::model::Dataset, db::model::Region)>( "new_dataset_and_region", @@ -830,7 +865,7 @@ pub(crate) mod test { /// Create four zpools, a disk, and a snapshot of that disk async fn prepare_for_test( cptestctx: &ControlPlaneTestContext, - ) -> PrepareResult { + ) -> PrepareResult<'_> { let client = &cptestctx.external_client; let nexus = &cptestctx.server.server_context().nexus; let datastore = nexus.datastore(); @@ -876,20 +911,21 @@ pub(crate) mod test { panic!("test snapshot {:?} should exist", snapshot_id) }); - PrepareResult { db_disk, snapshot, db_snapshot } + PrepareResult { db_disk, snapshot, db_snapshot, disk_test } } - struct PrepareResult { + struct PrepareResult<'a> { db_disk: nexus_db_model::Disk, snapshot: views::Snapshot, db_snapshot: nexus_db_model::Snapshot, + disk_test: DiskTest<'a, crate::Server>, } #[nexus_test(server = crate::Server)] async fn test_region_snapshot_replacement_start_saga( cptestctx: &ControlPlaneTestContext, ) { - let PrepareResult { db_disk, snapshot, db_snapshot } = + let PrepareResult { db_disk, snapshot, db_snapshot, .. } = prepare_for_test(cptestctx).await; let nexus = &cptestctx.server.server_context().nexus; @@ -1009,9 +1045,11 @@ pub(crate) mod test { pub(crate) async fn verify_clean_slate( cptestctx: &ControlPlaneTestContext, + test: &DiskTest<'_, crate::Server>, request: &RegionSnapshotReplacement, affected_volume_original: &Volume, ) { + let sled_agent = &cptestctx.sled_agent.sled_agent; let datastore = cptestctx.server.server_context().nexus.datastore(); crate::app::sagas::test_helpers::assert_no_failed_undo_steps( @@ -1024,6 +1062,10 @@ pub(crate) mod test { // original disk, and three for the (currently unused) snapshot // destination volume assert_eq!(region_allocations(&datastore).await, 6); + + // Assert that only those six provisioned regions are non-destroyed + assert_no_other_ensured_regions(sled_agent, test, &datastore).await; + assert_region_snapshot_replacement_request_untouched( cptestctx, &datastore, &request, ) @@ -1031,11 +1073,12 @@ pub(crate) mod test { assert_volume_untouched(&datastore, &affected_volume_original).await; } - async fn region_allocations(datastore: &DataStore) -> usize { + async fn regions(datastore: &DataStore) -> Vec { use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use async_bb8_diesel::AsyncSimpleConnection; use diesel::QueryDsl; + use diesel::SelectableHelper; use nexus_db_queries::db::queries::ALLOW_FULL_TABLE_SCAN_SQL; use nexus_db_queries::db::schema::region::dsl; @@ -1047,15 +1090,56 @@ pub(crate) mod test { conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await.unwrap(); dsl::region - .count() - .get_result_async(&conn) + .select(db::model::Region::as_select()) + .get_results_async(&conn) .await - .map(|x: i64| x as usize) }) .await .unwrap() } + async fn region_allocations(datastore: &DataStore) -> usize { + regions(datastore).await.len() + } + + async fn assert_no_other_ensured_regions( + sled_agent: &omicron_sled_agent::sim::SledAgent, + test: &DiskTest<'_, crate::Server>, + datastore: &DataStore, + ) { + let mut non_destroyed_regions_from_agent = vec![]; + + for zpool in test.zpools() { + for dataset in &zpool.datasets { + let crucible_dataset = + sled_agent.get_crucible_dataset(zpool.id, dataset.id).await; + for region in crucible_dataset.list().await { + match region.state { + crucible_agent_client::types::State::Tombstoned + | crucible_agent_client::types::State::Destroyed => { + // ok + } + + _ => { + non_destroyed_regions_from_agent + .push(region.clone()); + } + } + } + } + } + + let db_regions = regions(datastore).await; + let db_region_ids: Vec = + db_regions.iter().map(|x| x.id()).collect(); + + for region in non_destroyed_regions_from_agent { + let region_id = region.id.0.parse().unwrap(); + let contains = db_region_ids.contains(®ion_id); + assert!(contains, "db does not have {:?}", region_id); + } + } + async fn assert_region_snapshot_replacement_request_untouched( cptestctx: &ControlPlaneTestContext, datastore: &DataStore, @@ -1094,11 +1178,78 @@ pub(crate) mod test { assert_eq!(actual, expected); } + #[nexus_test(server = crate::Server)] + async fn test_action_failure_can_unwind( + cptestctx: &ControlPlaneTestContext, + ) { + let PrepareResult { db_disk, snapshot, db_snapshot, disk_test } = + prepare_for_test(cptestctx).await; + + let log = &cptestctx.logctx.log; + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = test_opctx(cptestctx); + + let disk_allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + assert_eq!(disk_allocated_regions.len(), 3); + + let region: &nexus_db_model::Region = &disk_allocated_regions[0].1; + let snapshot_id = snapshot.identity.id; + + let region_snapshot = datastore + .region_snapshot_get(region.dataset_id(), region.id(), snapshot_id) + .await + .unwrap() + .unwrap(); + + let request = + RegionSnapshotReplacement::for_region_snapshot(®ion_snapshot); + + datastore + .insert_region_snapshot_replacement_request(&opctx, request.clone()) + .await + .unwrap(); + + let affected_volume_original = + datastore.volume_get(db_snapshot.volume_id).await.unwrap().unwrap(); + + verify_clean_slate( + &cptestctx, + &disk_test, + &request, + &affected_volume_original, + ) + .await; + + crate::app::sagas::test_helpers::action_failure_can_unwind::< + SagaRegionSnapshotReplacementStart, + _, + _, + >( + nexus, + || Box::pin(async { new_test_params(&opctx, &request) }), + || { + Box::pin(async { + verify_clean_slate( + &cptestctx, + &disk_test, + &request, + &affected_volume_original, + ) + .await; + }) + }, + log, + ) + .await; + } + #[nexus_test(server = crate::Server)] async fn test_action_failure_can_unwind_idempotently( cptestctx: &ControlPlaneTestContext, ) { - let PrepareResult { db_disk, snapshot, db_snapshot } = + let PrepareResult { db_disk, snapshot, db_snapshot, disk_test } = prepare_for_test(cptestctx).await; let log = &cptestctx.logctx.log; @@ -1130,8 +1281,13 @@ pub(crate) mod test { let affected_volume_original = datastore.volume_get(db_snapshot.volume_id).await.unwrap().unwrap(); - verify_clean_slate(&cptestctx, &request, &affected_volume_original) - .await; + verify_clean_slate( + &cptestctx, + &disk_test, + &request, + &affected_volume_original, + ) + .await; crate::app::sagas::test_helpers::action_failure_can_unwind_idempotently::< SagaRegionSnapshotReplacementStart, @@ -1143,6 +1299,7 @@ pub(crate) mod test { || Box::pin(async { verify_clean_slate( &cptestctx, + &disk_test, &request, &affected_volume_original, ).await; @@ -1155,7 +1312,7 @@ pub(crate) mod test { async fn test_actions_succeed_idempotently( cptestctx: &ControlPlaneTestContext, ) { - let PrepareResult { db_disk, snapshot, db_snapshot: _ } = + let PrepareResult { db_disk, snapshot, .. } = prepare_for_test(cptestctx).await; let nexus = &cptestctx.server.server_context().nexus; @@ -1192,4 +1349,66 @@ pub(crate) mod test { ) .await; } + + /// Assert this saga does not leak regions if the replacement read-only + /// region cannot be created. + #[nexus_test(server = crate::Server)] + async fn test_no_leak_region(cptestctx: &ControlPlaneTestContext) { + let PrepareResult { db_disk, snapshot, db_snapshot, disk_test } = + prepare_for_test(cptestctx).await; + + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = test_opctx(cptestctx); + + let disk_allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + assert_eq!(disk_allocated_regions.len(), 3); + + let region: &nexus_db_model::Region = &disk_allocated_regions[0].1; + let snapshot_id = snapshot.identity.id; + + let region_snapshot = datastore + .region_snapshot_get(region.dataset_id(), region.id(), snapshot_id) + .await + .unwrap() + .unwrap(); + + let request = + RegionSnapshotReplacement::for_region_snapshot(®ion_snapshot); + + datastore + .insert_region_snapshot_replacement_request(&opctx, request.clone()) + .await + .unwrap(); + + let affected_volume_original = + datastore.volume_get(db_snapshot.volume_id).await.unwrap().unwrap(); + + disk_test.set_always_fail_callback().await; + + // Run the region snapshot replacement start saga + let dag = + create_saga_dag::(Params { + serialized_authn: Serialized::for_opctx(&opctx), + request: request.clone(), + allocation_strategy: RegionAllocationStrategy::Random { + seed: None, + }, + }) + .unwrap(); + + let runnable_saga = nexus.sagas.saga_prepare(dag).await.unwrap(); + + // Actually run the saga + runnable_saga.run_to_completion().await.unwrap(); + + verify_clean_slate( + &cptestctx, + &disk_test, + &request, + &affected_volume_original, + ) + .await; + } } diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index d9888f9ccd..e381146fc0 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -769,10 +769,9 @@ async fn test_disk_region_creation_failure( .await .unwrap(); - // After the failed allocation, the disk should be Faulted + // After the failed allocation, the disk creation should have unwound let disks = disks_list(&client, &disks_url).await; - assert_eq!(disks.len(), 1); - assert_eq!(disks[0].state, DiskState::Faulted); + assert_eq!(disks.len(), 0); } // Tests that invalid block sizes are rejected diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index dc8cf63fe4..8fd648096a 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -168,12 +168,6 @@ impl CrucibleDataInner { let id = Uuid::from_str(&id.0).unwrap(); if let Some(region) = self.regions.get_mut(&id) { - if region.state == State::Failed { - // The real Crucible agent would not let a Failed region be - // deleted - bail!("cannot delete in state Failed"); - } - region.state = State::Destroyed; self.used_ports.remove(®ion.port_number); Ok(Some(region.clone()))