diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index a8032bb319..b32cccadc0 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -979,6 +979,8 @@ pub(crate) mod test { ); assert!(no_region_allocations_exist(datastore, &test).await); assert!(no_regions_ensured(&sled_agent, &test).await); + + assert!(test.crucible_resources_deleted().await); } #[nexus_test(server = crate::Server)] diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index d9ccffaa9c..ddd32fad1b 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -181,9 +181,12 @@ declare_saga_actions! { + ssc_detach_disk_from_pantry } + START_RUNNING_SNAPSHOT_UNDO -> "ssc_not_used" { + + ssc_noop + - ssc_start_running_snapshot_undo + } START_RUNNING_SNAPSHOT -> "replace_sockets_map" { + ssc_start_running_snapshot - - ssc_start_running_snapshot_undo } CREATE_VOLUME_RECORD -> "created_volume" { + ssc_create_volume_record @@ -266,8 +269,7 @@ impl NexusSaga for SagaSnapshotCreate { // (Sleds + DB) Start snapshot downstairs, add an entry in the DB for // the dataset's snapshot. - // - // TODO: Should this be two separate saga steps? + builder.append(start_running_snapshot_undo_action()); builder.append(start_running_snapshot_action()); // (DB) Copy and modify the disk volume construction request to point // to the new running snapshot @@ -289,6 +291,10 @@ impl NexusSaga for SagaSnapshotCreate { // snapshot create saga: action implementations +async fn ssc_noop(_sagactx: NexusActionContext) -> Result<(), ActionError> { + Ok(()) +} + async fn ssc_alloc_regions( sagactx: NexusActionContext, ) -> Result, ActionError> { diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index 4ebf8eab57..a72538e349 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -45,6 +45,10 @@ fn get_disks_url() -> String { format!("/v1/disks?project={}", PROJECT_NAME) } +fn get_disk_url(name: &str) -> String { + format!("/v1/disks/{}?project={}", name, PROJECT_NAME) +} + async fn create_org_and_project(client: &ClientTestContext) -> Uuid { let project = create_project(client, PROJECT_NAME).await; project.identity.id @@ -747,6 +751,115 @@ async fn test_cannot_snapshot_if_no_space(cptestctx: &ControlPlaneTestContext) { .expect("unexpected success creating snapshot"); } +#[nexus_test] +async fn test_snapshot_unwind(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + let disk_test = DiskTest::new(&cptestctx).await; + populate_ip_pool(&client, "default", None).await; + create_org_and_project(client).await; + let disks_url = get_disks_url(); + + // Define a global image + let server = ServerBuilder::new().run().unwrap(); + server.expect( + Expectation::matching(request::method_path("HEAD", "/image.raw")) + .times(1..) + .respond_with( + status_code(200).append_header( + "Content-Length", + format!("{}", 4096 * 1000), + ), + ), + ); + + let image_create_params = params::ImageCreate { + identity: IdentityMetadataCreateParams { + name: "alpine-edge".parse().unwrap(), + description: String::from( + "you can boot any image, as long as it's alpine", + ), + }, + source: params::ImageSource::Url { + url: server.url("/image.raw").to_string(), + }, + os: "alpine".to_string(), + version: "edge".to_string(), + block_size: params::BlockSize::try_from(512).unwrap(), + }; + + let images_url = format!("/v1/images?project={}", PROJECT_NAME); + let image = + NexusRequest::objects_post(client, &images_url, &image_create_params) + .authn_as(AuthnMode::PrivilegedUser) + .execute_and_parse_unwrap::() + .await; + + // Create a disk from this image + let disk_size = ByteCount::from_gibibytes_u32(2); + let base_disk_name: Name = "base-disk".parse().unwrap(); + let base_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: base_disk_name.clone(), + description: String::from("sells rainsticks"), + }, + disk_source: params::DiskSource::Image { image_id: image.identity.id }, + size: disk_size, + }; + + let _base_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&base_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Set the third region's running snapshot callback so it fails + let zpool = &disk_test.zpools[2]; + let dataset = &zpool.datasets[0]; + disk_test + .sled_agent + .get_crucible_dataset(zpool.id, dataset.id) + .await + .set_creating_a_running_snapshot_should_fail() + .await; + + // Issue snapshot request, expecting it to fail + let snapshots_url = format!("/v1/snapshots?project={}", PROJECT_NAME); + + NexusRequest::expect_failure_with_body( + client, + StatusCode::INTERNAL_SERVER_ERROR, + Method::POST, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "snapshot".parse().unwrap(), + description: String::from("a snapshot"), + }, + disk: base_disk_name.clone(), + }, + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("snapshot request succeeded"); + + // Delete the disk + NexusRequest::object_delete(client, &get_disk_url(base_disk_name.as_str())) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + // Assert everything was cleaned up + assert!(disk_test.crucible_resources_deleted().await); +} + // Test that the code that Saga nodes call is idempotent #[nexus_test] diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 79da8cae22..69d476576b 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -37,6 +37,7 @@ struct CrucibleDataInner { snapshots: HashMap>, running_snapshots: HashMap>, on_create: Option, + creating_a_running_snapshot_should_fail: bool, next_port: u16, } @@ -47,6 +48,7 @@ impl CrucibleDataInner { snapshots: HashMap::new(), running_snapshots: HashMap::new(), on_create: None, + creating_a_running_snapshot_should_fail: false, next_port: crucible_port, } } @@ -175,11 +177,19 @@ impl CrucibleDataInner { Ok(()) } + fn set_creating_a_running_snapshot_should_fail(&mut self) { + self.creating_a_running_snapshot_should_fail = true; + } + fn create_running_snapshot( &mut self, id: &RegionId, name: &str, ) -> Result { + if self.creating_a_running_snapshot_should_fail { + bail!("failure creating running snapshot"); + } + let id = Uuid::from_str(&id.0).unwrap(); let map = @@ -308,6 +318,10 @@ impl CrucibleData { self.inner.lock().await.delete_snapshot(id, name) } + pub async fn set_creating_a_running_snapshot_should_fail(&self) { + self.inner.lock().await.set_creating_a_running_snapshot_should_fail(); + } + pub async fn create_running_snapshot( &self, id: &RegionId,