Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split starting running snapshots into two steps #3097

Merged
merged 1 commit into from
May 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions nexus/src/app/sagas/disk_create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,8 @@ pub(crate) mod test {
);
assert!(no_region_allocations_exist(datastore, &test).await);
assert!(no_regions_ensured(&sled_agent, &test).await);

assert!(test.crucible_resources_deleted().await);
}

#[nexus_test(server = crate::Server)]
Expand Down
12 changes: 9 additions & 3 deletions nexus/src/app/sagas/snapshot_create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,9 +181,12 @@ declare_saga_actions! {
+ ssc_detach_disk_from_pantry
}

START_RUNNING_SNAPSHOT_UNDO -> "ssc_not_used" {
+ ssc_noop
- ssc_start_running_snapshot_undo
}
START_RUNNING_SNAPSHOT -> "replace_sockets_map" {
+ ssc_start_running_snapshot
- ssc_start_running_snapshot_undo
}
CREATE_VOLUME_RECORD -> "created_volume" {
+ ssc_create_volume_record
Expand Down Expand Up @@ -266,8 +269,7 @@ impl NexusSaga for SagaSnapshotCreate {

// (Sleds + DB) Start snapshot downstairs, add an entry in the DB for
// the dataset's snapshot.
//
// TODO: Should this be two separate saga steps?
builder.append(start_running_snapshot_undo_action());
builder.append(start_running_snapshot_action());
// (DB) Copy and modify the disk volume construction request to point
// to the new running snapshot
Expand All @@ -289,6 +291,10 @@ impl NexusSaga for SagaSnapshotCreate {

// snapshot create saga: action implementations

async fn ssc_noop(_sagactx: NexusActionContext) -> Result<(), ActionError> {
Ok(())
}

async fn ssc_alloc_regions(
sagactx: NexusActionContext,
) -> Result<Vec<(db::model::Dataset, db::model::Region)>, ActionError> {
Expand Down
113 changes: 113 additions & 0 deletions nexus/tests/integration_tests/snapshots.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ fn get_disks_url() -> String {
format!("/v1/disks?project={}", PROJECT_NAME)
}

fn get_disk_url(name: &str) -> String {
format!("/v1/disks/{}?project={}", name, PROJECT_NAME)
}

async fn create_org_and_project(client: &ClientTestContext) -> Uuid {
let project = create_project(client, PROJECT_NAME).await;
project.identity.id
Expand Down Expand Up @@ -747,6 +751,115 @@ async fn test_cannot_snapshot_if_no_space(cptestctx: &ControlPlaneTestContext) {
.expect("unexpected success creating snapshot");
}

#[nexus_test]
async fn test_snapshot_unwind(cptestctx: &ControlPlaneTestContext) {
let client = &cptestctx.external_client;
let disk_test = DiskTest::new(&cptestctx).await;
populate_ip_pool(&client, "default", None).await;
create_org_and_project(client).await;
let disks_url = get_disks_url();

// Define a global image
let server = ServerBuilder::new().run().unwrap();
server.expect(
Expectation::matching(request::method_path("HEAD", "/image.raw"))
.times(1..)
.respond_with(
status_code(200).append_header(
"Content-Length",
format!("{}", 4096 * 1000),
),
),
);

let image_create_params = params::ImageCreate {
identity: IdentityMetadataCreateParams {
name: "alpine-edge".parse().unwrap(),
description: String::from(
"you can boot any image, as long as it's alpine",
),
},
source: params::ImageSource::Url {
url: server.url("/image.raw").to_string(),
},
os: "alpine".to_string(),
version: "edge".to_string(),
block_size: params::BlockSize::try_from(512).unwrap(),
};

let images_url = format!("/v1/images?project={}", PROJECT_NAME);
let image =
NexusRequest::objects_post(client, &images_url, &image_create_params)
.authn_as(AuthnMode::PrivilegedUser)
.execute_and_parse_unwrap::<views::Image>()
.await;

// Create a disk from this image
let disk_size = ByteCount::from_gibibytes_u32(2);
let base_disk_name: Name = "base-disk".parse().unwrap();
let base_disk = params::DiskCreate {
identity: IdentityMetadataCreateParams {
name: base_disk_name.clone(),
description: String::from("sells rainsticks"),
},
disk_source: params::DiskSource::Image { image_id: image.identity.id },
size: disk_size,
};

let _base_disk: Disk = NexusRequest::new(
RequestBuilder::new(client, Method::POST, &disks_url)
.body(Some(&base_disk))
.expect_status(Some(StatusCode::CREATED)),
)
.authn_as(AuthnMode::PrivilegedUser)
.execute()
.await
.unwrap()
.parsed_body()
.unwrap();

// Set the third region's running snapshot callback so it fails
let zpool = &disk_test.zpools[2];
let dataset = &zpool.datasets[0];
disk_test
.sled_agent
.get_crucible_dataset(zpool.id, dataset.id)
.await
.set_creating_a_running_snapshot_should_fail()
.await;

// Issue snapshot request, expecting it to fail
let snapshots_url = format!("/v1/snapshots?project={}", PROJECT_NAME);

NexusRequest::expect_failure_with_body(
client,
StatusCode::INTERNAL_SERVER_ERROR,
Method::POST,
&snapshots_url,
&params::SnapshotCreate {
identity: IdentityMetadataCreateParams {
name: "snapshot".parse().unwrap(),
description: String::from("a snapshot"),
},
disk: base_disk_name.clone(),
},
)
.authn_as(AuthnMode::PrivilegedUser)
.execute()
.await
.expect("snapshot request succeeded");

// Delete the disk
NexusRequest::object_delete(client, &get_disk_url(base_disk_name.as_str()))
.authn_as(AuthnMode::PrivilegedUser)
.execute()
.await
.expect("failed to delete disk");

// Assert everything was cleaned up
assert!(disk_test.crucible_resources_deleted().await);
}

// Test that the code that Saga nodes call is idempotent

#[nexus_test]
Expand Down
14 changes: 14 additions & 0 deletions sled-agent/src/sim/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ struct CrucibleDataInner {
snapshots: HashMap<Uuid, Vec<Snapshot>>,
running_snapshots: HashMap<Uuid, HashMap<String, RunningSnapshot>>,
on_create: Option<CreateCallback>,
creating_a_running_snapshot_should_fail: bool,
next_port: u16,
}

Expand All @@ -47,6 +48,7 @@ impl CrucibleDataInner {
snapshots: HashMap::new(),
running_snapshots: HashMap::new(),
on_create: None,
creating_a_running_snapshot_should_fail: false,
next_port: crucible_port,
}
}
Expand Down Expand Up @@ -175,11 +177,19 @@ impl CrucibleDataInner {
Ok(())
}

fn set_creating_a_running_snapshot_should_fail(&mut self) {
self.creating_a_running_snapshot_should_fail = true;
}

fn create_running_snapshot(
&mut self,
id: &RegionId,
name: &str,
) -> Result<RunningSnapshot> {
if self.creating_a_running_snapshot_should_fail {
bail!("failure creating running snapshot");
}

let id = Uuid::from_str(&id.0).unwrap();

let map =
Expand Down Expand Up @@ -308,6 +318,10 @@ impl CrucibleData {
self.inner.lock().await.delete_snapshot(id, name)
}

pub async fn set_creating_a_running_snapshot_should_fail(&self) {
self.inner.lock().await.set_creating_a_running_snapshot_should_fail();
}

pub async fn create_running_snapshot(
&self,
id: &RegionId,
Expand Down