Skip to content

Commit

Permalink
Simulated Crucible agent should be stricter (#4049)
Browse files Browse the repository at this point in the history
There's a few more info messages here that will aid debugging, plus a
few ways the simulated Crucible agent is now stricter:

- it should be impossible to create a snapshot of a region in a bad
state.

- it should be an error to try to delete a snapshot for a non-existent
region (the real sled agent would return a 404)

- it should be an error to attempt creating a running snapshot for a
snapshot that does not exist

The actual Crucible agent reuses freed ports, so the simulated one
should too.
  • Loading branch information
jmpesp authored Sep 9, 2024
1 parent ad8548a commit 69af861
Show file tree
Hide file tree
Showing 3 changed files with 406 additions and 76 deletions.
7 changes: 0 additions & 7 deletions sled-agent/src/sim/http_entrypoints_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,13 +282,6 @@ async fn region_delete_running_snapshot(
));
}

if crucible.get_snapshot_for_region(&p.id, &p.name).await.is_none() {
return Err(HttpError::for_not_found(
None,
format!("snapshot {:?} not found", p.name),
));
}

crucible.delete_running_snapshot(&p.id, &p.name).await.map_err(|e| {
HttpError::for_internal_error(format!(
"running snapshot create failure: {:?}",
Expand Down
27 changes: 14 additions & 13 deletions sled-agent/src/sim/sled_agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,9 @@ impl SledAgent {
///
/// Crucible regions are returned with a port number, and volume
/// construction requests contain a single Nexus region (which points to
/// three crucible regions). Extract the region addresses, lookup the
/// region from the port (which should be unique), and pair disk id with
/// region ids. This map is referred to later when making snapshots.
/// three crucible regions). Extract the region addresses, lookup the region
/// from the port and pair disk id with region ids. This map is referred to
/// later when making snapshots.
pub async fn map_disk_ids_to_region_ids(
&self,
volume_construction_request: &VolumeConstructionRequest,
Expand All @@ -235,22 +235,18 @@ impl SledAgent {

let storage = self.storage.lock().await;
for target in targets {
let crucible_data = storage
.get_dataset_for_port(target.port())
let region = storage
.get_region_for_port(target.port())
.await
.ok_or_else(|| {
Error::internal_error(&format!(
"no dataset for port {}",
"no region for port {}",
target.port()
))
})?;

for region in crucible_data.list().await {
if region.port_number == target.port() {
let region_id = Uuid::from_str(&region.id.0).unwrap();
region_ids.push(region_id);
}
}
let region_id = Uuid::from_str(&region.id.0).unwrap();
region_ids.push(region_id);
}

let mut disk_id_to_region_ids = self.disk_id_to_region_ids.lock().await;
Expand Down Expand Up @@ -670,14 +666,19 @@ impl SledAgent {
Error::not_found_by_id(ResourceType::Disk, &disk_id)
})?;

info!(self.log, "disk id {} region ids are {:?}", disk_id, region_ids);

let storage = self.storage.lock().await;

for region_id in region_ids {
let crucible_data =
storage.get_dataset_for_region(*region_id).await;

if let Some(crucible_data) = crucible_data {
crucible_data.create_snapshot(*region_id, snapshot_id).await;
crucible_data
.create_snapshot(*region_id, snapshot_id)
.await
.map_err(|e| Error::internal_error(&e.to_string()))?;
} else {
return Err(Error::not_found_by_id(
ResourceType::Disk,
Expand Down
Loading

0 comments on commit 69af861

Please sign in to comment.