diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 64ddc7c451..853db4195a 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(49, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(50, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(50, "add-lookup-disk-by-volume-id-index"), KnownVersion::new(49, "physical-disk-state-and-policy"), KnownVersion::new(48, "add-metrics-producers-time-modified-index"), KnownVersion::new(47, "add-view-for-bgp-peer-configs"), diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index b40b641202..c753ac5436 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -114,6 +114,7 @@ pub use virtual_provisioning_collection::StorageType; pub use volume::read_only_resources_associated_with_volume; pub use volume::CrucibleResources; pub use volume::CrucibleTargets; +pub use volume::VolumeCheckoutReason; // Number of unique datasets required to back a region. // TODO: This should likely turn into a configuration option. diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs index a9646b9ef6..0e80ee3e3c 100644 --- a/nexus/db-queries/src/db/datastore/volume.rs +++ b/nexus/db-queries/src/db/datastore/volume.rs @@ -11,8 +11,10 @@ use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::identity::Asset; use crate::db::model::Dataset; +use crate::db::model::Disk; use crate::db::model::DownstairsClientStopRequestNotification; use crate::db::model::DownstairsClientStoppedNotification; +use crate::db::model::Instance; use crate::db::model::Region; use crate::db::model::RegionSnapshot; use crate::db::model::UpstairsRepairNotification; @@ -25,6 +27,7 @@ use anyhow::bail; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::prelude::*; use diesel::OptionalExtension; +use nexus_types::identity::Resource; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; @@ -44,6 +47,40 @@ use serde::Serialize; use sled_agent_client::types::VolumeConstructionRequest; use uuid::Uuid; +#[derive(Debug, Clone, Copy)] +pub enum VolumeCheckoutReason { + /// Check out a read-only Volume. + ReadOnlyCopy, + + /// Check out a Volume to modify and store back to the database. + CopyAndModify, + + /// Check out a Volume to send to Propolis to start an instance. + InstanceStart { vmm_id: Uuid }, + + /// Check out a Volume to send to a migration destination Propolis. + InstanceMigrate { vmm_id: Uuid, target_vmm_id: Uuid }, + + /// Check out a Volume to send to a Pantry (for background maintenance + /// operations). + Pantry, +} + +#[derive(Debug, thiserror::Error)] +enum VolumeGetError { + #[error("Serde error during volume_checkout: {0}")] + SerdeError(#[from] serde_json::Error), + + #[error("Updated {0} database rows, expected {1}")] + UnexpectedDatabaseUpdate(usize, usize), + + #[error("Checkout condition failed: {0}")] + CheckoutConditionFailed(String), + + #[error("Invalid Volume: {0}")] + InvalidVolume(String), +} + impl DataStore { pub async fn volume_create(&self, volume: Volume) -> CreateResult { use db::schema::volume::dsl; @@ -194,6 +231,244 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + async fn volume_checkout_allowed( + reason: &VolumeCheckoutReason, + vcr: &VolumeConstructionRequest, + maybe_disk: Option, + maybe_instance: Option, + ) -> Result<(), VolumeGetError> { + match reason { + VolumeCheckoutReason::ReadOnlyCopy => { + // When checking out to make a copy (usually for use as a + // read-only parent), the volume must be read only. Even if a + // call-site that uses Copy sends this copied Volume to a + // Propolis or Pantry, the Upstairs that will be created will be + // read-only, and will not take over from other read-only + // Upstairs. + + match volume_is_read_only(&vcr) { + Ok(read_only) => { + if !read_only { + return Err(VolumeGetError::CheckoutConditionFailed( + String::from("Non-read-only Volume Checkout for use Copy!") + )); + } + + Ok(()) + } + + Err(e) => Err(VolumeGetError::InvalidVolume(e.to_string())), + } + } + + VolumeCheckoutReason::CopyAndModify => { + // `CopyAndModify` is used when taking a read/write Volume, + // modifying it (for example, when taking a snapshot, to point + // to read-only resources), and committing it back to the DB. + // This is a checkout of a read/write Volume, so creating an + // Upstairs from it *may* take over from something else. The + // call-site must ensure this doesn't happen, but we can't do + // that here. + + Ok(()) + } + + VolumeCheckoutReason::InstanceStart { vmm_id } => { + // Check out this volume to send to Propolis to start an + // Instance. The VMM id in the enum must match the instance's + // propolis_id. + + let Some(instance) = &maybe_instance else { + return Err(VolumeGetError::CheckoutConditionFailed( + format!( + "InstanceStart {}: instance does not exist", + vmm_id + ), + )); + }; + + let runtime = instance.runtime(); + match (runtime.propolis_id, runtime.dst_propolis_id) { + (Some(_), Some(_)) => { + Err(VolumeGetError::CheckoutConditionFailed( + format!( + "InstanceStart {}: instance {} is undergoing migration", + vmm_id, + instance.id(), + ) + )) + } + + (None, None) => { + Err(VolumeGetError::CheckoutConditionFailed( + format!( + "InstanceStart {}: instance {} has no propolis ids", + vmm_id, + instance.id(), + ) + )) + } + + (Some(propolis_id), None) => { + if propolis_id != *vmm_id { + return Err(VolumeGetError::CheckoutConditionFailed( + format!( + "InstanceStart {}: instance {} propolis id {} mismatch", + vmm_id, + instance.id(), + propolis_id, + ) + )); + } + + Ok(()) + } + + (None, Some(dst_propolis_id)) => { + Err(VolumeGetError::CheckoutConditionFailed( + format!( + "InstanceStart {}: instance {} has no propolis id but dst propolis id {}", + vmm_id, + instance.id(), + dst_propolis_id, + ) + )) + } + } + } + + VolumeCheckoutReason::InstanceMigrate { vmm_id, target_vmm_id } => { + // Check out this volume to send to destination Propolis to + // migrate an Instance. Only take over from the specified source + // VMM. + + let Some(instance) = &maybe_instance else { + return Err(VolumeGetError::CheckoutConditionFailed( + format!( + "InstanceMigrate {} {}: instance does not exist", + vmm_id, target_vmm_id + ), + )); + }; + + let runtime = instance.runtime(); + match (runtime.propolis_id, runtime.dst_propolis_id) { + (Some(propolis_id), Some(dst_propolis_id)) => { + if propolis_id != *vmm_id || dst_propolis_id != *target_vmm_id { + return Err(VolumeGetError::CheckoutConditionFailed( + format!( + "InstanceMigrate {} {}: instance {} propolis id mismatches {} {}", + vmm_id, + target_vmm_id, + instance.id(), + propolis_id, + dst_propolis_id, + ) + )); + } + + Ok(()) + } + + (None, None) => { + Err(VolumeGetError::CheckoutConditionFailed( + format!( + "InstanceMigrate {} {}: instance {} has no propolis ids", + vmm_id, + target_vmm_id, + instance.id(), + ) + )) + } + + (Some(propolis_id), None) => { + // XXX is this right? + if propolis_id != *vmm_id { + return Err(VolumeGetError::CheckoutConditionFailed( + format!( + "InstanceMigrate {} {}: instance {} propolis id {} mismatch", + vmm_id, + target_vmm_id, + instance.id(), + propolis_id, + ) + )); + } + + Ok(()) + } + + (None, Some(dst_propolis_id)) => { + Err(VolumeGetError::CheckoutConditionFailed( + format!( + "InstanceMigrate {} {}: instance {} has no propolis id but dst propolis id {}", + vmm_id, + target_vmm_id, + instance.id(), + dst_propolis_id, + ) + )) + } + } + } + + VolumeCheckoutReason::Pantry => { + // Check out this Volume to send to a Pantry, which will create + // a read/write Upstairs, for background maintenance operations. + // There must not be any Propolis, otherwise this will take over + // from that and cause errors for guest OSes. + + let Some(disk) = maybe_disk else { + // This volume isn't backing a disk, it won't take over from + // a Propolis' Upstairs. + return Ok(()); + }; + + let Some(attach_instance_id) = + disk.runtime().attach_instance_id + else { + // The volume is backing a disk that is not attached to an + // instance. At this moment it won't take over from a + // Propolis' Upstairs, so send it to a Pantry to create an + // Upstairs there. A future checkout that happens after + // this transaction that is sent to a Propolis _will_ take + // over from this checkout (sent to a Pantry), which is ok. + return Ok(()); + }; + + let Some(instance) = maybe_instance else { + // The instance, which the disk that this volume backs is + // attached to, doesn't exist? + // + // XXX this is a Nexus bug! + return Err(VolumeGetError::CheckoutConditionFailed( + format!( + "Pantry: instance {} backing disk {} does not exist?", + attach_instance_id, + disk.id(), + ) + )); + }; + + if let Some(propolis_id) = instance.runtime().propolis_id { + // The instance, which the disk that this volume backs is + // attached to, exists and has an active propolis ID. A + // propolis _may_ exist, so bail here - an activation from + // the Pantry is not allowed to take over from a Propolis. + Err(VolumeGetError::CheckoutConditionFailed(format!( + "Pantry: possible Propolis {}", + propolis_id + ))) + } else { + // The instance, which the disk that this volume backs is + // attached to, exists, but there is no active propolis ID. + // This is ok. + Ok(()) + } + } + } + } + /// Checkout a copy of the Volume from the database. /// This action (getting a copy) will increase the generation number /// of Volumes of the VolumeConstructionRequest::Volume type that have @@ -203,18 +478,10 @@ impl DataStore { pub async fn volume_checkout( &self, volume_id: Uuid, + reason: VolumeCheckoutReason, ) -> LookupResult { use db::schema::volume::dsl; - #[derive(Debug, thiserror::Error)] - enum VolumeGetError { - #[error("Serde error during volume_checkout: {0}")] - SerdeError(#[from] serde_json::Error), - - #[error("Updated {0} database rows, expected {1}")] - UnexpectedDatabaseUpdate(usize, usize), - } - // We perform a transaction here, to be sure that on completion // of this, the database contains an updated version of the // volume with the generation number incremented (for the volume @@ -241,6 +508,56 @@ impl DataStore { err.bail(VolumeGetError::SerdeError(e)) })?; + // The VolumeConstructionRequest resulting from this checkout will have its + // generation numbers bumped, and as result will (if it has non-read-only + // sub-volumes) take over from previous read/write activations when sent to a + // place that will `construct` a new Volume. Depending on the checkout reason, + // prevent creating multiple read/write Upstairs acting on the same Volume, + // except where the take over is intended. + + let (maybe_disk, maybe_instance) = { + use db::schema::instance::dsl as instance_dsl; + use db::schema::disk::dsl as disk_dsl; + + let maybe_disk: Option = disk_dsl::disk + .filter(disk_dsl::time_deleted.is_null()) + .filter(disk_dsl::volume_id.eq(volume_id)) + .select(Disk::as_select()) + .get_result_async(&conn) + .await + .optional()?; + + let maybe_instance: Option = if let Some(disk) = &maybe_disk { + if let Some(attach_instance_id) = disk.runtime().attach_instance_id { + instance_dsl::instance + .filter(instance_dsl::time_deleted.is_null()) + .filter(instance_dsl::id.eq(attach_instance_id)) + .select(Instance::as_select()) + .get_result_async(&conn) + .await + .optional()? + } else { + // Disk not attached to an instance + None + } + } else { + // Volume not associated with disk + None + }; + + (maybe_disk, maybe_instance) + }; + + if let Err(e) = Self::volume_checkout_allowed( + &reason, + &vcr, + maybe_disk, + maybe_instance, + ) + .await { + return Err(err.bail(e)); + } + // Look to see if the VCR is a Volume type, and if so, look at // its sub_volumes. If they are of type Region, then we need // to update their generation numbers and record that update @@ -353,8 +670,17 @@ impl DataStore { .await .map_err(|e| { if let Some(err) = err.take() { - return Error::internal_error(&format!("Transaction error: {}", err)); + match err { + VolumeGetError::CheckoutConditionFailed(message) => { + return Error::conflict(message); + } + + _ => { + return Error::internal_error(&format!("Transaction error: {}", err)); + } + } } + public_error_from_diesel(e, ErrorHandler::Server) }) } @@ -447,8 +773,9 @@ impl DataStore { pub async fn volume_checkout_randomize_ids( &self, volume_id: Uuid, + reason: VolumeCheckoutReason, ) -> CreateResult { - let volume = self.volume_checkout(volume_id).await?; + let volume = self.volume_checkout(volume_id, reason).await?; let vcr: sled_agent_client::types::VolumeConstructionRequest = serde_json::from_str(volume.data())?; @@ -1309,6 +1636,51 @@ pub fn read_only_resources_associated_with_volume( } } +/// Returns true if the sub-volumes of a Volume are all read-only +pub fn volume_is_read_only( + vcr: &VolumeConstructionRequest, +) -> anyhow::Result { + match vcr { + VolumeConstructionRequest::Volume { sub_volumes, .. } => { + for sv in sub_volumes { + match sv { + VolumeConstructionRequest::Region { opts, .. } => { + if !opts.read_only { + return Ok(false); + } + } + + _ => { + bail!("Saw non-Region in sub-volume {:?}", sv); + } + } + } + + Ok(true) + } + + VolumeConstructionRequest::Region { .. } => { + // We don't support a pure Region VCR at the volume + // level in the database, so this choice should + // never be encountered, but I want to know if it is. + panic!("Region not supported as a top level volume"); + } + + VolumeConstructionRequest::File { .. } => { + // Effectively, this is read-only, as this BlockIO implementation + // does not have a `write` implementation. This will be hit if + // trying to make a snapshot or image out of a + // `YouCanBootAnythingAsLongAsItsAlpine` image source. + Ok(true) + } + + VolumeConstructionRequest::Url { .. } => { + // ImageSource::Url was deprecated + bail!("Saw VolumeConstructionRequest::Url"); + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/nexus/src/app/image.rs b/nexus/src/app/image.rs index a7fe75a464..96a3e6b06f 100644 --- a/nexus/src/app/image.rs +++ b/nexus/src/app/image.rs @@ -121,7 +121,10 @@ impl super::Nexus { let image_volume = self .db_datastore - .volume_checkout_randomize_ids(db_snapshot.volume_id) + .volume_checkout_randomize_ids( + db_snapshot.volume_id, + db::datastore::VolumeCheckoutReason::ReadOnlyCopy, + ) .await?; db::model::Image { diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index e29ed21192..a82a53331e 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -173,6 +173,13 @@ enum InstanceStateChangeRequestAction { SendToSled(Uuid), } +/// What is the higher level operation that is calling +/// `instance_ensure_registered`? +pub(crate) enum InstanceRegisterReason { + Start { vmm_id: Uuid }, + Migrate { vmm_id: Uuid, target_vmm_id: Uuid }, +} + impl super::Nexus { pub fn instance_lookup<'a>( &'a self, @@ -1010,6 +1017,7 @@ impl super::Nexus { db_instance: &db::model::Instance, propolis_id: &Uuid, initial_vmm: &db::model::Vmm, + operation: InstanceRegisterReason, ) -> Result<(), Error> { opctx.authorize(authz::Action::Modify, authz_instance).await?; @@ -1065,8 +1073,19 @@ impl super::Nexus { } }; - let volume = - self.db_datastore.volume_checkout(disk.volume_id).await?; + let volume = self + .db_datastore + .volume_checkout( + disk.volume_id, + match operation { + InstanceRegisterReason::Start { vmm_id } => + db::datastore::VolumeCheckoutReason::InstanceStart { vmm_id }, + InstanceRegisterReason::Migrate { vmm_id, target_vmm_id } => + db::datastore::VolumeCheckoutReason::InstanceMigrate { vmm_id, target_vmm_id }, + } + ) + .await?; + disk_reqs.push(sled_agent_client::types::DiskRequest { name: disk.name().to_string(), slot: sled_agent_client::types::Slot(slot.0), diff --git a/nexus/src/app/sagas/common_storage.rs b/nexus/src/app/sagas/common_storage.rs index 3b590f6205..bf530ef858 100644 --- a/nexus/src/app/sagas/common_storage.rs +++ b/nexus/src/app/sagas/common_storage.rs @@ -769,7 +769,10 @@ pub(crate) async fn call_pantry_attach_for_disk( let disk_volume = nexus .datastore() - .volume_checkout(disk.volume_id) + .volume_checkout( + disk.volume_id, + db::datastore::VolumeCheckoutReason::Pantry, + ) .await .map_err(ActionError::action_failed)?; diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index 830a4dd96c..165bf7573c 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -390,7 +390,10 @@ async fn sdc_regions_ensure( let volume = osagactx .datastore() - .volume_checkout(db_snapshot.volume_id) + .volume_checkout( + db_snapshot.volume_id, + db::datastore::VolumeCheckoutReason::ReadOnlyCopy, + ) .await .map_err(ActionError::action_failed)?; @@ -433,7 +436,10 @@ async fn sdc_regions_ensure( let volume = osagactx .datastore() - .volume_checkout(image.volume_id) + .volume_checkout( + image.volume_id, + db::datastore::VolumeCheckoutReason::ReadOnlyCopy, + ) .await .map_err(ActionError::action_failed)?; diff --git a/nexus/src/app/sagas/finalize_disk.rs b/nexus/src/app/sagas/finalize_disk.rs index d4f6fc39aa..89893fb703 100644 --- a/nexus/src/app/sagas/finalize_disk.rs +++ b/nexus/src/app/sagas/finalize_disk.rs @@ -79,7 +79,8 @@ impl NexusSaga for SagaFinalizeDisk { silo_id: params.silo_id, project_id: params.project_id, disk_id: params.disk_id, - attached_instance_and_sled: None, + attach_instance_id: None, + use_the_pantry: true, create_params: params::SnapshotCreate { identity: external::IdentityMetadataCreateParams { name: snapshot_name.clone(), diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index da3b3e93ea..e4bdd989cc 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -4,7 +4,8 @@ use super::{NexusActionContext, NexusSaga, ACTION_GENERATE_ID}; use crate::app::instance::{ - InstanceStateChangeError, InstanceStateChangeRequest, + InstanceRegisterReason, InstanceStateChangeError, + InstanceStateChangeRequest, }; use crate::app::sagas::{ declare_saga_actions, instance_common::allocate_vmm_ipv6, @@ -356,6 +357,10 @@ async fn sim_ensure_destination_propolis( &db_instance, &vmm.id, &vmm, + InstanceRegisterReason::Migrate { + vmm_id: params.src_vmm.id, + target_vmm_id: vmm.id, + }, ) .await .map_err(ActionError::action_failed)?; diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index b1d9506c31..98fcec13a7 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -10,6 +10,7 @@ use super::{ instance_common::allocate_vmm_ipv6, NexusActionContext, NexusSaga, SagaInitError, ACTION_GENERATE_ID, }; +use crate::app::instance::InstanceRegisterReason; use crate::app::instance::InstanceStateChangeError; use crate::app::sagas::declare_saga_actions; use chrono::Utc; @@ -527,6 +528,7 @@ async fn sis_ensure_registered( &db_instance, &propolis_id, &vmm_record, + InstanceRegisterReason::Start { vmm_id: propolis_id }, ) .await .map_err(ActionError::action_failed)?; diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 8b6febf71a..ff57470a5f 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -130,7 +130,8 @@ pub(crate) struct Params { pub silo_id: Uuid, pub project_id: Uuid, pub disk_id: Uuid, - pub attached_instance_and_sled: Option<(Uuid, Uuid)>, + pub attach_instance_id: Option, + pub use_the_pantry: bool, pub create_params: params::SnapshotCreate, } @@ -251,8 +252,7 @@ impl NexusSaga for SagaSnapshotCreate { // (DB) Tracks virtual resource provisioning. builder.append(space_account_action()); - let use_the_pantry = params.attached_instance_and_sled.is_none(); - if !use_the_pantry { + if !params.use_the_pantry { // (Sleds) If the disk is attached to an instance, send a // snapshot request to sled-agent to create a ZFS snapshot. builder.append(send_snapshot_request_to_sled_agent_action()); @@ -284,7 +284,7 @@ impl NexusSaga for SagaSnapshotCreate { // (DB) Mark snapshot as "ready" builder.append(finalize_snapshot_record_action()); - if use_the_pantry { + if params.use_the_pantry { // (Pantry) Set the state back to Detached // // This has to be the last saga node! Otherwise, concurrent @@ -675,22 +675,47 @@ async fn ssc_send_snapshot_request_to_sled_agent( let snapshot_id = sagactx.lookup::("snapshot_id")?; // If this node was reached, the saga initiator thought the disk was - // attached to an instance that was running on a specific sled. Contact that - // sled and ask it to initiate a snapshot. Note that this is best-effort: - // the instance may have stopped (or may be have stopped, had the disk - // detached, and resumed running on the same sled) while the saga was - // executing. - let (instance_id, sled_id) = - params.attached_instance_and_sled.ok_or_else(|| { - ActionError::action_failed(Error::internal_error( - "snapshot saga in send_snapshot_request_to_sled_agent but no \ - instance/sled pair was provided", - )) - })?; + // attached to an instance that _may_ have a running Propolis. Contact that + // Propolis and ask it to initiate a snapshot. Note that this is + // best-effort: the instance may have stopped (or may be have stopped, had + // the disk detached, and resumed running on the same sled) while the saga + // was executing. + let Some(attach_instance_id) = params.attach_instance_id else { + return Err(ActionError::action_failed(Error::internal_error( + "attach instance id is None!", + ))); + }; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) + .instance_id(attach_instance_id) + .lookup_for(authz::Action::Read) + .await + .map_err(ActionError::action_failed)?; + + let sled_id = osagactx + .datastore() + .instance_fetch_with_vmm(&opctx, &authz_instance) + .await + .map_err(ActionError::action_failed)? + .sled_id(); + + // If this instance does not currently have a sled, we can't continue this + // saga - the user will have to reissue the snapshot request and it will get + // run on a Pantry. + let Some(sled_id) = sled_id else { + return Err(ActionError::action_failed(Error::unavail( + "sled id is None!", + ))); + }; info!(log, "asking for disk snapshot from Propolis via sled agent"; "disk_id" => %params.disk_id, - "instance_id" => %instance_id, + "instance_id" => %attach_instance_id, "sled_id" => %sled_id); let sled_agent_client = osagactx @@ -702,7 +727,7 @@ async fn ssc_send_snapshot_request_to_sled_agent( retry_until_known_result(log, || async { sled_agent_client .instance_issue_disk_snapshot_request( - &instance_id, + &attach_instance_id, ¶ms.disk_id, &InstanceIssueDiskSnapshotRequestBody { snapshot_id }, ) @@ -838,6 +863,16 @@ async fn ssc_attach_disk_to_pantry( info!(log, "disk {} in state finalizing", params.disk_id); } + external::DiskState::Attached(attach_instance_id) => { + // No state change required + info!( + log, + "disk {} in state attached to instance id {}", + params.disk_id, + attach_instance_id + ); + } + _ => { // Return a 503 indicating that the user should retry return Err(ActionError::action_failed( @@ -1358,7 +1393,10 @@ async fn ssc_create_volume_record( let disk_volume = osagactx .datastore() - .volume_checkout(disk.volume_id) + .volume_checkout( + disk.volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) .await .map_err(ActionError::action_failed)?; @@ -1815,14 +1853,16 @@ mod test { project_id: Uuid, disk_id: Uuid, disk: NameOrId, - instance_and_sled: Option<(Uuid, Uuid)>, + attach_instance_id: Option, + use_the_pantry: bool, ) -> Params { Params { serialized_authn: authn::saga::Serialized::for_opctx(opctx), silo_id, project_id, disk_id, - attached_instance_and_sled: instance_and_sled, + attach_instance_id, + use_the_pantry, create_params: params::SnapshotCreate { identity: IdentityMetadataCreateParams { name: "my-snapshot".parse().expect("Invalid disk name"), @@ -1871,7 +1911,8 @@ mod test { project_id, disk_id, Name::from_str(DISK_NAME).unwrap().into(), - None, + None, // not attached to an instance + true, // use the pantry ); let dag = create_saga_dag::(params).unwrap(); let runnable_saga = nexus.create_runnable_saga(dag).await.unwrap(); @@ -2079,7 +2120,7 @@ mod test { // since this is just a test, bypass the normal // attachment machinery and just update the disk's // database record directly. - let instance_and_sled = if !use_the_pantry { + let attach_instance_id = if !use_the_pantry { let state = setup_test_instance( cptestctx, client, @@ -2092,11 +2133,7 @@ mod test { ) .await; - let sled_id = state - .sled_id() - .expect("running instance should have a vmm"); - - Some((state.instance().id(), sled_id)) + Some(state.instance().id()) } else { None }; @@ -2107,7 +2144,8 @@ mod test { project_id, disk_id, Name::from_str(DISK_NAME).unwrap().into(), - instance_and_sled, + attach_instance_id, + use_the_pantry, ) } }) @@ -2205,36 +2243,31 @@ mod test { Name::from_str(DISK_NAME).unwrap().into(), // The disk isn't attached at this time, so don't supply a sled. None, + true, // use the pantry ); let dag = create_saga_dag::(params).unwrap(); let runnable_saga = nexus.create_runnable_saga(dag).await.unwrap(); // Before running the saga, attach the disk to an instance! - let (.., authz_disk, db_disk) = - LookupPath::new(&opctx, nexus.datastore()) - .disk_id(disk_id) - .fetch_for(authz::Action::Read) - .await - .expect("Failed to look up created disk"); - - assert!(nexus - .datastore() - .disk_update_runtime( - &opctx, - &authz_disk, - &db_disk.runtime().attach(Uuid::new_v4()), - ) - .await - .expect("failed to attach disk")); + let _instance_and_vmm = setup_test_instance( + &cptestctx, + &client, + vec![params::InstanceDiskAttachment::Attach( + params::InstanceDiskAttach { + name: Name::from_str(DISK_NAME).unwrap(), + }, + )], + ) + .await; // Actually run the saga let output = nexus.run_saga(runnable_saga).await; - // Expect to see 503 + // Expect to see 409 match output { Err(e) => { - assert!(matches!(e, Error::ServiceUnavailable { .. })); + assert!(matches!(e, Error::Conflict { .. })); } Ok(_) => { @@ -2269,6 +2302,7 @@ mod test { Name::from_str(DISK_NAME).unwrap().into(), // The disk isn't attached at this time, so don't supply a sled. None, + true, // use the pantry ); let dag = create_saga_dag::(params).unwrap(); @@ -2313,8 +2347,6 @@ mod test { // the saga, stopping the instance, detaching the disk, and then letting // the saga run. let fake_instance_id = Uuid::new_v4(); - let fake_sled_id = - Uuid::parse_str(nexus_test_utils::SLED_AGENT_UUID).unwrap(); let params = new_test_params( &opctx, @@ -2322,7 +2354,8 @@ mod test { project_id, disk_id, Name::from_str(DISK_NAME).unwrap().into(), - Some((fake_instance_id, fake_sled_id)), + Some(fake_instance_id), + false, // use the pantry ); let dag = create_saga_dag::(params).unwrap(); @@ -2363,10 +2396,6 @@ mod test { ) .await; - let sled_id = instance_state - .sled_id() - .expect("running instance should have a vmm"); - // Rerun the saga let params = new_test_params( &opctx, @@ -2374,7 +2403,8 @@ mod test { project_id, disk_id, Name::from_str(DISK_NAME).unwrap().into(), - Some((instance_state.instance().id(), sled_id)), + Some(instance_state.instance().id()), + false, // use the pantry ); let dag = create_saga_dag::(params).unwrap(); diff --git a/nexus/src/app/snapshot.rs b/nexus/src/app/snapshot.rs index 0c90ac31fb..c28d180d3c 100644 --- a/nexus/src/app/snapshot.rs +++ b/nexus/src/app/snapshot.rs @@ -12,7 +12,6 @@ use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; -use omicron_common::api::external::InstanceState; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::NameOrId; @@ -93,7 +92,7 @@ impl super::Nexus { // If there isn't a running propolis, Nexus needs to use the Crucible // Pantry to make this snapshot - let instance_and_sled = if let Some(attach_instance_id) = + let use_the_pantry = if let Some(attach_instance_id) = &db_disk.runtime_state.attach_instance_id { let (.., authz_instance) = @@ -107,29 +106,12 @@ impl super::Nexus { .instance_fetch_with_vmm(&opctx, &authz_instance) .await?; - match instance_state.vmm().as_ref() { - None => None, - Some(vmm) => match vmm.runtime.state.0 { - // If the VM might be running, or it's rebooting (which - // doesn't deactivate the volume), send the snapshot request - // to the relevant VMM. Otherwise, there's no way to know if - // the instance has attached the volume or is in the process - // of detaching it, so bail. - InstanceState::Running | InstanceState::Rebooting => { - Some((*attach_instance_id, vmm.sled_id)) - } - _ => { - return Err(Error::invalid_request(&format!( - "cannot snapshot attached disk for instance in \ - state {}", - vmm.runtime.state.0 - ))); - } - }, - } + // If a Propolis _may_ exist, send the snapshot request there, + // otherwise use the pantry. + !instance_state.vmm().is_some() } else { // This disk is not attached to an instance, use the pantry. - None + true }; let saga_params = sagas::snapshot_create::Params { @@ -137,7 +119,8 @@ impl super::Nexus { silo_id: authz_silo.id(), project_id: authz_project.id(), disk_id: authz_disk.id(), - attached_instance_and_sled: instance_and_sled, + attach_instance_id: db_disk.runtime_state.attach_instance_id, + use_the_pantry, create_params: params.clone(), }; diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index 63ea81f13f..251b729f98 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -256,6 +256,109 @@ async fn test_snapshot_without_instance(cptestctx: &ControlPlaneTestContext) { assert_eq!(disk.state, DiskState::Detached); } +#[nexus_test] +async fn test_snapshot_stopped_instance(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + DiskTest::new(&cptestctx).await; + create_project_and_pool(client).await; + let disks_url = get_disks_url(); + + // Define a global image + let image_create_params = params::ImageCreate { + identity: IdentityMetadataCreateParams { + name: "alpine-edge".parse().unwrap(), + description: String::from( + "you can boot any image, as long as it's alpine", + ), + }, + source: params::ImageSource::YouCanBootAnythingAsLongAsItsAlpine, + os: "alpine".to_string(), + version: "edge".to_string(), + }; + + let images_url = format!("/v1/images?project={}", PROJECT_NAME); + let image = + NexusRequest::objects_post(client, &images_url, &image_create_params) + .authn_as(AuthnMode::PrivilegedUser) + .execute_and_parse_unwrap::() + .await; + + // Create a disk from this image + let disk_size = ByteCount::from_gibibytes_u32(2); + let base_disk_name: Name = "base-disk".parse().unwrap(); + let base_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: base_disk_name.clone(), + description: String::from("sells rainsticks"), + }, + disk_source: params::DiskSource::Image { image_id: image.identity.id }, + size: disk_size, + }; + + let base_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&base_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Create a stopped instance with attached disk + let instances_url = format!("/v1/instances?project={}", PROJECT_NAME,); + let instance_name = "base-instance"; + + let instance: Instance = object_create( + client, + &instances_url, + ¶ms::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: instance_name.parse().unwrap(), + description: format!("instance {:?}", instance_name), + }, + ncpus: InstanceCpuCount(2), + memory: ByteCount::from_gibibytes_u32(1), + hostname: "base-instance".parse().unwrap(), + user_data: + b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" + .to_vec(), + ssh_public_keys: Some(Vec::new()), + network_interfaces: + params::InstanceNetworkInterfaceAttachment::None, + disks: vec![params::InstanceDiskAttachment::Attach( + params::InstanceDiskAttach { name: base_disk_name.clone() }, + )], + external_ips: vec![], + start: false, + }, + ) + .await; + + assert_eq!(instance.runtime.run_state, external::InstanceState::Stopped); + + // Issue snapshot request + let snapshots_url = format!("/v1/snapshots?project={}", PROJECT_NAME); + + let snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: instance_name.parse().unwrap(), + description: format!("instance {:?}", instance_name), + }, + disk: base_disk_name.into(), + }, + ) + .await; + + assert_eq!(snapshot.disk_id, base_disk.identity.id); + assert_eq!(snapshot.size, base_disk.size); +} + #[nexus_test] async fn test_delete_snapshot(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; diff --git a/nexus/tests/integration_tests/volume_management.rs b/nexus/tests/integration_tests/volume_management.rs index daf78823ed..ecfa7cf0f1 100644 --- a/nexus/tests/integration_tests/volume_management.rs +++ b/nexus/tests/integration_tests/volume_management.rs @@ -9,6 +9,7 @@ use chrono::Utc; use dropshot::test_util::ClientTestContext; use http::method::Method; use http::StatusCode; +use nexus_db_queries::db; use nexus_db_queries::db::DataStore; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; @@ -1375,7 +1376,13 @@ async fn test_volume_remove_read_only_parent_base( // Go and get the volume from the database, verify it no longer // has a read only parent. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); let vcr: VolumeConstructionRequest = serde_json::from_str(new_vol.data()).unwrap(); @@ -1394,7 +1401,13 @@ async fn test_volume_remove_read_only_parent_base( } // Verify the t_vid now has a ROP. - let new_vol = datastore.volume_checkout(t_vid).await.unwrap(); + let new_vol = datastore + .volume_checkout( + t_vid, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); let vcr: VolumeConstructionRequest = serde_json::from_str(new_vol.data()).unwrap(); @@ -1421,7 +1434,13 @@ async fn test_volume_remove_read_only_parent_base( // We want to verify we can call volume_remove_rop twice and the second // time through it won't change what it did the first time. This is // critical to supporting replay of the saga, should it be needed. - let new_vol = datastore.volume_checkout(t_vid).await.unwrap(); + let new_vol = datastore + .volume_checkout( + t_vid, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); let vcr: VolumeConstructionRequest = serde_json::from_str(new_vol.data()).unwrap(); @@ -1570,7 +1589,13 @@ async fn test_volume_remove_rop_saga(cptestctx: &ControlPlaneTestContext) { .await .unwrap(); - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); let vcr: VolumeConstructionRequest = serde_json::from_str(new_vol.data()).unwrap(); @@ -1628,7 +1653,13 @@ async fn test_volume_remove_rop_saga_twice( .unwrap(); println!("first returns {:?}", res); - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); let vcr: VolumeConstructionRequest = serde_json::from_str(new_vol.data()).unwrap(); @@ -1762,7 +1793,13 @@ async fn test_volume_remove_rop_saga_deleted_volume( .await .unwrap(); - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); let vcr: VolumeConstructionRequest = serde_json::from_str(new_vol.data()).unwrap(); @@ -1811,11 +1848,23 @@ async fn test_volume_checkout(cptestctx: &ControlPlaneTestContext) { // The first time back, we get 1 but internally the generation number goes // to 2. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![Some(1)]); // Request again, we should get 2 now. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![Some(2)]); } @@ -1853,9 +1902,21 @@ async fn test_volume_checkout_updates_nothing( .unwrap(); // Verify nothing happens to our non generation number volume. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![None]); - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![None]); } @@ -1894,15 +1955,33 @@ async fn test_volume_checkout_updates_multiple_gen( // The first time back, we get our original values, but internally the // generation number goes up. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![Some(3), Some(8)]); // Request again, we should see the incremented values now.. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![Some(4), Some(9)]); // Request one more, because why not. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![Some(5), Some(10)]); } @@ -1947,11 +2026,23 @@ async fn test_volume_checkout_updates_sparse_multiple_gen( // The first time back, we get our original values, but internally the // generation number goes up. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![None, Some(7), Some(9)]); // Request again, we should see the incremented values now.. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![None, Some(8), Some(10)]); } @@ -1996,11 +2087,23 @@ async fn test_volume_checkout_updates_sparse_mid_multiple_gen( // The first time back, we get our original values, but internally the // generation number goes up. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![Some(7), None, Some(9)]); // Request again, we should see the incremented values now.. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![Some(8), None, Some(10)]); } @@ -2038,7 +2141,12 @@ async fn test_volume_checkout_randomize_ids_only_read_only( .unwrap(); // volume_checkout_randomize_ids should fail - let r = datastore.volume_checkout_randomize_ids(volume_id).await; + let r = datastore + .volume_checkout_randomize_ids( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await; assert!(r.is_err()); } diff --git a/schema/crdb/add-lookup-disk-by-volume-id-index/up.sql b/schema/crdb/add-lookup-disk-by-volume-id-index/up.sql new file mode 100644 index 0000000000..2f129f334c --- /dev/null +++ b/schema/crdb/add-lookup-disk-by-volume-id-index/up.sql @@ -0,0 +1,4 @@ +CREATE UNIQUE INDEX IF NOT EXISTS lookup_disk_by_volume_id ON omicron.public.disk ( + volume_id +) WHERE + time_deleted IS NULL; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index da3dbb3f4c..9f28efbd16 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1152,6 +1152,11 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_deleted_disk ON omicron.public.disk ( ) WHERE time_deleted IS NOT NULL; +CREATE UNIQUE INDEX IF NOT EXISTS lookup_disk_by_volume_id ON omicron.public.disk ( + volume_id +) WHERE + time_deleted IS NULL; + CREATE TABLE IF NOT EXISTS omicron.public.image ( /* Identity metadata (resource) */ id UUID PRIMARY KEY, @@ -3770,7 +3775,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '49.0.0', NULL) + ( TRUE, NOW(), NOW(), '50.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT;