From 6598065908038209de253165404eba46bce62fdc Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 28 Nov 2023 19:55:19 -0800 Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?= =?UTF-8?q?l=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.5 --- common/src/api/external/error.rs | 163 +++++++++++++++--- nexus/db-queries/src/db/datastore/mod.rs | 8 +- nexus/db-queries/src/db/datastore/sled.rs | 2 +- nexus/db-queries/src/db/datastore/vpc.rs | 2 +- .../src/db/queries/region_allocation.rs | 6 +- nexus/src/app/external_endpoints.rs | 13 +- nexus/src/app/instance.rs | 36 ++-- nexus/src/app/oximeter.rs | 7 +- nexus/src/app/sagas/snapshot_create.rs | 11 +- sled-agent/src/sim/sled_agent.rs | 5 +- 10 files changed, 193 insertions(+), 60 deletions(-) diff --git a/common/src/api/external/error.rs b/common/src/api/external/error.rs index e508b7ecba..2eb88dbca9 100644 --- a/common/src/api/external/error.rs +++ b/common/src/api/external/error.rs @@ -53,8 +53,11 @@ pub enum Error { #[error("Internal Error: {internal_message}")] InternalError { internal_message: String }, /// The system (or part of it) is unavailable. - #[error("Service Unavailable: {internal_message}")] - ServiceUnavailable { internal_message: String }, + #[error( + "Service Unavailable {}", + message.display_internal() + )] + ServiceUnavailable { message: MessageVariant }, /// Method Not Allowed #[error("Method Not Allowed: {internal_message}")] MethodNotAllowed { internal_message: String }, @@ -66,6 +69,88 @@ pub enum Error { Conflict { internal_message: String }, } +/// Represents either a message that is internal to the system or a message that +/// is intended to be returned to the client. +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub enum MessageVariant { + Internal(String), + External { + message: String, + /// Internal context, or an empty string if there's no context yet. + internal_context: String, + }, +} + +impl MessageVariant { + fn internal(message: String) -> Self { + MessageVariant::Internal(message) + } + + fn external(message: String) -> Self { + MessageVariant::External { + message: message, + internal_context: String::new(), + } + } + + fn internal_context(self, context: C) -> Self + where + C: Display + Send + Sync + 'static, + { + match self { + MessageVariant::Internal(msg) => { + MessageVariant::Internal(format!("{}: {}", context, msg)) + } + MessageVariant::External { message, internal_context } => { + let internal_context = if internal_context.is_empty() { + context.to_string() + } else { + format!("{}: {}", context, internal_context) + }; + MessageVariant::External { message, internal_context } + } + } + } + + fn into_internal_external(self) -> (String, Option) { + match self { + MessageVariant::Internal(msg) => (msg, None), + MessageVariant::External { message, internal_context } => { + let internal = if internal_context.is_empty() { + message.clone() + } else { + format!("{}: {}", message, internal_context) + }; + (internal, Some(message)) + } + } + } + + // Do not implement `fmt::Display` for this enum because we don't want users to + // accidentally display the internal message to the client. Instead, use a + // private formatter. + fn display_internal(&self) -> MessageVariantDisplay<'_> { + MessageVariantDisplay(self) + } +} + +struct MessageVariantDisplay<'a>(&'a MessageVariant); + +impl<'a> Display for MessageVariantDisplay<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.0 { + MessageVariant::Internal(msg) => write!(f, "(internal: {})", msg), + MessageVariant::External { message, internal_context } => { + write!(f, ": {message}")?; + if !internal_context.is_empty() { + write!(f, " (with internal context: {internal_context})")?; + } + Ok(()) + } + } + } +} + /// Indicates how an object was looked up (for an `ObjectNotFound` error) #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub enum LookupType { @@ -155,15 +240,28 @@ impl Error { Error::InvalidRequest { message: message.to_owned() } } - /// Generates an [`Error::ServiceUnavailable`] error with the specific - /// message + /// Generates an [`Error::ServiceUnavailable`] error with a specific + /// message that is sent back to the client. /// /// This should be used for transient failures where the caller might be /// expected to retry. Logic errors or other problems indicating that a /// retry would not work should probably be an InternalError (if it's a /// server problem) or InvalidRequest (if it's a client problem) instead. - pub fn unavail(message: &str) -> Error { - Error::ServiceUnavailable { internal_message: message.to_owned() } + pub fn unavail_external(external_message: impl Into) -> Error { + Error::ServiceUnavailable { + message: MessageVariant::external(external_message.into()), + } + } + + /// Generates an [`Error::ServiceUnavailable`] error with a specific + /// internal-only message. + /// + /// This should be used for transient failures where the caller might be + /// expected to retry. + pub fn unavail_internal(internal_message: impl Into) -> Error { + Error::ServiceUnavailable { + message: MessageVariant::internal(internal_message.into()), + } } /// Generates an [`Error::TypeVersionMismatch`] with a specific message. @@ -215,12 +313,9 @@ impl Error { Error::InternalError { internal_message } => Error::InternalError { internal_message: format!("{}: {}", context, internal_message), }, - Error::ServiceUnavailable { internal_message } => { + Error::ServiceUnavailable { message } => { Error::ServiceUnavailable { - internal_message: format!( - "{}: {}", - context, internal_message - ), + message: message.internal_context(context), } } Error::MethodNotAllowed { internal_message } => { @@ -326,11 +421,16 @@ impl From for HttpError { HttpError::for_internal_error(internal_message) } - Error::ServiceUnavailable { internal_message } => { - HttpError::for_unavail( - Some(String::from("ServiceNotAvailable")), + Error::ServiceUnavailable { message } => { + let (internal_message, external) = + message.into_internal_external(); + HttpError { + status_code: http::StatusCode::SERVICE_UNAVAILABLE, + error_code: Some(String::from("ServiceNotAvailable")), + external_message: external + .unwrap_or_else(|| "Service Unavailable".to_owned()), internal_message, - ) + } } Error::TypeVersionMismatch { internal_message } => { @@ -381,7 +481,10 @@ impl From> for Error { match rv.status() { http::StatusCode::SERVICE_UNAVAILABLE => { - Error::unavail(&message) + // TODO: Out of an abundance of caution we treat this + // message as internal, though it's possible some such + // messages should be displayed. + Error::unavail_internal(&message) } status if status.is_client_error() => { Error::invalid_request(&message) @@ -493,6 +596,8 @@ impl InternalContext for Result { #[cfg(test)] mod test { + use crate::api::external::error::MessageVariant; + use super::Error; use super::InternalContext; @@ -553,11 +658,29 @@ mod test { }; // test `with_internal_context()` and (separately) `ServiceUnavailable` - // variant - let error: Result<(), Error> = Err(Error::unavail("boom")); + // internal variant + let error: Result<(), Error> = Err(Error::unavail_internal("boom")); + match error.with_internal_context(|| format!("uh-oh (#{:2})", 2)) { + Err(Error::ServiceUnavailable { message }) => { + assert_eq!( + message, + MessageVariant::internal("uh-oh (# 2): boom".to_owned()) + ); + } + _ => panic!("returned wrong type"), + }; + + // test `ServiceUnavailable` external variant + let error: Result<(), Error> = Err(Error::unavail_external("boom")); match error.with_internal_context(|| format!("uh-oh (#{:2})", 2)) { - Err(Error::ServiceUnavailable { internal_message }) => { - assert_eq!(internal_message, "uh-oh (# 2): boom"); + Err(Error::ServiceUnavailable { message }) => { + assert_eq!( + message, + MessageVariant::External { + message: "boom".to_owned(), + internal_context: "uh-oh (# 2)".to_owned(), + } + ); } _ => panic!("returned wrong type"), }; diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 0612b960c9..3af16918ca 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -220,7 +220,9 @@ impl DataStore { opctx.authorize(authz::Action::Query, &authz::DATABASE).await?; let pool = self.pool.pool(); let connection = pool.get().await.map_err(|err| { - Error::unavail(&format!("Failed to access DB connection: {err}")) + Error::unavail_internal(format!( + "Failed to access DB connection: {err}" + )) })?; Ok(connection) } @@ -234,7 +236,9 @@ impl DataStore { &self, ) -> Result { let connection = self.pool.pool().get().await.map_err(|err| { - Error::unavail(&format!("Failed to access DB connection: {err}")) + Error::unavail_internal(format!( + "Failed to access DB connection: {err}" + )) })?; Ok(connection) } diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 130c36b496..1b4caa9d2b 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -194,7 +194,7 @@ impl DataStore { .await .map_err(|e| match e { TxnError::CustomError(SledReservationError::NotFound) => { - external::Error::unavail( + external::Error::unavail_external( "No sleds can fit the requested instance", ) } diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index 6db99465a3..37d7077777 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -340,7 +340,7 @@ impl DataStore { opctx.log, "failed to find a VNI after searching entire range"; ); - Err(Error::unavail("Failed to find a free VNI for this VPC")) + Err(Error::unavail_external("Failed to find a free VNI for this VPC")) } // Internal implementation for creating a VPC. diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index a080af4c37..061aaffe33 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -48,17 +48,17 @@ pub fn from_diesel(e: DieselError) -> external::Error { if let Some(sentinel) = matches_sentinel(&e, &sentinels) { match sentinel { NOT_ENOUGH_DATASETS_SENTINEL => { - return external::Error::unavail( + return external::Error::unavail_external( "Not enough datasets to allocate disks", ); } NOT_ENOUGH_ZPOOL_SPACE_SENTINEL => { - return external::Error::unavail( + return external::Error::unavail_external( "Not enough zpool space to allocate disks. There may not be enough disks with space for the requested region. You may also see this if your rack is in a degraded state, or you're running the default multi-rack topology configuration in a 1-sled development environment.", ); } NOT_ENOUGH_UNIQUE_ZPOOLS_SENTINEL => { - return external::Error::unavail( + return external::Error::unavail_external( "Not enough unique zpools selected while allocating disks", ); } diff --git a/nexus/src/app/external_endpoints.rs b/nexus/src/app/external_endpoints.rs index f95c64d3eb..a413ad15fa 100644 --- a/nexus/src/app/external_endpoints.rs +++ b/nexus/src/app/external_endpoints.rs @@ -737,7 +737,7 @@ fn endpoint_for_authority( let endpoint_config = config_rx.borrow(); let endpoints = endpoint_config.as_ref().ok_or_else(|| { error!(&log, "received request with no endpoints loaded"); - Error::unavail("endpoints not loaded") + Error::unavail_external("endpoints not loaded") })?; // See if there's an endpoint for the requested name. If so, use it. @@ -810,6 +810,7 @@ mod test { use nexus_types::identity::Resource; use omicron_common::api::external::Error; use omicron_common::api::external::IdentityMetadataCreateParams; + use omicron_common::api::external::MessageVariant; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -1532,9 +1533,15 @@ mod test { let result = endpoint_for_authority(&log, &authority, rx_channel); match result { - Err(Error::ServiceUnavailable { internal_message }) => { + Err(Error::ServiceUnavailable { message }) => { assert_eq!(rx_label, "none"); - assert_eq!(internal_message, "endpoints not loaded"); + assert_eq!( + message, + MessageVariant::External { + message: "endpoints not loaded".to_owned(), + internal_context: String::new(), + } + ); } Err(Error::InvalidRequest { message }) => { assert_eq!(rx_label, "empty"); diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 923bb1777e..17d34863df 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -362,7 +362,9 @@ impl super::Nexus { } if instance.runtime().migration_id.is_some() { - return Err(Error::unavail("instance is already migrating")); + return Err(Error::unavail_external( + "instance is already migrating", + )); } // Kick off the migration saga @@ -1583,28 +1585,26 @@ impl super::Nexus { | InstanceState::Starting | InstanceState::Stopping | InstanceState::Stopped - | InstanceState::Failed => Err(Error::ServiceUnavailable { - internal_message: format!( - "cannot connect to serial console of instance in state \ - {:?}", - vmm.runtime.state.0 - ), - }), - InstanceState::Destroyed => Err(Error::ServiceUnavailable { - internal_message: format!( + | InstanceState::Failed => { + Err(Error::unavail_external(format!( + "cannot connect to serial console of instance in state \ + {:?}", + vmm.runtime.state.0 + ))) + } + InstanceState::Destroyed => { + Err(Error::unavail_external(format!( "cannot connect to serial console of instance in state \ {:?}", - InstanceState::Stopped), - }), + InstanceState::Stopped))) + } } } else { - Err(Error::ServiceUnavailable { - internal_message: format!( - "instance is in state {:?} and has no active serial console \ + Err(Error::unavail_external(format!( + "instance is in state {:?} and has no active serial console \ server", - instance.runtime().nexus_state - ) - }) + instance.runtime().nexus_state + ))) } } diff --git a/nexus/src/app/oximeter.rs b/nexus/src/app/oximeter.rs index 66f39a32b6..21aec7a6b3 100644 --- a/nexus/src/app/oximeter.rs +++ b/nexus/src/app/oximeter.rs @@ -390,8 +390,8 @@ impl super::Nexus { limit: std::num::NonZeroU32::new(1).unwrap(), }; let oxs = self.db_datastore.oximeter_list(&page_params).await?; - let info = oxs.first().ok_or_else(|| Error::ServiceUnavailable { - internal_message: String::from("no oximeter collectors available"), + let info = oxs.first().ok_or_else(|| { + Error::unavail_external("no oximeter collectors available") })?; let address = SocketAddr::from((info.ip.ip(), info.port.try_into().unwrap())); @@ -403,7 +403,8 @@ impl super::Nexus { fn map_oximeter_err(error: oximeter_db::Error) -> Error { match error { oximeter_db::Error::DatabaseUnavailable(_) => { - Error::ServiceUnavailable { internal_message: error.to_string() } + // XXX: should this be unavail_external? + Error::unavail_internal(error.to_string()) } _ => Error::InternalError { internal_message: error.to_string() }, } diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 3b4dfc0043..77038c4098 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -840,14 +840,9 @@ async fn ssc_attach_disk_to_pantry( _ => { // Return a 503 indicating that the user should retry - return Err(ActionError::action_failed( - Error::ServiceUnavailable { - internal_message: format!( - "disk is in state {:?}", - db_disk.state(), - ), - }, - )); + return Err(ActionError::action_failed(Error::unavail_external( + format!("disk is in state {:?}", db_disk.state()), + ))); } } diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index c06ae96f2e..a7dce7730c 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -651,7 +651,10 @@ impl SledAgent { &dropshot_log, ) .map_err(|error| { - Error::unavail(&format!("initializing propolis-server: {}", error)) + Error::unavail_external(format!( + "initializing propolis-server: {}", + error + )) })? .start(); let client = propolis_client::Client::new(&format!(