From fe94f96985583a327600b8feb8c8c61843fdde69 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Wed, 18 Oct 2023 20:59:37 +0000 Subject: [PATCH] Limit size of the search range for VNIs when creating VPCs - Fixes #4283. - Adds a relatively small limit to the `NextItem` query used for finding a free VNI during VPC creation. This limits the memory consumption to something very reasonable, but is big enough that we should be extremely unlikely to find _no_ available VNIs in the range. - Add an application-level retry loop when inserting _customer_ VPCs, which catches the unlikely event that there really are no VNIs available, and retries a few times. - Adds tests for the computation of the limited search range. - Adds tests for the actual exhaustion-detection and retry behavior. --- nexus/db-model/src/vpc.rs | 8 + nexus/db-queries/src/db/datastore/vpc.rs | 377 ++++++++++++++++++++--- nexus/db-queries/src/db/queries/vpc.rs | 86 +++++- 3 files changed, 424 insertions(+), 47 deletions(-) diff --git a/nexus/db-model/src/vpc.rs b/nexus/db-model/src/vpc.rs index 8a4dc0e3493..5f2f82119e6 100644 --- a/nexus/db-model/src/vpc.rs +++ b/nexus/db-model/src/vpc.rs @@ -110,6 +110,14 @@ impl IncompleteVpc { subnet_gen: Generation::new(), }) } + + /// Create a copy of self, but with a new random VNI. + /// + /// This is used to retry insertion of a VPC in the case where we can't find + /// an available VNI. + pub fn with_new_vni(self) -> Self { + Self { vni: Vni(external::Vni::random()), ..self } + } } impl DatastoreCollectionConfig for Vpc { diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index 14886ba0181..3652ccc87ab 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -40,6 +40,8 @@ use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use diesel::result::DatabaseErrorKind; +use diesel::result::Error as DieselError; use ipnetwork::IpNetwork; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::CreateResult; @@ -85,18 +87,23 @@ impl DataStore { SERVICES_VPC.clone(), Some(Vni(ExternalVni::SERVICES_VNI)), ); - let authz_vpc = self + let authz_vpc = match self .project_create_vpc_raw(opctx, &authz_project, vpc_query) .await - .map(|(authz_vpc, _)| authz_vpc) - .or_else(|e| match e { - Error::ObjectAlreadyExists { .. } => Ok(authz::Vpc::new( - authz_project.clone(), - *SERVICES_VPC_ID, - LookupType::ByName(SERVICES_VPC.identity.name.to_string()), - )), - _ => Err(e), - })?; + { + Ok(None) => { + let msg = "VNI exhaustion detected when creating built-in VPCs"; + error!(opctx.log, "{}", msg); + Err(Error::internal_error(msg)) + } + Ok(Some((authz_vpc, _))) => Ok(authz_vpc), + Err(Error::ObjectAlreadyExists { .. }) => Ok(authz::Vpc::new( + authz_project.clone(), + *SERVICES_VPC_ID, + LookupType::ByName(SERVICES_VPC.identity.name.to_string()), + )), + Err(e) => Err(e), + }?; // Also add the system router and internet gateway route @@ -287,22 +294,60 @@ impl DataStore { &self, opctx: &OpContext, authz_project: &authz::Project, - vpc: IncompleteVpc, + mut vpc: IncompleteVpc, ) -> Result<(authz::Vpc, Vpc), Error> { - self.project_create_vpc_raw( - opctx, - authz_project, - InsertVpcQuery::new(vpc), - ) - .await + // Attempt to insert a VPC, retrying the query if there are no available + // VNIs. + const N_ATTEMPTS: usize = 3; + let mut attempt = 0; + while attempt < N_ATTEMPTS { + let original_vni = vpc.vni; + match self + .project_create_vpc_raw( + opctx, + authz_project, + InsertVpcQuery::new(vpc.clone()), + ) + .await + { + Ok(Some(vpcs)) => return Ok(vpcs), + Err(e) => return Err(e), + Ok(None) => { + vpc = vpc.with_new_vni(); + debug!( + opctx.log, + "VNI exhaustion detected while inserting VPC, retrying"; + "attempt" => attempt, + "vpc_name" => %vpc.identity.name, + "original_vni" => ?original_vni, + "new_vni" => ?vpc.vni, + ); + attempt += 1; + } + } + } + + // We've failed to find a VNI within our retry limit. We'll return a 503 + // in this case. + error!( + opctx.log, + "failed to find a VNI within retry limit"; + "limit" => N_ATTEMPTS, + ); + Err(Error::unavail("Failed to find a free VNI for this VPC")) } + // Internal implementation for creating a VPC. + // + // This returns an optional VPC. If it is None, then we failed to insert a + // VPC specifically because there are no available VNIs. All other errors + // are returned in the `Result::Err` variant. async fn project_create_vpc_raw( &self, opctx: &OpContext, authz_project: &authz::Project, vpc_query: InsertVpcQuery, - ) -> Result<(authz::Vpc, Vpc), Error> { + ) -> Result, Error> { use db::schema::vpc::dsl; assert_eq!(authz_project.id(), vpc_query.vpc.project_id); @@ -312,30 +357,48 @@ impl DataStore { let project_id = vpc_query.vpc.project_id; let conn = self.pool_connection_authorized(opctx).await?; - let vpc: Vpc = Project::insert_resource( + let result: Result = Project::insert_resource( project_id, diesel::insert_into(dsl::vpc).values(vpc_query), ) .insert_and_get_result_async(&conn) - .await - .map_err(|e| match e { - AsyncInsertError::CollectionNotFound => Error::ObjectNotFound { - type_name: ResourceType::Project, - lookup_type: LookupType::ById(project_id), - }, - AsyncInsertError::DatabaseError(e) => public_error_from_diesel( - e, - ErrorHandler::Conflict(ResourceType::Vpc, name.as_str()), - ), - })?; - Ok(( - authz::Vpc::new( - authz_project.clone(), - vpc.id(), - LookupType::ByName(vpc.name().to_string()), - ), - vpc, - )) + .await; + match result { + Ok(vpc) => Ok(Some(( + authz::Vpc::new( + authz_project.clone(), + vpc.id(), + LookupType::ByName(vpc.name().to_string()), + ), + vpc, + ))), + Err(AsyncInsertError::CollectionNotFound) => { + Err(Error::ObjectNotFound { + type_name: ResourceType::Project, + lookup_type: LookupType::ById(project_id), + }) + } + Err(AsyncInsertError::DatabaseError( + DieselError::DatabaseError( + DatabaseErrorKind::NotNullViolation, + info, + ), + )) if info + .message() + .starts_with("null value in column \"vni\"") => + { + // We failed the non-null check on the VNI column, which means + // we could not find a valid VNI in our search range. Return + // None instead to signal the error. + Ok(None) + } + Err(AsyncInsertError::DatabaseError(e)) => { + Err(public_error_from_diesel( + e, + ErrorHandler::Conflict(ResourceType::Vpc, name.as_str()), + )) + } + } } pub async fn project_update_vpc( @@ -1092,3 +1155,241 @@ impl DataStore { }) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::db::datastore::datastore_test; + use crate::db::model::Project; + use crate::db::queries::vpc::MAX_VNI_SEARCH_RANGE_SIZE; + use nexus_test_utils::db::test_setup_database; + use nexus_types::external_api::params; + use omicron_common::api::external; + use omicron_test_utils::dev; + use slog::info; + + // Test that we detect the right error condition and return None when we + // fail to insert a VPC due to VNI exhaustion. + // + // This is a bit awkward, but we'll test this by inserting a bunch of VPCs, + // and checking that we get the expected error response back from the + // `project_create_vpc_raw` call. + #[tokio::test] + async fn test_project_create_vpc_raw_returns_none_on_vni_exhaustion() { + let logctx = dev::test_setup_log( + "test_project_create_vpc_raw_returns_none_on_vni_exhaustion", + ); + let log = &logctx.log; + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a project. + let project_params = params::ProjectCreate { + identity: IdentityMetadataCreateParams { + name: "project".parse().unwrap(), + description: String::from("test project"), + }, + }; + let project = Project::new(Uuid::new_v4(), project_params); + let (authz_project, _) = datastore + .project_create(&opctx, project) + .await + .expect("failed to create project"); + + let starting_vni = 2048; + let description = String::from("test vpc"); + for vni in 0..=MAX_VNI_SEARCH_RANGE_SIZE { + // Create an incomplete VPC and make sure it has the next available + // VNI. + let name: external::Name = format!("vpc{vni}").parse().unwrap(); + let mut incomplete_vpc = IncompleteVpc::new( + Uuid::new_v4(), + authz_project.id(), + Uuid::new_v4(), + params::VpcCreate { + identity: IdentityMetadataCreateParams { + name: name.clone(), + description: description.clone(), + }, + ipv6_prefix: None, + dns_name: name.clone(), + }, + ) + .expect("failed to create incomplete VPC"); + let this_vni = + Vni(external::Vni::try_from(starting_vni + vni).unwrap()); + incomplete_vpc.vni = this_vni; + info!( + log, + "creating initial VPC"; + "index" => vni, + "vni" => ?this_vni, + ); + let query = InsertVpcQuery::new(incomplete_vpc); + let (_, db_vpc) = datastore + .project_create_vpc_raw(&opctx, &authz_project, query) + .await + .expect("failed to create initial set of VPCs") + .expect("expected an actual VPC"); + info!( + log, + "created VPC"; + "vpc" => ?db_vpc, + ); + } + + // At this point, we've filled all the VNIs starting from 2048. Let's + // try to allocate one more, also starting from that position. This + // should fail, because we've explicitly filled the entire range we'll + // search above. + let name: external::Name = "dead-vpc".parse().unwrap(); + let mut incomplete_vpc = IncompleteVpc::new( + Uuid::new_v4(), + authz_project.id(), + Uuid::new_v4(), + params::VpcCreate { + identity: IdentityMetadataCreateParams { + name: name.clone(), + description: description.clone(), + }, + ipv6_prefix: None, + dns_name: name.clone(), + }, + ) + .expect("failed to create incomplete VPC"); + let this_vni = Vni(external::Vni::try_from(starting_vni).unwrap()); + incomplete_vpc.vni = this_vni; + info!( + log, + "creating VPC when all VNIs are allocated"; + "vni" => ?this_vni, + ); + let query = InsertVpcQuery::new(incomplete_vpc); + let Ok(None) = datastore + .project_create_vpc_raw(&opctx, &authz_project, query) + .await + else { + panic!("Expected Ok(None) when creating a VPC without any available VNIs"); + }; + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + // Test that we appropriately retry when there are no available VNIs. + // + // This is a bit awkward, but we'll test this by inserting a bunch of VPCs, + // and then check that we correctly retry + #[tokio::test] + async fn test_project_create_vpc_retries() { + let logctx = dev::test_setup_log("test_project_create_vpc_retries"); + let log = &logctx.log; + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a project. + let project_params = params::ProjectCreate { + identity: IdentityMetadataCreateParams { + name: "project".parse().unwrap(), + description: String::from("test project"), + }, + }; + let project = Project::new(Uuid::new_v4(), project_params); + let (authz_project, _) = datastore + .project_create(&opctx, project) + .await + .expect("failed to create project"); + + let starting_vni = 2048; + let description = String::from("test vpc"); + for vni in 0..=MAX_VNI_SEARCH_RANGE_SIZE { + // Create an incomplete VPC and make sure it has the next available + // VNI. + let name: external::Name = format!("vpc{vni}").parse().unwrap(); + let mut incomplete_vpc = IncompleteVpc::new( + Uuid::new_v4(), + authz_project.id(), + Uuid::new_v4(), + params::VpcCreate { + identity: IdentityMetadataCreateParams { + name: name.clone(), + description: description.clone(), + }, + ipv6_prefix: None, + dns_name: name.clone(), + }, + ) + .expect("failed to create incomplete VPC"); + let this_vni = + Vni(external::Vni::try_from(starting_vni + vni).unwrap()); + incomplete_vpc.vni = this_vni; + info!( + log, + "creating initial VPC"; + "index" => vni, + "vni" => ?this_vni, + ); + let query = InsertVpcQuery::new(incomplete_vpc); + let (_, db_vpc) = datastore + .project_create_vpc_raw(&opctx, &authz_project, query) + .await + .expect("failed to create initial set of VPCs") + .expect("expected an actual VPC"); + info!( + log, + "created VPC"; + "vpc" => ?db_vpc, + ); + } + + // Similar to the above test, we've fill all available VPCs starting at + // `starting_vni`. Let's attempt to allocate one beginning there, which + // _should_ fail and be internally retried. Note that we're using + // `project_create_vpc()` here instead of the raw version, to check that + // retry logic. + let name: external::Name = "dead-at-first-vpc".parse().unwrap(); + let mut incomplete_vpc = IncompleteVpc::new( + Uuid::new_v4(), + authz_project.id(), + Uuid::new_v4(), + params::VpcCreate { + identity: IdentityMetadataCreateParams { + name: name.clone(), + description: description.clone(), + }, + ipv6_prefix: None, + dns_name: name.clone(), + }, + ) + .expect("failed to create incomplete VPC"); + let this_vni = Vni(external::Vni::try_from(starting_vni).unwrap()); + incomplete_vpc.vni = this_vni; + info!( + log, + "creating VPC when all VNIs are allocated"; + "vni" => ?this_vni, + ); + match datastore + .project_create_vpc(&opctx, &authz_project, incomplete_vpc.clone()) + .await + { + Ok((_, vpc)) => { + assert_eq!(vpc.id(), incomplete_vpc.identity.id); + let min_expected_vni = starting_vni + MAX_VNI_SEARCH_RANGE_SIZE; + assert!(u32::from(vpc.vni.0) > min_expected_vni); + info!(log, "successfully created VPC after retries"; "vpc" => ?vpc); + } + Err(Error::ServiceUnavailable { internal_message }) => { + // This is pretty surprising, but technically possible. Just + // check that we've actually emitted the right message. + assert_eq!( + internal_message, + "Failed to find a free VNI for this VPC", + ); + info!(log, "failed to create VPC within retry limit"); + } + Err(e) => panic!("Unexpected error when inserting VPC: {e}"), + }; + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/nexus/db-queries/src/db/queries/vpc.rs b/nexus/db-queries/src/db/queries/vpc.rs index b1ac8fe1e17..b94f92233a7 100644 --- a/nexus/db-queries/src/db/queries/vpc.rs +++ b/nexus/db-queries/src/db/queries/vpc.rs @@ -245,15 +245,7 @@ struct NextVni { impl NextVni { fn new(vni: Vni) -> Self { - let base_u32 = u32::from(vni.0); - // The valid range is [0, 1 << 24], so the maximum shift is whatever - // gets us to 1 << 24, and the minimum is whatever gets us back to the - // minimum guest VNI. - let max_shift = i64::from(external::Vni::MAX_VNI - base_u32); - let min_shift = i64::from( - -i32::try_from(base_u32 - external::Vni::MIN_GUEST_VNI) - .expect("Expected a valid VNI at this point"), - ); + let VniShifts { min_shift, max_shift } = VniShifts::new(vni); let generator = DefaultShiftGenerator { base: vni, max_shift, min_shift }; let inner = NextItem::new_unscoped(generator); @@ -278,3 +270,79 @@ impl NextVni { } delegate_query_fragment_impl!(NextVni); + +// Helper type to compute the shift for a `NextItem` query to find VNIs. +#[derive(Clone, Copy, Debug, PartialEq)] +struct VniShifts { + // The minimum `ShiftGenerator` shift. + min_shift: i64, + // The maximum `ShiftGenerator` shift. + max_shift: i64, +} + +/// Restrict the search for a VNI to a small range. +/// +/// VNIs are pretty sparsely allocated (the number of VPCs), and the range is +/// quite large (24 bits). To avoid memory issues, we'll restrict a search +/// for an available VNI to a small range starting from the random starting +/// VNI. +// +// NOTE: This is very small for tests, to ensure we can accurately test the +// failure mode where there are no available VNIs. +#[cfg(not(test))] +pub const MAX_VNI_SEARCH_RANGE_SIZE: u32 = 2048; +#[cfg(test)] +pub const MAX_VNI_SEARCH_RANGE_SIZE: u32 = 10; + +impl VniShifts { + fn new(vni: Vni) -> Self { + let base_u32 = u32::from(vni.0); + let range_end = base_u32 + MAX_VNI_SEARCH_RANGE_SIZE; + + // Clamp the maximum shift at the distance to the maximum allowed VNI, + // or the maximum of the range. + let max_shift = i64::from( + (external::Vni::MAX_VNI - base_u32).min(MAX_VNI_SEARCH_RANGE_SIZE), + ); + + // And any remaining part of the range wraps around starting at the + // beginning. + let min_shift = -i64::from( + range_end.checked_sub(external::Vni::MAX_VNI).unwrap_or(0), + ); + Self { min_shift, max_shift } + } +} + +#[cfg(test)] +mod tests { + use super::external; + use super::Vni; + use super::VniShifts; + use super::MAX_VNI_SEARCH_RANGE_SIZE; + + // Ensure that when the search range lies entirely within the range of VNIs, + // we search from the start VNI through the maximum allowed range size. + #[test] + fn test_vni_shift_no_wrapping() { + let vni = Vni(external::Vni::try_from(2048).unwrap()); + let VniShifts { min_shift, max_shift } = VniShifts::new(vni); + assert_eq!(min_shift, 0); + assert_eq!(max_shift, i64::from(MAX_VNI_SEARCH_RANGE_SIZE)); + assert_eq!(max_shift - min_shift, i64::from(MAX_VNI_SEARCH_RANGE_SIZE)); + } + + // Ensure that we wrap correctly, when the starting VNI happens to land + // quite close to the end of the allowed range. + #[test] + fn test_vni_shift_with_wrapping() { + let offset = 5; + let vni = + Vni(external::Vni::try_from(external::Vni::MAX_VNI - offset) + .unwrap()); + let VniShifts { min_shift, max_shift } = VniShifts::new(vni); + assert_eq!(min_shift, -i64::from(MAX_VNI_SEARCH_RANGE_SIZE - offset)); + assert_eq!(max_shift, i64::from(offset)); + assert_eq!(max_shift - min_shift, i64::from(MAX_VNI_SEARCH_RANGE_SIZE)); + } +}