diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index a6d729593b..64a2e462ec 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -702,6 +702,7 @@ pub enum ResourceType { Silo, SiloUser, SiloGroup, + SiloQuotas, IdentityProvider, SamlIdentityProvider, SshKey, diff --git a/end-to-end-tests/src/bin/bootstrap.rs b/end-to-end-tests/src/bin/bootstrap.rs index 83a37b8c21..9ddd872bc2 100644 --- a/end-to-end-tests/src/bin/bootstrap.rs +++ b/end-to-end-tests/src/bin/bootstrap.rs @@ -4,11 +4,11 @@ use end_to_end_tests::helpers::{generate_name, get_system_ip_pool}; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oxide_client::types::{ ByteCount, DeviceAccessTokenRequest, DeviceAuthRequest, DeviceAuthVerify, - DiskCreate, DiskSource, IpRange, Ipv4Range, + DiskCreate, DiskSource, IpRange, Ipv4Range, SiloQuotasUpdate, }; use oxide_client::{ ClientDisksExt, ClientHiddenExt, ClientProjectsExt, - ClientSystemNetworkingExt, + ClientSystemNetworkingExt, ClientSystemSilosExt, }; use serde::{de::DeserializeOwned, Deserialize}; use std::time::Duration; @@ -45,6 +45,19 @@ async fn main() -> Result<()> { .send() .await?; + // ===== SET UP QUOTAS ===== // + eprintln!("setting up quotas..."); + client + .silo_quotas_update() + .silo("recovery") + .body(SiloQuotasUpdate { + cpus: Some(16), + memory: Some(ByteCount(1024 * 1024 * 1024 * 10)), + storage: Some(ByteCount(1024 * 1024 * 1024 * 1024)), + }) + .send() + .await?; + // ===== ENSURE DATASETS ARE READY ===== // eprintln!("ensuring datasets are ready..."); let ctx = Context::from_client(client).await?; diff --git a/end-to-end-tests/src/helpers/ctx.rs b/end-to-end-tests/src/helpers/ctx.rs index 2c66bd4724..0132feafeb 100644 --- a/end-to-end-tests/src/helpers/ctx.rs +++ b/end-to-end-tests/src/helpers/ctx.rs @@ -78,7 +78,7 @@ fn rss_config() -> Result { let content = std::fs::read_to_string(&path).unwrap_or(RSS_CONFIG_STR.to_string()); toml::from_str(&content) - .with_context(|| format!("parsing config-rss as TOML")) + .with_context(|| "parsing config-rss as TOML".to_string()) } fn nexus_external_dns_name(config: &SetupServiceConfig) -> String { diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 43bf83fd34..908f6f2368 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -55,6 +55,7 @@ mod system_update; // for join-based marker trait generation. mod ipv4_nat_entry; pub mod queries; +mod quota; mod rack; mod region; mod region_snapshot; @@ -139,6 +140,7 @@ pub use physical_disk::*; pub use physical_disk_kind::*; pub use producer_endpoint::*; pub use project::*; +pub use quota::*; pub use rack::*; pub use region::*; pub use region_snapshot::*; diff --git a/nexus/db-model/src/queries/virtual_provisioning_collection_update.rs b/nexus/db-model/src/queries/virtual_provisioning_collection_update.rs index 6c684016b4..124ffe4db6 100644 --- a/nexus/db-model/src/queries/virtual_provisioning_collection_update.rs +++ b/nexus/db-model/src/queries/virtual_provisioning_collection_update.rs @@ -8,6 +8,7 @@ //! for the construction of this query. use crate::schema::silo; +use crate::schema::silo_quotas; use crate::schema::virtual_provisioning_collection; table! { @@ -28,11 +29,32 @@ table! { } } +table! { + quotas (silo_id) { + silo_id -> Uuid, + cpus -> Int8, + memory -> Int8, + storage -> Int8, + } +} + +table! { + silo_provisioned { + id -> Uuid, + virtual_disk_bytes_provisioned -> Int8, + cpus_provisioned -> Int8, + ram_provisioned -> Int8, + } +} + diesel::allow_tables_to_appear_in_same_query!(silo, parent_silo,); diesel::allow_tables_to_appear_in_same_query!( virtual_provisioning_collection, + silo_quotas, parent_silo, all_collections, do_update, + quotas, + silo_provisioned ); diff --git a/nexus/db-model/src/quota.rs b/nexus/db-model/src/quota.rs new file mode 100644 index 0000000000..70a8ffa1fd --- /dev/null +++ b/nexus/db-model/src/quota.rs @@ -0,0 +1,109 @@ +use super::ByteCount; +use crate::schema::silo_quotas; +use chrono::{DateTime, Utc}; +use nexus_types::external_api::{params, views}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +#[derive( + Queryable, + Insertable, + Debug, + Clone, + Selectable, + Serialize, + Deserialize, + AsChangeset, +)] +#[diesel(table_name = silo_quotas)] +pub struct SiloQuotas { + pub silo_id: Uuid, + pub time_created: DateTime, + pub time_modified: DateTime, + + /// The number of CPUs that this silo is allowed to use + pub cpus: i64, + + /// The amount of memory (in bytes) that this silo is allowed to use + #[diesel(column_name = memory_bytes)] + pub memory: ByteCount, + + /// The amount of storage (in bytes) that this silo is allowed to use + #[diesel(column_name = storage_bytes)] + pub storage: ByteCount, +} + +impl SiloQuotas { + pub fn new( + silo_id: Uuid, + cpus: i64, + memory: ByteCount, + storage: ByteCount, + ) -> Self { + Self { + silo_id, + time_created: Utc::now(), + time_modified: Utc::now(), + cpus, + memory, + storage, + } + } + + pub fn arbitrarily_high_default(silo_id: Uuid) -> Self { + let count = params::SiloQuotasCreate::arbitrarily_high_default(); + Self::new( + silo_id, + count.cpus, + count.memory.into(), + count.storage.into(), + ) + } +} + +impl From for views::SiloQuotas { + fn from(silo_quotas: SiloQuotas) -> Self { + Self { + silo_id: silo_quotas.silo_id, + cpus: silo_quotas.cpus, + memory: silo_quotas.memory.into(), + storage: silo_quotas.storage.into(), + } + } +} + +impl From for SiloQuotas { + fn from(silo_quotas: views::SiloQuotas) -> Self { + Self { + silo_id: silo_quotas.silo_id, + time_created: Utc::now(), + time_modified: Utc::now(), + cpus: silo_quotas.cpus, + memory: silo_quotas.memory.into(), + storage: silo_quotas.storage.into(), + } + } +} + +// Describes a set of updates for the [`SiloQuotas`] model. +#[derive(AsChangeset)] +#[diesel(table_name = silo_quotas)] +pub struct SiloQuotasUpdate { + pub cpus: Option, + #[diesel(column_name = memory_bytes)] + pub memory: Option, + #[diesel(column_name = storage_bytes)] + pub storage: Option, + pub time_modified: DateTime, +} + +impl From for SiloQuotasUpdate { + fn from(params: params::SiloQuotasUpdate) -> Self { + Self { + cpus: params.cpus, + memory: params.memory.map(|f| f.into()), + storage: params.storage.map(|f| f.into()), + time_modified: Utc::now(), + } + } +} diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 51501b4894..10fa8dcfac 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -409,6 +409,17 @@ table! { } } +table! { + silo_quotas(silo_id) { + silo_id -> Uuid, + time_created -> Timestamptz, + time_modified -> Timestamptz, + cpus -> Int8, + memory_bytes -> Int8, + storage_bytes -> Int8, + } +} + table! { network_interface (id) { id -> Uuid, @@ -1322,7 +1333,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(19, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(20, 0, 0); allow_tables_to_appear_in_same_query!( system_update, diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 761c3f995f..1609fc7101 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -68,6 +68,7 @@ mod network_interface; mod oximeter; mod physical_disk; mod project; +mod quota; mod rack; mod region; mod region_snapshot; diff --git a/nexus/db-queries/src/db/datastore/quota.rs b/nexus/db-queries/src/db/datastore/quota.rs new file mode 100644 index 0000000000..2066781e6b --- /dev/null +++ b/nexus/db-queries/src/db/datastore/quota.rs @@ -0,0 +1,127 @@ +use super::DataStore; +use crate::authz; +use crate::context::OpContext; +use crate::db; +use crate::db::error::public_error_from_diesel; +use crate::db::error::ErrorHandler; +use crate::db::pagination::paginated; +use crate::db::pool::DbConnection; +use async_bb8_diesel::AsyncRunQueryDsl; +use diesel::prelude::*; +use nexus_db_model::SiloQuotas; +use nexus_db_model::SiloQuotasUpdate; +use omicron_common::api::external::DataPageParams; +use omicron_common::api::external::DeleteResult; +use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::ResourceType; +use omicron_common::api::external::UpdateResult; +use uuid::Uuid; + +impl DataStore { + /// Creates new quotas for a silo. This is grouped with silo creation + /// and shouldn't be called outside of that flow. + /// + /// An authz check _cannot_ be performed here because the authz initialization + /// isn't complete and will lead to a db deadlock. + /// + /// See + pub async fn silo_quotas_create( + &self, + conn: &async_bb8_diesel::Connection, + authz_silo: &authz::Silo, + quotas: SiloQuotas, + ) -> Result<(), Error> { + let silo_id = authz_silo.id(); + use db::schema::silo_quotas::dsl; + + diesel::insert_into(dsl::silo_quotas) + .values(quotas) + .execute_async(conn) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::Conflict( + ResourceType::SiloQuotas, + &silo_id.to_string(), + ), + ) + }) + .map(|_| ()) + } + + pub async fn silo_quotas_delete( + &self, + opctx: &OpContext, + conn: &async_bb8_diesel::Connection, + authz_silo: &authz::Silo, + ) -> DeleteResult { + // Given that the quotas right now are somewhat of an extension of the + // Silo we just check for delete permission on the silo itself. + opctx.authorize(authz::Action::Delete, authz_silo).await?; + + use db::schema::silo_quotas::dsl; + diesel::delete(dsl::silo_quotas) + .filter(dsl::silo_id.eq(authz_silo.id())) + .execute_async(conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } + + pub async fn silo_update_quota( + &self, + opctx: &OpContext, + authz_silo: &authz::Silo, + updates: SiloQuotasUpdate, + ) -> UpdateResult { + opctx.authorize(authz::Action::Modify, authz_silo).await?; + use db::schema::silo_quotas::dsl; + let silo_id = authz_silo.id(); + diesel::update(dsl::silo_quotas) + .filter(dsl::silo_id.eq(silo_id)) + .set(updates) + .returning(SiloQuotas::as_returning()) + .get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::Conflict( + ResourceType::SiloQuotas, + &silo_id.to_string(), + ), + ) + }) + } + + pub async fn silo_quotas_view( + &self, + opctx: &OpContext, + authz_silo: &authz::Silo, + ) -> Result { + opctx.authorize(authz::Action::Read, authz_silo).await?; + use db::schema::silo_quotas::dsl; + dsl::silo_quotas + .filter(dsl::silo_id.eq(authz_silo.id())) + .first_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + pub async fn fleet_list_quotas( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + use db::schema::silo_quotas::dsl; + paginated(dsl::silo_quotas, dsl::silo_id, pagparams) + .select(SiloQuotas::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } +} diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index a69386cfd0..728da0b0d1 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -912,6 +912,8 @@ mod test { name: "test-silo".parse().unwrap(), description: String::new(), }, + // Set a default quota of a half rack's worth of resources + quotas: external_params::SiloQuotasCreate::arbitrarily_high_default(), discoverable: false, identity_mode: SiloIdentityMode::LocalOnly, admin_group_name: None, diff --git a/nexus/db-queries/src/db/datastore/silo.rs b/nexus/db-queries/src/db/datastore/silo.rs index 437c171fb0..2c0c5f3c47 100644 --- a/nexus/db-queries/src/db/datastore/silo.rs +++ b/nexus/db-queries/src/db/datastore/silo.rs @@ -28,6 +28,7 @@ use chrono::Utc; use diesel::prelude::*; use nexus_db_model::Certificate; use nexus_db_model::ServiceKind; +use nexus_db_model::SiloQuotas; use nexus_types::external_api::params; use nexus_types::external_api::shared; use nexus_types::external_api::shared::SiloRole; @@ -61,13 +62,32 @@ impl DataStore { debug!(opctx.log, "attempting to create built-in silos"); use db::schema::silo::dsl; - let count = diesel::insert_into(dsl::silo) - .values([&*DEFAULT_SILO, &*INTERNAL_SILO]) - .on_conflict(dsl::id) - .do_nothing() - .execute_async(&*self.pool_connection_authorized(opctx).await?) + use db::schema::silo_quotas::dsl as quotas_dsl; + let count = self + .pool_connection_authorized(opctx) + .await? + .transaction_async(|conn| async move { + diesel::insert_into(quotas_dsl::silo_quotas) + .values(SiloQuotas::arbitrarily_high_default( + DEFAULT_SILO.id(), + )) + .on_conflict(quotas_dsl::silo_id) + .do_nothing() + .execute_async(&conn) + .await + .map_err(TransactionError::CustomError) + .unwrap(); + diesel::insert_into(dsl::silo) + .values([&*DEFAULT_SILO, &*INTERNAL_SILO]) + .on_conflict(dsl::id) + .do_nothing() + .execute_async(&conn) + .await + .map_err(TransactionError::CustomError) + }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .unwrap(); + info!(opctx.log, "created {} built-in silos", count); self.virtual_provisioning_collection_create( @@ -263,6 +283,18 @@ impl DataStore { self.dns_update(nexus_opctx, &conn, dns_update).await?; + self.silo_quotas_create( + &conn, + &authz_silo, + SiloQuotas::new( + authz_silo.id(), + new_silo_params.quotas.cpus, + new_silo_params.quotas.memory.into(), + new_silo_params.quotas.storage.into(), + ), + ) + .await?; + Ok::>(silo) }) .await?; @@ -380,6 +412,8 @@ impl DataStore { ))); } + self.silo_quotas_delete(opctx, &conn, &authz_silo).await?; + self.virtual_provisioning_collection_delete_on_connection( &opctx.log, &conn, id, ) diff --git a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs index 230c3941ff..348d277ddf 100644 --- a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs +++ b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs @@ -195,7 +195,9 @@ impl DataStore { ) .get_results_async(&*self.pool_connection_authorized(opctx).await?) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .map_err(|e| { + crate::db::queries::virtual_provisioning_collection_update::from_diesel(e) + })?; self.virtual_provisioning_collection_producer .append_disk_metrics(&provisions)?; Ok(provisions) @@ -249,7 +251,7 @@ impl DataStore { ) .get_results_async(&*self.pool_connection_authorized(opctx).await?) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .map_err(|e| crate::db::queries::virtual_provisioning_collection_update::from_diesel(e))?; self.virtual_provisioning_collection_producer .append_disk_metrics(&provisions)?; Ok(provisions) @@ -270,7 +272,7 @@ impl DataStore { ) .get_results_async(&*self.pool_connection_authorized(opctx).await?) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .map_err(|e| crate::db::queries::virtual_provisioning_collection_update::from_diesel(e))?; self.virtual_provisioning_collection_producer .append_cpu_metrics(&provisions)?; Ok(provisions) @@ -300,7 +302,7 @@ impl DataStore { ) .get_results_async(&*self.pool_connection_authorized(opctx).await?) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .map_err(|e| crate::db::queries::virtual_provisioning_collection_update::from_diesel(e))?; self.virtual_provisioning_collection_producer .append_cpu_metrics(&provisions)?; Ok(provisions) diff --git a/nexus/db-queries/src/db/fixed_data/silo.rs b/nexus/db-queries/src/db/fixed_data/silo.rs index d32c4211e9..6eba849ee3 100644 --- a/nexus/db-queries/src/db/fixed_data/silo.rs +++ b/nexus/db-queries/src/db/fixed_data/silo.rs @@ -24,6 +24,9 @@ lazy_static! { name: "default-silo".parse().unwrap(), description: "default silo".to_string(), }, + // This quota is actually _unused_ because the default silo + // isn't constructed in the same way a normal silo would be. + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode: shared::SiloIdentityMode::LocalOnly, admin_group_name: None, @@ -49,6 +52,8 @@ lazy_static! { name: "oxide-internal".parse().unwrap(), description: "Built-in internal Silo.".to_string(), }, + // The internal silo contains no virtual resources, so it has no allotted capacity. + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode: shared::SiloIdentityMode::LocalOnly, admin_group_name: None, diff --git a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs index 0a383eb6f1..7672d5af9a 100644 --- a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs +++ b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs @@ -13,16 +13,69 @@ use crate::db::pool::DbConnection; use crate::db::schema::virtual_provisioning_collection; use crate::db::schema::virtual_provisioning_resource; use crate::db::subquery::{AsQuerySource, Cte, CteBuilder, CteQuery}; +use crate::db::true_or_cast_error::matches_sentinel; +use crate::db::true_or_cast_error::TrueOrCastError; use db_macros::Subquery; use diesel::pg::Pg; use diesel::query_builder::{AstPass, Query, QueryFragment, QueryId}; +use diesel::result::Error as DieselError; use diesel::{ - sql_types, CombineDsl, ExpressionMethods, IntoSql, - NullableExpressionMethods, QueryDsl, RunQueryDsl, SelectableHelper, + sql_types, BoolExpressionMethods, CombineDsl, ExpressionMethods, IntoSql, + JoinOnDsl, NullableExpressionMethods, QueryDsl, RunQueryDsl, + SelectableHelper, }; use nexus_db_model::queries::virtual_provisioning_collection_update::{ - all_collections, do_update, parent_silo, + all_collections, do_update, parent_silo, quotas, silo_provisioned, }; +use omicron_common::api::external; +use omicron_common::api::external::MessagePair; + +const NOT_ENOUGH_CPUS_SENTINEL: &'static str = "Not enough cpus"; +const NOT_ENOUGH_MEMORY_SENTINEL: &'static str = "Not enough memory"; +const NOT_ENOUGH_STORAGE_SENTINEL: &'static str = "Not enough storage"; + +/// Translates a generic pool error to an external error based +/// on messages which may be emitted when provisioning virtual resources +/// such as instances and disks. +pub fn from_diesel(e: DieselError) -> external::Error { + use crate::db::error; + + let sentinels = [ + NOT_ENOUGH_CPUS_SENTINEL, + NOT_ENOUGH_MEMORY_SENTINEL, + NOT_ENOUGH_STORAGE_SENTINEL, + ]; + if let Some(sentinel) = matches_sentinel(&e, &sentinels) { + match sentinel { + NOT_ENOUGH_CPUS_SENTINEL => { + return external::Error::InsufficientCapacity { + message: MessagePair::new_full( + "vCPU Limit Exceeded: Not enough vCPUs to complete request. Either stop unused instances to free up resources or contact the rack operator to request a capacity increase.".to_string(), + "User tried to allocate an instance but the virtual provisioning resource table indicated that there were not enough CPUs available to satisfy the request.".to_string(), + ) + } + } + NOT_ENOUGH_MEMORY_SENTINEL => { + return external::Error::InsufficientCapacity { + message: MessagePair::new_full( + "Memory Limit Exceeded: Not enough memory to complete request. Either stop unused instances to free up resources or contact the rack operator to request a capacity increase.".to_string(), + "User tried to allocate an instance but the virtual provisioning resource table indicated that there were not enough RAM available to satisfy the request.".to_string(), + ) + } + } + NOT_ENOUGH_STORAGE_SENTINEL => { + return external::Error::InsufficientCapacity { + message: MessagePair::new_full( + "Storage Limit Exceeded: Not enough storage to complete request. Either remove unneeded disks and snapshots to free up resources or contact the rack operator to request a capacity increase.".to_string(), + "User tried to allocate a disk or snapshot but the virtual provisioning resource table indicated that there were not enough storage available to satisfy the request.".to_string(), + ) + } + } + _ => {} + } + } + error::public_error_from_diesel(e, error::ErrorHandler::Server) +} #[derive(Subquery, QueryId)] #[subquery(name = parent_silo)] @@ -82,20 +135,86 @@ struct DoUpdate { } impl DoUpdate { - fn new_for_insert(id: uuid::Uuid) -> Self { + fn new_for_insert( + silo_provisioned: &SiloProvisioned, + quotas: &Quotas, + resource: VirtualProvisioningResource, + ) -> Self { use virtual_provisioning_resource::dsl; + let cpus_provisioned_delta = + resource.cpus_provisioned.into_sql::(); + let memory_provisioned_delta = + i64::from(resource.ram_provisioned).into_sql::(); + let storage_provisioned_delta = + i64::from(resource.virtual_disk_bytes_provisioned) + .into_sql::(); + let not_allocted = dsl::virtual_provisioning_resource - .find(id) + .find(resource.id) .count() .single_value() .assume_not_null() .eq(0); + let has_sufficient_cpus = quotas + .query_source() + .select(quotas::cpus) + .single_value() + .assume_not_null() + .ge(silo_provisioned + .query_source() + .select(silo_provisioned::cpus_provisioned) + .single_value() + .assume_not_null() + + cpus_provisioned_delta); + + let has_sufficient_memory = quotas + .query_source() + .select(quotas::memory) + .single_value() + .assume_not_null() + .ge(silo_provisioned + .query_source() + .select(silo_provisioned::ram_provisioned) + .single_value() + .assume_not_null() + + memory_provisioned_delta); + + let has_sufficient_storage = quotas + .query_source() + .select(quotas::storage) + .single_value() + .assume_not_null() + .ge(silo_provisioned + .query_source() + .select(silo_provisioned::virtual_disk_bytes_provisioned) + .single_value() + .assume_not_null() + + storage_provisioned_delta); + Self { query: Box::new(diesel::select((ExpressionAlias::new::< do_update::update, - >(not_allocted),))), + >( + not_allocted + .and(TrueOrCastError::new( + cpus_provisioned_delta.eq(0).or(has_sufficient_cpus), + NOT_ENOUGH_CPUS_SENTINEL, + )) + .and(TrueOrCastError::new( + memory_provisioned_delta + .eq(0) + .or(has_sufficient_memory), + NOT_ENOUGH_MEMORY_SENTINEL, + )) + .and(TrueOrCastError::new( + storage_provisioned_delta + .eq(0) + .or(has_sufficient_storage), + NOT_ENOUGH_STORAGE_SENTINEL, + )), + ),))), } } @@ -161,6 +280,67 @@ impl UpdatedProvisions { } } +#[derive(Subquery, QueryId)] +#[subquery(name = quotas)] +struct Quotas { + query: Box>, +} + +impl Quotas { + // TODO: We could potentially skip this in cases where we know we're removing a resource instead of inserting + fn new(parent_silo: &ParentSilo) -> Self { + use crate::db::schema::silo_quotas::dsl; + Self { + query: Box::new( + dsl::silo_quotas + .inner_join( + parent_silo + .query_source() + .on(dsl::silo_id.eq(parent_silo::id)), + ) + .select(( + dsl::silo_id, + dsl::cpus, + ExpressionAlias::new::( + dsl::memory_bytes, + ), + ExpressionAlias::new::( + dsl::storage_bytes, + ), + )), + ), + } + } +} + +#[derive(Subquery, QueryId)] +#[subquery(name = silo_provisioned)] +struct SiloProvisioned { + query: Box>, +} + +impl SiloProvisioned { + fn new(parent_silo: &ParentSilo) -> Self { + use virtual_provisioning_collection::dsl; + Self { + query: Box::new( + dsl::virtual_provisioning_collection + .inner_join( + parent_silo + .query_source() + .on(dsl::id.eq(parent_silo::id)), + ) + .select(( + dsl::id, + dsl::cpus_provisioned, + dsl::ram_provisioned, + dsl::virtual_disk_bytes_provisioned, + )), + ), + } + } +} + // This structure wraps a query, such that it can be used within a CTE. // // It generates a name that can be used by the "CteBuilder", but does not @@ -195,6 +375,15 @@ where } } +/// The virtual resource collection is only updated when a resource is inserted +/// or deleted from the resource provisioning table. By probing for the presence +/// or absence of a resource, we can update collections at the same time as we +/// create or destroy the resource, which helps make the operation idempotent. +enum UpdateKind { + Insert(VirtualProvisioningResource), + Delete(uuid::Uuid), +} + /// Constructs a CTE for updating resource provisioning information in all /// collections for a particular object. #[derive(QueryId)] @@ -220,7 +409,7 @@ impl VirtualProvisioningCollectionUpdate { // - values: The updated values to propagate through collections (iff // "do_update" evaluates to "true"). fn apply_update( - do_update: DoUpdate, + update_kind: UpdateKind, update: U, project_id: uuid::Uuid, values: V, @@ -237,6 +426,17 @@ impl VirtualProvisioningCollectionUpdate { &parent_silo, *crate::db::fixed_data::FLEET_ID, ); + + let quotas = Quotas::new(&parent_silo); + let silo_provisioned = SiloProvisioned::new(&parent_silo); + + let do_update = match update_kind { + UpdateKind::Insert(resource) => { + DoUpdate::new_for_insert(&silo_provisioned, "as, resource) + } + UpdateKind::Delete(id) => DoUpdate::new_for_delete(id), + }; + let updated_collections = UpdatedProvisions::new(&all_collections, &do_update, values); @@ -251,6 +451,8 @@ impl VirtualProvisioningCollectionUpdate { let cte = CteBuilder::new() .add_subquery(parent_silo) .add_subquery(all_collections) + .add_subquery(quotas) + .add_subquery(silo_provisioned) .add_subquery(do_update) .add_subquery(update) .add_subquery(updated_collections) @@ -273,8 +475,7 @@ impl VirtualProvisioningCollectionUpdate { provision.virtual_disk_bytes_provisioned = disk_byte_diff; Self::apply_update( - // We should insert the record if it does not already exist. - DoUpdate::new_for_insert(id), + UpdateKind::Insert(provision.clone()), // The query to actually insert the record. UnreferenceableSubquery( diesel::insert_into( @@ -305,8 +506,7 @@ impl VirtualProvisioningCollectionUpdate { use virtual_provisioning_resource::dsl as resource_dsl; Self::apply_update( - // We should delete the record if it exists. - DoUpdate::new_for_delete(id), + UpdateKind::Delete(id), // The query to actually delete the record. UnreferenceableSubquery( diesel::delete(resource_dsl::virtual_provisioning_resource) @@ -342,8 +542,7 @@ impl VirtualProvisioningCollectionUpdate { provision.ram_provisioned = ram_diff; Self::apply_update( - // We should insert the record if it does not already exist. - DoUpdate::new_for_insert(id), + UpdateKind::Insert(provision.clone()), // The query to actually insert the record. UnreferenceableSubquery( diesel::insert_into( @@ -378,8 +577,7 @@ impl VirtualProvisioningCollectionUpdate { use virtual_provisioning_resource::dsl as resource_dsl; Self::apply_update( - // We should delete the record if it exists. - DoUpdate::new_for_delete(id), + UpdateKind::Delete(id), // The query to actually delete the record. // // The filter condition here ensures that the provisioning record is diff --git a/nexus/src/app/external_endpoints.rs b/nexus/src/app/external_endpoints.rs index 1ab33c5c9c..0a6dd41db6 100644 --- a/nexus/src/app/external_endpoints.rs +++ b/nexus/src/app/external_endpoints.rs @@ -827,6 +827,7 @@ mod test { name: name.parse().unwrap(), description: String::new(), }, + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode, admin_group_name: None, diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index d4c2d596f8..b92714a365 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -51,6 +51,7 @@ mod metrics; mod network_interface; mod oximeter; mod project; +mod quota; mod rack; pub(crate) mod saga; mod session; diff --git a/nexus/src/app/quota.rs b/nexus/src/app/quota.rs new file mode 100644 index 0000000000..f59069a9ab --- /dev/null +++ b/nexus/src/app/quota.rs @@ -0,0 +1,49 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Resource limits and system quotas + +use nexus_db_queries::authz; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db; +use nexus_db_queries::db::lookup; +use nexus_types::external_api::params; +use omicron_common::api::external::DataPageParams; +use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::UpdateResult; +use uuid::Uuid; + +impl super::Nexus { + pub async fn silo_quotas_view( + &self, + opctx: &OpContext, + silo_lookup: &lookup::Silo<'_>, + ) -> Result { + let (.., authz_silo) = + silo_lookup.lookup_for(authz::Action::Read).await?; + self.db_datastore.silo_quotas_view(opctx, &authz_silo).await + } + + pub(crate) async fn fleet_list_quotas( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + self.db_datastore.fleet_list_quotas(opctx, pagparams).await + } + + pub(crate) async fn silo_update_quota( + &self, + opctx: &OpContext, + silo_lookup: &lookup::Silo<'_>, + updates: ¶ms::SiloQuotasUpdate, + ) -> UpdateResult { + let (.., authz_silo) = + silo_lookup.lookup_for(authz::Action::Modify).await?; + self.db_datastore + .silo_update_quota(opctx, &authz_silo, updates.clone().into()) + .await + } +} diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 1643ac301d..168e9eeaa3 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -203,6 +203,10 @@ impl super::Nexus { name: request.recovery_silo.silo_name, description: "built-in recovery Silo".to_string(), }, + // The recovery silo is initialized with no allocated capacity given it's + // not intended to be used to deploy workloads. Operators can add capacity + // after the fact if they want to use it for that purpose. + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode: SiloIdentityMode::LocalOnly, admin_group_name: None, diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index a6fd7a3ccb..6720f95c39 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -45,7 +45,7 @@ use nexus_db_queries::db::model::Name; use nexus_db_queries::{ authz::ApiResource, db::fixed_data::silo::INTERNAL_SILO_ID, }; -use nexus_types::external_api::params::ProjectSelector; +use nexus_types::external_api::{params::ProjectSelector, views::SiloQuotas}; use nexus_types::{ external_api::views::{SledInstance, Switch}, identity::AssetIdentityMetadata, @@ -280,6 +280,11 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(silo_policy_view)?; api.register(silo_policy_update)?; + api.register(system_quotas_list)?; + + api.register(silo_quotas_view)?; + api.register(silo_quotas_update)?; + api.register(silo_identity_provider_list)?; api.register(saml_identity_provider_create)?; @@ -510,6 +515,91 @@ async fn policy_update( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } +/// Lists resource quotas for all silos +#[endpoint { + method = GET, + path = "/v1/system/silo-quotas", + tags = ["system/silos"], +}] +async fn system_quotas_list( + rqctx: RequestContext>, + query_params: Query, +) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.nexus; + + let query = query_params.into_inner(); + let pagparams = data_page_params_for(&rqctx, &query)?; + + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let quotas = nexus + .fleet_list_quotas(&opctx, &pagparams) + .await? + .into_iter() + .map(|p| p.into()) + .collect(); + + Ok(HttpResponseOk(ScanById::results_page( + &query, + quotas, + &|_, quota: &SiloQuotas| quota.silo_id, + )?)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// View the resource quotas of a given silo +#[endpoint { + method = GET, + path = "/v1/system/silos/{silo}/quotas", + tags = ["system/silos"], +}] +async fn silo_quotas_view( + rqctx: RequestContext>, + path_params: Path, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.nexus; + + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let silo_lookup = + nexus.silo_lookup(&opctx, path_params.into_inner().silo)?; + let quota = nexus.silo_quotas_view(&opctx, &silo_lookup).await?; + Ok(HttpResponseOk(quota.into())) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Update the resource quotas of a given silo +/// +/// If a quota value is not specified, it will remain unchanged. +#[endpoint { + method = PUT, + path = "/v1/system/silos/{silo}/quotas", + tags = ["system/silos"], +}] +async fn silo_quotas_update( + rqctx: RequestContext>, + path_params: Path, + new_quota: TypedBody, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.nexus; + + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let silo_lookup = + nexus.silo_lookup(&opctx, path_params.into_inner().silo)?; + let quota = nexus + .silo_update_quota(&opctx, &silo_lookup, &new_quota.into_inner()) + .await?; + Ok(HttpResponseOk(quota.into())) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + /// List silos /// /// Lists silos that are discoverable based on the current permissions. diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index 1848989bf9..0527d99490 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -287,6 +287,7 @@ pub async fn create_silo( name: silo_name.parse().unwrap(), description: "a silo".to_string(), }, + quotas: params::SiloQuotasCreate::arbitrarily_high_default(), discoverable, identity_mode, admin_group_name: None, diff --git a/nexus/tests/integration_tests/certificates.rs b/nexus/tests/integration_tests/certificates.rs index 1843fc28c8..5a34caab49 100644 --- a/nexus/tests/integration_tests/certificates.rs +++ b/nexus/tests/integration_tests/certificates.rs @@ -394,6 +394,11 @@ async fn test_silo_certificates() { .name(silo2.silo_name.clone()) .description("") .discoverable(false) + .quotas(oxide_client::types::SiloQuotasCreate { + cpus: 0, + memory: oxide_client::types::ByteCount(0), + storage: oxide_client::types::ByteCount(0), + }) .identity_mode(oxide_client::types::SiloIdentityMode::LocalOnly) .tls_certificates(vec![silo2_cert.try_into().unwrap()]), ) @@ -454,6 +459,11 @@ async fn test_silo_certificates() { .name(silo3.silo_name.clone()) .description("") .discoverable(false) + .quotas(oxide_client::types::SiloQuotasCreate { + cpus: 0, + memory: oxide_client::types::ByteCount(0), + storage: oxide_client::types::ByteCount(0), + }) .identity_mode(oxide_client::types::SiloIdentityMode::LocalOnly) .tls_certificates(vec![silo3_cert.try_into().unwrap()]), ) diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index e11902d0fe..bd6df210c0 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -85,12 +85,15 @@ lazy_static! { format!("/v1/system/silos/{}", *DEMO_SILO_NAME); pub static ref DEMO_SILO_POLICY_URL: String = format!("/v1/system/silos/{}/policy", *DEMO_SILO_NAME); + pub static ref DEMO_SILO_QUOTAS_URL: String = + format!("/v1/system/silos/{}/quotas", *DEMO_SILO_NAME); pub static ref DEMO_SILO_CREATE: params::SiloCreate = params::SiloCreate { identity: IdentityMetadataCreateParams { name: DEMO_SILO_NAME.clone(), description: String::from(""), }, + quotas: params::SiloQuotasCreate::arbitrarily_high_default(), discoverable: true, identity_mode: shared::SiloIdentityMode::SamlJit, admin_group_name: None, @@ -950,6 +953,27 @@ lazy_static! { ), ], }, + VerifyEndpoint { + url: &DEMO_SILO_QUOTAS_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Get, + AllowedMethod::Put( + serde_json::to_value( + params::SiloQuotasCreate::empty() + ).unwrap() + ) + ], + }, + VerifyEndpoint { + url: "/v1/system/silo-quotas", + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Get + ], + }, VerifyEndpoint { url: "/v1/policy", visibility: Visibility::Public, diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index 53de24c518..35c70bf874 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -24,6 +24,7 @@ mod oximeter; mod pantry; mod password_login; mod projects; +mod quotas; mod rack; mod role_assignments; mod roles_builtin; diff --git a/nexus/tests/integration_tests/quotas.rs b/nexus/tests/integration_tests/quotas.rs new file mode 100644 index 0000000000..2fddf4e05c --- /dev/null +++ b/nexus/tests/integration_tests/quotas.rs @@ -0,0 +1,312 @@ +use anyhow::Error; +use dropshot::test_util::ClientTestContext; +use dropshot::HttpErrorResponseBody; +use http::Method; +use nexus_test_utils::http_testing::AuthnMode; +use nexus_test_utils::http_testing::NexusRequest; +use nexus_test_utils::http_testing::RequestBuilder; +use nexus_test_utils::http_testing::TestResponse; +use nexus_test_utils::resource_helpers::create_local_user; +use nexus_test_utils::resource_helpers::grant_iam; +use nexus_test_utils::resource_helpers::object_create; +use nexus_test_utils::resource_helpers::populate_ip_pool; +use nexus_test_utils::resource_helpers::DiskTest; +use nexus_test_utils_macros::nexus_test; +use nexus_types::external_api::params; +use nexus_types::external_api::shared; +use nexus_types::external_api::shared::SiloRole; +use nexus_types::external_api::views::SiloQuotas; +use omicron_common::api::external::ByteCount; +use omicron_common::api::external::IdentityMetadataCreateParams; +use omicron_common::api::external::InstanceCpuCount; + +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + +struct ResourceAllocator { + auth: AuthnMode, +} + +impl ResourceAllocator { + fn new(auth: AuthnMode) -> Self { + Self { auth } + } + + async fn set_quotas( + &self, + client: &ClientTestContext, + quotas: params::SiloQuotasUpdate, + ) -> Result { + NexusRequest::object_put( + client, + "/v1/system/silos/quota-test-silo/quotas", + Some("as), + ) + .authn_as(self.auth.clone()) + .execute() + .await + } + + async fn get_quotas(&self, client: &ClientTestContext) -> SiloQuotas { + NexusRequest::object_get( + client, + "/v1/system/silos/quota-test-silo/quotas", + ) + .authn_as(self.auth.clone()) + .execute() + .await + .expect("failed to fetch quotas") + .parsed_body() + .expect("failed to parse quotas") + } + + async fn provision_instance( + &self, + client: &ClientTestContext, + name: &str, + cpus: u16, + memory: u32, + ) -> Result { + NexusRequest::objects_post( + client, + "/v1/instances?project=project", + ¶ms::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: name.parse().unwrap(), + description: "".into(), + }, + ncpus: InstanceCpuCount(cpus), + memory: ByteCount::from_gibibytes_u32(memory), + hostname: "host".to_string(), + user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" + .to_vec(), + network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + external_ips: Vec::::new(), + disks: Vec::::new(), + start: false, + }, + ) + .authn_as(self.auth.clone()) + .execute() + .await + .expect("Instance should be created regardless of quotas"); + + NexusRequest::new( + RequestBuilder::new( + client, + Method::POST, + format!("/v1/instances/{}/start?project=project", name) + .as_str(), + ) + .body(None as Option<&serde_json::Value>), + ) + .authn_as(self.auth.clone()) + .execute() + .await + } + + async fn cleanup_instance( + &self, + client: &ClientTestContext, + name: &str, + ) -> TestResponse { + // Try to stop the instance + NexusRequest::new( + RequestBuilder::new( + client, + Method::POST, + format!("/v1/instances/{}/stop?project=project", name).as_str(), + ) + .body(None as Option<&serde_json::Value>), + ) + .authn_as(self.auth.clone()) + .execute() + .await + .expect("failed to stop instance"); + + NexusRequest::object_delete( + client, + format!("/v1/instances/{}?project=project", name).as_str(), + ) + .authn_as(self.auth.clone()) + .execute() + .await + .expect("failed to delete instance") + } + + async fn provision_disk( + &self, + client: &ClientTestContext, + name: &str, + size: u32, + ) -> Result { + NexusRequest::new( + RequestBuilder::new( + client, + Method::POST, + "/v1/disks?project=project", + ) + .body(Some(¶ms::DiskCreate { + identity: IdentityMetadataCreateParams { + name: name.parse().unwrap(), + description: "".into(), + }, + size: ByteCount::from_gibibytes_u32(size), + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + })), + ) + .authn_as(self.auth.clone()) + .execute() + .await + } +} + +async fn setup_silo_with_quota( + client: &ClientTestContext, + silo_name: &str, + quotas: params::SiloQuotasCreate, +) -> ResourceAllocator { + let silo = object_create( + client, + "/v1/system/silos", + ¶ms::SiloCreate { + identity: IdentityMetadataCreateParams { + name: silo_name.parse().unwrap(), + description: "".into(), + }, + quotas, + discoverable: true, + identity_mode: shared::SiloIdentityMode::LocalOnly, + admin_group_name: None, + tls_certificates: vec![], + mapped_fleet_roles: Default::default(), + }, + ) + .await; + + populate_ip_pool(&client, "default", None).await; + + // Create a silo user + let user = create_local_user( + client, + &silo, + &"user".parse().unwrap(), + params::UserPassword::LoginDisallowed, + ) + .await; + + // Make silo admin + grant_iam( + client, + format!("/v1/system/silos/{}", silo_name).as_str(), + SiloRole::Admin, + user.id, + AuthnMode::PrivilegedUser, + ) + .await; + + let auth_mode = AuthnMode::SiloUser(user.id); + + NexusRequest::objects_post( + client, + "/v1/projects", + ¶ms::ProjectCreate { + identity: IdentityMetadataCreateParams { + name: "project".parse().unwrap(), + description: "".into(), + }, + }, + ) + .authn_as(auth_mode.clone()) + .execute() + .await + .unwrap(); + + ResourceAllocator::new(auth_mode) +} + +#[nexus_test] +async fn test_quotas(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + // Simulate space for disks + DiskTest::new(&cptestctx).await; + + let system = setup_silo_with_quota( + &client, + "quota-test-silo", + params::SiloQuotasCreate::empty(), + ) + .await; + + // Ensure trying to provision an instance with empty quotas fails + let err = system + .provision_instance(client, "instance", 1, 1) + .await + .unwrap() + .parsed_body::() + .expect("failed to parse error body"); + assert!( + err.message.contains("vCPU Limit Exceeded"), + "Unexpected error: {0}", + err.message + ); + system.cleanup_instance(client, "instance").await; + + // Up the CPU, memory quotas + system + .set_quotas( + client, + params::SiloQuotasUpdate { + cpus: Some(4), + memory: Some(ByteCount::from_gibibytes_u32(15)), + storage: Some(ByteCount::from_gibibytes_u32(2)), + }, + ) + .await + .expect("failed to set quotas"); + + let quotas = system.get_quotas(client).await; + assert_eq!(quotas.cpus, 4); + assert_eq!(quotas.memory, ByteCount::from_gibibytes_u32(15)); + assert_eq!(quotas.storage, ByteCount::from_gibibytes_u32(2)); + + // Ensure memory quota is enforced + let err = system + .provision_instance(client, "instance", 1, 16) + .await + .unwrap() + .parsed_body::() + .expect("failed to parse error body"); + assert!( + err.message.contains("Memory Limit Exceeded"), + "Unexpected error: {0}", + err.message + ); + system.cleanup_instance(client, "instance").await; + + // Allocating instance should now succeed + system + .provision_instance(client, "instance", 2, 10) + .await + .expect("Instance should've had enough resources to be provisioned"); + + let err = system + .provision_disk(client, "disk", 3) + .await + .unwrap() + .parsed_body::() + .expect("failed to parse error body"); + assert!( + err.message.contains("Storage Limit Exceeded"), + "Unexpected error: {0}", + err.message + ); + + system + .provision_disk(client, "disk", 1) + .await + .expect("Disk should be provisioned"); +} diff --git a/nexus/tests/integration_tests/silos.rs b/nexus/tests/integration_tests/silos.rs index 3c69c8b7cd..a5d4b47eaa 100644 --- a/nexus/tests/integration_tests/silos.rs +++ b/nexus/tests/integration_tests/silos.rs @@ -68,6 +68,7 @@ async fn test_silos(cptestctx: &ControlPlaneTestContext) { name: cptestctx.silo_name.clone(), description: "a silo".to_string(), }, + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode: shared::SiloIdentityMode::LocalOnly, admin_group_name: None, @@ -284,6 +285,7 @@ async fn test_silo_admin_group(cptestctx: &ControlPlaneTestContext) { name: "silo-name".parse().unwrap(), description: "a silo".to_string(), }, + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode: shared::SiloIdentityMode::SamlJit, admin_group_name: Some("administrator".into()), @@ -2256,6 +2258,7 @@ async fn test_silo_authn_policy(cptestctx: &ControlPlaneTestContext) { name: silo_name, description: String::new(), }, + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode: shared::SiloIdentityMode::LocalOnly, admin_group_name: None, @@ -2332,6 +2335,7 @@ async fn check_fleet_privileges( name: SILO_NAME.parse().unwrap(), description: String::new(), }, + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode: shared::SiloIdentityMode::LocalOnly, admin_group_name: None, @@ -2360,6 +2364,7 @@ async fn check_fleet_privileges( name: SILO_NAME.parse().unwrap(), description: String::new(), }, + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode: shared::SiloIdentityMode::LocalOnly, admin_group_name: None, @@ -2387,6 +2392,7 @@ async fn check_fleet_privileges( name: SILO_NAME.parse().unwrap(), description: String::new(), }, + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode: shared::SiloIdentityMode::LocalOnly, admin_group_name: None, @@ -2419,6 +2425,7 @@ async fn check_fleet_privileges( name: SILO_NAME.parse().unwrap(), description: String::new(), }, + quotas: params::SiloQuotasCreate::empty(), discoverable: false, identity_mode: shared::SiloIdentityMode::LocalOnly, admin_group_name: None, diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index 5a4a61132e..3f77f4cb26 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -183,9 +183,12 @@ silo_identity_provider_list GET /v1/system/identity-providers silo_list GET /v1/system/silos silo_policy_update PUT /v1/system/silos/{silo}/policy silo_policy_view GET /v1/system/silos/{silo}/policy +silo_quotas_update PUT /v1/system/silos/{silo}/quotas +silo_quotas_view GET /v1/system/silos/{silo}/quotas silo_user_list GET /v1/system/users silo_user_view GET /v1/system/users/{user_id} silo_view GET /v1/system/silos/{silo} +system_quotas_list GET /v1/system/silo-quotas user_builtin_list GET /v1/system/users-builtin user_builtin_view GET /v1/system/users-builtin/{user} diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index cde448c5b7..f27a6619e2 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -288,6 +288,12 @@ pub struct SiloCreate { /// endpoints. These should be valid for the Silo's DNS name(s). pub tls_certificates: Vec, + /// Limits the amount of provisionable CPU, memory, and storage in the Silo. + /// CPU and memory are only consumed by running instances, while storage is + /// consumed by any disk or snapshot. A value of 0 means that resource is + /// *not* provisionable. + pub quotas: SiloQuotasCreate, + /// Mapping of which Fleet roles are conferred by each Silo role /// /// The default is that no Fleet roles are conferred by any Silo roles @@ -297,6 +303,54 @@ pub struct SiloCreate { BTreeMap>, } +/// The amount of provisionable resources for a Silo +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct SiloQuotasCreate { + /// The amount of virtual CPUs available for running instances in the Silo + pub cpus: i64, + /// The amount of RAM (in bytes) available for running instances in the Silo + pub memory: ByteCount, + /// The amount of storage (in bytes) available for disks or snapshots + pub storage: ByteCount, +} + +impl SiloQuotasCreate { + /// All quotas set to 0 + pub fn empty() -> Self { + Self { + cpus: 0, + memory: ByteCount::from(0), + storage: ByteCount::from(0), + } + } + + /// An arbitrarily high but identifiable default for quotas + /// that can be used for creating a Silo for testing + /// + /// The only silo that customers will see that this should be set on is the default + /// silo. Ultimately the default silo should only be initialized with an empty quota, + /// but as tests currently relying on it having a quota, we need to set something. + pub fn arbitrarily_high_default() -> Self { + Self { + cpus: 9999999999, + memory: ByteCount::try_from(999999999999999999_u64).unwrap(), + storage: ByteCount::try_from(999999999999999999_u64).unwrap(), + } + } +} + +/// Updateable properties of a Silo's resource limits. +/// If a value is omitted it will not be updated. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct SiloQuotasUpdate { + /// The amount of virtual CPUs available for running instances in the Silo + pub cpus: Option, + /// The amount of RAM (in bytes) available for running instances in the Silo + pub memory: Option, + /// The amount of storage (in bytes) available for disks or snapshots + pub storage: Option, +} + /// Create-time parameters for a `User` #[derive(Clone, Deserialize, Serialize, JsonSchema)] pub struct UserCreate { diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index af17e7e840..ecd459594a 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -49,6 +49,14 @@ pub struct Silo { BTreeMap>, } +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct SiloQuotas { + pub silo_id: Uuid, + pub cpus: i64, + pub memory: ByteCount, + pub storage: ByteCount, +} + // IDENTITY PROVIDER #[derive(Clone, Copy, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] diff --git a/openapi/nexus.json b/openapi/nexus.json index 7afb6cdc2f..2ddd5f0e94 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -6210,6 +6210,65 @@ } } }, + "/v1/system/silo-quotas": { + "get": { + "tags": [ + "system/silos" + ], + "summary": "Lists resource quotas for all silos", + "operationId": "system_quotas_list", + "parameters": [ + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/IdSortMode" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SiloQuotasResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + }, + "x-dropshot-pagination": { + "required": [] + } + } + }, "/v1/system/silos": { "get": { "tags": [ @@ -6458,6 +6517,91 @@ } } }, + "/v1/system/silos/{silo}/quotas": { + "get": { + "tags": [ + "system/silos" + ], + "summary": "View the resource quotas of a given silo", + "operationId": "silo_quotas_view", + "parameters": [ + { + "in": "path", + "name": "silo", + "description": "Name or ID of the silo", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SiloQuotas" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "put": { + "tags": [ + "system/silos" + ], + "summary": "Update the resource quotas of a given silo", + "description": "If a quota value is not specified, it will remain unchanged.", + "operationId": "silo_quotas_update", + "parameters": [ + { + "in": "path", + "name": "silo", + "description": "Name or ID of the silo", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SiloQuotasUpdate" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SiloQuotas" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/system/users": { "get": { "tags": [ @@ -13206,6 +13350,14 @@ "name": { "$ref": "#/components/schemas/Name" }, + "quotas": { + "description": "Limits the amount of provisionable CPU, memory, and storage in the Silo. CPU and memory are only consumed by running instances, while storage is consumed by any disk or snapshot. A value of 0 means that resource is *not* provisionable.", + "allOf": [ + { + "$ref": "#/components/schemas/SiloQuotasCreate" + } + ] + }, "tls_certificates": { "description": "Initial TLS certificates to be used for the new Silo's console and API endpoints. These should be valid for the Silo's DNS name(s).", "type": "array", @@ -13219,6 +13371,7 @@ "discoverable", "identity_mode", "name", + "quotas", "tls_certificates" ] }, @@ -13241,6 +13394,114 @@ } ] }, + "SiloQuotas": { + "type": "object", + "properties": { + "cpus": { + "type": "integer", + "format": "int64" + }, + "memory": { + "$ref": "#/components/schemas/ByteCount" + }, + "silo_id": { + "type": "string", + "format": "uuid" + }, + "storage": { + "$ref": "#/components/schemas/ByteCount" + } + }, + "required": [ + "cpus", + "memory", + "silo_id", + "storage" + ] + }, + "SiloQuotasCreate": { + "description": "The amount of provisionable resources for a Silo", + "type": "object", + "properties": { + "cpus": { + "description": "The amount of virtual CPUs available for running instances in the Silo", + "type": "integer", + "format": "int64" + }, + "memory": { + "description": "The amount of RAM (in bytes) available for running instances in the Silo", + "allOf": [ + { + "$ref": "#/components/schemas/ByteCount" + } + ] + }, + "storage": { + "description": "The amount of storage (in bytes) available for disks or snapshots", + "allOf": [ + { + "$ref": "#/components/schemas/ByteCount" + } + ] + } + }, + "required": [ + "cpus", + "memory", + "storage" + ] + }, + "SiloQuotasResultsPage": { + "description": "A single page of results", + "type": "object", + "properties": { + "items": { + "description": "list of items on this page of results", + "type": "array", + "items": { + "$ref": "#/components/schemas/SiloQuotas" + } + }, + "next_page": { + "nullable": true, + "description": "token used to fetch the next page of results (if any)", + "type": "string" + } + }, + "required": [ + "items" + ] + }, + "SiloQuotasUpdate": { + "description": "Updateable properties of a Silo's resource limits. If a value is omitted it will not be updated.", + "type": "object", + "properties": { + "cpus": { + "nullable": true, + "description": "The amount of virtual CPUs available for running instances in the Silo", + "type": "integer", + "format": "int64" + }, + "memory": { + "nullable": true, + "description": "The amount of RAM (in bytes) available for running instances in the Silo", + "allOf": [ + { + "$ref": "#/components/schemas/ByteCount" + } + ] + }, + "storage": { + "nullable": true, + "description": "The amount of storage (in bytes) available for disks or snapshots", + "allOf": [ + { + "$ref": "#/components/schemas/ByteCount" + } + ] + } + } + }, "SiloResultsPage": { "description": "A single page of results", "type": "object", diff --git a/schema/crdb/20.0.0/up01.sql b/schema/crdb/20.0.0/up01.sql new file mode 100644 index 0000000000..6a95c41e48 --- /dev/null +++ b/schema/crdb/20.0.0/up01.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS omicron.public.silo_quotas ( + silo_id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + cpus INT8 NOT NULL, + memory_bytes INT8 NOT NULL, + storage_bytes INT8 NOT NULL +); \ No newline at end of file diff --git a/schema/crdb/20.0.0/up02.sql b/schema/crdb/20.0.0/up02.sql new file mode 100644 index 0000000000..2909e379ca --- /dev/null +++ b/schema/crdb/20.0.0/up02.sql @@ -0,0 +1,28 @@ +set + local disallow_full_table_scans = off; + +-- Adds quotas for any existing silos without them. +-- The selected quotas are based on the resources of a half rack +-- with 30% CPU and memory reserved for internal use and a 3.5x tax +-- on storage for replication, etc. +INSERT INTO + silo_quotas ( + silo_id, + time_created, + time_modified, + cpus, + memory_bytes, + storage_bytes + ) +SELECT + s.id AS silo_id, + NOW() AS time_created, + NOW() AS time_modified, + 9999999999 AS cpus, + 999999999999999999 AS memory_bytes, + 999999999999999999 AS storage_bytes +FROM + silo s + LEFT JOIN silo_quotas sq ON s.id = sq.silo_id +WHERE + sq.silo_id IS NULL; \ No newline at end of file diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 0bf365a2f1..be7291b4e4 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -827,6 +827,15 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_ssh_key_by_silo_user ON omicron.public. ) WHERE time_deleted IS NULL; +CREATE TABLE IF NOT EXISTS omicron.public.silo_quotas ( + silo_id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + cpus INT8 NOT NULL, + memory_bytes INT8 NOT NULL, + storage_bytes INT8 NOT NULL +); + /* * Projects */ @@ -3062,7 +3071,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '19.0.0', NULL) + ( TRUE, NOW(), NOW(), '20.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT;