Skip to content

Commit

Permalink
Add resource limits (#4605)
Browse files Browse the repository at this point in the history
This PR aims to introduce `quotas` as a concept into Nexus for allowing
operators to enforce virtual resource limits at the silo level. The
initial implementation will be limited to checks during instance start,
disk creation, and snapshot creation. We will _not_ being doing advanced
quota recalculation as system resources change. We will _not yet_ be
enforcing intelligent quota caps where the sum of all quotas must be
less than the theoretical available system virtual resources.

The implementation of this functionality is shaped by
[RFD-427](https://rfd.shared.oxide.computer/rfd/0427) but some desired
functionality will be deferred given time/complexity constraints.

Longer term I believe the shape of quotas and perhaps even their
relationship to silos may change. This PR implements a simplified
version that matches closely to how the virtual resource provisioning
tables are already built out. I know there's some oddness around the
shape of the quotas table with it not having its own ID and otherwise
being mildly divergent from other resources, but this was largely to
ensure we could migrate to another solution _and_ not overcomplicate the
initial implementation.

## TODO
- [x] Add quota creation as a step of silo creation
- [x] Add initialization checks in CTEs for instance create, etc to only
proceed when quota unmet
- [x] Wire up CTE sentinels in upstream callsites
- [x] Add backfill migration for existing customers
- [x] Add tests for quota enforcement
- [x] Delete the quotas when the silo is deleted

---------

Co-authored-by: Sean Klein <[email protected]>
  • Loading branch information
zephraph and smklein authored Dec 13, 2023
1 parent 1a7863c commit 877a886
Show file tree
Hide file tree
Showing 32 changed files with 1,429 additions and 31 deletions.
1 change: 1 addition & 0 deletions common/src/api/external/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,7 @@ pub enum ResourceType {
Silo,
SiloUser,
SiloGroup,
SiloQuotas,
IdentityProvider,
SamlIdentityProvider,
SshKey,
Expand Down
17 changes: 15 additions & 2 deletions end-to-end-tests/src/bin/bootstrap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ use end_to_end_tests::helpers::{generate_name, get_system_ip_pool};
use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError};
use oxide_client::types::{
ByteCount, DeviceAccessTokenRequest, DeviceAuthRequest, DeviceAuthVerify,
DiskCreate, DiskSource, IpRange, Ipv4Range,
DiskCreate, DiskSource, IpRange, Ipv4Range, SiloQuotasUpdate,
};
use oxide_client::{
ClientDisksExt, ClientHiddenExt, ClientProjectsExt,
ClientSystemNetworkingExt,
ClientSystemNetworkingExt, ClientSystemSilosExt,
};
use serde::{de::DeserializeOwned, Deserialize};
use std::time::Duration;
Expand Down Expand Up @@ -45,6 +45,19 @@ async fn main() -> Result<()> {
.send()
.await?;

// ===== SET UP QUOTAS ===== //
eprintln!("setting up quotas...");
client
.silo_quotas_update()
.silo("recovery")
.body(SiloQuotasUpdate {
cpus: Some(16),
memory: Some(ByteCount(1024 * 1024 * 1024 * 10)),
storage: Some(ByteCount(1024 * 1024 * 1024 * 1024)),
})
.send()
.await?;

// ===== ENSURE DATASETS ARE READY ===== //
eprintln!("ensuring datasets are ready...");
let ctx = Context::from_client(client).await?;
Expand Down
2 changes: 1 addition & 1 deletion end-to-end-tests/src/helpers/ctx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ fn rss_config() -> Result<SetupServiceConfig> {
let content =
std::fs::read_to_string(&path).unwrap_or(RSS_CONFIG_STR.to_string());
toml::from_str(&content)
.with_context(|| format!("parsing config-rss as TOML"))
.with_context(|| "parsing config-rss as TOML".to_string())
}

fn nexus_external_dns_name(config: &SetupServiceConfig) -> String {
Expand Down
2 changes: 2 additions & 0 deletions nexus/db-model/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ mod system_update;
// for join-based marker trait generation.
mod ipv4_nat_entry;
pub mod queries;
mod quota;
mod rack;
mod region;
mod region_snapshot;
Expand Down Expand Up @@ -139,6 +140,7 @@ pub use physical_disk::*;
pub use physical_disk_kind::*;
pub use producer_endpoint::*;
pub use project::*;
pub use quota::*;
pub use rack::*;
pub use region::*;
pub use region_snapshot::*;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
//! for the construction of this query.
use crate::schema::silo;
use crate::schema::silo_quotas;
use crate::schema::virtual_provisioning_collection;

table! {
Expand All @@ -28,11 +29,32 @@ table! {
}
}

table! {
quotas (silo_id) {
silo_id -> Uuid,
cpus -> Int8,
memory -> Int8,
storage -> Int8,
}
}

table! {
silo_provisioned {
id -> Uuid,
virtual_disk_bytes_provisioned -> Int8,
cpus_provisioned -> Int8,
ram_provisioned -> Int8,
}
}

diesel::allow_tables_to_appear_in_same_query!(silo, parent_silo,);

diesel::allow_tables_to_appear_in_same_query!(
virtual_provisioning_collection,
silo_quotas,
parent_silo,
all_collections,
do_update,
quotas,
silo_provisioned
);
109 changes: 109 additions & 0 deletions nexus/db-model/src/quota.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
use super::ByteCount;
use crate::schema::silo_quotas;
use chrono::{DateTime, Utc};
use nexus_types::external_api::{params, views};
use serde::{Deserialize, Serialize};
use uuid::Uuid;

#[derive(
Queryable,
Insertable,
Debug,
Clone,
Selectable,
Serialize,
Deserialize,
AsChangeset,
)]
#[diesel(table_name = silo_quotas)]
pub struct SiloQuotas {
pub silo_id: Uuid,
pub time_created: DateTime<Utc>,
pub time_modified: DateTime<Utc>,

/// The number of CPUs that this silo is allowed to use
pub cpus: i64,

/// The amount of memory (in bytes) that this silo is allowed to use
#[diesel(column_name = memory_bytes)]
pub memory: ByteCount,

/// The amount of storage (in bytes) that this silo is allowed to use
#[diesel(column_name = storage_bytes)]
pub storage: ByteCount,
}

impl SiloQuotas {
pub fn new(
silo_id: Uuid,
cpus: i64,
memory: ByteCount,
storage: ByteCount,
) -> Self {
Self {
silo_id,
time_created: Utc::now(),
time_modified: Utc::now(),
cpus,
memory,
storage,
}
}

pub fn arbitrarily_high_default(silo_id: Uuid) -> Self {
let count = params::SiloQuotasCreate::arbitrarily_high_default();
Self::new(
silo_id,
count.cpus,
count.memory.into(),
count.storage.into(),
)
}
}

impl From<SiloQuotas> for views::SiloQuotas {
fn from(silo_quotas: SiloQuotas) -> Self {
Self {
silo_id: silo_quotas.silo_id,
cpus: silo_quotas.cpus,
memory: silo_quotas.memory.into(),
storage: silo_quotas.storage.into(),
}
}
}

impl From<views::SiloQuotas> for SiloQuotas {
fn from(silo_quotas: views::SiloQuotas) -> Self {
Self {
silo_id: silo_quotas.silo_id,
time_created: Utc::now(),
time_modified: Utc::now(),
cpus: silo_quotas.cpus,
memory: silo_quotas.memory.into(),
storage: silo_quotas.storage.into(),
}
}
}

// Describes a set of updates for the [`SiloQuotas`] model.
#[derive(AsChangeset)]
#[diesel(table_name = silo_quotas)]
pub struct SiloQuotasUpdate {
pub cpus: Option<i64>,
#[diesel(column_name = memory_bytes)]
pub memory: Option<i64>,
#[diesel(column_name = storage_bytes)]
pub storage: Option<i64>,
pub time_modified: DateTime<Utc>,
}

impl From<params::SiloQuotasUpdate> for SiloQuotasUpdate {
fn from(params: params::SiloQuotasUpdate) -> Self {
Self {
cpus: params.cpus,
memory: params.memory.map(|f| f.into()),
storage: params.storage.map(|f| f.into()),
time_modified: Utc::now(),
}
}
}
13 changes: 12 additions & 1 deletion nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,17 @@ table! {
}
}

table! {
silo_quotas(silo_id) {
silo_id -> Uuid,
time_created -> Timestamptz,
time_modified -> Timestamptz,
cpus -> Int8,
memory_bytes -> Int8,
storage_bytes -> Int8,
}
}

table! {
network_interface (id) {
id -> Uuid,
Expand Down Expand Up @@ -1322,7 +1333,7 @@ table! {
///
/// This should be updated whenever the schema is changed. For more details,
/// refer to: schema/crdb/README.adoc
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(19, 0, 0);
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(20, 0, 0);

allow_tables_to_appear_in_same_query!(
system_update,
Expand Down
1 change: 1 addition & 0 deletions nexus/db-queries/src/db/datastore/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ mod network_interface;
mod oximeter;
mod physical_disk;
mod project;
mod quota;
mod rack;
mod region;
mod region_snapshot;
Expand Down
127 changes: 127 additions & 0 deletions nexus/db-queries/src/db/datastore/quota.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
use super::DataStore;
use crate::authz;
use crate::context::OpContext;
use crate::db;
use crate::db::error::public_error_from_diesel;
use crate::db::error::ErrorHandler;
use crate::db::pagination::paginated;
use crate::db::pool::DbConnection;
use async_bb8_diesel::AsyncRunQueryDsl;
use diesel::prelude::*;
use nexus_db_model::SiloQuotas;
use nexus_db_model::SiloQuotasUpdate;
use omicron_common::api::external::DataPageParams;
use omicron_common::api::external::DeleteResult;
use omicron_common::api::external::Error;
use omicron_common::api::external::ListResultVec;
use omicron_common::api::external::ResourceType;
use omicron_common::api::external::UpdateResult;
use uuid::Uuid;

impl DataStore {
/// Creates new quotas for a silo. This is grouped with silo creation
/// and shouldn't be called outside of that flow.
///
/// An authz check _cannot_ be performed here because the authz initialization
/// isn't complete and will lead to a db deadlock.
///
/// See <https://github.com/oxidecomputer/omicron/blob/07eb7dafc20e35e44edf429fcbb759cbb33edd5f/nexus/db-queries/src/db/datastore/rack.rs#L407-L410>
pub async fn silo_quotas_create(
&self,
conn: &async_bb8_diesel::Connection<DbConnection>,
authz_silo: &authz::Silo,
quotas: SiloQuotas,
) -> Result<(), Error> {
let silo_id = authz_silo.id();
use db::schema::silo_quotas::dsl;

diesel::insert_into(dsl::silo_quotas)
.values(quotas)
.execute_async(conn)
.await
.map_err(|e| {
public_error_from_diesel(
e,
ErrorHandler::Conflict(
ResourceType::SiloQuotas,
&silo_id.to_string(),
),
)
})
.map(|_| ())
}

pub async fn silo_quotas_delete(
&self,
opctx: &OpContext,
conn: &async_bb8_diesel::Connection<DbConnection>,
authz_silo: &authz::Silo,
) -> DeleteResult {
// Given that the quotas right now are somewhat of an extension of the
// Silo we just check for delete permission on the silo itself.
opctx.authorize(authz::Action::Delete, authz_silo).await?;

use db::schema::silo_quotas::dsl;
diesel::delete(dsl::silo_quotas)
.filter(dsl::silo_id.eq(authz_silo.id()))
.execute_async(conn)
.await
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?;

Ok(())
}

pub async fn silo_update_quota(
&self,
opctx: &OpContext,
authz_silo: &authz::Silo,
updates: SiloQuotasUpdate,
) -> UpdateResult<SiloQuotas> {
opctx.authorize(authz::Action::Modify, authz_silo).await?;
use db::schema::silo_quotas::dsl;
let silo_id = authz_silo.id();
diesel::update(dsl::silo_quotas)
.filter(dsl::silo_id.eq(silo_id))
.set(updates)
.returning(SiloQuotas::as_returning())
.get_result_async(&*self.pool_connection_authorized(opctx).await?)
.await
.map_err(|e| {
public_error_from_diesel(
e,
ErrorHandler::Conflict(
ResourceType::SiloQuotas,
&silo_id.to_string(),
),
)
})
}

pub async fn silo_quotas_view(
&self,
opctx: &OpContext,
authz_silo: &authz::Silo,
) -> Result<SiloQuotas, Error> {
opctx.authorize(authz::Action::Read, authz_silo).await?;
use db::schema::silo_quotas::dsl;
dsl::silo_quotas
.filter(dsl::silo_id.eq(authz_silo.id()))
.first_async(&*self.pool_connection_authorized(opctx).await?)
.await
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
}

pub async fn fleet_list_quotas(
&self,
opctx: &OpContext,
pagparams: &DataPageParams<'_, Uuid>,
) -> ListResultVec<SiloQuotas> {
opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?;
use db::schema::silo_quotas::dsl;
paginated(dsl::silo_quotas, dsl::silo_id, pagparams)
.select(SiloQuotas::as_select())
.load_async(&*self.pool_connection_authorized(opctx).await?)
.await
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
}
}
2 changes: 2 additions & 0 deletions nexus/db-queries/src/db/datastore/rack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -912,6 +912,8 @@ mod test {
name: "test-silo".parse().unwrap(),
description: String::new(),
},
// Set a default quota of a half rack's worth of resources
quotas: external_params::SiloQuotasCreate::arbitrarily_high_default(),
discoverable: false,
identity_mode: SiloIdentityMode::LocalOnly,
admin_group_name: None,
Expand Down
Loading

0 comments on commit 877a886

Please sign in to comment.