From 6dbef2a4d5d8aa119f626f515cf9a6d374def072 Mon Sep 17 00:00:00 2001 From: Rain Date: Sun, 19 Nov 2023 17:58:46 -0800 Subject: [PATCH 1/5] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?= =?UTF-8?q?itial=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.5 --- nexus/db-model/src/lib.rs | 2 ++ .../db-model/src/queries/region_allocation.rs | 2 ++ nexus/db-model/src/schema.rs | 1 + nexus/db-model/src/sled.rs | 2 +- nexus/db-model/src/sled_provision_state.rs | 20 +++++++++++++++++++ nexus/db-queries/src/db/datastore/sled.rs | 5 +++++ .../src/db/queries/region_allocation.rs | 10 +++++++++- schema/crdb/dbinit.sql | 10 ++++++++++ 8 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 nexus/db-model/src/sled_provision_state.rs diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 6b65eb87ec..a9403f427b 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -70,6 +70,7 @@ mod silo_user; mod silo_user_password_hash; mod sled; mod sled_instance; +mod sled_provision_state; mod sled_resource; mod sled_resource_kind; mod snapshot; @@ -151,6 +152,7 @@ pub use silo_user::*; pub use silo_user_password_hash::*; pub use sled::*; pub use sled_instance::*; +pub use sled_provision_state::*; pub use sled_resource::*; pub use sled_resource_kind::*; pub use snapshot::*; diff --git a/nexus/db-model/src/queries/region_allocation.rs b/nexus/db-model/src/queries/region_allocation.rs index 2025e79fb8..a1b9e0373a 100644 --- a/nexus/db-model/src/queries/region_allocation.rs +++ b/nexus/db-model/src/queries/region_allocation.rs @@ -23,6 +23,7 @@ // a CTE (where we want the alias name to come first). use crate::schema::dataset; +use crate::schema::sled; use crate::schema::zpool; table! { @@ -157,6 +158,7 @@ diesel::allow_tables_to_appear_in_same_query!( diesel::allow_tables_to_appear_in_same_query!( old_zpool_usage, zpool, + sled, proposed_dataset_changes, ); diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 4844f2a33f..5236387655 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -729,6 +729,7 @@ table! { rack_id -> Uuid, is_scrimlet -> Bool, + provision_state -> crate::SledProvisionStateEnum, serial_number -> Text, part_number -> Text, revision -> Int8, diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index 5e059946ff..a77aae001c 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -4,8 +4,8 @@ use super::{ByteCount, Generation, SqlU16, SqlU32}; use crate::collection::DatastoreCollectionConfig; -use crate::ipv6; use crate::schema::{physical_disk, service, sled, zpool}; +use crate::{ipv6, SledProvisionState}; use chrono::{DateTime, Utc}; use db_macros::Asset; use nexus_types::{external_api::views, identity::Asset}; diff --git a/nexus/db-model/src/sled_provision_state.rs b/nexus/db-model/src/sled_provision_state.rs new file mode 100644 index 0000000000..91947aa93e --- /dev/null +++ b/nexus/db-model/src/sled_provision_state.rs @@ -0,0 +1,20 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::impl_enum_type; +use serde::{Deserialize, Serialize}; + +impl_enum_type!( + #[derive(Clone, SqlType, Debug, QueryId)] + #[diesel(postgres_type(name = "sled_provision_state"))] + pub struct SledProvisionStateEnum; + + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] + #[diesel(sql_type = SledProvisionStateEnum)] + pub enum SledProvisionState; + + // Enum values + Provisionable => b"provisionable" + NotProvisionable => b"not_provisionable" +); diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index f4f5188057..ad478caf25 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -149,6 +149,11 @@ impl DataStore { .and(sled_has_space_in_reservoir), ) .filter(sled_dsl::time_deleted.is_null()) + // Filter out sleds that are not provisionable. + .filter( + sled_dsl::provision_state + .eq(db::model::SledProvisionState::Provisionable), + ) .select(sled_dsl::id) .into_boxed(); diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index a080af4c37..031be92c08 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -290,6 +290,7 @@ impl CandidateZpools { seed: u128, distinct_sleds: bool, ) -> Self { + use schema::sled::dsl as sled_dsl; use schema::zpool::dsl as zpool_dsl; // Why are we using raw `diesel::dsl::sql` here? @@ -310,13 +311,20 @@ impl CandidateZpools { + diesel::dsl::sql(&zpool_size_delta.to_string())) .le(diesel::dsl::sql(zpool_dsl::total_size::NAME)); + // We need to join on the sled table to access provision_state. + let with_sled = sled_dsl::sled.on(zpool_dsl::sled_id.eq(sled_dsl::id)); let with_zpool = zpool_dsl::zpool - .on(zpool_dsl::id.eq(old_zpool_usage::dsl::pool_id)); + .on(zpool_dsl::id.eq(old_zpool_usage::dsl::pool_id)) + .inner_join(with_sled); + + let sled_is_provisionable = sled_dsl::provision_state + .eq(crate::db::model::SledProvisionState::Provisionable); let base_query = old_zpool_usage .query_source() .inner_join(with_zpool) .filter(it_will_fit) + .filter(sled_is_provisionable) .select((old_zpool_usage::dsl::pool_id,)); let query = if distinct_sleds { diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index a74cabfe6e..e1938c34b6 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -73,6 +73,13 @@ CREATE TABLE IF NOT EXISTS omicron.public.rack ( * Sleds */ +CREATE TYPE IF NOT EXISTS omicron.public.sled_provision_state ( + /* New resources can be provisioned onto the sled */ + 'provisionable', + /* Resources must not be provisioned onto the sled */ + 'not_provisionable', +) + CREATE TABLE IF NOT EXISTS omicron.public.sled ( /* Identity metadata (asset) */ id UUID PRIMARY KEY, @@ -92,6 +99,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.sled ( part_number STRING(63) NOT NULL, revision INT8 NOT NULL, + /* The state of whether resources should be provisioned onto the sled */ + provision_state omicron.public.sled_provision_state NOT NULL, + /* CPU & RAM summary for the sled */ usable_hardware_threads INT8 CHECK (usable_hardware_threads BETWEEN 0 AND 4294967295) NOT NULL, usable_physical_ram INT8 NOT NULL, From 96260aae4d60fdae1985c2112e086e69b48f4eae Mon Sep 17 00:00:00 2001 From: Rain Date: Wed, 22 Nov 2023 17:28:05 -0800 Subject: [PATCH 2/5] Add test for resource creation Created using spr 1.3.5 --- nexus/db-queries/src/db/datastore/sled.rs | 130 ++++++++++++++++++--- nexus/src/external_api/http_entrypoints.rs | 13 +-- nexus/tests/integration_tests/commands.rs | 2 +- 3 files changed, 122 insertions(+), 23 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 5fd886aa66..7cc6ce1f16 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -201,6 +201,14 @@ impl DataStore { .await .map_err(|e| match e { TxnError::CustomError(SledReservationError::NotFound) => { + // XXX: Should this be a different error? 503 Service + // Unavailable is often for transient errors, and this + // message isn't exposed to users (which seems wrong?) + // + // Maybe 409 Conflict as documented at + // https://stackoverflow.com/a/11093566, since this request + // if completed would put the system into an inconsistent + // state. external::Error::unavail( "No sleds can fit the requested instance", ) @@ -261,12 +269,14 @@ mod test { use crate::db::datastore::test::{ sled_baseboard_for_test, sled_system_hardware_for_test, }; + use crate::db::lookup::LookupPath; use crate::db::model::ByteCount; use crate::db::model::SqlU32; use nexus_test_utils::db::test_setup_database; use omicron_common::api::external; use omicron_test_utils::dev; use std::net::{Ipv6Addr, SocketAddrV6}; + use std::num::NonZeroU32; fn rack_id() -> Uuid { Uuid::parse_str(nexus_test_utils::RACK_UUID).unwrap() @@ -278,19 +288,9 @@ mod test { let mut db = test_setup_database(&logctx.log).await; let (_opctx, datastore) = datastore_test(&logctx, &db).await; - let sled_id = Uuid::new_v4(); - let addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0); - let mut sled_update = SledUpdate::new( - sled_id, - addr, - sled_baseboard_for_test(), - sled_system_hardware_for_test(), - rack_id(), - ); - let observed_sled = datastore - .sled_upsert(sled_update.clone()) - .await - .expect("Could not upsert sled during test prep"); + let mut sled_update = test_new_sled_update(); + let observed_sled = + datastore.sled_upsert(sled_update.clone()).await.unwrap(); assert_eq!( observed_sled.usable_hardware_threads, sled_update.usable_hardware_threads @@ -336,4 +336,108 @@ mod test { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } + + /// Test that new reservations aren't created on non-provisionable sleds. + #[tokio::test] + async fn sled_reservation_create_non_provisionable() { + let logctx = + dev::test_setup_log("sled_reservation_create_non_provisionable"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let sled_update = test_new_sled_update(); + let non_provisionable_sled = + datastore.sled_upsert(sled_update.clone()).await.unwrap(); + datastore + .sled_set_provision_state( + &opctx, + &LookupPath::new(&opctx, &datastore) + .sled_id(non_provisionable_sled.id()), + db::model::SledProvisionState::NonProvisionable, + ) + .await + .unwrap(); + + // This should be an error since there are no provisionable sleds. + let resources = db::model::Resources::new( + 1, + // Just require the bare non-zero amount of RAM. + ByteCount::try_from(1024).unwrap(), + ByteCount::try_from(1024).unwrap(), + ); + let constraints = db::model::SledReservationConstraints::none(); + let error = datastore + .sled_reservation_create( + &opctx, + Uuid::new_v4(), + db::model::SledResourceKind::Instance, + resources.clone(), + constraints, + ) + .await + .unwrap_err(); + assert!(matches!(error, external::Error::ServiceUnavailable { .. })); + + // Now add a provisionable sled and try again. + let sled_update = test_new_sled_update(); + let provisionable_sled = + datastore.sled_upsert(sled_update.clone()).await.unwrap(); + + let sleds = datastore + .sled_list(&opctx, &first_page(NonZeroU32::new(10).unwrap())) + .await + .unwrap(); + println!("sleds: {:?}", sleds); + + // Try a few times to ensure that resources never get allocated to the + // non-provisionable sled. + for _ in 0..10 { + let constraints = db::model::SledReservationConstraints::none(); + let resource = datastore + .sled_reservation_create( + &opctx, + Uuid::new_v4(), + db::model::SledResourceKind::Instance, + resources.clone(), + constraints, + ) + .await + .unwrap(); + assert_eq!( + resource.sled_id, + provisionable_sled.id(), + "resource is always allocated to the provisionable sled" + ); + + datastore + .sled_reservation_delete(&opctx, resource.id) + .await + .unwrap(); + } + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + fn test_new_sled_update() -> SledUpdate { + let sled_id = Uuid::new_v4(); + let addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0); + SledUpdate::new( + sled_id, + addr, + sled_baseboard_for_test(), + sled_system_hardware_for_test(), + rack_id(), + ) + } + + /// Returns pagination parameters to fetch the first page of results for a + /// paginated endpoint + fn first_page<'a, T>(limit: NonZeroU32) -> DataPageParams<'a, T> { + DataPageParams { + marker: None, + direction: dropshot::PaginationOrder::Ascending, + limit, + } + } } diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index e7fc055a9f..f9617c38ad 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -4477,15 +4477,10 @@ async fn sled_set_provision_state( let opctx = crate::context::op_context_for_external_api(&rqctx).await?; // Convert the external `SledProvisionState` into our internal data model. - let new_state = db::model::SledProvisionState::try_from( - provision_state, - ) - .map_err(|error| { - HttpError::for_bad_request( - None, - format!("invalid provision state: {}", error.to_string()), - ) - })?; + let new_state = + db::model::SledProvisionState::try_from(provision_state).map_err( + |error| HttpError::for_bad_request(None, format!("{error}")), + )?; let sled_lookup = nexus.sled_lookup(&opctx, &path.sled_id)?; diff --git a/nexus/tests/integration_tests/commands.rs b/nexus/tests/integration_tests/commands.rs index 27af266603..02d938b2ac 100644 --- a/nexus/tests/integration_tests/commands.rs +++ b/nexus/tests/integration_tests/commands.rs @@ -95,7 +95,7 @@ fn run_command_with_arg(arg: &str) -> (String, String) { fs::remove_file(&config_path).expect("failed to remove temporary file"); assert_exit_code(exit_status, EXIT_SUCCESS, &stderr_text); - (stdout_text, stderr_text) + (stdout_text, stderr_text) } #[test] From b192f865a2f097e54cd2e912b8c8f9600eaa7324 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 28 Nov 2023 17:28:00 -0800 Subject: [PATCH 3/5] Fix build Created using spr 1.3.5 --- nexus/src/app/sled.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index e4dacbf967..44efc2934e 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -8,6 +8,7 @@ use crate::internal_api::params::{ PhysicalDiskDeleteRequest, PhysicalDiskPutRequest, SledAgentStartupInfo, SledRole, ZpoolPutRequest, }; +use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; use nexus_db_queries::db::lookup; @@ -149,8 +150,10 @@ impl super::Nexus { sled_lookup: &lookup::Sled<'_>, state: db::model::SledProvisionState, ) -> Result { + let (authz_sled,) = + sled_lookup.lookup_for(authz::Action::Modify).await?; self.db_datastore - .sled_set_provision_state(opctx, sled_lookup, state) + .sled_set_provision_state(opctx, &authz_sled, state) .await } From c4b94c1c5ce0f78b5251b42c0abbce408f066e3b Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 28 Nov 2023 20:14:54 -0800 Subject: [PATCH 4/5] Revert rcgen changes Created using spr 1.3.5 --- schema/crdb/dbinit.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 4802bdee65..178c7af913 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -86,7 +86,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.sled ( time_created TIMESTAMPTZ NOT NULL, time_modified TIMESTAMPTZ NOT NULL, time_deleted TIMESTAMPTZ, - rcgen INT8 NOT NULL, + rcgen INT NOT NULL, /* FK into the Rack table */ rack_id UUID NOT NULL, @@ -223,7 +223,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.switch ( time_created TIMESTAMPTZ NOT NULL, time_modified TIMESTAMPTZ NOT NULL, time_deleted TIMESTAMPTZ, - rcgen INT8 NOT NULL, + rcgen INT NOT NULL, /* FK into the Rack table */ rack_id UUID NOT NULL, From 611114b7a645e3ce06d88f7b069ec251e498fa16 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 28 Nov 2023 22:49:21 -0800 Subject: [PATCH 5/5] update SCHEMA_VERSION Created using spr 1.3.5 --- nexus/db-model/src/schema.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index ecc240a4eb..6527da3637 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1300,7 +1300,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(14, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(15, 0, 0); allow_tables_to_appear_in_same_query!( system_update,