Skip to content

Commit

Permalink
[nexus] add sled provision state (#4520)
Browse files Browse the repository at this point in the history
Add the notion of a sled provision state to Nexus. Currently, we will only use this to prevent new resources and regions from being provisioned to sleds.

This PR includes:

1. Database updates and schema migrations.
2. Database APIs in `nexus-db-queries`.
3. An HTTP API.
4. Tests for resource and region allocation.
  • Loading branch information
sunshowers authored Nov 29, 2023
1 parent 0a6966c commit 67cd482
Show file tree
Hide file tree
Showing 23 changed files with 607 additions and 33 deletions.
6 changes: 4 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions nexus/db-model/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ serde.workspace = true
serde_json.workspace = true
steno.workspace = true
strum.workspace = true
thiserror.workspace = true
uuid.workspace = true

db-macros.workspace = true
Expand Down
9 changes: 5 additions & 4 deletions nexus/db-model/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ mod silo_user;
mod silo_user_password_hash;
mod sled;
mod sled_instance;
mod sled_provision_state;
mod sled_resource;
mod sled_resource_kind;
mod sled_underlay_subnet_allocation;
Expand Down Expand Up @@ -152,6 +153,7 @@ pub use silo_user::*;
pub use silo_user_password_hash::*;
pub use sled::*;
pub use sled_instance::*;
pub use sled_provision_state::*;
pub use sled_resource::*;
pub use sled_resource_kind::*;
pub use sled_underlay_subnet_allocation::*;
Expand Down Expand Up @@ -287,10 +289,9 @@ macro_rules! impl_enum_type {
Ok($model_type::$enum_item)
}
)*
_ => {
Err(concat!("Unrecognized enum variant for ",
stringify!{$model_type})
.into())
other => {
let s = concat!("Unrecognized enum variant for ", stringify!{$model_type});
Err(format!("{}: (raw bytes: {:?})", s, other).into())
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions nexus/db-model/src/queries/region_allocation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
// a CTE (where we want the alias name to come first).

use crate::schema::dataset;
use crate::schema::sled;
use crate::schema::zpool;

table! {
Expand Down Expand Up @@ -157,6 +158,7 @@ diesel::allow_tables_to_appear_in_same_query!(
diesel::allow_tables_to_appear_in_same_query!(
old_zpool_usage,
zpool,
sled,
proposed_dataset_changes,
);

Expand Down
3 changes: 2 additions & 1 deletion nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,7 @@ table! {
ip -> Inet,
port -> Int4,
last_used_address -> Inet,
provision_state -> crate::SledProvisionStateEnum,
}
}

Expand Down Expand Up @@ -1299,7 +1300,7 @@ table! {
///
/// This should be updated whenever the schema is changed. For more details,
/// refer to: schema/crdb/README.adoc
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(14, 0, 0);
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(15, 0, 0);

allow_tables_to_appear_in_same_query!(
system_update,
Expand Down
11 changes: 10 additions & 1 deletion nexus/db-model/src/sled.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

use super::{ByteCount, Generation, SqlU16, SqlU32};
use crate::collection::DatastoreCollectionConfig;
use crate::ipv6;
use crate::schema::{physical_disk, service, sled, zpool};
use crate::{ipv6, SledProvisionState};
use chrono::{DateTime, Utc};
use db_macros::Asset;
use nexus_types::{external_api::shared, external_api::views, identity::Asset};
Expand Down Expand Up @@ -59,6 +59,8 @@ pub struct Sled {

/// The last IP address provided to an Oxide service on this sled
pub last_used_address: ipv6::Ipv6Addr,

provision_state: SledProvisionState,
}

impl Sled {
Expand All @@ -81,6 +83,10 @@ impl Sled {
pub fn serial_number(&self) -> &str {
&self.serial_number
}

pub fn provision_state(&self) -> SledProvisionState {
self.provision_state
}
}

impl From<Sled> for views::Sled {
Expand All @@ -93,6 +99,7 @@ impl From<Sled> for views::Sled {
part: sled.part_number,
revision: sled.revision,
},
provision_state: sled.provision_state.into(),
usable_hardware_threads: sled.usable_hardware_threads.0,
usable_physical_ram: *sled.usable_physical_ram,
}
Expand Down Expand Up @@ -188,6 +195,8 @@ impl SledUpdate {
serial_number: self.serial_number,
part_number: self.part_number,
revision: self.revision,
// By default, sleds start as provisionable.
provision_state: SledProvisionState::Provisionable,
usable_hardware_threads: self.usable_hardware_threads,
usable_physical_ram: self.usable_physical_ram,
reservoir_size: self.reservoir_size,
Expand Down
58 changes: 58 additions & 0 deletions nexus/db-model/src/sled_provision_state.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

use super::impl_enum_type;
use nexus_types::external_api::views;
use serde::{Deserialize, Serialize};
use thiserror::Error;

impl_enum_type!(
#[derive(Clone, SqlType, Debug, QueryId)]
#[diesel(postgres_type(name = "sled_provision_state"))]
pub struct SledProvisionStateEnum;

#[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)]
#[diesel(sql_type = SledProvisionStateEnum)]
pub enum SledProvisionState;

// Enum values
Provisionable => b"provisionable"
NonProvisionable => b"non_provisionable"
);

impl From<SledProvisionState> for views::SledProvisionState {
fn from(state: SledProvisionState) -> Self {
match state {
SledProvisionState::Provisionable => {
views::SledProvisionState::Provisionable
}
SledProvisionState::NonProvisionable => {
views::SledProvisionState::NonProvisionable
}
}
}
}

impl TryFrom<views::SledProvisionState> for SledProvisionState {
type Error = UnknownSledProvisionState;

fn try_from(state: views::SledProvisionState) -> Result<Self, Self::Error> {
match state {
views::SledProvisionState::Provisionable => {
Ok(SledProvisionState::Provisionable)
}
views::SledProvisionState::NonProvisionable => {
Ok(SledProvisionState::NonProvisionable)
}
views::SledProvisionState::Unknown => {
Err(UnknownSledProvisionState)
}
}
}
}

/// An unknown [`views::SledProvisionState`] was encountered.
#[derive(Clone, Debug, Error)]
#[error("Unknown SledProvisionState")]
pub struct UnknownSledProvisionState;
86 changes: 77 additions & 9 deletions nexus/db-queries/src/db/datastore/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -372,8 +372,8 @@ mod test {
BlockSize, ComponentUpdate, ComponentUpdateIdentity, ConsoleSession,
Dataset, DatasetKind, ExternalIp, PhysicalDisk, PhysicalDiskKind,
Project, Rack, Region, Service, ServiceKind, SiloUser, SledBaseboard,
SledSystemHardware, SledUpdate, SshKey, SystemUpdate,
UpdateableComponentType, VpcSubnet, Zpool,
SledProvisionState, SledSystemHardware, SledUpdate, SshKey,
SystemUpdate, UpdateableComponentType, VpcSubnet, Zpool,
};
use crate::db::queries::vpc_subnet::FilterConflictingVpcSubnetRangesQuery;
use assert_matches::assert_matches;
Expand Down Expand Up @@ -610,6 +610,35 @@ mod test {
sled_id
}

// Marks a sled as non-provisionable.
async fn mark_sled_non_provisionable(
datastore: &DataStore,
opctx: &OpContext,
sled_id: Uuid,
) {
let (authz_sled, sled) = LookupPath::new(opctx, datastore)
.sled_id(sled_id)
.fetch_for(authz::Action::Modify)
.await
.unwrap();
println!("sled: {:?}", sled);
let old_state = datastore
.sled_set_provision_state(
&opctx,
&authz_sled,
SledProvisionState::NonProvisionable,
)
.await
.unwrap_or_else(|error| {
panic!(
"error marking sled {sled_id} as non-provisionable: {error}"
)
});
// The old state should always be provisionable since that's where we
// start.
assert_eq!(old_state, SledProvisionState::Provisionable);
}

fn test_zpool_size() -> ByteCount {
ByteCount::from_gibibytes_u32(100)
}
Expand Down Expand Up @@ -770,13 +799,24 @@ mod test {
let logctx = dev::test_setup_log("test_region_allocation_strat_random");
let mut db = test_setup_database(&logctx.log).await;
let (opctx, datastore) = datastore_test(&logctx, &db).await;
create_test_datasets_for_region_allocation(
let test_datasets = create_test_datasets_for_region_allocation(
&opctx,
datastore.clone(),
// Even though we're going to mark one sled as non-provisionable to
// test that logic, we aren't forcing the datasets to be on
// distinct sleds, so REGION_REDUNDANCY_THRESHOLD is enough.
REGION_REDUNDANCY_THRESHOLD,
)
.await;

let non_provisionable_dataset_id = test_datasets[0].dataset_id;
mark_sled_non_provisionable(
&datastore,
&opctx,
test_datasets[0].sled_id,
)
.await;

// Allocate regions from the datasets for this disk. Do it a few times
// for good measure.
for alloc_seed in 0..10 {
Expand Down Expand Up @@ -809,6 +849,9 @@ mod test {
// Must be 3 unique datasets
assert!(disk_datasets.insert(dataset.id()));

// Dataset must not be non-provisionable.
assert_ne!(dataset.id(), non_provisionable_dataset_id);

// Must be 3 unique zpools
assert!(disk_zpools.insert(dataset.pool_id));

Expand Down Expand Up @@ -837,12 +880,23 @@ mod test {
let mut db = test_setup_database(&logctx.log).await;
let (opctx, datastore) = datastore_test(&logctx, &db).await;

// Create a rack without enough sleds for a successful allocation when
// we require 3 distinct sleds.
// Create a rack with enough sleds for a successful allocation when we
// require 3 distinct provisionable sleds.
let test_datasets = create_test_datasets_for_region_allocation(
&opctx,
datastore.clone(),
REGION_REDUNDANCY_THRESHOLD,
// We're going to mark one sled as non-provisionable to test that
// logic, and we *are* forcing the datasets to be on distinct
// sleds: hence threshold + 1.
REGION_REDUNDANCY_THRESHOLD + 1,
)
.await;

let non_provisionable_dataset_id = test_datasets[0].dataset_id;
mark_sled_non_provisionable(
&datastore,
&opctx,
test_datasets[0].sled_id,
)
.await;

Expand Down Expand Up @@ -884,6 +938,9 @@ mod test {
// Must be 3 unique datasets
assert!(disk_datasets.insert(dataset.id()));

// Dataset must not be non-provisionable.
assert_ne!(dataset.id(), non_provisionable_dataset_id);

// Must be 3 unique zpools
assert!(disk_zpools.insert(dataset.pool_id));

Expand Down Expand Up @@ -916,11 +973,22 @@ mod test {
let (opctx, datastore) = datastore_test(&logctx, &db).await;

// Create a rack without enough sleds for a successful allocation when
// we require 3 distinct sleds.
create_test_datasets_for_region_allocation(
// we require 3 distinct provisionable sleds.
let test_datasets = create_test_datasets_for_region_allocation(
&opctx,
datastore.clone(),
REGION_REDUNDANCY_THRESHOLD - 1,
// Here, we need to have REGION_REDUNDANCY_THRESHOLD - 1
// provisionable sleds to test this failure condition. We're going
// to mark one sled as non-provisionable to test that logic, so we
// need to add 1 to that number.
REGION_REDUNDANCY_THRESHOLD,
)
.await;

mark_sled_non_provisionable(
&datastore,
&opctx,
test_datasets[0].sled_id,
)
.await;

Expand Down
Loading

0 comments on commit 67cd482

Please sign in to comment.