Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[nexus] add sled provision state #4520

Merged
merged 10 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions nexus/db-model/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ serde.workspace = true
serde_json.workspace = true
steno.workspace = true
strum.workspace = true
thiserror.workspace = true
uuid.workspace = true

db-macros.workspace = true
Expand Down
2 changes: 2 additions & 0 deletions nexus/db-model/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ mod silo_user;
mod silo_user_password_hash;
mod sled;
mod sled_instance;
mod sled_provision_state;
mod sled_resource;
mod sled_resource_kind;
mod snapshot;
Expand Down Expand Up @@ -151,6 +152,7 @@ pub use silo_user::*;
pub use silo_user_password_hash::*;
pub use sled::*;
pub use sled_instance::*;
pub use sled_provision_state::*;
pub use sled_resource::*;
pub use sled_resource_kind::*;
pub use snapshot::*;
Expand Down
2 changes: 2 additions & 0 deletions nexus/db-model/src/queries/region_allocation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
// a CTE (where we want the alias name to come first).

use crate::schema::dataset;
use crate::schema::sled;
use crate::schema::zpool;

table! {
Expand Down Expand Up @@ -157,6 +158,7 @@ diesel::allow_tables_to_appear_in_same_query!(
diesel::allow_tables_to_appear_in_same_query!(
old_zpool_usage,
zpool,
sled,
proposed_dataset_changes,
);

Expand Down
1 change: 1 addition & 0 deletions nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,7 @@ table! {

rack_id -> Uuid,
is_scrimlet -> Bool,
provision_state -> crate::SledProvisionStateEnum,
serial_number -> Text,
part_number -> Text,
revision -> Int8,
Expand Down
11 changes: 10 additions & 1 deletion nexus/db-model/src/sled.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

use super::{ByteCount, Generation, SqlU16, SqlU32};
use crate::collection::DatastoreCollectionConfig;
use crate::ipv6;
use crate::schema::{physical_disk, service, sled, zpool};
use crate::{ipv6, SledProvisionState};
use chrono::{DateTime, Utc};
use db_macros::Asset;
use nexus_types::{external_api::views, identity::Asset};
Expand Down Expand Up @@ -49,6 +49,8 @@ pub struct Sled {
part_number: String,
revision: i64,

provision_state: SledProvisionState,

pub usable_hardware_threads: SqlU32,
pub usable_physical_ram: ByteCount,
pub reservoir_size: ByteCount,
Expand All @@ -66,6 +68,10 @@ impl Sled {
self.is_scrimlet
}

pub fn provision_state(&self) -> SledProvisionState {
self.provision_state
}

pub fn ip(&self) -> Ipv6Addr {
self.ip.into()
}
Expand Down Expand Up @@ -93,6 +99,7 @@ impl From<Sled> for views::Sled {
part: sled.part_number,
revision: sled.revision,
},
provision_state: sled.provision_state.into(),
usable_hardware_threads: sled.usable_hardware_threads.0,
usable_physical_ram: *sled.usable_physical_ram,
}
Expand Down Expand Up @@ -188,6 +195,8 @@ impl SledUpdate {
serial_number: self.serial_number,
part_number: self.part_number,
revision: self.revision,
// By default, sleds start as provisionable.
provision_state: SledProvisionState::Provisionable,
usable_hardware_threads: self.usable_hardware_threads,
usable_physical_ram: self.usable_physical_ram,
reservoir_size: self.reservoir_size,
Expand Down
58 changes: 58 additions & 0 deletions nexus/db-model/src/sled_provision_state.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

use super::impl_enum_type;
use nexus_types::external_api::views;
use serde::{Deserialize, Serialize};
use thiserror::Error;

impl_enum_type!(
#[derive(Clone, SqlType, Debug, QueryId)]
#[diesel(postgres_type(name = "sled_provision_state"))]
pub struct SledProvisionStateEnum;

#[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)]
#[diesel(sql_type = SledProvisionStateEnum)]
pub enum SledProvisionState;

// Enum values
Provisionable => b"provisionable"
NonProvisionable => b"non_provisionable"
);

impl From<SledProvisionState> for views::SledProvisionState {
fn from(state: SledProvisionState) -> Self {
match state {
SledProvisionState::Provisionable => {
views::SledProvisionState::Provisionable
}
SledProvisionState::NonProvisionable => {
views::SledProvisionState::NonProvisionable
}
}
}
}

impl TryFrom<views::SledProvisionState> for SledProvisionState {
type Error = UnknownSledProvisionState;

fn try_from(state: views::SledProvisionState) -> Result<Self, Self::Error> {
match state {
views::SledProvisionState::Provisionable => {
Ok(SledProvisionState::Provisionable)
}
views::SledProvisionState::NonProvisionable => {
Ok(SledProvisionState::NonProvisionable)
}
views::SledProvisionState::Unknown => {
Err(UnknownSledProvisionState)
}
}
}
}

/// An unknown [`views::SledProvisionState`] was encountered.
#[derive(Clone, Debug, Error)]
#[error("Unknown SledProvisionState")]
pub struct UnknownSledProvisionState;
81 changes: 72 additions & 9 deletions nexus/db-queries/src/db/datastore/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -372,8 +372,8 @@ mod test {
BlockSize, ComponentUpdate, ComponentUpdateIdentity, ConsoleSession,
Dataset, DatasetKind, ExternalIp, PhysicalDisk, PhysicalDiskKind,
Project, Rack, Region, Service, ServiceKind, SiloUser, SledBaseboard,
SledSystemHardware, SledUpdate, SshKey, SystemUpdate,
UpdateableComponentType, VpcSubnet, Zpool,
SledProvisionState, SledSystemHardware, SledUpdate, SshKey,
SystemUpdate, UpdateableComponentType, VpcSubnet, Zpool,
};
use crate::db::queries::vpc_subnet::FilterConflictingVpcSubnetRangesQuery;
use assert_matches::assert_matches;
Expand Down Expand Up @@ -610,6 +610,30 @@ mod test {
sled_id
}

// Marks a sled as non-provisionable.
async fn mark_sled_non_provisionable(
datastore: &DataStore,
opctx: &OpContext,
sled_id: Uuid,
) {
let sled_lookup = LookupPath::new(opctx, datastore).sled_id(sled_id);
let old_state = datastore
.sled_set_provision_state(
&opctx,
&sled_lookup,
SledProvisionState::NonProvisionable,
)
.await
.unwrap_or_else(|error| {
panic!(
"error marking sled {sled_id} as non-provisionable: {error}"
)
});
// The old state should always be provisionable since that's where we
// start.
assert_eq!(old_state, SledProvisionState::Provisionable);
}

fn test_zpool_size() -> ByteCount {
ByteCount::from_gibibytes_u32(100)
}
Expand Down Expand Up @@ -770,13 +794,24 @@ mod test {
let logctx = dev::test_setup_log("test_region_allocation_strat_random");
let mut db = test_setup_database(&logctx.log).await;
let (opctx, datastore) = datastore_test(&logctx, &db).await;
create_test_datasets_for_region_allocation(
let test_datasets = create_test_datasets_for_region_allocation(
&opctx,
datastore.clone(),
// Even though we're going to mark one sled as non-provisionable to
// test that logic, we aren't forcing the datasets to be on
// distinct sleds, so REGION_REDUNDANCY_THRESHOLD is enough.
REGION_REDUNDANCY_THRESHOLD,
)
.await;

let non_provisionable_dataset_id = test_datasets[0].dataset_id;
mark_sled_non_provisionable(
&datastore,
&opctx,
test_datasets[0].sled_id,
)
.await;

// Allocate regions from the datasets for this disk. Do it a few times
// for good measure.
for alloc_seed in 0..10 {
Expand Down Expand Up @@ -809,6 +844,9 @@ mod test {
// Must be 3 unique datasets
assert!(disk_datasets.insert(dataset.id()));

// Dataset must not be non-provisionable.
assert_ne!(dataset.id(), non_provisionable_dataset_id);

// Must be 3 unique zpools
assert!(disk_zpools.insert(dataset.pool_id));

Expand Down Expand Up @@ -837,12 +875,23 @@ mod test {
let mut db = test_setup_database(&logctx.log).await;
let (opctx, datastore) = datastore_test(&logctx, &db).await;

// Create a rack without enough sleds for a successful allocation when
// we require 3 distinct sleds.
// Create a rack with enough sleds for a successful allocation when we
// require 3 distinct provisionable sleds.
let test_datasets = create_test_datasets_for_region_allocation(
&opctx,
datastore.clone(),
REGION_REDUNDANCY_THRESHOLD,
// We're going to mark one sled as non-provisionable to test that
// logic, and we *are* forcing the datasets to be on distinct
// sleds: hence threshold + 1.
REGION_REDUNDANCY_THRESHOLD + 1,
)
.await;

let non_provisionable_dataset_id = test_datasets[0].dataset_id;
mark_sled_non_provisionable(
&datastore,
&opctx,
test_datasets[0].sled_id,
)
.await;

Expand Down Expand Up @@ -884,6 +933,9 @@ mod test {
// Must be 3 unique datasets
assert!(disk_datasets.insert(dataset.id()));

// Dataset must not be non-provisionable.
assert_ne!(dataset.id(), non_provisionable_dataset_id);

// Must be 3 unique zpools
assert!(disk_zpools.insert(dataset.pool_id));

Expand Down Expand Up @@ -916,11 +968,22 @@ mod test {
let (opctx, datastore) = datastore_test(&logctx, &db).await;

// Create a rack without enough sleds for a successful allocation when
// we require 3 distinct sleds.
create_test_datasets_for_region_allocation(
// we require 3 distinct provisionable sleds.
let test_datasets = create_test_datasets_for_region_allocation(
&opctx,
datastore.clone(),
REGION_REDUNDANCY_THRESHOLD - 1,
// Here, we need to have REGION_REDUNDANCY_THRESHOLD - 1
// provisionable sleds to test this failure condition. We're going
// to mark one sled as non-provisionable to test that logic, so we
// need to add 1 to that number.
REGION_REDUNDANCY_THRESHOLD,
)
.await;

mark_sled_non_provisionable(
&datastore,
&opctx,
test_datasets[0].sled_id,
)
.await;

Expand Down
Loading
Loading