diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index 5d3dd8ec39..c2579d98ea 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -143,6 +143,7 @@ cd /opt/oxide/work ptime -m tar xvzf /input/package/work/package.tar.gz cp /input/package/work/zones/* out/ +mv out/omicron-nexus-single-sled.tar.gz out/omicron-nexus.tar.gz mkdir tests for p in /input/ci-tools/work/end-to-end-tests/*.gz; do ptime -m gunzip < "$p" > "tests/$(basename "${p%.gz}")" diff --git a/.github/buildomat/jobs/package.sh b/.github/buildomat/jobs/package.sh index fe5d6a9b7f..64c087524e 100755 --- a/.github/buildomat/jobs/package.sh +++ b/.github/buildomat/jobs/package.sh @@ -45,7 +45,7 @@ ptime -m ./tools/ci_download_softnpu_machinery # Build the test target ptime -m cargo run --locked --release --bin omicron-package -- \ - -t test target create -i standard -m non-gimlet -s softnpu + -t test target create -i standard -m non-gimlet -s softnpu -r single-sled ptime -m cargo run --locked --release --bin omicron-package -- \ -t test package @@ -81,9 +81,13 @@ stamp_packages() { done } +# Keep the single-sled Nexus zone around for the deploy job. (The global zone +# build below overwrites the file.) +mv out/omicron-nexus.tar.gz out/omicron-nexus-single-sled.tar.gz + # Build necessary for the global zone ptime -m cargo run --locked --release --bin omicron-package -- \ - -t host target create -i standard -m gimlet -s asic + -t host target create -i standard -m gimlet -s asic -r multi-sled ptime -m cargo run --locked --release --bin omicron-package -- \ -t host package stamp_packages omicron-sled-agent maghemite propolis-server overlay @@ -111,6 +115,7 @@ zones=( out/external-dns.tar.gz out/internal-dns.tar.gz out/omicron-nexus.tar.gz + out/omicron-nexus-single-sled.tar.gz out/oximeter-collector.tar.gz out/propolis-server.tar.gz out/switch-*.tar.gz diff --git a/.github/buildomat/jobs/tuf-repo.sh b/.github/buildomat/jobs/tuf-repo.sh index fab6770564..e169bebff6 100644 --- a/.github/buildomat/jobs/tuf-repo.sh +++ b/.github/buildomat/jobs/tuf-repo.sh @@ -77,10 +77,11 @@ done mkdir /work/package pushd /work/package tar xf /input/package/work/package.tar.gz out package-manifest.toml target/release/omicron-package -target/release/omicron-package -t default target create -i standard -m gimlet -s asic +target/release/omicron-package -t default target create -i standard -m gimlet -s asic -r multi-sled ln -s /input/package/work/zones/* out/ rm out/switch-softnpu.tar.gz # not used when target switch=asic rm out/omicron-gateway-softnpu.tar.gz # not used when target switch=asic +rm out/omicron-nexus-single-sled.tar.gz # only used for deploy tests for zone in out/*.tar.gz; do target/release/omicron-package stamp "$(basename "${zone%.tar.gz}")" "$VERSION" done diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 722aacbe0f..f5cf1dc885 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -41,7 +41,7 @@ jobs: - name: Install Pre-Requisites run: ./tools/install_builder_prerequisites.sh -y - name: Set default target - run: cargo run --bin omicron-package -- -t default target create + run: cargo run --bin omicron-package -- -t default target create -r single-sled - name: Check build of deployed Omicron packages run: cargo run --bin omicron-package -- -t default check diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index 73ccec996c..ad62c34f92 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -372,6 +372,8 @@ pub struct PackageConfig { pub dendrite: HashMap, /// Background task configuration pub background_tasks: BackgroundTaskConfig, + /// Default Crucible region allocation strategy + pub default_region_allocation_strategy: RegionAllocationStrategy, } #[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] @@ -594,6 +596,9 @@ mod test { dns_external.period_secs_propagation = 7 dns_external.max_concurrent_server_updates = 8 external_endpoints.period_secs = 9 + [default_region_allocation_strategy] + type = "random" + seed = 0 "##, ) .unwrap(); @@ -677,6 +682,10 @@ mod test { period_secs: Duration::from_secs(9), } }, + default_region_allocation_strategy: + crate::nexus_config::RegionAllocationStrategy::Random { + seed: Some(0) + } }, } ); @@ -724,6 +733,8 @@ mod test { dns_external.period_secs_propagation = 7 dns_external.max_concurrent_server_updates = 8 external_endpoints.period_secs = 9 + [default_region_allocation_strategy] + type = "random" "##, ) .unwrap(); @@ -864,25 +875,31 @@ mod test { struct DummyConfig { deployment: DeploymentConfig, } - let config_path = "../smf/nexus/config-partial.toml"; - println!( - "checking {:?} with example deployment section added", - config_path - ); - let mut contents = std::fs::read_to_string(config_path) - .expect("failed to read Nexus SMF config file"); - contents.push_str( - "\n\n\n \ - # !! content below added by test_repo_configs_are_valid()\n\ - \n\n\n", - ); let example_deployment = toml::to_string_pretty(&DummyConfig { deployment: example_config.deployment, }) .unwrap(); - contents.push_str(&example_deployment); - let _: Config = toml::from_str(&contents) - .expect("Nexus SMF config file is not valid"); + + let nexus_config_paths = [ + "../smf/nexus/single-sled/config-partial.toml", + "../smf/nexus/multi-sled/config-partial.toml", + ]; + for config_path in nexus_config_paths { + println!( + "checking {:?} with example deployment section added", + config_path + ); + let mut contents = std::fs::read_to_string(config_path) + .expect("failed to read Nexus SMF config file"); + contents.push_str( + "\n\n\n \ + # !! content below added by test_repo_configs_are_valid()\n\ + \n\n\n", + ); + contents.push_str(&example_deployment); + let _: Config = toml::from_str(&contents) + .expect("Nexus SMF config file is not valid"); + } } #[test] @@ -894,3 +911,30 @@ mod test { ); } } + +/// Defines a strategy for choosing what physical disks to use when allocating +/// new crucible regions. +/// +/// NOTE: More strategies can - and should! - be added. +/// +/// See for a more +/// complete discussion. +/// +/// Longer-term, we should consider: +/// - Storage size + remaining free space +/// - Sled placement of datasets +/// - What sort of loads we'd like to create (even split across all disks +/// may not be preferable, especially if maintenance is expected) +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum RegionAllocationStrategy { + /// Choose disks pseudo-randomly. An optional seed may be provided to make + /// the ordering deterministic, otherwise the current time in nanoseconds + /// will be used. Ordering is based on sorting the output of `md5(UUID of + /// candidate dataset + seed)`. The seed does not need to come from a + /// cryptographically secure source. + Random { seed: Option }, + + /// Like Random, but ensures that each region is allocated on its own sled. + RandomWithDistinctSleds { seed: Option }, +} diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index 1988a42669..7539c5183f 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -321,10 +321,32 @@ Error: Creates a new build target, and sets it as "active" Usage: omicron-package target create [OPTIONS] Options: - -i, --image [default: standard] [possible values: standard, trampoline] - -m, --machine [possible values: gimlet, gimlet-standalone, non-gimlet] - -s, --switch [possible values: asic, stub, softnpu] - -h, --help Print help (see more with '--help') + -i, --image + [default: standard] + + Possible values: + - standard: A typical host OS image + - trampoline: A recovery host OS image, intended to bootstrap a Standard image + + -m, --machine + Possible values: + - gimlet: Use sled agent configuration for a Gimlet + - gimlet-standalone: Use sled agent configuration for a Gimlet running in isolation + - non-gimlet: Use sled agent configuration for a device emulating a Gimlet + + -s, --switch + Possible values: + - asic: Use the "real" Dendrite, that attempts to interact with the Tofino + - stub: Use a "stub" Dendrite that does not require any real hardware + - softnpu: Use a "softnpu" Dendrite that uses the SoftNPU asic emulator + + -r, --rack-topology + Possible values: + - multi-sled: Use configurations suitable for a multi-sled deployment, such as dogfood and production racks + - single-sled: Use configurations suitable for a single-sled deployment, such as CI and dev machines + + -h, --help + Print help (see a summary with '-h') ---- @@ -332,9 +354,9 @@ To set up a build target for a non-Gimlet machine with simulated (but fully func [source,console] ---- -$ cargo run --release --bin omicron-package -- -t default target create -i standard -m non-gimlet -s softnpu +$ cargo run --release --bin omicron-package -- -t default target create -i standard -m non-gimlet -s softnpu -r single-sled Finished release [optimized] target(s) in 0.66s - Running `target/release/omicron-package -t default target create -i standard -m non-gimlet -s softnpu` + Running `target/release/omicron-package -t default target create -i standard -m non-gimlet -s softnpu -r single-sled` Created new build target 'default' and set it as active ---- diff --git a/installinator/Cargo.toml b/installinator/Cargo.toml index 3b2f04c38f..428ea0d08e 100644 --- a/installinator/Cargo.toml +++ b/installinator/Cargo.toml @@ -57,3 +57,4 @@ tokio-stream.workspace = true [features] image-standard = [] image-trampoline = [] +rack-topology-single-sled = [] \ No newline at end of file diff --git a/nexus/db-model/src/queries/region_allocation.rs b/nexus/db-model/src/queries/region_allocation.rs index 43fac3c9a6..2025e79fb8 100644 --- a/nexus/db-model/src/queries/region_allocation.rs +++ b/nexus/db-model/src/queries/region_allocation.rs @@ -47,6 +47,13 @@ table! { } } +table! { + shuffled_candidate_datasets { + id -> Uuid, + pool_id -> Uuid, + } +} + table! { candidate_regions { id -> Uuid, @@ -89,6 +96,19 @@ table! { } } +table! { + one_zpool_per_sled (pool_id) { + pool_id -> Uuid + } +} + +table! { + one_dataset_per_zpool { + id -> Uuid, + pool_id -> Uuid + } +} + table! { inserted_regions { id -> Uuid, @@ -141,6 +161,7 @@ diesel::allow_tables_to_appear_in_same_query!( ); diesel::allow_tables_to_appear_in_same_query!(old_regions, dataset,); +diesel::allow_tables_to_appear_in_same_query!(old_regions, zpool,); diesel::allow_tables_to_appear_in_same_query!( inserted_regions, @@ -149,6 +170,7 @@ diesel::allow_tables_to_appear_in_same_query!( diesel::allow_tables_to_appear_in_same_query!(candidate_zpools, dataset,); diesel::allow_tables_to_appear_in_same_query!(candidate_zpools, zpool,); +diesel::allow_tables_to_appear_in_same_query!(candidate_datasets, dataset); // == Needed for random region allocation == diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index ff1df710bb..b1f3203c60 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -307,43 +307,6 @@ pub enum UpdatePrecondition { Value(T), } -/// Defines a strategy for choosing what physical disks to use when allocating -/// new crucible regions. -/// -/// NOTE: More strategies can - and should! - be added. -/// -/// See for a more -/// complete discussion. -/// -/// Longer-term, we should consider: -/// - Storage size + remaining free space -/// - Sled placement of datasets -/// - What sort of loads we'd like to create (even split across all disks -/// may not be preferable, especially if maintenance is expected) -#[derive(Debug, Clone)] -pub enum RegionAllocationStrategy { - /// Choose disks that have the least data usage in the rack. This strategy - /// can lead to bad failure states wherein the disks with the least usage - /// have the least usage because regions on them are actually failing in - /// some way. Further retried allocations will then continue to try to - /// allocate onto the disk, perpetuating the problem. Currently this - /// strategy only exists so we can test that using different allocation - /// strategies actually results in different allocation patterns, hence the - /// `#[cfg(test)]`. - /// - /// See https://github.com/oxidecomputer/omicron/issues/3416 for more on the - /// failure-states associated with this strategy - #[cfg(test)] - LeastUsedDisk, - - /// Choose disks pseudo-randomly. An optional seed may be provided to make - /// the ordering deterministic, otherwise the current time in nanoseconds - /// will be used. Ordering is based on sorting the output of `md5(UUID of - /// candidate dataset + seed)`. The seed does not need to come from a - /// cryptographically secure source. - Random(Option), -} - /// Constructs a DataStore for use in test suites that has preloaded the /// built-in users, roles, and role assignments that are needed for basic /// operation @@ -421,7 +384,9 @@ mod test { use omicron_common::api::external::{ self, ByteCount, Error, IdentityMetadataCreateParams, LookupType, Name, }; + use omicron_common::nexus_config::RegionAllocationStrategy; use omicron_test_utils::dev; + use std::collections::HashMap; use std::collections::HashSet; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddrV6}; use std::num::NonZeroU32; @@ -704,12 +669,18 @@ mod test { } } + struct TestDataset { + sled_id: Uuid, + dataset_id: Uuid, + } + async fn create_test_datasets_for_region_allocation( opctx: &OpContext, datastore: Arc, - ) -> Vec { + number_of_sleds: usize, + ) -> Vec { // Create sleds... - let sled_ids: Vec = stream::iter(0..REGION_REDUNDANCY_THRESHOLD) + let sled_ids: Vec = stream::iter(0..number_of_sleds) .then(|_| create_test_sled(&datastore)) .collect() .await; @@ -740,48 +711,69 @@ mod test { .collect() .await; + #[derive(Copy, Clone)] + struct Zpool { + sled_id: Uuid, + pool_id: Uuid, + } + // 1 pool per disk - let zpool_ids: Vec = stream::iter(physical_disks) + let zpools: Vec = stream::iter(physical_disks) .then(|disk| { - create_test_zpool(&datastore, disk.sled_id, disk.disk_id) + let pool_id_future = + create_test_zpool(&datastore, disk.sled_id, disk.disk_id); + async move { + let pool_id = pool_id_future.await; + Zpool { sled_id: disk.sled_id, pool_id } + } }) .collect() .await; let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); - // 1 dataset per zpool - let dataset_ids: Vec = stream::iter(zpool_ids) - .then(|zpool_id| { - let id = Uuid::new_v4(); - let dataset = Dataset::new( - id, - zpool_id, - bogus_addr, - DatasetKind::Crucible, - ); - let datastore = datastore.clone(); - async move { - datastore.dataset_upsert(dataset).await.unwrap(); - id - } + let datasets: Vec = stream::iter(zpools) + .map(|zpool| { + // 3 datasets per zpool, to test that pools are distinct + let zpool_iter: Vec = (0..3).map(|_| zpool).collect(); + stream::iter(zpool_iter).then(|zpool| { + let id = Uuid::new_v4(); + let dataset = Dataset::new( + id, + zpool.pool_id, + bogus_addr, + DatasetKind::Crucible, + ); + + let datastore = datastore.clone(); + async move { + datastore.dataset_upsert(dataset).await.unwrap(); + + TestDataset { sled_id: zpool.sled_id, dataset_id: id } + } + }) }) + .flatten() .collect() .await; - dataset_ids + datasets } #[tokio::test] /// Note that this test is currently non-deterministic. It can be made /// deterministic by generating deterministic *dataset* Uuids. The sled and /// pool IDs should not matter. - async fn test_region_allocation() { - let logctx = dev::test_setup_log("test_region_allocation"); + async fn test_region_allocation_strat_random() { + let logctx = dev::test_setup_log("test_region_allocation_strat_random"); let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - create_test_datasets_for_region_allocation(&opctx, datastore.clone()) - .await; + create_test_datasets_for_region_allocation( + &opctx, + datastore.clone(), + REGION_REDUNDANCY_THRESHOLD, + ) + .await; // Allocate regions from the datasets for this disk. Do it a few times // for good measure. @@ -799,7 +791,9 @@ mod test { volume_id, ¶ms.disk_source, params.size, - &RegionAllocationStrategy::Random(Some(alloc_seed as u128)), + &RegionAllocationStrategy::Random { + seed: Some(alloc_seed), + }, ) .await .unwrap(); @@ -809,8 +803,81 @@ mod test { let mut disk_datasets = HashSet::new(); let mut disk_zpools = HashSet::new(); - // TODO: When allocation chooses 3 distinct sleds, uncomment this. - // let mut disk1_sleds = HashSet::new(); + for (dataset, region) in dataset_and_regions { + // Must be 3 unique datasets + assert!(disk_datasets.insert(dataset.id())); + + // Must be 3 unique zpools + assert!(disk_zpools.insert(dataset.pool_id)); + + assert_eq!(volume_id, region.volume_id()); + assert_eq!(ByteCount::from(4096), region.block_size()); + let (_, extent_count) = DataStore::get_crucible_allocation( + &BlockSize::AdvancedFormat, + params.size, + ); + assert_eq!(extent_count, region.extent_count()); + } + } + + let _ = db.cleanup().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + /// Test the [`RegionAllocationStrategy::RandomWithDistinctSleds`] strategy. + /// It should always pick datasets where no two datasets are on the same + /// zpool and no two zpools are on the same sled. + async fn test_region_allocation_strat_random_with_distinct_sleds() { + let logctx = dev::test_setup_log( + "test_region_allocation_strat_random_with_distinct_sleds", + ); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a rack without enough sleds for a successful allocation when + // we require 3 distinct sleds. + let test_datasets = create_test_datasets_for_region_allocation( + &opctx, + datastore.clone(), + REGION_REDUNDANCY_THRESHOLD, + ) + .await; + + // We need to check that our datasets end up on 3 distinct sleds, but the query doesn't return the sled ID, so we need to reverse map from dataset ID to sled ID + let sled_id_map: HashMap = test_datasets + .into_iter() + .map(|test_dataset| (test_dataset.dataset_id, test_dataset.sled_id)) + .collect(); + + // Allocate regions from the datasets for this disk. Do it a few times + // for good measure. + for alloc_seed in 0..10 { + let params = create_test_disk_create_params( + &format!("disk{}", alloc_seed), + ByteCount::from_mebibytes_u32(1), + ); + let volume_id = Uuid::new_v4(); + + let expected_region_count = REGION_REDUNDANCY_THRESHOLD; + let dataset_and_regions = datastore + .region_allocate( + &opctx, + volume_id, + ¶ms.disk_source, + params.size, + &&RegionAllocationStrategy::RandomWithDistinctSleds { + seed: Some(alloc_seed), + }, + ) + .await + .unwrap(); + + // Verify the allocation. + assert_eq!(expected_region_count, dataset_and_regions.len()); + let mut disk_datasets = HashSet::new(); + let mut disk_zpools = HashSet::new(); + let mut disk_sleds = HashSet::new(); for (dataset, region) in dataset_and_regions { // Must be 3 unique datasets assert!(disk_datasets.insert(dataset.id())); @@ -819,8 +886,8 @@ mod test { assert!(disk_zpools.insert(dataset.pool_id)); // Must be 3 unique sleds - // TODO: When allocation chooses 3 distinct sleds, uncomment this. - // assert!(disk1_sleds.insert(Err(dataset))); + let sled_id = sled_id_map.get(&dataset.id()).unwrap(); + assert!(disk_sleds.insert(*sled_id)); assert_eq!(volume_id, region.volume_id()); assert_eq!(ByteCount::from(4096), region.block_size()); @@ -836,14 +903,72 @@ mod test { logctx.cleanup_successful(); } + #[tokio::test] + /// Ensure the [`RegionAllocationStrategy::RandomWithDistinctSleds`] + /// strategy fails when there aren't enough distinct sleds. + async fn test_region_allocation_strat_random_with_distinct_sleds_fails() { + let logctx = dev::test_setup_log( + "test_region_allocation_strat_random_with_distinct_sleds_fails", + ); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a rack without enough sleds for a successful allocation when + // we require 3 distinct sleds. + create_test_datasets_for_region_allocation( + &opctx, + datastore.clone(), + REGION_REDUNDANCY_THRESHOLD - 1, + ) + .await; + + // Allocate regions from the datasets for this disk. Do it a few times + // for good measure. + for alloc_seed in 0..10 { + let params = create_test_disk_create_params( + &format!("disk{}", alloc_seed), + ByteCount::from_mebibytes_u32(1), + ); + let volume_id = Uuid::new_v4(); + + let err = datastore + .region_allocate( + &opctx, + volume_id, + ¶ms.disk_source, + params.size, + &&RegionAllocationStrategy::RandomWithDistinctSleds { + seed: Some(alloc_seed), + }, + ) + .await + .unwrap_err(); + + let expected = "Not enough zpool space to allocate disks"; + assert!( + err.to_string().contains(expected), + "Saw error: \'{err}\', but expected \'{expected}\'" + ); + + assert!(matches!(err, Error::ServiceUnavailable { .. })); + } + + let _ = db.cleanup().await; + logctx.cleanup_successful(); + } + #[tokio::test] async fn test_region_allocation_is_idempotent() { let logctx = dev::test_setup_log("test_region_allocation_is_idempotent"); let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - create_test_datasets_for_region_allocation(&opctx, datastore.clone()) - .await; + create_test_datasets_for_region_allocation( + &opctx, + datastore.clone(), + REGION_REDUNDANCY_THRESHOLD, + ) + .await; // Allocate regions from the datasets for this volume. let params = create_test_disk_create_params( @@ -857,7 +982,7 @@ mod test { volume_id, ¶ms.disk_source, params.size, - &RegionAllocationStrategy::Random(Some(0)), + &RegionAllocationStrategy::Random { seed: Some(0) }, ) .await .unwrap(); @@ -870,7 +995,7 @@ mod test { volume_id, ¶ms.disk_source, params.size, - &RegionAllocationStrategy::Random(Some(1)), + &RegionAllocationStrategy::Random { seed: Some(1) }, ) .await .unwrap(); @@ -959,7 +1084,7 @@ mod test { volume1_id, ¶ms.disk_source, params.size, - &RegionAllocationStrategy::Random(Some(0)), + &RegionAllocationStrategy::Random { seed: Some(0) }, ) .await .unwrap_err(); @@ -983,8 +1108,12 @@ mod test { let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - create_test_datasets_for_region_allocation(&opctx, datastore.clone()) - .await; + create_test_datasets_for_region_allocation( + &opctx, + datastore.clone(), + REGION_REDUNDANCY_THRESHOLD, + ) + .await; let disk_size = test_zpool_size(); let alloc_size = ByteCount::try_from(disk_size.to_bytes() * 2).unwrap(); @@ -997,7 +1126,7 @@ mod test { volume1_id, ¶ms.disk_source, params.size, - &RegionAllocationStrategy::Random(Some(0)), + &RegionAllocationStrategy::Random { seed: Some(0) }, ) .await .is_err()); diff --git a/nexus/db-queries/src/db/datastore/region.rs b/nexus/db-queries/src/db/datastore/region.rs index 5bc79b9481..9465fe2792 100644 --- a/nexus/db-queries/src/db/datastore/region.rs +++ b/nexus/db-queries/src/db/datastore/region.rs @@ -5,7 +5,6 @@ //! [`DataStore`] methods on [`Region`]s. use super::DataStore; -use super::RegionAllocationStrategy; use super::RunnableQuery; use crate::context::OpContext; use crate::db; @@ -23,6 +22,7 @@ use omicron_common::api::external; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; use omicron_common::backoff::{self, BackoffError}; +use omicron_common::nexus_config::RegionAllocationStrategy; use slog::Logger; use uuid::Uuid; diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index b071ee3f44..7f7b2ea9bf 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -6,7 +6,6 @@ use crate::db::alias::ExpressionAlias; use crate::db::cast_uuid_as_bytea::CastUuidToBytea; -use crate::db::datastore::RegionAllocationStrategy; use crate::db::datastore::REGION_REDUNDANCY_THRESHOLD; use crate::db::model::{Dataset, DatasetKind, Region}; use crate::db::pool::DbConnection; @@ -24,10 +23,11 @@ use diesel::{ use nexus_db_model::queries::region_allocation::{ candidate_datasets, candidate_regions, candidate_zpools, cockroach_md5, do_insert, inserted_regions, old_regions, old_zpool_usage, - proposed_dataset_changes, updated_datasets, + proposed_dataset_changes, shuffled_candidate_datasets, updated_datasets, }; use nexus_db_model::schema; use omicron_common::api::external; +use omicron_common::nexus_config::RegionAllocationStrategy; const NOT_ENOUGH_DATASETS_SENTINEL: &'static str = "Not enough datasets"; const NOT_ENOUGH_ZPOOL_SPACE_SENTINEL: &'static str = "Not enough space"; @@ -53,7 +53,7 @@ pub fn from_diesel(e: async_bb8_diesel::ConnectionError) -> external::Error { } NOT_ENOUGH_ZPOOL_SPACE_SENTINEL => { return external::Error::unavail( - "Not enough zpool space to allocate disks", + "Not enough zpool space to allocate disks. There may not be enough disks with space for the requested region. You may also see this if your rack is in a degraded state, or you're running the default multi-rack topology configuration in a 1-sled development environment.", ); } NOT_ENOUGH_UNIQUE_ZPOOLS_SENTINEL => { @@ -91,6 +91,8 @@ impl OldRegions { /// This implicitly distinguishes between "M.2s" and "U.2s" -- Nexus needs to /// determine during dataset provisioning which devices should be considered for /// usage as Crucible storage. +/// +/// We select only one dataset from each zpool. #[derive(Subquery, QueryId)] #[subquery(name = candidate_datasets)] struct CandidateDatasets { @@ -98,71 +100,65 @@ struct CandidateDatasets { } impl CandidateDatasets { - fn new( - allocation_strategy: &RegionAllocationStrategy, - candidate_zpools: &CandidateZpools, - ) -> Self { + fn new(candidate_zpools: &CandidateZpools, seed: u128) -> Self { use crate::db::schema::dataset::dsl as dataset_dsl; use candidate_zpools::dsl as candidate_zpool_dsl; - let query = match allocation_strategy { - #[cfg(test)] - RegionAllocationStrategy::LeastUsedDisk => { - let query: Box< - dyn CteQuery, - > = Box::new( - dataset_dsl::dataset - .inner_join( - candidate_zpools - .query_source() - .on(dataset_dsl::pool_id - .eq(candidate_zpool_dsl::pool_id)), - ) - .filter(dataset_dsl::time_deleted.is_null()) - .filter(dataset_dsl::size_used.is_not_null()) - .filter(dataset_dsl::kind.eq(DatasetKind::Crucible)) - .order(dataset_dsl::size_used.asc()) - .limit(REGION_REDUNDANCY_THRESHOLD.try_into().unwrap()) - .select((dataset_dsl::id, dataset_dsl::pool_id)), - ); - query - } - RegionAllocationStrategy::Random(seed) => { - let seed = seed.unwrap_or_else(|| { - std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap() - .as_nanos() - }); - - let seed_bytes = seed.to_le_bytes(); - - let query: Box< - dyn CteQuery, - > = Box::new( - dataset_dsl::dataset - .inner_join( - candidate_zpools - .query_source() - .on(dataset_dsl::pool_id - .eq(candidate_zpool_dsl::pool_id)), - ) - .filter(dataset_dsl::time_deleted.is_null()) - .filter(dataset_dsl::size_used.is_not_null()) - .filter(dataset_dsl::kind.eq(DatasetKind::Crucible)) - // We order by md5 to shuffle the ordering of the datasets. - // md5 has a uniform output distribution so it does the job. - .order(cockroach_md5::dsl::md5( + let seed_bytes = seed.to_le_bytes(); + + let query: Box> = + Box::new( + dataset_dsl::dataset + .inner_join(candidate_zpools.query_source().on( + dataset_dsl::pool_id.eq(candidate_zpool_dsl::pool_id), + )) + .filter(dataset_dsl::time_deleted.is_null()) + .filter(dataset_dsl::size_used.is_not_null()) + .filter(dataset_dsl::kind.eq(DatasetKind::Crucible)) + .distinct_on(dataset_dsl::pool_id) + .order_by(( + dataset_dsl::pool_id, + cockroach_md5::dsl::md5( CastUuidToBytea::new(dataset_dsl::id) .concat(seed_bytes.to_vec()), - )) - .select((dataset_dsl::id, dataset_dsl::pool_id)) - .limit(REGION_REDUNDANCY_THRESHOLD.try_into().unwrap()), - ); - query - } - }; + ), + )) + .select((dataset_dsl::id, dataset_dsl::pool_id)), + ); + Self { query } + } +} + +/// Shuffle the candidate datasets, and select REGION_REDUNDANCY_THRESHOLD +/// regions from it. +#[derive(Subquery, QueryId)] +#[subquery(name = shuffled_candidate_datasets)] +struct ShuffledCandidateDatasets { + query: Box>, +} +impl ShuffledCandidateDatasets { + fn new(candidate_datasets: &CandidateDatasets, seed: u128) -> Self { + use candidate_datasets::dsl as candidate_datasets_dsl; + + let seed_bytes = seed.to_le_bytes(); + + let query: Box> = + Box::new( + candidate_datasets + .query_source() + // We order by md5 to shuffle the ordering of the datasets. + // md5 has a uniform output distribution so it does the job. + .order(cockroach_md5::dsl::md5( + CastUuidToBytea::new(candidate_datasets_dsl::id) + .concat(seed_bytes.to_vec()), + )) + .select(( + candidate_datasets_dsl::id, + candidate_datasets_dsl::pool_id, + )) + .limit(REGION_REDUNDANCY_THRESHOLD.try_into().unwrap()), + ); Self { query } } } @@ -179,14 +175,14 @@ diesel::sql_function!(fn now() -> Timestamptz); impl CandidateRegions { fn new( - candidate_datasets: &CandidateDatasets, + shuffled_candidate_datasets: &ShuffledCandidateDatasets, volume_id: uuid::Uuid, block_size: u64, blocks_per_extent: u64, extent_count: u64, ) -> Self { - use candidate_datasets::dsl as candidate_datasets_dsl; use schema::region; + use shuffled_candidate_datasets::dsl as shuffled_candidate_datasets_dsl; let volume_id = volume_id.into_sql::(); let block_size = (block_size as i64).into_sql::(); @@ -195,20 +191,22 @@ impl CandidateRegions { let extent_count = (extent_count as i64).into_sql::(); Self { - query: Box::new(candidate_datasets.query_source().select(( - ExpressionAlias::new::(gen_random_uuid()), - ExpressionAlias::new::(now()), - ExpressionAlias::new::(now()), - ExpressionAlias::new::( - candidate_datasets_dsl::id, + query: Box::new(shuffled_candidate_datasets.query_source().select( + ( + ExpressionAlias::new::(gen_random_uuid()), + ExpressionAlias::new::(now()), + ExpressionAlias::new::(now()), + ExpressionAlias::new::( + shuffled_candidate_datasets_dsl::id, + ), + ExpressionAlias::new::(volume_id), + ExpressionAlias::new::(block_size), + ExpressionAlias::new::( + blocks_per_extent, + ), + ExpressionAlias::new::(extent_count), ), - ExpressionAlias::new::(volume_id), - ExpressionAlias::new::(block_size), - ExpressionAlias::new::( - blocks_per_extent, - ), - ExpressionAlias::new::(extent_count), - ))), + )), } } } @@ -285,12 +283,14 @@ struct CandidateZpools { } impl CandidateZpools { - fn new(old_zpool_usage: &OldPoolUsage, zpool_size_delta: u64) -> Self { + fn new( + old_zpool_usage: &OldPoolUsage, + zpool_size_delta: u64, + seed: u128, + distinct_sleds: bool, + ) -> Self { use schema::zpool::dsl as zpool_dsl; - let with_zpool = zpool_dsl::zpool - .on(zpool_dsl::id.eq(old_zpool_usage::dsl::pool_id)); - // Why are we using raw `diesel::dsl::sql` here? // // When SQL performs the "SUM" operation on "bigint" type, the result @@ -309,15 +309,40 @@ impl CandidateZpools { + diesel::dsl::sql(&zpool_size_delta.to_string())) .le(diesel::dsl::sql(zpool_dsl::total_size::NAME)); - Self { - query: Box::new( - old_zpool_usage - .query_source() - .inner_join(with_zpool) - .filter(it_will_fit) - .select((old_zpool_usage::dsl::pool_id,)), - ), - } + let with_zpool = zpool_dsl::zpool + .on(zpool_dsl::id.eq(old_zpool_usage::dsl::pool_id)); + + let base_query = old_zpool_usage + .query_source() + .inner_join(with_zpool) + .filter(it_will_fit) + .select((old_zpool_usage::dsl::pool_id,)); + + let query = if distinct_sleds { + let seed_bytes = seed.to_le_bytes(); + + let query: Box> = + Box::new( + base_query + .order_by(( + zpool_dsl::sled_id, + cockroach_md5::dsl::md5( + CastUuidToBytea::new(zpool_dsl::id) + .concat(seed_bytes.to_vec()), + ), + )) + .distinct_on(zpool_dsl::sled_id), + ); + + query + } else { + let query: Box> = + Box::new(base_query); + + query + }; + + Self { query } } } @@ -508,19 +533,47 @@ impl RegionAllocate { extent_count: u64, allocation_strategy: &RegionAllocationStrategy, ) -> Self { + let (seed, distinct_sleds) = { + let (input_seed, distinct_sleds) = match allocation_strategy { + RegionAllocationStrategy::Random { seed } => (seed, false), + RegionAllocationStrategy::RandomWithDistinctSleds { seed } => { + (seed, true) + } + }; + ( + input_seed.map_or_else( + || { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() + }, + |seed| seed as u128, + ), + distinct_sleds, + ) + }; + let size_delta = block_size * blocks_per_extent * extent_count; let old_regions = OldRegions::new(volume_id); let old_pool_usage = OldPoolUsage::new(); - let candidate_zpools = - CandidateZpools::new(&old_pool_usage, size_delta); + let candidate_zpools = CandidateZpools::new( + &old_pool_usage, + size_delta, + seed, + distinct_sleds, + ); let candidate_datasets = - CandidateDatasets::new(&allocation_strategy, &candidate_zpools); + CandidateDatasets::new(&candidate_zpools, seed); + + let shuffled_candidate_datasets = + ShuffledCandidateDatasets::new(&candidate_datasets, seed); let candidate_regions = CandidateRegions::new( - &candidate_datasets, + &shuffled_candidate_datasets, volume_id, block_size, blocks_per_extent, @@ -577,6 +630,7 @@ impl RegionAllocate { .add_subquery(old_pool_usage) .add_subquery(candidate_zpools) .add_subquery(candidate_datasets) + .add_subquery(shuffled_candidate_datasets) .add_subquery(candidate_regions) .add_subquery(proposed_changes) .add_subquery(do_insert) diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index f1b20c32a1..1a9afbc6bd 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -92,3 +92,14 @@ dns_external.max_concurrent_server_updates = 5 # certificates it will take _other_ Nexus instances to notice and stop serving # them (on a sunny day). external_endpoints.period_secs = 60 + +[default_region_allocation_strategy] +# allocate region on 3 random distinct zpools, on 3 random distinct sleds. +type = "random_with_distinct_sleds" + +# the same as random_with_distinct_sleds, but without requiring distinct sleds +# type = "random" + +# setting `seed` to a fixed value will make dataset selection ordering use the +# same shuffling order for every region allocation. +# seed = 0 diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 5bab5e2820..354df0ead3 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -23,6 +23,7 @@ use omicron_common::address::DENDRITE_PORT; use omicron_common::address::MGS_PORT; use omicron_common::api::external::Error; use omicron_common::api::internal::shared::SwitchLocation; +use omicron_common::nexus_config::RegionAllocationStrategy; use slog::Logger; use std::collections::HashMap; use std::net::Ipv6Addr; @@ -153,6 +154,9 @@ pub struct Nexus { /// Background tasks background_tasks: background::BackgroundTasks, + + /// Default Crucible region allocation strategy + default_region_allocation_strategy: RegionAllocationStrategy, } impl Nexus { @@ -325,6 +329,10 @@ impl Nexus { external_resolver, dpd_clients, background_tasks, + default_region_allocation_strategy: config + .pkg + .default_region_allocation_strategy + .clone(), }; // TODO-cleanup all the extra Arcs here seems wrong diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index cca36cefa7..275c8738cc 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -12,11 +12,10 @@ use super::{ ACTION_GENERATE_ID, }; use crate::app::sagas::declare_saga_actions; +use crate::app::{authn, authz, db}; use crate::external_api::params; -use nexus_db_queries::db::datastore::RegionAllocationStrategy; use nexus_db_queries::db::identity::{Asset, Resource}; use nexus_db_queries::db::lookup::LookupPath; -use nexus_db_queries::{authn, authz, db}; use omicron_common::api::external::DiskState; use omicron_common::api::external::Error; use rand::{rngs::StdRng, RngCore, SeedableRng}; @@ -255,6 +254,9 @@ async fn sdc_alloc_regions( &sagactx, ¶ms.serialized_authn, ); + + let strategy = &osagactx.nexus().default_region_allocation_strategy; + let datasets_and_regions = osagactx .datastore() .region_allocate( @@ -262,7 +264,7 @@ async fn sdc_alloc_regions( volume_id, ¶ms.create_params.disk_source, params.create_params.size, - &RegionAllocationStrategy::Random(None), + &strategy, ) .await .map_err(ActionError::action_failed)?; diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index b27f4a3a9b..eeabf64894 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -100,14 +100,13 @@ use super::{ }; use crate::app::sagas::declare_saga_actions; use crate::app::sagas::retry_until_known_result; +use crate::app::{authn, authz, db}; use crate::external_api::params; use anyhow::anyhow; use crucible_agent_client::{types::RegionId, Client as CrucibleAgentClient}; use nexus_db_model::Generation; -use nexus_db_queries::db::datastore::RegionAllocationStrategy; use nexus_db_queries::db::identity::{Asset, Resource}; use nexus_db_queries::db::lookup::LookupPath; -use nexus_db_queries::{authn, authz, db}; use omicron_common::api::external; use omicron_common::api::external::Error; use rand::{rngs::StdRng, RngCore, SeedableRng}; @@ -332,6 +331,8 @@ async fn ssc_alloc_regions( .await .map_err(ActionError::action_failed)?; + let strategy = &osagactx.nexus().default_region_allocation_strategy; + let datasets_and_regions = osagactx .datastore() .region_allocate( @@ -344,7 +345,7 @@ async fn ssc_alloc_regions( .map_err(|e| ActionError::action_failed(e.to_string()))?, }, external::ByteCount::from(disk.size), - &RegionAllocationStrategy::Random(None), + &strategy, ) .await .map_err(ActionError::action_failed)?; diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 6eeacceaed..1b1ae2c912 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -89,3 +89,8 @@ dns_external.max_concurrent_server_updates = 5 # certificates it will take _other_ Nexus instances to notice and stop serving # them (on a sunny day). external_endpoints.period_secs = 60 + +[default_region_allocation_strategy] +# we only have one sled in the test environment, so we need to use the +# `Random` strategy, instead of `RandomWithDistinctSleds` +type = "random" \ No newline at end of file diff --git a/package-manifest.toml b/package-manifest.toml index 4dc0f6b616..c776f6d96d 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -92,7 +92,8 @@ source.rust.binary_names = ["nexus", "schema-updater"] source.rust.release = true source.paths = [ { from = "/opt/ooce/pgsql-13/lib/amd64", to = "/opt/ooce/pgsql-13/lib/amd64" }, - { from = "smf/nexus", to = "/var/svc/manifest/site/nexus" }, + { from = "smf/nexus/manifest.xml", to = "/var/svc/manifest/site/nexus/manifest.xml" }, + { from = "smf/nexus/{{rack-topology}}", to = "/var/svc/manifest/site/nexus" }, { from = "out/console-assets", to = "/var/nexus/static" }, { from = "schema/crdb", to = "/var/nexus/schema/crdb" }, ] diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs index ea490e54cf..bc07b61234 100644 --- a/package/src/bin/omicron-package.rs +++ b/package/src/bin/omicron-package.rs @@ -211,11 +211,12 @@ async fn do_target( format!("failed to create directory {}", target_dir.display()) })?; match subcommand { - TargetCommand::Create { image, machine, switch } => { + TargetCommand::Create { image, machine, switch, rack_topology } => { let target = KnownTarget::new( image.clone(), machine.clone(), switch.clone(), + rack_topology.clone(), )?; let path = get_single_target(&target_dir, name).await?; diff --git a/package/src/lib.rs b/package/src/lib.rs index b0cc04970a..395f3ed472 100644 --- a/package/src/lib.rs +++ b/package/src/lib.rs @@ -46,6 +46,29 @@ pub enum TargetCommand { #[clap(short, long, default_value_if("image", "standard", "stub"))] switch: Option, + + #[clap( + short, + long, + default_value_if("image", "trampoline", Some("single-sled")), + + // This opt is required, and clap will enforce that even with + // `required = false`, since it's not an Option. But the + // default_value_if only works if we set `required` to false. It's + // jank, but it is what it is. + // https://github.com/clap-rs/clap/issues/4086 + required = false + )] + /// Specify whether nexus will run in a single-sled or multi-sled + /// environment. + /// + /// Set single-sled for dev purposes when you're running a single + /// sled-agent. Set multi-sled if you're running with mulitple sleds. + /// Currently this only affects the crucible disk allocation strategy- + /// VM disks will require 3 distinct sleds with `multi-sled`, which will + /// fail in a single-sled environment. `single-sled` relaxes this + /// requirement. + rack_topology: crate::target::RackTopology, }, /// List all existing targets List, diff --git a/package/src/target.rs b/package/src/target.rs index a7b2dd4539..d5d5e92c46 100644 --- a/package/src/target.rs +++ b/package/src/target.rs @@ -48,12 +48,27 @@ pub enum Switch { SoftNpu, } +/// Topology of the sleds within the rack. +#[derive(Clone, Debug, strum::EnumString, strum::Display, ValueEnum)] +#[strum(serialize_all = "kebab-case")] +#[clap(rename_all = "kebab-case")] +pub enum RackTopology { + /// Use configurations suitable for a multi-sled deployment, such as dogfood + /// and production racks. + MultiSled, + + /// Use configurations suitable for a single-sled deployment, such as CI and + /// dev machines. + SingleSled, +} + /// A strongly-typed variant of [Target]. #[derive(Clone, Debug)] pub struct KnownTarget { image: Image, machine: Option, switch: Option, + rack_topology: RackTopology, } impl KnownTarget { @@ -61,6 +76,7 @@ impl KnownTarget { image: Image, machine: Option, switch: Option, + rack_topology: RackTopology, ) -> Result { if matches!(image, Image::Trampoline) { if machine.is_some() { @@ -77,7 +93,7 @@ impl KnownTarget { bail!("'switch=asic' is only valid with 'machine=gimlet'"); } - Ok(Self { image, machine, switch }) + Ok(Self { image, machine, switch, rack_topology }) } } @@ -87,6 +103,7 @@ impl Default for KnownTarget { image: Image::Standard, machine: Some(Machine::NonGimlet), switch: Some(Switch::Stub), + rack_topology: RackTopology::MultiSled, } } } @@ -101,6 +118,7 @@ impl From for Target { if let Some(switch) = kt.switch { map.insert("switch".to_string(), switch.to_string()); } + map.insert("rack-topology".to_string(), kt.rack_topology.to_string()); Target(map) } } @@ -121,6 +139,7 @@ impl std::str::FromStr for KnownTarget { let mut image = Self::default().image; let mut machine = None; let mut switch = None; + let mut rack_topology = None; for (k, v) in target.0.into_iter() { match k.as_str() { @@ -133,6 +152,9 @@ impl std::str::FromStr for KnownTarget { "switch" => { switch = Some(v.parse()?); } + "rack-topology" => { + rack_topology = Some(v.parse()?); + } _ => { bail!( "Unknown target key {k}\nValid keys include: [{}]", @@ -146,6 +168,11 @@ impl std::str::FromStr for KnownTarget { } } } - KnownTarget::new(image, machine, switch) + KnownTarget::new( + image, + machine, + switch, + rack_topology.unwrap_or(RackTopology::MultiSled), + ) } } diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 88e51a3bc3..d4ccfc97c8 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -120,3 +120,5 @@ machine-non-gimlet = [] switch-asic = [] switch-stub = [] switch-softnpu = [] +rack-topology-single-sled = [] +rack-topology-multi-sled = [] diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 96cdf8222b..60f0965612 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -1513,6 +1513,9 @@ impl ServiceManager { .open(&config_path) .await .map_err(|err| Error::io_path(&config_path, err))?; + file.write_all(b"\n\n") + .await + .map_err(|err| Error::io_path(&config_path, err))?; file.write_all(config_str.as_bytes()) .await .map_err(|err| Error::io_path(&config_path, err))?; diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml new file mode 100644 index 0000000000..2dfee81d02 --- /dev/null +++ b/smf/nexus/multi-sled/config-partial.toml @@ -0,0 +1,45 @@ +# +# Oxide API: partial configuration file +# + +[console] +# Directory for static assets. Absolute path or relative to CWD. +static_dir = "/var/nexus/static" +session_idle_timeout_minutes = 60 +session_absolute_timeout_minutes = 480 + +[authn] +schemes_external = ["session_cookie", "access_token"] + +[log] +# Show log messages of this level and more severe +level = "debug" +mode = "file" +path = "/dev/stdout" +if_exists = "append" + +# TODO: Uncomment the following lines to enable automatic schema +# migration on boot. +# +# [schema] +# schema_dir = "/var/nexus/schema/crdb" + +[background_tasks] +dns_internal.period_secs_config = 60 +dns_internal.period_secs_servers = 60 +dns_internal.period_secs_propagation = 60 +dns_internal.max_concurrent_server_updates = 5 +dns_external.period_secs_config = 60 +dns_external.period_secs_servers = 60 +dns_external.period_secs_propagation = 60 +dns_external.max_concurrent_server_updates = 5 +# How frequently we check the list of stored TLS certificates. This is +# approximately an upper bound on how soon after updating the list of +# certificates it will take _other_ Nexus instances to notice and stop serving +# them (on a sunny day). +external_endpoints.period_secs = 60 + +[default_region_allocation_strategy] +# by default, allocate across 3 distinct sleds +# seed is omitted so a new seed will be chosen with every allocation. +type = "random_with_distinct_sleds" \ No newline at end of file diff --git a/smf/nexus/config-partial.toml b/smf/nexus/single-sled/config-partial.toml similarity index 86% rename from smf/nexus/config-partial.toml rename to smf/nexus/single-sled/config-partial.toml index b29727c4aa..aff0a8a25f 100644 --- a/smf/nexus/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -38,3 +38,8 @@ dns_external.max_concurrent_server_updates = 5 # certificates it will take _other_ Nexus instances to notice and stop serving # them (on a sunny day). external_endpoints.period_secs = 60 + +[default_region_allocation_strategy] +# by default, allocate without requirement for distinct sleds. +# seed is omitted so a new seed will be chosen with every allocation. +type = "random" \ No newline at end of file