From e7a1c33ce24f7ac146848067645ad677d93782c7 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 19 Sep 2024 22:38:05 +0000 Subject: [PATCH 01/19] Start integrating clickhouse clusters to blueprints --- Cargo.lock | 1 + nexus/db-queries/Cargo.toml | 1 + .../db-queries/src/db/datastore/deployment.rs | 162 ++++++++++++++++++ .../planning/src/blueprint_builder/builder.rs | 150 ++++++++++++++++ .../src/blueprint_builder/clickhouse.rs | 99 +++++------ nexus/reconfigurator/planning/src/planner.rs | 20 +++ .../src/planner/omicron_zone_placement.rs | 16 +- nexus/types/src/deployment.rs | 4 + nexus/types/src/deployment/planning_input.rs | 20 +++ nexus/types/src/inventory.rs | 4 +- sled-agent/src/rack_setup/service.rs | 3 + 11 files changed, 422 insertions(+), 58 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 146c721e22..3a3ee6c9d0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5594,6 +5594,7 @@ dependencies = [ "camino", "camino-tempfile", "chrono", + "clickhouse-admin-types", "const_format", "db-macros", "diesel", diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index c6c5caab6a..2059180c59 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -16,6 +16,7 @@ async-bb8-diesel.workspace = true async-trait.workspace = true camino.workspace = true chrono.workspace = true +clickhouse-admin-types.workspace = true const_format.workspace = true diesel.workspace = true diesel-dtrace.workspace = true diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 12cbed4652..3b0e34da6b 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -20,6 +20,7 @@ use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::DateTime; use chrono::Utc; +use clickhouse_admin_types::{KeeperId, ServerId}; use diesel::expression::SelectableHelper; use diesel::pg::Pg; use diesel::query_builder::AstPass; @@ -35,7 +36,11 @@ use diesel::IntoSql; use diesel::OptionalExtension; use diesel::QueryDsl; use diesel::RunQueryDsl; +use nexus_db_model::schema::bp_clickhouse_cluster_config; use nexus_db_model::Blueprint as DbBlueprint; +use nexus_db_model::BpClickhouseClusterConfig; +use nexus_db_model::BpClickhouseKeeperZoneIdToNodeId; +use nexus_db_model::BpClickhouseServerZoneIdToNodeId; use nexus_db_model::BpOmicronPhysicalDisk; use nexus_db_model::BpOmicronZone; use nexus_db_model::BpOmicronZoneNic; @@ -49,6 +54,7 @@ use nexus_types::deployment::BlueprintPhysicalDisksConfig; use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::BlueprintZonesConfig; +use nexus_types::deployment::ClickhouseClusterConfig; use nexus_types::deployment::CockroachDbPreserveDowngrade; use nexus_types::external_api::views::SledState; use omicron_common::api::external::DataPageParams; @@ -58,6 +64,7 @@ use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; use omicron_common::bail_unless; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use std::collections::BTreeMap; use uuid::Uuid; @@ -622,6 +629,160 @@ impl DataStore { disks_config.disks.sort_unstable_by_key(|d| d.id); } + // Load our `ClickhouseClusterConfig` if it exists + let clickhouse_cluster_config: Option = { + use db::schema::bp_clickhouse_cluster_config::dsl; + + let res = dsl::bp_clickhouse_cluster_config + .filter(dsl::blueprint_id.eq(blueprint_id)) + .select(BpClickhouseClusterConfig::as_select()) + .get_result_async(&*conn) + .await + .optional() + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + match res { + None => None, + Some(bp_config) => { + // Load our clickhouse keeper configs for the given blueprint + let keepers: BTreeMap = { + use db::schema::bp_clickhouse_keeper_zone_id_to_node_id::dsl; + let mut keepers = BTreeMap::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::bp_clickhouse_keeper_zone_id_to_node_id, + dsl::omicron_zone_id, + &p.current_pagparams(), + ) + .filter(dsl::blueprint_id.eq(blueprint_id)) + .select( + BpClickhouseKeeperZoneIdToNodeId::as_select(), + ) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::Server, + ) + })?; + + paginator = + p.found_batch(&batch, &|k| k.omicron_zone_id); + + for k in batch { + let keeper_id = KeeperId( + u64::try_from(k.keeper_id).or_else( + |_| { + Err(Error::internal_error( + &format!( + "keeper id is negative: {}", + k.keeper_id + ), + )) + }, + )?, + ); + keepers.insert( + k.omicron_zone_id.into(), + keeper_id, + ); + } + } + keepers + }; + + // Load our clickhouse server configs for the given blueprint + let servers: BTreeMap = { + use db::schema::bp_clickhouse_server_zone_id_to_node_id::dsl; + let mut servers = BTreeMap::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::bp_clickhouse_server_zone_id_to_node_id, + dsl::omicron_zone_id, + &p.current_pagparams(), + ) + .filter(dsl::blueprint_id.eq(blueprint_id)) + .select( + BpClickhouseServerZoneIdToNodeId::as_select(), + ) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::Server, + ) + })?; + + paginator = + p.found_batch(&batch, &|s| s.omicron_zone_id); + + for s in batch { + let server_id = ServerId( + u64::try_from(s.server_id).or_else( + |_| { + Err(Error::internal_error( + &format!( + "server id is negative: {}", + s.server_id + ), + )) + }, + )?, + ); + servers.insert( + s.omicron_zone_id.into(), + server_id, + ); + } + } + servers + }; + + Some(ClickhouseClusterConfig { + generation: bp_config.generation.into(), + max_used_server_id: ServerId( + u64::try_from(bp_config.max_used_server_id) + .or_else(|_| { + Err(Error::internal_error(&format!( + "max server id is negative: {}", + bp_config.max_used_server_id + ))) + })?, + ), + max_used_keeper_id: KeeperId( + u64::try_from(bp_config.max_used_keeper_id) + .or_else(|_| { + Err(Error::internal_error(&format!( + "max keeper id is negative: {}", + bp_config.max_used_keeper_id + ))) + })?, + ), + cluster_name: bp_config.cluster_name, + cluster_secret: bp_config.cluster_secret, + highest_seen_keeper_leader_committed_log_index: + u64::try_from( + bp_config.highest_seen_keeper_leader_committed_log_index, + ) + .or_else(|_| { + Err(Error::internal_error(&format!( + "max server id is negative: {}", + bp_config.highest_seen_keeper_leader_committed_log_index + ))) + })?, + keepers, + servers, + }) + } + } + }; + Ok(Blueprint { id: blueprint_id, blueprint_zones, @@ -632,6 +793,7 @@ impl DataStore { external_dns_version, cockroachdb_fingerprint, cockroachdb_setting_preserve_downgrade, + clickhouse_cluster_config, time_created, creator, comment, diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index 19450c0f80..344d1fc970 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -8,6 +8,7 @@ use crate::ip_allocator::IpAllocator; use crate::planner::zone_needs_expungement; use crate::planner::ZoneExpungeReason; use anyhow::anyhow; +use clickhouse_admin_types::OXIMETER_CLUSTER; use internal_dns::config::Host; use internal_dns::config::Zone; use ipnet::IpAdd; @@ -23,6 +24,7 @@ use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::BlueprintZoneType; use nexus_types::deployment::BlueprintZonesConfig; +use nexus_types::deployment::ClickhouseClusterConfig; use nexus_types::deployment::CockroachDbPreserveDowngrade; use nexus_types::deployment::DiskFilter; use nexus_types::deployment::OmicronZoneExternalFloatingIp; @@ -75,6 +77,7 @@ use thiserror::Error; use typed_rng::TypedUuidRng; use typed_rng::UuidRng; +use super::clickhouse::ClickhouseAllocator; use super::external_networking::BuilderExternalNetworking; use super::external_networking::ExternalNetworkingChoice; use super::external_networking::ExternalSnatNetworkingChoice; @@ -214,6 +217,7 @@ pub struct BlueprintBuilder<'a> { sled_ip_allocators: BTreeMap, external_networking: BuilderExternalNetworking<'a>, internal_dns_subnets: DnsSubnetAllocator, + clickhouse_allocator: Option, // These fields will become part of the final blueprint. See the // corresponding fields in `Blueprint`. @@ -276,6 +280,7 @@ impl<'a> BlueprintBuilder<'a> { .copied() .map(|sled_id| (sled_id, SledState::Active)) .collect(); + Blueprint { id: rng.blueprint_rng.next(), blueprint_zones, @@ -287,6 +292,7 @@ impl<'a> BlueprintBuilder<'a> { cockroachdb_fingerprint: String::new(), cockroachdb_setting_preserve_downgrade: CockroachDbPreserveDowngrade::DoNotModify, + clickhouse_cluster_config: None, time_created: now_db_precision(), creator: creator.to_owned(), comment: format!("starting blueprint with {num_sleds} empty sleds"), @@ -337,6 +343,25 @@ impl<'a> BlueprintBuilder<'a> { || commissioned_sled_ids.contains(sled_id) }); + // If we have the clickhouse cluster setup enabled via policy and we + // don't yet have a `ClickhouseClusterConfiguration`then we must create + // one and feed it to our `ClickhouseAllocator` + let clickhouse_allocator = if input.clickhouse_cluster_enabled() { + let parent_config = parent_blueprint + .clickhouse_cluster_config + .clone() + .unwrap_or_else(|| { + ClickhouseClusterConfig::new(OXIMETER_CLUSTER.to_string()) + }); + Some(ClickhouseAllocator::new( + log.clone(), + parent_config, + inventory.latest_clickhouse_keeper_membership(), + )) + } else { + None + }; + Ok(BlueprintBuilder { log, parent_blueprint, @@ -350,6 +375,7 @@ impl<'a> BlueprintBuilder<'a> { sled_state, cockroachdb_setting_preserve_downgrade: parent_blueprint .cockroachdb_setting_preserve_downgrade, + clickhouse_allocator, creator: creator.to_owned(), operations: Vec::new(), comments: Vec::new(), @@ -382,6 +408,18 @@ impl<'a> BlueprintBuilder<'a> { let blueprint_disks = self .disks .into_disks_map(self.input.all_sled_ids(SledFilter::InService)); + + // If we have an allocator, use it to generate a new config. If an error + // is returned then log it and carry over the parent_config. + let clickhouse_cluster_config = self.clickhouse_allocator.map(|a| { + match a.plan(&(&blueprint_zones).into()) { + Ok(config) => config, + Err(e) => { + error!(self.log, "{e}"); + a.parent_config().clone() + } + } + }); Blueprint { id: self.rng.blueprint_rng.next(), blueprint_zones, @@ -397,6 +435,7 @@ impl<'a> BlueprintBuilder<'a> { .clone(), cockroachdb_setting_preserve_downgrade: self .cockroachdb_setting_preserve_downgrade, + clickhouse_cluster_config, time_created: now_db_precision(), creator: self.creator, comment: self @@ -1016,6 +1055,117 @@ impl<'a> BlueprintBuilder<'a> { Ok(EnsureMultiple::Changed { added: num_crdb_to_add, removed: 0 }) } + pub fn sled_ensure_zone_multiple_clickhouse_server( + &mut self, + sled_id: SledUuid, + desired_zone_count: usize, + ) -> Result { + // How many clickhouse server zones do we want to add? + let clickhouse_server_count = self.sled_num_running_zones_of_kind( + sled_id, + ZoneKind::ClickhouseServer, + ); + let num_clickhouse_servers_to_add = + match desired_zone_count.checked_sub(clickhouse_server_count) { + Some(0) => return Ok(EnsureMultiple::NotNeeded), + Some(n) => n, + None => { + return Err(Error::Planner(anyhow!( + "removing a ClickhouseServer zone not yet supported \ + (sled {sled_id} has {clickhouse_server_count}; \ + planner wants {desired_zone_count})" + ))); + } + }; + for _ in 0..num_clickhouse_servers_to_add { + let zone_id = self.rng.zone_rng.next(); + let underlay_ip = self.sled_alloc_ip(sled_id)?; + let pool_name = + self.sled_select_zpool(sled_id, ZoneKind::ClickhouseServer)?; + let port = omicron_common::address::CLICKHOUSE_HTTP_PORT; + let address = SocketAddrV6::new(underlay_ip, port, 0, 0); + let zone_type = BlueprintZoneType::ClickhouseServer( + blueprint_zone_type::ClickhouseServer { + address, + dataset: OmicronZoneDataset { + pool_name: pool_name.clone(), + }, + }, + ); + let filesystem_pool = pool_name; + + let zone = BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: zone_id, + underlay_address: underlay_ip, + filesystem_pool: Some(filesystem_pool), + zone_type, + }; + self.sled_add_zone(sled_id, zone)?; + } + + Ok(EnsureMultiple::Changed { + added: num_clickhouse_servers_to_add, + removed: 0, + }) + } + + pub fn sled_ensure_zone_multiple_clickhouse_keeper( + &mut self, + sled_id: SledUuid, + desired_zone_count: usize, + ) -> Result { + // How many clickhouse keeper zones do we want to add? + let clickhouse_keeper_count = self.sled_num_running_zones_of_kind( + sled_id, + ZoneKind::ClickhouseKeeper, + ); + let num_clickhouse_keepers_to_add = + match desired_zone_count.checked_sub(clickhouse_keeper_count) { + Some(0) => return Ok(EnsureMultiple::NotNeeded), + Some(n) => n, + None => { + return Err(Error::Planner(anyhow!( + "removing a ClickhouseKeeper zone not yet supported \ + (sled {sled_id} has {clickhouse_keeper_count}; \ + planner wants {desired_zone_count})" + ))); + } + }; + + for _ in 0..num_clickhouse_keepers_to_add { + let zone_id = self.rng.zone_rng.next(); + let underlay_ip = self.sled_alloc_ip(sled_id)?; + let pool_name = + self.sled_select_zpool(sled_id, ZoneKind::ClickhouseKeeper)?; + let port = omicron_common::address::CLICKHOUSE_KEEPER_TCP_PORT; + let address = SocketAddrV6::new(underlay_ip, port, 0, 0); + let zone_type = BlueprintZoneType::ClickhouseKeeper( + blueprint_zone_type::ClickhouseKeeper { + address, + dataset: OmicronZoneDataset { + pool_name: pool_name.clone(), + }, + }, + ); + let filesystem_pool = pool_name; + + let zone = BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: zone_id, + underlay_address: underlay_ip, + filesystem_pool: Some(filesystem_pool), + zone_type, + }; + self.sled_add_zone(sled_id, zone)?; + } + + Ok(EnsureMultiple::Changed { + added: num_clickhouse_keepers_to_add, + removed: 0, + }) + } + pub fn sled_promote_internal_ntp_to_boundary_ntp( &mut self, sled_id: SledUuid, diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs b/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs index 4071346632..ef74ac8928 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs @@ -22,7 +22,7 @@ use thiserror::Error; // Will be removed once the planner starts using this code // See: https://github.com/oxidecomputer/omicron/issues/6577 #[allow(unused)] -struct ClickhouseZonesThatShouldBeRunning { +pub struct ClickhouseZonesThatShouldBeRunning { keepers: BTreeSet, servers: BTreeSet, } @@ -64,7 +64,6 @@ impl From<&BTreeMap> #[allow(unused)] pub struct ClickhouseAllocator { log: Logger, - active_clickhouse_zones: ClickhouseZonesThatShouldBeRunning, parent_config: ClickhouseClusterConfig, // The latest clickhouse cluster membership from inventory inventory: Option, @@ -77,10 +76,6 @@ pub struct ClickhouseAllocator { #[allow(unused)] #[derive(Debug, Error)] pub enum KeeperAllocationError { - #[error("a clickhouse cluster configuration has not been created")] - NoConfig, - #[error("failed to retrieve clickhouse keeper membership from inventory")] - NoInventory, #[error("cannot add more than one keeper at a time: {added_keepers:?}")] BadMembershipChange { added_keepers: BTreeSet }, } @@ -91,13 +86,11 @@ pub enum KeeperAllocationError { impl ClickhouseAllocator { pub fn new( log: Logger, - zones_by_sled_id: &BTreeMap, clickhouse_cluster_config: ClickhouseClusterConfig, inventory: Option, ) -> ClickhouseAllocator { ClickhouseAllocator { log, - active_clickhouse_zones: zones_by_sled_id.into(), parent_config: clickhouse_cluster_config, inventory, } @@ -107,6 +100,7 @@ impl ClickhouseAllocator { /// on the parent blueprint and inventory pub fn plan( &self, + active_clickhouse_zones: &ClickhouseZonesThatShouldBeRunning, ) -> Result { let mut new_config = self.parent_config.clone(); @@ -122,10 +116,10 @@ impl ClickhouseAllocator { // First, remove the clickhouse servers that are no longer in service new_config.servers.retain(|zone_id, _| { - self.active_clickhouse_zones.servers.contains(zone_id) + active_clickhouse_zones.servers.contains(zone_id) }); // Next, add any new clickhouse servers - for zone_id in &self.active_clickhouse_zones.servers { + for zone_id in &active_clickhouse_zones.servers { if !new_config.servers.contains_key(zone_id) { // Allocate a new `ServerId` and map it to the server zone new_config.max_used_server_id += 1.into(); @@ -219,7 +213,7 @@ impl ClickhouseAllocator { // Let's ensure that this zone has not been expunged yet. If it has that means // that adding the keeper will never succeed. - if !self.active_clickhouse_zones.keepers.contains(added_zone_id) { + if !active_clickhouse_zones.keepers.contains(added_zone_id) { // The zone has been expunged, so we must remove it from our configuration. new_config.keepers.remove(added_zone_id); @@ -245,7 +239,7 @@ impl ClickhouseAllocator { // We remove first, because the zones are already gone and therefore // don't help our quorum. for (zone_id, _) in &self.parent_config.keepers { - if !self.active_clickhouse_zones.keepers.contains(&zone_id) { + if !active_clickhouse_zones.keepers.contains(&zone_id) { // Remove the keeper for the first expunged zone we see. // Remember, we only do one keeper membership change at time. new_config.keepers.remove(zone_id); @@ -254,7 +248,7 @@ impl ClickhouseAllocator { } // Do we need to add any nodes to in service zones that don't have them - for zone_id in &self.active_clickhouse_zones.keepers { + for zone_id in &active_clickhouse_zones.keepers { if !new_config.keepers.contains_key(zone_id) { // Allocate a new `KeeperId` and map it to the server zone new_config.max_used_keeper_id += 1.into(); @@ -268,6 +262,10 @@ impl ClickhouseAllocator { // We possibly added or removed clickhouse servers, but not keepers. bump_gen_if_necessary(new_config) } + + pub fn parent_config(&self) -> &ClickhouseClusterConfig { + &self.parent_config + } } #[cfg(test)] @@ -363,13 +361,12 @@ pub mod test { let mut allocator = ClickhouseAllocator { log: logctx.log.clone(), - active_clickhouse_zones, parent_config: parent_config.clone(), inventory, }; // Our clickhouse cluster config should not have changed - let new_config = allocator.plan().unwrap(); + let new_config = allocator.plan(&active_clickhouse_zones).unwrap(); // Note that we cannot directly check equality here and // in a bunch of the test cases below, because we bump the @@ -380,7 +377,7 @@ pub mod test { // Running again without changing the inventory should be idempotent allocator.parent_config = new_config; - let new_config = allocator.plan().unwrap(); + let new_config = allocator.plan(&active_clickhouse_zones).unwrap(); assert_eq!(new_config, allocator.parent_config); logctx.cleanup_successful(); @@ -412,13 +409,12 @@ pub mod test { // allocator should allocate one more keeper. let mut allocator = ClickhouseAllocator { log: logctx.log.clone(), - active_clickhouse_zones, parent_config: parent_config.clone(), inventory, }; // Did our new config change as we expect? - let new_config = allocator.plan().unwrap(); + let new_config = allocator.plan(&active_clickhouse_zones).unwrap(); assert_eq!(new_config.generation, Generation::from_u32(2)); assert_eq!(new_config.generation, parent_config.generation.next()); assert_eq!(new_config.max_used_keeper_id, 4.into()); @@ -440,14 +436,14 @@ pub mod test { // itself does not modify the allocator and a new one is created by the // `BlueprintBuilder` on each planning round. allocator.parent_config = new_config; - let new_config = allocator.plan().unwrap(); + let new_config = allocator.plan(&active_clickhouse_zones).unwrap(); assert_eq!(new_config, allocator.parent_config); // Now let's update our inventory to reflect the new keeper. This should // trigger the planner to add a 5th keeper. allocator.inventory.as_mut().unwrap().raft_config.insert(4.into()); allocator.inventory.as_mut().unwrap().leader_committed_log_index += 1; - let new_config = allocator.plan().unwrap(); + let new_config = allocator.plan(&active_clickhouse_zones).unwrap(); assert_eq!(new_config.generation, Generation::from_u32(3)); assert_eq!( new_config.generation, @@ -473,7 +469,7 @@ pub mod test { // inventory raft config. We should end up with the same config. allocator.parent_config = new_config; allocator.inventory.as_mut().unwrap().leader_committed_log_index += 1; - let new_config = allocator.plan().unwrap(); + let new_config = allocator.plan(&active_clickhouse_zones).unwrap(); assert!(!new_config.needs_generation_bump(&allocator.parent_config)); // Now let's modify the inventory to reflect that the 5th keeper node @@ -483,7 +479,7 @@ pub mod test { // our keeper zones have a keeper that is part of the cluster. allocator.inventory.as_mut().unwrap().raft_config.insert(5.into()); allocator.inventory.as_mut().unwrap().leader_committed_log_index += 1; - let new_config = allocator.plan().unwrap(); + let new_config = allocator.plan(&active_clickhouse_zones).unwrap(); assert!(!new_config.needs_generation_bump(&allocator.parent_config)); logctx.cleanup_successful(); @@ -497,7 +493,7 @@ pub mod test { let (n_keeper_zones, n_server_zones, n_keepers, n_servers) = (5, 2, 5, 2); - let (active_clickhouse_zones, parent_config) = initial_config( + let (mut active_clickhouse_zones, parent_config) = initial_config( n_keeper_zones, n_server_zones, n_keepers, @@ -512,23 +508,22 @@ pub mod test { let mut allocator = ClickhouseAllocator { log: logctx.log.clone(), - active_clickhouse_zones, parent_config: parent_config.clone(), inventory, }; // Our clickhouse cluster config should not have changed // We have 5 keepers and 5 zones and all of them are in the inventory - let new_config = allocator.plan().unwrap(); + let new_config = allocator.plan(&active_clickhouse_zones).unwrap(); assert!(!new_config.needs_generation_bump(&parent_config)); // Now expunge 2 of the 5 keeper zones by removing them from the // in-service zones - allocator.active_clickhouse_zones.keepers.pop_first(); - allocator.active_clickhouse_zones.keepers.pop_first(); + active_clickhouse_zones.keepers.pop_first(); + active_clickhouse_zones.keepers.pop_first(); // Running the planner should remove one of the keepers from the new config - let new_config = allocator.plan().unwrap(); + let new_config = allocator.plan(&active_clickhouse_zones).unwrap(); assert_eq!(new_config.generation, Generation::from_u32(2)); assert_eq!( new_config.generation, @@ -554,14 +549,14 @@ pub mod test { // since the inventory hasn't reflected the change allocator.parent_config = new_config; allocator.inventory.as_mut().unwrap().leader_committed_log_index += 1; - let new_config = allocator.plan().unwrap(); + let new_config = allocator.plan(&active_clickhouse_zones).unwrap(); assert!(!new_config.needs_generation_bump(&allocator.parent_config)); // Reflecting the new config in inventory should remove another keeper allocator.inventory.as_mut().unwrap().raft_config = new_config.keepers.values().cloned().collect(); allocator.inventory.as_mut().unwrap().leader_committed_log_index += 1; - let new_config = allocator.plan().unwrap(); + let new_config = allocator.plan(&active_clickhouse_zones).unwrap(); assert_eq!(new_config.generation, Generation::from_u32(3)); assert_eq!( @@ -588,7 +583,7 @@ pub mod test { // change, because the inventory doesn't reflect the removed keeper allocator.parent_config = new_config; allocator.inventory.as_mut().unwrap().leader_committed_log_index += 1; - let new_config = allocator.plan().unwrap(); + let new_config = allocator.plan(&&active_clickhouse_zones).unwrap(); assert!(!new_config.needs_generation_bump(&allocator.parent_config)); // Reflecting the keeper removal in inventory should also result in no @@ -597,7 +592,7 @@ pub mod test { new_config.keepers.values().cloned().collect(); allocator.inventory.as_mut().unwrap().leader_committed_log_index += 1; allocator.parent_config = new_config; - let new_config = allocator.plan().unwrap(); + let new_config = allocator.plan(&active_clickhouse_zones).unwrap(); assert!(!new_config.needs_generation_bump(&allocator.parent_config)); logctx.cleanup_successful(); @@ -612,7 +607,7 @@ pub mod test { let (n_keeper_zones, n_server_zones, n_keepers, n_servers) = (5, 2, 4, 2); - let (active_clickhouse_zones, parent_config) = initial_config( + let (mut active_clickhouse_zones, parent_config) = initial_config( n_keeper_zones, n_server_zones, n_keepers, @@ -627,14 +622,13 @@ pub mod test { let mut allocator = ClickhouseAllocator { log: logctx.log.clone(), - active_clickhouse_zones, parent_config, inventory, }; // First run the planner to add a 5th keeper to our config assert_eq!(allocator.parent_config.keepers.len(), 4); - let new_config = allocator.plan().unwrap(); + let new_config = allocator.plan(&active_clickhouse_zones).unwrap(); assert_eq!(new_config.keepers.len(), 5); // Pick one of the keepers currently in our inventory, find the zone @@ -654,7 +648,7 @@ pub mod test { .find(|(_, &keeper_id)| keeper_id == keeper_to_expunge) .map(|(zone_id, _)| *zone_id) .unwrap(); - allocator.active_clickhouse_zones.keepers.remove(&zone_to_expunge); + active_clickhouse_zones.keepers.remove(&zone_to_expunge); // Bump the inventory commit index so we guarantee we perform the keeper // checks @@ -666,7 +660,7 @@ pub mod test { // Run the plan. Our configuration should stay the same because we can // only add or remove one keeper node from the cluster at a time and we // are already in the process of adding a node. - let new_config = allocator.plan().unwrap(); + let new_config = allocator.plan(&active_clickhouse_zones).unwrap(); assert!(!new_config.needs_generation_bump(&allocator.parent_config)); // Now we change the inventory to reflect the addition of the node to @@ -675,7 +669,7 @@ pub mod test { allocator.parent_config = new_config; allocator.inventory.as_mut().unwrap().leader_committed_log_index += 1; allocator.inventory.as_mut().unwrap().raft_config.insert(5.into()); - let new_config = allocator.plan().unwrap(); + let new_config = allocator.plan(&active_clickhouse_zones).unwrap(); assert_eq!(new_config.keepers.len(), 4); // Let's make sure that the right keeper was expunged. @@ -694,8 +688,8 @@ pub mod test { .raft_config .remove(&keeper_to_expunge); let new_zone_id = OmicronZoneUuid::new_v4(); - allocator.active_clickhouse_zones.keepers.insert(new_zone_id); - let new_config = allocator.plan().unwrap(); + active_clickhouse_zones.keepers.insert(new_zone_id); + let new_config = allocator.plan(&active_clickhouse_zones).unwrap(); assert_eq!(new_config.keepers.len(), 5); assert_eq!(*new_config.keepers.get(&new_zone_id).unwrap(), KeeperId(6)); assert_eq!(new_config.max_used_keeper_id, 6.into()); @@ -712,7 +706,7 @@ pub mod test { let (n_keeper_zones, n_server_zones, n_keepers, n_servers) = (5, 2, 4, 2); - let (active_clickhouse_zones, parent_config) = initial_config( + let (mut active_clickhouse_zones, parent_config) = initial_config( n_keeper_zones, n_server_zones, n_keepers, @@ -727,14 +721,13 @@ pub mod test { let mut allocator = ClickhouseAllocator { log: logctx.log.clone(), - active_clickhouse_zones, parent_config, inventory, }; // First run the planner to add a 5th keeper to our config assert_eq!(allocator.parent_config.keepers.len(), 4); - let new_config = allocator.plan().unwrap(); + let new_config = allocator.plan(&active_clickhouse_zones).unwrap(); assert_eq!(new_config.keepers.len(), 5); // Find the zone for our new keeper and expunge it before it is @@ -749,12 +742,12 @@ pub mod test { .map(|(zone_id, _)| *zone_id) .unwrap(); allocator.parent_config = new_config; - allocator.active_clickhouse_zones.keepers.remove(&zone_to_expunge); + active_clickhouse_zones.keepers.remove(&zone_to_expunge); // Bump the inventory commit index so we guarantee we perform the keeper // checks allocator.inventory.as_mut().unwrap().leader_committed_log_index += 1; - let new_config = allocator.plan().unwrap(); + let new_config = allocator.plan(&active_clickhouse_zones).unwrap(); assert_eq!(new_config.keepers.len(), 4); assert!(!new_config.keepers.contains_key(&zone_to_expunge)); @@ -770,7 +763,7 @@ pub mod test { let (n_keeper_zones, n_server_zones, n_keepers, n_servers) = (3, 5, 3, 2); - let (active_clickhouse_zones, parent_config) = initial_config( + let (mut active_clickhouse_zones, parent_config) = initial_config( n_keeper_zones, n_server_zones, n_keepers, @@ -785,7 +778,6 @@ pub mod test { let mut allocator = ClickhouseAllocator { log: logctx.log.clone(), - active_clickhouse_zones, parent_config, inventory, }; @@ -793,12 +785,12 @@ pub mod test { let zone_to_expunge = *allocator.parent_config.servers.keys().next().unwrap(); - allocator.active_clickhouse_zones.servers.remove(&zone_to_expunge); + active_clickhouse_zones.servers.remove(&zone_to_expunge); // After running the planner we should see 4 servers: // Start with 2, expunge 1, add 3 to reach the number of zones we have. assert_eq!(allocator.parent_config.servers.len(), 2); - let new_config = allocator.plan().unwrap(); + let new_config = allocator.plan(&active_clickhouse_zones).unwrap(); assert_eq!(new_config.servers.len(), 4); assert_eq!(new_config.max_used_server_id, 5.into()); assert_eq!(new_config.generation, Generation::from_u32(2)); @@ -810,11 +802,11 @@ pub mod test { // We can add a new keeper and server at the same time let new_keeper_zone = OmicronZoneUuid::new_v4(); let new_server_id = OmicronZoneUuid::new_v4(); - allocator.active_clickhouse_zones.keepers.insert(new_keeper_zone); - allocator.active_clickhouse_zones.servers.insert(new_server_id); + active_clickhouse_zones.keepers.insert(new_keeper_zone); + active_clickhouse_zones.servers.insert(new_server_id); allocator.parent_config = new_config; allocator.inventory.as_mut().unwrap().leader_committed_log_index += 1; - let new_config = allocator.plan().unwrap(); + let new_config = allocator.plan(&active_clickhouse_zones).unwrap(); assert_eq!(new_config.generation, Generation::from_u32(3)); assert_eq!(new_config.max_used_server_id, 6.into()); assert_eq!(new_config.max_used_keeper_id, 4.into()); @@ -850,7 +842,6 @@ pub mod test { let allocator = ClickhouseAllocator { log: logctx.log.clone(), - active_clickhouse_zones, parent_config, inventory, }; @@ -858,7 +849,7 @@ pub mod test { // We expect to get an error back. This can be used by higher level // software to trigger alerts, etc... In practice the `BlueprintBuilder` // should not change it's config when it receives an error. - assert!(allocator.plan().is_err()); + assert!(allocator.plan(&active_clickhouse_zones).is_err()); logctx.cleanup_successful(); } diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index a27a3443f7..f20688ec6c 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -353,6 +353,8 @@ impl<'a> Planner<'a> { for zone_kind in [ DiscretionaryOmicronZone::BoundaryNtp, + DiscretionaryOmicronZone::ClickhouseKeeper, + DiscretionaryOmicronZone::ClickhouseServer, DiscretionaryOmicronZone::CockroachDb, DiscretionaryOmicronZone::InternalDns, DiscretionaryOmicronZone::Nexus, @@ -430,6 +432,12 @@ impl<'a> Planner<'a> { DiscretionaryOmicronZone::BoundaryNtp => { self.input.target_boundary_ntp_zone_count() } + DiscretionaryOmicronZone::ClickhouseKeeper => { + self.input.target_clickhouse_keeper_zone_count() + } + DiscretionaryOmicronZone::ClickhouseServer => { + self.input.target_clickhouse_server_zone_count() + } DiscretionaryOmicronZone::CockroachDb => { self.input.target_cockroachdb_zone_count() } @@ -512,6 +520,18 @@ impl<'a> Planner<'a> { DiscretionaryOmicronZone::BoundaryNtp => self .blueprint .sled_promote_internal_ntp_to_boundary_ntp(sled_id)?, + DiscretionaryOmicronZone::ClickhouseKeeper => { + self.blueprint.sled_ensure_zone_multiple_clickhouse_keeper( + sled_id, + new_total_zone_count, + )? + } + DiscretionaryOmicronZone::ClickhouseServer => { + self.blueprint.sled_ensure_zone_multiple_clickhouse_server( + sled_id, + new_total_zone_count, + )? + } DiscretionaryOmicronZone::CockroachDb => { self.blueprint.sled_ensure_zone_multiple_cockroachdb( sled_id, diff --git a/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs b/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs index 6f3bac0ecc..85c192bbbc 100644 --- a/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs +++ b/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs @@ -15,6 +15,8 @@ use std::mem; #[cfg_attr(test, derive(test_strategy::Arbitrary))] pub(crate) enum DiscretionaryOmicronZone { BoundaryNtp, + ClickhouseKeeper, + ClickhouseServer, CockroachDb, InternalDns, Nexus, @@ -27,13 +29,17 @@ impl DiscretionaryOmicronZone { ) -> Option { match zone_type { BlueprintZoneType::BoundaryNtp(_) => Some(Self::BoundaryNtp), + BlueprintZoneType::ClickhouseKeeper(_) => { + Some(Self::ClickhouseKeeper) + } + BlueprintZoneType::ClickhouseServer(_) => { + Some(Self::ClickhouseServer) + } BlueprintZoneType::CockroachDb(_) => Some(Self::CockroachDb), BlueprintZoneType::InternalDns(_) => Some(Self::InternalDns), BlueprintZoneType::Nexus(_) => Some(Self::Nexus), // Zones that we should place but don't yet. BlueprintZoneType::Clickhouse(_) - | BlueprintZoneType::ClickhouseKeeper(_) - | BlueprintZoneType::ClickhouseServer(_) | BlueprintZoneType::CruciblePantry(_) | BlueprintZoneType::ExternalDns(_) | BlueprintZoneType::Oximeter(_) => None, @@ -50,6 +56,12 @@ impl From for ZoneKind { fn from(zone: DiscretionaryOmicronZone) -> Self { match zone { DiscretionaryOmicronZone::BoundaryNtp => Self::BoundaryNtp, + DiscretionaryOmicronZone::ClickhouseKeeper => { + Self::ClickhouseKeeper + } + DiscretionaryOmicronZone::ClickhouseServer => { + Self::ClickhouseServer + } DiscretionaryOmicronZone::CockroachDb => Self::CockroachDb, DiscretionaryOmicronZone::InternalDns => Self::InternalDns, DiscretionaryOmicronZone::Nexus => Self::Nexus, diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index ad62ec1d50..d678e19ac7 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -164,6 +164,10 @@ pub struct Blueprint { /// Whether to set `cluster.preserve_downgrade_option` and what to set it to pub cockroachdb_setting_preserve_downgrade: CockroachDbPreserveDowngrade, + /// Allocation of Clickhouse Servers and Keepers for replicated clickhouse + /// setups. This is set to `None` if replicated clickhouse is not in use. + pub clickhouse_cluster_config: Option, + /// when this blueprint was generated (for debugging) pub time_created: chrono::DateTime, /// identity of the component that generated the blueprint (for debugging) diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index e2158875a3..7fae97f372 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -109,10 +109,30 @@ impl PlanningInput { self.policy.target_cockroachdb_cluster_version } + pub fn target_clickhouse_server_zone_count(&self) -> usize { + self.policy + .clickhouse_policy + .as_ref() + .map(|policy| policy.target_servers) + .unwrap_or(0) + } + + pub fn target_clickhouse_keeper_zone_count(&self) -> usize { + self.policy + .clickhouse_policy + .as_ref() + .map(|policy| policy.target_keepers) + .unwrap_or(0) + } + pub fn service_ip_pool_ranges(&self) -> &[IpRange] { &self.policy.service_ip_pool_ranges } + pub fn clickhouse_cluster_enabled(&self) -> bool { + self.policy.clickhouse_policy.is_some() + } + pub fn all_sleds( &self, filter: SledFilter, diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index f9ad9a5cf0..03cea01640 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -167,11 +167,11 @@ impl Collection { /// there is one. pub fn latest_clickhouse_keeper_membership( &self, - ) -> Option<(OmicronZoneUuid, ClickhouseKeeperClusterMembership)> { + ) -> Option { self.clickhouse_keeper_cluster_membership .iter() .max_by_key(|(_, membership)| membership.leader_committed_log_index) - .map(|(zone_id, membership)| (*zone_id, membership.clone())) + .map(|(_, membership)| (membership.clone())) } } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 1c1f3cb0b3..2a769a1688 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -1486,6 +1486,9 @@ pub(crate) fn build_initial_blueprint_from_sled_configs( cockroachdb_fingerprint: String::new(), cockroachdb_setting_preserve_downgrade: CockroachDbPreserveDowngrade::DoNotModify, + // TODO: Allocate keepers and servers from the plan if there is a policy, + // or just do it all in reconfigurator after RSS? + clickhouse_cluster_config: None, time_created: Utc::now(), creator: "RSS".to_string(), comment: "initial blueprint from rack setup".to_string(), From 0a39c6f2efb2e07ac0ca73ef3d73be601257cb60 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Mon, 23 Sep 2024 15:47:54 +0000 Subject: [PATCH 02/19] Insert clickhouse cluster related blueprint tables --- nexus/db-model/src/deployment.rs | 8 ++- .../db-queries/src/db/datastore/deployment.rs | 63 ++++++++++++++++++- 2 files changed, 67 insertions(+), 4 deletions(-) diff --git a/nexus/db-model/src/deployment.rs b/nexus/db-model/src/deployment.rs index b9046519c8..98c3ddc3d1 100644 --- a/nexus/db-model/src/deployment.rs +++ b/nexus/db-model/src/deployment.rs @@ -830,18 +830,20 @@ impl BpClickhouseClusterConfig { .max_used_server_id .0 .try_into() - .context("more than 2^63 IDs in use")?, + .context("more than 2^63 clickhouse server IDs in use")?, max_used_keeper_id: config .max_used_keeper_id .0 .try_into() - .context("more than 2^63 IDs in use")?, + .context("more than 2^63 clickhouse keeper IDs in use")?, cluster_name: config.cluster_name.clone(), cluster_secret: config.cluster_secret.clone(), highest_seen_keeper_leader_committed_log_index: config .highest_seen_keeper_leader_committed_log_index .try_into() - .context("more than 2^63 IDs in use")?, + .context( + "more than 2^63 clickhouse keeper log indexes in use", + )?, }) } } diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 3b0e34da6b..7e70694883 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -36,7 +36,6 @@ use diesel::IntoSql; use diesel::OptionalExtension; use diesel::QueryDsl; use diesel::RunQueryDsl; -use nexus_db_model::schema::bp_clickhouse_cluster_config; use nexus_db_model::Blueprint as DbBlueprint; use nexus_db_model::BpClickhouseClusterConfig; use nexus_db_model::BpClickhouseKeeperZoneIdToNodeId; @@ -187,6 +186,42 @@ impl DataStore { }) .collect::, _>>()?; + let clickhouse_tables: Option<(_, _, _)> = if let Some(config) = + &blueprint.clickhouse_cluster_config + { + let mut keepers = vec![]; + for (zone_id, keeper_id) in &config.keepers { + let keeper = BpClickhouseKeeperZoneIdToNodeId::new( + blueprint_id, + *zone_id, + *keeper_id, + ) + .with_context(|| format!("zone {zone_id}, keeper {keeper_id}")) + .map_err(|e| Error::internal_error(&format!("{:#}", e)))?; + keepers.push(keeper) + } + + let mut servers = vec![]; + for (zone_id, server_id) in &config.servers { + let server = BpClickhouseServerZoneIdToNodeId::new( + blueprint_id, + *zone_id, + *server_id, + ) + .with_context(|| format!("zone {zone_id}, server {server_id}")) + .map_err(|e| Error::internal_error(&format!("{:#}", e)))?; + servers.push(server); + } + + let cluster_config = + BpClickhouseClusterConfig::new(blueprint_id, config) + .map_err(|e| Error::internal_error(&format!("{:#}", e)))?; + + Some((cluster_config, keepers, servers)) + } else { + None + }; + // This implementation inserts all records associated with the // blueprint in one transaction. This is required: we don't want // any planner or executor to see a half-inserted blueprint, nor do we @@ -265,7 +300,33 @@ impl DataStore { .await?; } + // Insert all clickhouse cluster related tables if necessary + if let Some((clickhouse_cluster_config, keepers, servers)) = clickhouse_tables { + { + use db::schema::bp_clickhouse_cluster_config::dsl; + let _ = diesel::insert_into(dsl::bp_clickhouse_cluster_config) + .values(clickhouse_cluster_config) + .execute_async(&conn) + .await?; + } + { + use db::schema::bp_clickhouse_keeper_zone_id_to_node_id::dsl; + let _ = diesel::insert_into(dsl::bp_clickhouse_keeper_zone_id_to_node_id) + .values(keepers) + .execute_async(&conn) + .await?; + } + { + use db::schema::bp_clickhouse_server_zone_id_to_node_id::dsl; + let _ = diesel::insert_into(dsl::bp_clickhouse_server_zone_id_to_node_id) + .values(servers) + .execute_async(&conn) + .await?; + } + } + Ok(()) + }) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; From 482d650b3497d35a988c0ab7e0d767cf9331ae39 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Mon, 23 Sep 2024 17:09:10 +0000 Subject: [PATCH 03/19] delete clickhouse cluster related tables --- .../db-queries/src/db/datastore/deployment.rs | 37 +++++++++++++++++++ nexus/test-utils/src/lib.rs | 2 + 2 files changed, 39 insertions(+) diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 7e70694883..e0b09eda9e 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -886,6 +886,9 @@ impl DataStore { nsled_agent_zones, nzones, nnics, + nclickhouse_cluster_configs, + nclickhouse_keepers, + nclickhouse_servers, ) = conn .transaction_async(|conn| async move { // Ensure that blueprint we're about to delete is not the @@ -986,6 +989,34 @@ impl DataStore { .await? }; + let nclickhouse_cluster_configs = { + use db::schema::bp_clickhouse_cluster_config::dsl; + diesel::delete( + dsl::bp_clickhouse_cluster_config + .filter(dsl::blueprint_id.eq(blueprint_id)), + ) + .execute_async(&conn) + .await? + }; + + let nclickhouse_keepers = { + use db::schema::bp_clickhouse_keeper_zone_id_to_node_id::dsl; + diesel::delete(dsl::bp_clickhouse_keeper_zone_id_to_node_id + .filter(dsl::blueprint_id.eq(blueprint_id)), + ) + .execute_async(&conn) + .await? + }; + + let nclickhouse_servers = { + use db::schema::bp_clickhouse_server_zone_id_to_node_id::dsl; + diesel::delete(dsl::bp_clickhouse_server_zone_id_to_node_id + .filter(dsl::blueprint_id.eq(blueprint_id)), + ) + .execute_async(&conn) + .await? + }; + Ok(( nblueprints, nsled_states, @@ -994,6 +1025,9 @@ impl DataStore { nsled_agent_zones, nzones, nnics, + nclickhouse_cluster_configs, + nclickhouse_keepers, + nclickhouse_servers, )) }) .await @@ -1013,6 +1047,9 @@ impl DataStore { "nsled_agent_zones" => nsled_agent_zones, "nzones" => nzones, "nnics" => nnics, + "nclickhouse_cluster_configs" => nclickhouse_cluster_configs, + "nclickhouse_keepers" => nclickhouse_keepers, + "nclickhouse_servers" => nclickhouse_servers ); Ok(()) diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 0d03d8a57a..c48f85513c 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -808,6 +808,8 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { cockroachdb_fingerprint: String::new(), cockroachdb_setting_preserve_downgrade: CockroachDbPreserveDowngrade::DoNotModify, + // TODO(ajs): Should we create this in RSS? it relies on policy + clickhouse_cluster_config: None, time_created: Utc::now(), creator: "nexus-test-utils".to_string(), comment: "initial test blueprint".to_string(), From 61abafbb94029307e4f50f614d29ac7a25a36fdc Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Mon, 23 Sep 2024 18:14:09 +0000 Subject: [PATCH 04/19] fix build and clippy --- .../db-queries/src/db/datastore/deployment.rs | 38 +++++++++---------- nexus/db-queries/src/db/datastore/rack.rs | 5 +++ nexus/reconfigurator/execution/src/dns.rs | 1 + .../execution/src/omicron_physical_disks.rs | 1 + .../execution/src/omicron_zones.rs | 1 + .../background/tasks/blueprint_execution.rs | 1 + .../app/background/tasks/blueprint_load.rs | 1 + 7 files changed, 27 insertions(+), 21 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index e0b09eda9e..dbe223a2d3 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -736,13 +736,11 @@ impl DataStore { for k in batch { let keeper_id = KeeperId( - u64::try_from(k.keeper_id).or_else( + u64::try_from(k.keeper_id).map_err( |_| { - Err(Error::internal_error( - &format!( - "keeper id is negative: {}", - k.keeper_id - ), + Error::internal_error(&format!( + "keeper id is negative: {}", + k.keeper_id )) }, )?, @@ -785,13 +783,11 @@ impl DataStore { for s in batch { let server_id = ServerId( - u64::try_from(s.server_id).or_else( + u64::try_from(s.server_id).map_err( |_| { - Err(Error::internal_error( - &format!( - "server id is negative: {}", - s.server_id - ), + Error::internal_error(&format!( + "server id is negative: {}", + s.server_id )) }, )?, @@ -809,20 +805,20 @@ impl DataStore { generation: bp_config.generation.into(), max_used_server_id: ServerId( u64::try_from(bp_config.max_used_server_id) - .or_else(|_| { - Err(Error::internal_error(&format!( + .map_err(|_| { + Error::internal_error(&format!( "max server id is negative: {}", bp_config.max_used_server_id - ))) + )) })?, ), max_used_keeper_id: KeeperId( u64::try_from(bp_config.max_used_keeper_id) - .or_else(|_| { - Err(Error::internal_error(&format!( + .map_err(|_| { + Error::internal_error(&format!( "max keeper id is negative: {}", bp_config.max_used_keeper_id - ))) + )) })?, ), cluster_name: bp_config.cluster_name, @@ -831,11 +827,11 @@ impl DataStore { u64::try_from( bp_config.highest_seen_keeper_leader_committed_log_index, ) - .or_else(|_| { - Err(Error::internal_error(&format!( + .map_err(|_| { + Error::internal_error(&format!( "max server id is negative: {}", bp_config.highest_seen_keeper_leader_committed_log_index - ))) + )) })?, keepers, servers, diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index c069a72955..9eda901d76 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -1066,6 +1066,7 @@ mod test { internal_dns_version: *Generation::new(), external_dns_version: *Generation::new(), cockroachdb_fingerprint: String::new(), + clickhouse_cluster_config: None, time_created: Utc::now(), creator: "test suite".to_string(), comment: "test suite".to_string(), @@ -1552,6 +1553,7 @@ mod test { internal_dns_version: *Generation::new(), external_dns_version: *Generation::new(), cockroachdb_fingerprint: String::new(), + clickhouse_cluster_config: None, time_created: now_db_precision(), creator: "test suite".to_string(), comment: "test blueprint".to_string(), @@ -1813,6 +1815,7 @@ mod test { internal_dns_version: *Generation::new(), external_dns_version: *Generation::new(), cockroachdb_fingerprint: String::new(), + clickhouse_cluster_config: None, time_created: now_db_precision(), creator: "test suite".to_string(), comment: "test blueprint".to_string(), @@ -2027,6 +2030,7 @@ mod test { internal_dns_version: *Generation::new(), external_dns_version: *Generation::new(), cockroachdb_fingerprint: String::new(), + clickhouse_cluster_config: None, time_created: now_db_precision(), creator: "test suite".to_string(), comment: "test blueprint".to_string(), @@ -2170,6 +2174,7 @@ mod test { internal_dns_version: *Generation::new(), external_dns_version: *Generation::new(), cockroachdb_fingerprint: String::new(), + clickhouse_cluster_config: None, time_created: now_db_precision(), creator: "test suite".to_string(), comment: "test blueprint".to_string(), diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 4654643576..ae3e1508c5 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -832,6 +832,7 @@ mod test { internal_dns_version: initial_dns_generation, external_dns_version: Generation::new(), cockroachdb_fingerprint: String::new(), + clickhouse_cluster_config: None, time_created: now_db_precision(), creator: "test-suite".to_string(), comment: "test blueprint".to_string(), diff --git a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs index d94bbe2e27..14af55fee6 100644 --- a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs +++ b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs @@ -187,6 +187,7 @@ mod test { internal_dns_version: Generation::new(), external_dns_version: Generation::new(), cockroachdb_fingerprint: String::new(), + clickhouse_cluster_config: None, time_created: chrono::Utc::now(), creator: "test".to_string(), comment: "test blueprint".to_string(), diff --git a/nexus/reconfigurator/execution/src/omicron_zones.rs b/nexus/reconfigurator/execution/src/omicron_zones.rs index b40bbab982..cb8a5a2eed 100644 --- a/nexus/reconfigurator/execution/src/omicron_zones.rs +++ b/nexus/reconfigurator/execution/src/omicron_zones.rs @@ -351,6 +351,7 @@ mod test { internal_dns_version: Generation::new(), external_dns_version: Generation::new(), cockroachdb_fingerprint: String::new(), + clickhouse_cluster_config: None, time_created: chrono::Utc::now(), creator: "test".to_string(), comment: "test blueprint".to_string(), diff --git a/nexus/src/app/background/tasks/blueprint_execution.rs b/nexus/src/app/background/tasks/blueprint_execution.rs index c4fd916d5f..398655e9fb 100644 --- a/nexus/src/app/background/tasks/blueprint_execution.rs +++ b/nexus/src/app/background/tasks/blueprint_execution.rs @@ -247,6 +247,7 @@ mod test { internal_dns_version: dns_version, external_dns_version: dns_version, cockroachdb_fingerprint: String::new(), + clickhouse_cluster_config: None, time_created: chrono::Utc::now(), creator: "test".to_string(), comment: "test blueprint".to_string(), diff --git a/nexus/src/app/background/tasks/blueprint_load.rs b/nexus/src/app/background/tasks/blueprint_load.rs index 70fcf713bc..8b5c02dd80 100644 --- a/nexus/src/app/background/tasks/blueprint_load.rs +++ b/nexus/src/app/background/tasks/blueprint_load.rs @@ -225,6 +225,7 @@ mod test { internal_dns_version: Generation::new(), external_dns_version: Generation::new(), cockroachdb_fingerprint: String::new(), + clickhouse_cluster_config: None, time_created: now_db_precision(), creator: "test".to_string(), comment: "test blueprint".to_string(), From 3ed0110b02477d5c0566c2fe91cf4939c7e7980f Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Tue, 24 Sep 2024 01:10:59 +0000 Subject: [PATCH 05/19] wip - safekeeping --- nexus/inventory/src/builder.rs | 22 ++++- .../planning/src/blueprint_builder/builder.rs | 20 ++++- .../src/blueprint_builder/clickhouse.rs | 10 ++- .../planning/src/blueprint_builder/mod.rs | 1 + nexus/reconfigurator/planning/src/planner.rs | 84 +++++++++++++++++++ nexus/reconfigurator/planning/src/system.rs | 11 ++- nexus/types/src/deployment.rs | 1 + nexus/types/src/deployment/zone_type.rs | 15 ++++ 8 files changed, 157 insertions(+), 7 deletions(-) diff --git a/nexus/inventory/src/builder.rs b/nexus/inventory/src/builder.rs index 064d0025e2..6801e34fad 100644 --- a/nexus/inventory/src/builder.rs +++ b/nexus/inventory/src/builder.rs @@ -21,6 +21,7 @@ use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Caboose; use nexus_types::inventory::CabooseFound; use nexus_types::inventory::CabooseWhich; +use nexus_types::inventory::ClickhouseKeeperClusterMembership; use nexus_types::inventory::Collection; use nexus_types::inventory::OmicronZonesFound; use nexus_types::inventory::RotPage; @@ -32,6 +33,7 @@ use nexus_types::inventory::SledAgent; use nexus_types::inventory::Zpool; use omicron_uuid_kinds::CollectionKind; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -93,6 +95,9 @@ pub struct CollectionBuilder { BTreeMap, RotPageFound>>, sleds: BTreeMap, omicron_zones: BTreeMap, + clickhouse_keeper_cluster_membership: + BTreeMap, + // We just generate one UUID for each collection. id_rng: TypedUuidRng, } @@ -120,6 +125,7 @@ impl CollectionBuilder { rot_pages_found: BTreeMap::new(), sleds: BTreeMap::new(), omicron_zones: BTreeMap::new(), + clickhouse_keeper_cluster_membership: BTreeMap::new(), id_rng: TypedUuidRng::from_entropy(), } } @@ -147,9 +153,8 @@ impl CollectionBuilder { rot_pages_found: self.rot_pages_found, sled_agents: self.sleds, omicron_zones: self.omicron_zones, - // Currently unused - // See: https://github.com/oxidecomputer/omicron/issues/6578 - clickhouse_keeper_cluster_membership: BTreeMap::new(), + clickhouse_keeper_cluster_membership: self + .clickhouse_keeper_cluster_membership, } } @@ -562,6 +567,16 @@ impl CollectionBuilder { Ok(()) } } + + /// Record information about Keeper cluster membership learned from the + /// clickhouse-admin service running in the keeper zones. + pub fn found_clickhouse_keeper_cluster_membership( + &mut self, + zone_id: OmicronZoneUuid, + membership: ClickhouseKeeperClusterMembership, + ) { + self.clickhouse_keeper_cluster_membership.insert(zone_id, membership); + } } /// Returns the current time, truncated to the previous microsecond. @@ -620,6 +635,7 @@ mod test { assert!(collection.rots.is_empty()); assert!(collection.cabooses_found.is_empty()); assert!(collection.rot_pages_found.is_empty()); + assert!(collection.clickhouse_keeper_cluster_membership.is_empty()); } // Simple test of a single, fairly typical collection that contains just diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index 344d1fc970..f757862c73 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -351,6 +351,14 @@ impl<'a> BlueprintBuilder<'a> { .clickhouse_cluster_config .clone() .unwrap_or_else(|| { + info!( + log, + concat!( + "Clickhouse cluster enabled by policy: ", + "generating initial 'ClickhouseClusterConfig' ", + "and 'ClickhouseAllocator'" + ) + ); ClickhouseClusterConfig::new(OXIMETER_CLUSTER.to_string()) }); Some(ClickhouseAllocator::new( @@ -359,6 +367,16 @@ impl<'a> BlueprintBuilder<'a> { inventory.latest_clickhouse_keeper_membership(), )) } else { + if parent_blueprint.clickhouse_cluster_config.is_some() { + info!( + log, + concat!( + "clickhouse cluster disabled via policy ", + "discarding existing 'ClickhouseAllocator' and ", + "the resulting generated 'ClickhouseClusterConfig" + ) + ); + } None }; @@ -415,7 +433,7 @@ impl<'a> BlueprintBuilder<'a> { match a.plan(&(&blueprint_zones).into()) { Ok(config) => config, Err(e) => { - error!(self.log, "{e}"); + error!(self.log, "clickhouse allocator error: {e}"); a.parent_config().clone() } } diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs b/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs index ef74ac8928..ad8078f7b6 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs @@ -23,8 +23,8 @@ use thiserror::Error; // See: https://github.com/oxidecomputer/omicron/issues/6577 #[allow(unused)] pub struct ClickhouseZonesThatShouldBeRunning { - keepers: BTreeSet, - servers: BTreeSet, + pub keepers: BTreeSet, + pub servers: BTreeSet, } impl From<&BTreeMap> @@ -140,6 +140,12 @@ impl ClickhouseAllocator { let current_keepers: BTreeSet<_> = self.parent_config.keepers.values().cloned().collect(); let Some(inventory_membership) = &self.inventory else { + XXX + // We don't have any inventory yet. However, we may have just transitioned + // via a policy change to go from 0 to 1 keepers. In that case, there is a chicken/egg + // problem and no way to actually get a keeper config, since it comes from the keepers + // themselves. Therefore, we add a new keeper here to jumpstart our config. + XXX return bump_gen_if_necessary(new_config); }; diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs b/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs index b84bef6426..725835f4ae 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs @@ -11,3 +11,4 @@ mod internal_dns; mod zones; pub use builder::*; +pub use clickhouse::{ClickhouseAllocator, ClickhouseZonesThatShouldBeRunning}; diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index f20688ec6c..e30bb85907 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -761,6 +761,7 @@ mod test { use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::BlueprintZoneType; + use nexus_types::deployment::ClickhousePolicy; use nexus_types::deployment::CockroachDbClusterVersion; use nexus_types::deployment::CockroachDbPreserveDowngrade; use nexus_types::deployment::CockroachDbSettings; @@ -2200,4 +2201,87 @@ mod test { logctx.cleanup_successful(); } + + /// Deploy all keeper nodes 1 at a time and all server nodes at once + /// starting from 0 deployed nodes + #[test] + fn test_plan_deploy_all_clickhouse_cluster_nodes() { + static TEST_NAME: &str = "planner_deploy_all_keeper_nodes"; + let logctx = test_setup_log(TEST_NAME); + let log = logctx.log.clone(); + + // Use our example system. + let (mut collection, input, blueprint1) = + example(&log, TEST_NAME, DEFAULT_N_SLEDS); + verify_blueprint(&blueprint1); + + // We shouldn't have a clickhouse cluster config, as we don't have a + // clickhouse policy set yet + assert!(blueprint1.clickhouse_cluster_config.is_none()); + let target_keepers = 3; + let target_servers = 2; + + // Enable clickhouse clusters via policy + let mut input_builder = input.into_builder(); + input_builder.policy_mut().clickhouse_policy = Some(ClickhousePolicy { + deploy_with_standalone: true, + target_servers, + target_keepers, + }); + + // Creating a new blueprint should deploy all the new clickhouse zones + let input = input_builder.build(); + let blueprint2 = Planner::new_based_on( + log.clone(), + &blueprint1, + &input, + "test_blueprint2", + &collection, + ) + .expect("created planner") + .with_rng_seed((TEST_NAME, "bp2")) + .plan() + .expect("plan"); + + println!("{}", blueprint2.display()); + + // We should see zones for 3 clickhouse keepers, and 2 servers created + let active_zones: Vec<_> = blueprint2 + .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) + .map(|(_, z)| z.clone()) + .collect(); + + let num_keeper_zones = active_zones + .iter() + .filter(|z| z.zone_type.is_clickhouse_keeper()) + .count(); + let num_server_zones = active_zones + .iter() + .filter(|z| z.zone_type.is_clickhouse_server()) + .count(); + + assert_eq!(num_keeper_zones, target_keepers); + assert_eq!(num_server_zones, target_servers); + + // We should be attempting to allocate both servers and 0 keepers + // since we don't have a keeper inventory to work with yet. + // + // TODO: Without allocating keepers ahead of time, how do we get a valid + // inventory in production? Maybe we just bootstrap wiht an empty one in + // this case. Seems reasonable. + + // until the inventory reflects a change in keeper membership. + { + let clickhouse_cluster_config = + blueprint2.clickhouse_cluster_config.as_ref().unwrap(); + assert_eq!(clickhouse_cluster_config.generation, 2.into()); + assert_eq!(clickhouse_cluster_config.max_used_keeper_id, 0.into()); + assert_eq!( + clickhouse_cluster_config.max_used_server_id, + (target_servers as u64).into() + ); + assert_eq!(clickhouse_cluster_config.keepers.len(), 0); + assert_eq!(clickhouse_cluster_config.servers.len(), target_servers); + } + } } diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index 538c7bec1b..27587a7523 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -14,6 +14,7 @@ use nexus_sled_agent_shared::inventory::Baseboard; use nexus_sled_agent_shared::inventory::Inventory; use nexus_sled_agent_shared::inventory::InventoryDisk; use nexus_sled_agent_shared::inventory::SledRole; +use nexus_types::deployment::ClickhousePolicy; use nexus_types::deployment::CockroachDbClusterVersion; use nexus_types::deployment::CockroachDbSettings; use nexus_types::deployment::PlanningInputBuilder; @@ -88,6 +89,7 @@ pub struct SystemDescription { service_ip_pool_ranges: Vec, internal_dns_version: Generation, external_dns_version: Generation, + clickhouse_policy: Option, } impl SystemDescription { @@ -165,6 +167,7 @@ impl SystemDescription { service_ip_pool_ranges, internal_dns_version: Generation::new(), external_dns_version: Generation::new(), + clickhouse_policy: None, } } @@ -210,6 +213,12 @@ impl SystemDescription { self } + /// Set the clickhouse policy + pub fn clickhouse_policy(&mut self, policy: ClickhousePolicy) -> &mut Self { + self.clickhouse_policy = Some(policy); + self + } + /// Add a sled to the system, as described by a SledBuilder pub fn sled(&mut self, sled: SledBuilder) -> anyhow::Result<&mut Self> { let sled_id = sled.id.unwrap_or_else(SledUuid::new_v4); @@ -333,7 +342,7 @@ impl SystemDescription { target_cockroachdb_zone_count: self.target_cockroachdb_zone_count, target_cockroachdb_cluster_version: self .target_cockroachdb_cluster_version, - clickhouse_policy: None, + clickhouse_policy: self.clickhouse_policy.clone(), }; let mut builder = PlanningInputBuilder::new( policy, diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index d678e19ac7..e341fad6e0 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -59,6 +59,7 @@ pub use network_resources::OmicronZoneExternalSnatIp; pub use network_resources::OmicronZoneNetworkResources; pub use network_resources::OmicronZoneNic; pub use network_resources::OmicronZoneNicEntry; +pub use planning_input::ClickhousePolicy; pub use planning_input::CockroachDbClusterVersion; pub use planning_input::CockroachDbPreserveDowngrade; pub use planning_input::CockroachDbSettings; diff --git a/nexus/types/src/deployment/zone_type.rs b/nexus/types/src/deployment/zone_type.rs index e0f389fe2a..d09150b894 100644 --- a/nexus/types/src/deployment/zone_type.rs +++ b/nexus/types/src/deployment/zone_type.rs @@ -95,6 +95,21 @@ impl BlueprintZoneType { matches!(self, BlueprintZoneType::Crucible(_)) } + /// Identifies whether this is a clickhouse keeper zone + pub fn is_clickhouse_keeper(&self) -> bool { + matches!(self, BlueprintZoneType::ClickhouseKeeper(_)) + } + + /// Identifies whether this is a clickhouse server zone + pub fn is_clickhouse_server(&self) -> bool { + matches!(self, BlueprintZoneType::ClickhouseServer(_)) + } + + /// Identifies whether this is a clickhouse zone + pub fn is_clickhouse(&self) -> bool { + matches!(self, BlueprintZoneType::Clickhouse(_)) + } + /// Returns the durable dataset associated with this zone, if any exists. pub fn durable_dataset(&self) -> Option> { let (dataset, kind, &address) = match self { From ac591fdf1cc082318aa05e29fecfe8dcf067d655 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Tue, 24 Sep 2024 16:47:06 +0000 Subject: [PATCH 06/19] deal with going from 0->1 keeper --- .../src/blueprint_builder/clickhouse.rs | 26 ++++++++++++++----- nexus/reconfigurator/planning/src/planner.rs | 15 ++++------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs b/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs index ad8078f7b6..2efc625ec4 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs @@ -137,15 +137,27 @@ impl ClickhouseAllocator { // If we fail to retrieve any inventory for keepers in the current // collection than we must not modify our keeper config, as we don't // know whether a configuration is ongoing or not. + // + // There is an exception to this rule: on *new* clusters that have + // keeper zones deployed buty do not have any keepers running we must + // start at least one keeper unconditionally. This is because we cannot + // retrieve keeper inventory if there are no keepers running. let current_keepers: BTreeSet<_> = self.parent_config.keepers.values().cloned().collect(); let Some(inventory_membership) = &self.inventory else { - XXX - // We don't have any inventory yet. However, we may have just transitioned - // via a policy change to go from 0 to 1 keepers. In that case, there is a chicken/egg - // problem and no way to actually get a keeper config, since it comes from the keepers - // themselves. Therefore, we add a new keeper here to jumpstart our config. - XXX + // Are we a new cluster and do we have any active keeper zones? + if new_config.max_used_keeper_id == 0.into() + && !active_clickhouse_zones.keepers.is_empty() + { + // Unwrap safety: We check that there is at least one keeper in + // the `if` condition above. + let zone_id = active_clickhouse_zones.keepers.first().unwrap(); + // Allocate a new `KeeperId` and map it to the first zone + new_config.max_used_keeper_id += 1.into(); + new_config + .keepers + .insert(*zone_id, new_config.max_used_keeper_id); + } return bump_gen_if_necessary(new_config); }; @@ -256,7 +268,7 @@ impl ClickhouseAllocator { // Do we need to add any nodes to in service zones that don't have them for zone_id in &active_clickhouse_zones.keepers { if !new_config.keepers.contains_key(zone_id) { - // Allocate a new `KeeperId` and map it to the server zone + // Allocate a new `KeeperId` and map it to the keeper zone new_config.max_used_keeper_id += 1.into(); new_config .keepers diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index e30bb85907..00d2af9ed3 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -781,6 +781,7 @@ mod test { use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; + use std::collections::BTreeSet; use std::collections::HashMap; use std::mem; use typed_rng::TypedUuidRng; @@ -2211,7 +2212,7 @@ mod test { let log = logctx.log.clone(); // Use our example system. - let (mut collection, input, blueprint1) = + let (collection, input, blueprint1) = example(&log, TEST_NAME, DEFAULT_N_SLEDS); verify_blueprint(&blueprint1); @@ -2263,24 +2264,18 @@ mod test { assert_eq!(num_keeper_zones, target_keepers); assert_eq!(num_server_zones, target_servers); - // We should be attempting to allocate both servers and 0 keepers - // since we don't have a keeper inventory to work with yet. - // - // TODO: Without allocating keepers ahead of time, how do we get a valid - // inventory in production? Maybe we just bootstrap wiht an empty one in - // this case. Seems reasonable. - + // We should be attempting to allocate both servers and a single keeper // until the inventory reflects a change in keeper membership. { let clickhouse_cluster_config = blueprint2.clickhouse_cluster_config.as_ref().unwrap(); assert_eq!(clickhouse_cluster_config.generation, 2.into()); - assert_eq!(clickhouse_cluster_config.max_used_keeper_id, 0.into()); + assert_eq!(clickhouse_cluster_config.max_used_keeper_id, 1.into()); assert_eq!( clickhouse_cluster_config.max_used_server_id, (target_servers as u64).into() ); - assert_eq!(clickhouse_cluster_config.keepers.len(), 0); + assert_eq!(clickhouse_cluster_config.keepers.len(), 1); assert_eq!(clickhouse_cluster_config.servers.len(), target_servers); } } From 47e35bfaa42f5c6907c22b269bb24f5f6a483600 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Tue, 24 Sep 2024 17:43:45 +0000 Subject: [PATCH 07/19] wrap up first planner test --- nexus/reconfigurator/planning/src/planner.rs | 152 +++++++++++++++++-- 1 file changed, 142 insertions(+), 10 deletions(-) diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 00d2af9ed3..5e1666a346 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -772,6 +772,7 @@ mod test { use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; use nexus_types::external_api::views::SledState; + use nexus_types::inventory::ClickhouseKeeperClusterMembership; use nexus_types::inventory::OmicronZonesFound; use omicron_common::api::external::Generation; use omicron_common::disk::DiskIdentity; @@ -2212,14 +2213,16 @@ mod test { let log = logctx.log.clone(); // Use our example system. - let (collection, input, blueprint1) = + let (mut collection, input, blueprint1) = example(&log, TEST_NAME, DEFAULT_N_SLEDS); verify_blueprint(&blueprint1); // We shouldn't have a clickhouse cluster config, as we don't have a // clickhouse policy set yet assert!(blueprint1.clickhouse_cluster_config.is_none()); - let target_keepers = 3; + // We'd typically use at least 3 servers here, but we're trying to keep + // this test reasonably short. + let target_keepers = 2; let target_servers = 2; // Enable clickhouse clusters via policy @@ -2244,25 +2247,25 @@ mod test { .plan() .expect("plan"); - println!("{}", blueprint2.display()); - // We should see zones for 3 clickhouse keepers, and 2 servers created let active_zones: Vec<_> = blueprint2 .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) .map(|(_, z)| z.clone()) .collect(); - let num_keeper_zones = active_zones + let keeper_zone_ids: BTreeSet<_> = active_zones .iter() .filter(|z| z.zone_type.is_clickhouse_keeper()) - .count(); - let num_server_zones = active_zones + .map(|z| z.id) + .collect(); + let server_zone_ids: BTreeSet<_> = active_zones .iter() .filter(|z| z.zone_type.is_clickhouse_server()) - .count(); + .map(|z| z.id) + .collect(); - assert_eq!(num_keeper_zones, target_keepers); - assert_eq!(num_server_zones, target_servers); + assert_eq!(keeper_zone_ids.len(), target_keepers); + assert_eq!(server_zone_ids.len(), target_servers); // We should be attempting to allocate both servers and a single keeper // until the inventory reflects a change in keeper membership. @@ -2277,6 +2280,135 @@ mod test { ); assert_eq!(clickhouse_cluster_config.keepers.len(), 1); assert_eq!(clickhouse_cluster_config.servers.len(), target_servers); + + // Ensure that the added keeper is in a keeper zone + let zone_id = + &clickhouse_cluster_config.keepers.first_key_value().unwrap().0; + assert!(keeper_zone_ids.contains(zone_id)); + + // Ensure that the added servers are in server zones + for zone_id in clickhouse_cluster_config.servers.keys() { + assert!(server_zone_ids.contains(zone_id)); + } + } + + // Planning again withougt changing inventory should result in the same state + let blueprint3 = Planner::new_based_on( + log.clone(), + &blueprint2, + &input, + "test_blueprint3", + &collection, + ) + .expect("created planner") + .with_rng_seed((TEST_NAME, "bp3")) + .plan() + .expect("plan"); + + assert_eq!( + blueprint2.clickhouse_cluster_config, + blueprint3.clickhouse_cluster_config + ); + + // Updating the inventory to reflect the first keeper should result in a + // new configured keeper. + let (zone_id, keeper_id) = blueprint3 + .clickhouse_cluster_config + .as_ref() + .unwrap() + .keepers + .first_key_value() + .unwrap(); + let membership = ClickhouseKeeperClusterMembership { + queried_keeper: *keeper_id, + leader_committed_log_index: 1, + raft_config: [*keeper_id].into_iter().collect(), + }; + collection + .clickhouse_keeper_cluster_membership + .insert(*zone_id, membership); + + let blueprint4 = Planner::new_based_on( + log.clone(), + &blueprint3, + &input, + "test_blueprint4", + &collection, + ) + .expect("created planner") + .with_rng_seed((TEST_NAME, "bp4")) + .plan() + .expect("plan"); + + // We should be attempting to allocate 2 keepers now that one is running + // and returning inventory. + { + let clickhouse_cluster_config = + blueprint4.clickhouse_cluster_config.as_ref().unwrap(); + assert_eq!(clickhouse_cluster_config.generation, 3.into()); + assert_eq!(clickhouse_cluster_config.max_used_keeper_id, 2.into()); + assert_eq!(clickhouse_cluster_config.keepers.len(), 2); + + // Ensure that the keepers are in keeper zones + for zone_id in clickhouse_cluster_config.keepers.keys() { + assert!(keeper_zone_ids.contains(zone_id)); + } } + + // Fill in the inventory for the second keeper. We should not allocate + // a new keeper since we have reached our target count. However, we should update + // the `highests_seen_keeper_committed_log_index`; + for (zone_id, keeper_id) in + &blueprint4.clickhouse_cluster_config.as_ref().unwrap().keepers + { + let membership = ClickhouseKeeperClusterMembership { + queried_keeper: *keeper_id, + leader_committed_log_index: 2, + raft_config: [1.into(), 2.into()].into_iter().collect(), + }; + collection + .clickhouse_keeper_cluster_membership + .insert(*zone_id, membership); + } + let blueprint5 = Planner::new_based_on( + log.clone(), + &blueprint4, + &input, + "test_blueprint5", + &collection, + ) + .expect("created planner") + .with_rng_seed((TEST_NAME, "bp4")) + .plan() + .expect("plan"); + + let clickhouse_cluster_config4 = + blueprint4.clickhouse_cluster_config.as_ref().unwrap(); + let clickhouse_cluster_config5 = + blueprint5.clickhouse_cluster_config.as_ref().unwrap(); + + assert_eq!( + clickhouse_cluster_config4.generation, + clickhouse_cluster_config5.generation + ); + assert_eq!( + clickhouse_cluster_config4.max_used_keeper_id, + clickhouse_cluster_config5.max_used_keeper_id + ); + assert_eq!( + clickhouse_cluster_config4.keepers, + clickhouse_cluster_config5.keepers + ); + assert_ne!( + clickhouse_cluster_config4 + .highest_seen_keeper_leader_committed_log_index, + clickhouse_cluster_config5 + .highest_seen_keeper_leader_committed_log_index + ); + assert_eq!( + clickhouse_cluster_config5 + .highest_seen_keeper_leader_committed_log_index, + 2 + ); } } From c5a2aea0376fd8855f5c4e679d724846c24c8c23 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Tue, 24 Sep 2024 23:01:58 +0000 Subject: [PATCH 08/19] expunge test --- nexus/reconfigurator/planning/src/planner.rs | 211 ++++++++++++++++++- 1 file changed, 209 insertions(+), 2 deletions(-) diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 5e1666a346..d28005599d 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -753,6 +753,7 @@ mod test { use chrono::NaiveDateTime; use chrono::TimeZone; use chrono::Utc; + use clickhouse_admin_types::KeeperId; use expectorate::assert_contents; use nexus_inventory::now_db_precision; use nexus_sled_agent_shared::inventory::ZoneKind; @@ -2221,7 +2222,7 @@ mod test { // clickhouse policy set yet assert!(blueprint1.clickhouse_cluster_config.is_none()); // We'd typically use at least 3 servers here, but we're trying to keep - // this test reasonably short. + // this test reasonably short. let target_keepers = 2; let target_servers = 2; @@ -2247,7 +2248,7 @@ mod test { .plan() .expect("plan"); - // We should see zones for 3 clickhouse keepers, and 2 servers created + // We should see zones for 2 clickhouse keepers, and 2 servers created let active_zones: Vec<_> = blueprint2 .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) .map(|(_, z)| z.clone()) @@ -2411,4 +2412,210 @@ mod test { 2 ); } + + // Start with an existing clickhouse cluster and expunge a keeper. This + // models what will happen after an RSS deployment with clickhouse policy + // enabled or an existing system already running a clickhouse cluster. + #[test] + fn test_grow_and_shrink_clickhouse_clusters() { + static TEST_NAME: &str = "planner_grow_and_shrink_clickhouse_clusters"; + let logctx = test_setup_log(TEST_NAME); + let log = logctx.log.clone(); + + // Use our example system. + let (mut collection, input, blueprint1) = + example(&log, TEST_NAME, DEFAULT_N_SLEDS); + + let target_keepers = 3; + let target_servers = 2; + + // Enable clickhouse clusters via policy + let mut input_builder = input.into_builder(); + input_builder.policy_mut().clickhouse_policy = Some(ClickhousePolicy { + deploy_with_standalone: true, + target_servers, + target_keepers, + }); + let input = input_builder.build(); + + // Create a new blueprint to deploy all our clickhouse zones + let mut blueprint2 = Planner::new_based_on( + log.clone(), + &blueprint1, + &input, + "test_blueprint2", + &collection, + ) + .expect("created planner") + .with_rng_seed((TEST_NAME, "bp2")) + .plan() + .expect("plan"); + + // We should see zones for 2 clickhouse keepers, and 2 servers created + let active_zones: Vec<_> = blueprint2 + .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) + .map(|(_, z)| z.clone()) + .collect(); + + let keeper_zone_ids: BTreeSet<_> = active_zones + .iter() + .filter(|z| z.zone_type.is_clickhouse_keeper()) + .map(|z| z.id) + .collect(); + let server_zone_ids: BTreeSet<_> = active_zones + .iter() + .filter(|z| z.zone_type.is_clickhouse_server()) + .map(|z| z.id) + .collect(); + + assert_eq!(keeper_zone_ids.len(), target_keepers); + assert_eq!(server_zone_ids.len(), target_servers); + + // Directly manipulate the blueprint and inventory so that the + // clickhouse clusters are stable + let config = blueprint2.clickhouse_cluster_config.as_mut().unwrap(); + config.max_used_keeper_id = (target_keepers as u64).into(); + config.keepers = keeper_zone_ids + .iter() + .enumerate() + .map(|(i, zone_id)| (*zone_id, KeeperId(i as u64))) + .collect(); + config.highest_seen_keeper_leader_committed_log_index = 1; + + let raft_config: BTreeSet<_> = + config.keepers.values().cloned().collect(); + + collection.clickhouse_keeper_cluster_membership = config + .keepers + .iter() + .map(|(zone_id, keeper_id)| { + ( + *zone_id, + ClickhouseKeeperClusterMembership { + queried_keeper: *keeper_id, + leader_committed_log_index: 1, + raft_config: raft_config.clone(), + }, + ) + }) + .collect(); + + // Let's run the planner. The blueprint shouldn't change with regards to + // clickhouse as our inventory reflects our desired state. + let blueprint3 = Planner::new_based_on( + log.clone(), + &blueprint2, + &input, + "test_blueprint3", + &collection, + ) + .expect("created planner") + .with_rng_seed((TEST_NAME, "bp3")) + .plan() + .expect("plan"); + + assert_eq!( + blueprint2.clickhouse_cluster_config, + blueprint3.clickhouse_cluster_config + ); + + // Find the sled containing one of the keeper zones and expunge it + let (sled_id, bp_zone_config) = blueprint3 + .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) + .find(|(_, z)| z.zone_type.is_clickhouse_keeper()) + .clone() + .unwrap(); + + // Expunge a keeper zone + let mut builder = input.into_builder(); + builder.sleds_mut().get_mut(&sled_id).unwrap().policy = + SledPolicy::Expunged; + let input = builder.build(); + + let blueprint4 = Planner::new_based_on( + log.clone(), + &blueprint3, + &input, + "test_blueprint4", + &collection, + ) + .expect("created planner") + .with_rng_seed((TEST_NAME, "bp4")) + .plan() + .expect("plan"); + + // The planner should expunge a zone based on the sled being expunged. Since this + // is a clickhouse keeper zone, the clickhouse keeper configuration should change + // to reflect this. + let old_config = blueprint3.clickhouse_cluster_config.as_ref().unwrap(); + let config = blueprint4.clickhouse_cluster_config.as_ref().unwrap(); + assert_eq!(config.generation, old_config.generation.next()); + assert!(config.keepers.get(&bp_zone_config.id).is_none()); + // We've only removed one keeper from our desired state + assert_eq!(config.keepers.len() + 1, old_config.keepers.len()); + // We haven't allocated any new keepers + assert_eq!(config.max_used_keeper_id, old_config.max_used_keeper_id); + + // Planning again will not change the keeper state because we haven't updated the inventory + let blueprint5 = Planner::new_based_on( + log.clone(), + &blueprint4, + &input, + "test_blueprint5", + &collection, + ) + .expect("created planner") + .with_rng_seed((TEST_NAME, "bp5")) + .plan() + .expect("plan"); + + assert_eq!( + blueprint4.clickhouse_cluster_config, + blueprint5.clickhouse_cluster_config + ); + + // Updating the inventory to reflect the removed keeper results in a new one being added + + // Remove the keeper for the expunged zone + collection + .clickhouse_keeper_cluster_membership + .remove(&bp_zone_config.id); + + // Update the inventory on at least one of the remaining nodes. + let existing = collection + .clickhouse_keeper_cluster_membership + .first_entry() + .unwrap() + .into_mut(); + existing.leader_committed_log_index = 3; + existing.raft_config = config.keepers.values().cloned().collect(); + + let blueprint6 = Planner::new_based_on( + log.clone(), + &blueprint5, + &input, + "test_blueprint6", + &collection, + ) + .expect("created planner") + .with_rng_seed((TEST_NAME, "bp6")) + .plan() + .expect("plan"); + + let old_config = blueprint5.clickhouse_cluster_config.as_ref().unwrap(); + let config = blueprint6.clickhouse_cluster_config.as_ref().unwrap(); + + // Our generation has changed to reflect the added keeper + assert_eq!(config.generation, old_config.generation.next()); + assert!(config.keepers.get(&bp_zone_config.id).is_none()); + // We've only added one keeper from our desired state + // This brings us back up to our target count + assert_eq!(config.keepers.len(), old_config.keepers.len() + 1); + assert_eq!(config.keepers.len(), target_keepers); + // We've allocated one new keeper + assert_eq!( + config.max_used_keeper_id, + old_config.max_used_keeper_id + 1.into() + ); + } } From 2466bdde6b1ab5e7d3b952136726445147ecf511 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Tue, 24 Sep 2024 23:04:53 +0000 Subject: [PATCH 09/19] fix test name --- nexus/reconfigurator/planning/src/planner.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index d28005599d..78f720b6d6 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -2417,8 +2417,8 @@ mod test { // models what will happen after an RSS deployment with clickhouse policy // enabled or an existing system already running a clickhouse cluster. #[test] - fn test_grow_and_shrink_clickhouse_clusters() { - static TEST_NAME: &str = "planner_grow_and_shrink_clickhouse_clusters"; + fn test_expunge_clickhouse_clusters() { + static TEST_NAME: &str = "planner_expunge_clickhouse_clusters"; let logctx = test_setup_log(TEST_NAME); let log = logctx.log.clone(); From 29db4b516097a44875e2a153df80a1fd6a158f20 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 25 Sep 2024 21:38:49 +0000 Subject: [PATCH 10/19] fix clippy and tests --- nexus/reconfigurator/planning/src/planner.rs | 5 +- openapi/nexus-internal.json | 89 ++++++++++++++++++++ 2 files changed, 91 insertions(+), 3 deletions(-) diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 78f720b6d6..f55976a238 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -2523,7 +2523,6 @@ mod test { let (sled_id, bp_zone_config) = blueprint3 .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) .find(|(_, z)| z.zone_type.is_clickhouse_keeper()) - .clone() .unwrap(); // Expunge a keeper zone @@ -2550,7 +2549,7 @@ mod test { let old_config = blueprint3.clickhouse_cluster_config.as_ref().unwrap(); let config = blueprint4.clickhouse_cluster_config.as_ref().unwrap(); assert_eq!(config.generation, old_config.generation.next()); - assert!(config.keepers.get(&bp_zone_config.id).is_none()); + assert!(config.keepers.contains_key(&bp_zone_config.id)); // We've only removed one keeper from our desired state assert_eq!(config.keepers.len() + 1, old_config.keepers.len()); // We haven't allocated any new keepers @@ -2607,7 +2606,7 @@ mod test { // Our generation has changed to reflect the added keeper assert_eq!(config.generation, old_config.generation.next()); - assert!(config.keepers.get(&bp_zone_config.id).is_none()); + assert!(config.keepers.contains_key(&bp_zone_config.id)); // We've only added one keeper from our desired state // This brings us back up to our target count assert_eq!(config.keepers.len(), old_config.keepers.len() + 1); diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 8b91b84f54..e1d5b38fd6 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -1875,6 +1875,15 @@ "$ref": "#/components/schemas/BlueprintZonesConfig" } }, + "clickhouse_cluster_config": { + "nullable": true, + "description": "Allocation of Clickhouse Servers and Keepers for replicated clickhouse setups. This is set to `None` if replicated clickhouse is not in use.", + "allOf": [ + { + "$ref": "#/components/schemas/ClickhouseClusterConfig" + } + ] + }, "cockroachdb_fingerprint": { "description": "CockroachDB state fingerprint when this blueprint was created", "type": "string" @@ -2561,6 +2570,74 @@ "key" ] }, + "ClickhouseClusterConfig": { + "description": "Global configuration for all clickhouse servers (replicas) and keepers", + "type": "object", + "properties": { + "cluster_name": { + "description": "An arbitrary name for the Clickhouse cluster shared by all nodes", + "type": "string" + }, + "cluster_secret": { + "description": "An arbitrary string shared by all nodes used at runtime to determine whether nodes are part of the same cluster.", + "type": "string" + }, + "generation": { + "description": "The last update to the clickhouse cluster configuration\n\nThis is used by `clickhouse-admin` in the clickhouse server and keeper zones to discard old configurations.", + "allOf": [ + { + "$ref": "#/components/schemas/Generation" + } + ] + }, + "highest_seen_keeper_leader_committed_log_index": { + "description": "This is used as a marker to tell if the raft configuration in a new inventory collection is newer than the last collection. This serves as a surrogate for the log index of the last committed configuration, which clickhouse keeper doesn't expose.\n\nThis is necesssary because during inventory collection we poll multiple keeper nodes, and each returns their local knowledge of the configuration. But we may reach different nodes in different attempts, and some nodes in a following attempt may reflect stale configuration. Due to timing, we can always query old information. That is just normal polling. However, we never want to use old configuration if we have already seen and acted on newer configuration.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "keepers": { + "description": "The desired state of the clickhouse keeper cluster\n\nWe decouple deployment of zones that should contain clickhouse keeper processes from actually starting or stopping those processes, adding or removing them to/from the keeper cluster, and reconfiguring other keeper and clickhouse server nodes to reflect the new configuration.\n\nAs part of this decoupling, we keep track of the intended zone deployment in the blueprint, but that is not enough to track the desired state of the keeper cluster. We are only allowed to add or remove one keeper node at a time, and therefore we must track the desired state of the keeper cluster which may change multiple times until the keepers in the cluster match the deployed zones. An example may help:\n\n1. We start with 3 keeper nodes in 3 deployed keeper zones and need to add two to reach our desired policy of 5 keepers 2. The planner adds 2 new keeper zones to the blueprint 3. The planner will also add **one** new keeper to the `keepers` field below that matches one of the deployed zones. 4. The executor will start the new keeper process that was added to the `keepers` field, attempt to add it to the keeper cluster by pushing configuration updates to the other keepers, and then updating the clickhouse server configurations to know about the new keeper. 5. If the keeper is successfully added, as reflected in inventory, then steps 3 and 4 above will be repeated for the next keeper process. 6. If the keeper is not successfully added by the executor it will continue to retry indefinitely. 7. If the zone is expunged while the planner has it as part of its desired state in `keepers`, and the executor is trying to add it, the keeper will be removed from `keepers` in the next blueprint. If it has been added to the actual cluster by an executor in the meantime it will be removed on the next iteration of an executor.", + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/KeeperId" + } + }, + "max_used_keeper_id": { + "description": "Clickhouse Keeper IDs must be unique and are handed out monotonically. Keep track of the last used one.", + "allOf": [ + { + "$ref": "#/components/schemas/KeeperId" + } + ] + }, + "max_used_server_id": { + "description": "Clickhouse Server IDs must be unique and are handed out monotonically. Keep track of the last used one.", + "allOf": [ + { + "$ref": "#/components/schemas/ServerId" + } + ] + }, + "servers": { + "description": "The desired state of clickhouse server processes on the rack\n\nClickhouse servers do not have the same limitations as keepers and can be deployed all at once.", + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/ServerId" + } + } + }, + "required": [ + "cluster_name", + "cluster_secret", + "generation", + "highest_seen_keeper_leader_committed_log_index", + "keepers", + "max_used_keeper_id", + "max_used_server_id", + "servers" + ] + }, "CockroachDbClusterVersion": { "description": "CockroachDB cluster versions we are aware of.\n\nCockroachDB can be upgraded from one major version to the next, e.g. v22.1 -> v22.2. Each major version introduces changes in how it stores data on disk to support new features, and each major version has support for reading the previous version's data so that it can perform an upgrade. The version of the data format is called the \"cluster version\", which is distinct from but related to the software version that's being run.\n\nWhile software version v22.2 is using cluster version v22.1, it's possible to downgrade back to v22.1. Once the cluster version is upgraded, there's no going back.\n\nTo give us some time to evaluate new versions of the software while retaining a downgrade path, we currently deploy new versions of CockroachDB across two releases of the Oxide software, in a \"tick-tock\" model:\n\n- In \"tick\" releases, we upgrade the version of the CockroachDB software to a new major version, and update `CockroachDbClusterVersion::NEWLY_INITIALIZED`. On upgraded racks, the new version is running with the previous cluster version; on newly-initialized racks, the new version is running with the new cluser version. - In \"tock\" releases, we change `CockroachDbClusterVersion::POLICY` to the major version we upgraded to in the last \"tick\" release. This results in a new blueprint that upgrades the cluster version, destroying the downgrade path but allowing us to eventually upgrade to the next release.\n\nThese presently describe major versions of CockroachDB. The order of these must be maintained in the correct order (the first variant must be the earliest version).", "type": "string", @@ -3638,6 +3715,12 @@ "last" ] }, + "KeeperId": { + "description": "A unique ID for a ClickHouse Keeper", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, "LastResult": { "oneOf": [ { @@ -4950,6 +5033,12 @@ } ] }, + "ServerId": { + "description": "A unique ID for a Clickhouse Server", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, "SledAgentInfo": { "description": "Sent by a sled agent to Nexus to inform about resources", "type": "object", From 158a9b5cd5dd965d4e4586e1cda5c3eb3bf8d9a2 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 25 Sep 2024 22:28:36 +0000 Subject: [PATCH 11/19] minor cleanup --- .../reconfigurator/planning/src/blueprint_builder/builder.rs | 4 ++-- .../planning/src/blueprint_builder/clickhouse.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index f757862c73..b735fc9b79 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -344,8 +344,8 @@ impl<'a> BlueprintBuilder<'a> { }); // If we have the clickhouse cluster setup enabled via policy and we - // don't yet have a `ClickhouseClusterConfiguration`then we must create - // one and feed it to our `ClickhouseAllocator` + // don't yet have a `ClickhouseClusterConfiguration`, then we must create + // one and feed it to our `ClickhouseAllocator`. let clickhouse_allocator = if input.clickhouse_cluster_enabled() { let parent_config = parent_blueprint .clickhouse_cluster_config diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs b/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs index 2efc625ec4..868c310cc6 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs @@ -139,7 +139,7 @@ impl ClickhouseAllocator { // know whether a configuration is ongoing or not. // // There is an exception to this rule: on *new* clusters that have - // keeper zones deployed buty do not have any keepers running we must + // keeper zones deployed but do not have any keepers running we must // start at least one keeper unconditionally. This is because we cannot // retrieve keeper inventory if there are no keepers running. let current_keepers: BTreeSet<_> = @@ -153,7 +153,7 @@ impl ClickhouseAllocator { // the `if` condition above. let zone_id = active_clickhouse_zones.keepers.first().unwrap(); // Allocate a new `KeeperId` and map it to the first zone - new_config.max_used_keeper_id += 1.into(); + new_config.max_used_keeper_id = 1.into(); new_config .keepers .insert(*zone_id, new_config.max_used_keeper_id); From ce475769e82a20aa80cdee731f75a8c7f80a1f00 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 25 Sep 2024 23:13:52 +0000 Subject: [PATCH 12/19] fix clippy fix --- nexus/reconfigurator/planning/src/planner.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index f55976a238..a3bf65becf 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -2549,7 +2549,7 @@ mod test { let old_config = blueprint3.clickhouse_cluster_config.as_ref().unwrap(); let config = blueprint4.clickhouse_cluster_config.as_ref().unwrap(); assert_eq!(config.generation, old_config.generation.next()); - assert!(config.keepers.contains_key(&bp_zone_config.id)); + assert!(!config.keepers.contains_key(&bp_zone_config.id)); // We've only removed one keeper from our desired state assert_eq!(config.keepers.len() + 1, old_config.keepers.len()); // We haven't allocated any new keepers @@ -2606,7 +2606,7 @@ mod test { // Our generation has changed to reflect the added keeper assert_eq!(config.generation, old_config.generation.next()); - assert!(config.keepers.contains_key(&bp_zone_config.id)); + assert!(!config.keepers.contains_key(&bp_zone_config.id)); // We've only added one keeper from our desired state // This brings us back up to our target count assert_eq!(config.keepers.len(), old_config.keepers.len() + 1); From a03052b1745102fbe7951cbf3ee85bf0c4f30ca9 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 26 Sep 2024 00:09:56 +0000 Subject: [PATCH 13/19] cleanup after yourself you slob --- nexus/reconfigurator/planning/src/planner.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index a3bf65becf..fba7c59e48 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -2411,6 +2411,8 @@ mod test { .highest_seen_keeper_leader_committed_log_index, 2 ); + + logctx.cleanup_successful(); } // Start with an existing clickhouse cluster and expunge a keeper. This @@ -2616,5 +2618,7 @@ mod test { config.max_used_keeper_id, old_config.max_used_keeper_id + 1.into() ); + + logctx.cleanup_successful(); } } From ae380dff525a72028dce865bba57e210f47d70f5 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 26 Sep 2024 17:00:45 +0000 Subject: [PATCH 14/19] Expunge relevant zones when clickhouse cluster is disabled via policy Test coming soon --- .../planning/src/blueprint_builder/builder.rs | 33 +++++++++++++++++-- nexus/reconfigurator/planning/src/planner.rs | 12 +++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index b735fc9b79..60c5506557 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -176,6 +176,9 @@ impl fmt::Display for Operation { ZoneExpungeReason::SledExpunged => { "sled policy is expunged" } + ZoneExpungeReason::ClickhouseClusterDisabled => { + "clickhouse cluster disabled via policy" + } }; write!( f, @@ -513,6 +516,13 @@ impl<'a> BlueprintBuilder<'a> { "sled_id" => sled_id.to_string(), )); + // If there are any `ClickhouseServer` or `ClickhouseKeeper` zones that + // are not expunged and we no longer have a `ClickhousePolicy` which + // indicates replicated clickhouse clusters should be running, we need + // to expunge all such zones. + let clickhouse_cluster_enabled = + self.input.clickhouse_cluster_enabled(); + // Do any zones need to be marked expunged? let mut zones_to_expunge = BTreeMap::new(); @@ -524,9 +534,11 @@ impl<'a> BlueprintBuilder<'a> { "zone_id" => zone_id.to_string() )); - let Some(reason) = - zone_needs_expungement(sled_details, zone_config) - else { + let Some(reason) = zone_needs_expungement( + sled_details, + zone_config, + clickhouse_cluster_enabled, + ) else { continue; }; @@ -564,6 +576,13 @@ impl<'a> BlueprintBuilder<'a> { "expunged sled with non-expunged zone found" ); } + ZoneExpungeReason::ClickhouseClusterDisabled => { + info!( + &log, + "clickhouse cluster disabled via policy, \ + expunging related zone" + ); + } } zones_to_expunge.insert(zone_id, reason); @@ -594,6 +613,7 @@ impl<'a> BlueprintBuilder<'a> { let mut count_disk_expunged = 0; let mut count_sled_decommissioned = 0; let mut count_sled_expunged = 0; + let mut count_clickhouse_cluster_disabled = 0; for reason in zones_to_expunge.values() { match reason { ZoneExpungeReason::DiskExpunged => count_disk_expunged += 1, @@ -601,12 +621,19 @@ impl<'a> BlueprintBuilder<'a> { count_sled_decommissioned += 1; } ZoneExpungeReason::SledExpunged => count_sled_expunged += 1, + ZoneExpungeReason::ClickhouseClusterDisabled => { + count_clickhouse_cluster_disabled += 1 + } }; } let count_and_reason = [ (count_disk_expunged, ZoneExpungeReason::DiskExpunged), (count_sled_decommissioned, ZoneExpungeReason::SledDecommissioned), (count_sled_expunged, ZoneExpungeReason::SledExpunged), + ( + count_clickhouse_cluster_disabled, + ZoneExpungeReason::ClickhouseClusterDisabled, + ), ]; for (count, reason) in count_and_reason { if count > 0 { diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index fba7c59e48..26fca1bb73 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -703,6 +703,7 @@ fn sled_needs_all_zones_expunged( pub(crate) fn zone_needs_expungement( sled_details: &SledDetails, zone_config: &BlueprintZoneConfig, + clickhouse_cluster_enabled: bool, ) -> Option { // Should we expunge the zone because the sled is gone? if let Some(reason) = @@ -727,6 +728,16 @@ pub(crate) fn zone_needs_expungement( } }; + // Should we expunge the zone because clickhouse clusters are no longer + // enabled via policy? + if !clickhouse_cluster_enabled { + if zone_config.zone_type.is_clickhouse_keeper() + || zone_config.zone_type.is_clickhouse_server() + { + return Some(ZoneExpungeReason::ClickhouseClusterDisabled); + } + } + None } @@ -739,6 +750,7 @@ pub(crate) enum ZoneExpungeReason { DiskExpunged, SledDecommissioned, SledExpunged, + ClickhouseClusterDisabled, } #[cfg(test)] From 592608d2fd1468e1a9cd16153987f0d244d3c7dc Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 26 Sep 2024 18:48:42 +0000 Subject: [PATCH 15/19] add a test for zone expungement on policy disable --- nexus/reconfigurator/planning/src/planner.rs | 97 ++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 26fca1bb73..01aea9c08d 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -2633,4 +2633,101 @@ mod test { logctx.cleanup_successful(); } + + #[test] + fn test_expunge_all_clickhouse_cluster_zones_after_policy_is_disabled() { + static TEST_NAME: &str = "planner_expunge_all_clickhouse_cluster_zones_after_policy_is_disabled"; + let logctx = test_setup_log(TEST_NAME); + let log = logctx.log.clone(); + + // Use our example system. + let (collection, input, blueprint1) = + example(&log, TEST_NAME, DEFAULT_N_SLEDS); + + let target_keepers = 3; + let target_servers = 2; + + // Enable clickhouse clusters via policy + let mut input_builder = input.into_builder(); + input_builder.policy_mut().clickhouse_policy = Some(ClickhousePolicy { + deploy_with_standalone: true, + target_servers, + target_keepers, + }); + let input = input_builder.build(); + + // Create a new blueprint to deploy all our clickhouse zones + let blueprint2 = Planner::new_based_on( + log.clone(), + &blueprint1, + &input, + "test_blueprint2", + &collection, + ) + .expect("created planner") + .with_rng_seed((TEST_NAME, "bp2")) + .plan() + .expect("plan"); + + // We should see zones for 2 clickhouse keepers, and 2 servers created + let active_zones: Vec<_> = blueprint2 + .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) + .map(|(_, z)| z.clone()) + .collect(); + + let keeper_zone_ids: BTreeSet<_> = active_zones + .iter() + .filter(|z| z.zone_type.is_clickhouse_keeper()) + .map(|z| z.id) + .collect(); + let server_zone_ids: BTreeSet<_> = active_zones + .iter() + .filter(|z| z.zone_type.is_clickhouse_server()) + .map(|z| z.id) + .collect(); + + assert_eq!(keeper_zone_ids.len(), target_keepers); + assert_eq!(server_zone_ids.len(), target_servers); + + // Disable clickhouse clusters via policy + let mut input_builder = input.into_builder(); + input_builder.policy_mut().clickhouse_policy = None; + let input = input_builder.build(); + + // Create a new blueprint with the disabled policy + let blueprint3 = Planner::new_based_on( + log.clone(), + &blueprint2, + &input, + "test_blueprint3", + &collection, + ) + .expect("created planner") + .with_rng_seed((TEST_NAME, "bp3")) + .plan() + .expect("plan"); + + // All our clickhouse keeper and server zones that we created when we + // enabled our clickhouse policy should be expunged when we disable it. + let expunged_zones: Vec<_> = blueprint3 + .all_omicron_zones(BlueprintZoneFilter::Expunged) + .map(|(_, z)| z.clone()) + .collect(); + + let expunged_keeper_zone_ids: BTreeSet<_> = expunged_zones + .iter() + .filter(|z| z.zone_type.is_clickhouse_keeper()) + .map(|z| z.id) + .collect(); + let expunged_server_zone_ids: BTreeSet<_> = expunged_zones + .iter() + .filter(|z| z.zone_type.is_clickhouse_server()) + .map(|z| z.id) + .collect(); + + assert_eq!(keeper_zone_ids, expunged_keeper_zone_ids); + assert_eq!(server_zone_ids, expunged_server_zone_ids); + + logctx.cleanup_successful(); + } } From ad1801bc113ffab526bfca3a073615489028124a Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 26 Sep 2024 18:53:46 +0000 Subject: [PATCH 16/19] fix up comments --- nexus/reconfigurator/planning/src/planner.rs | 7 ++++--- nexus/types/src/deployment/zone_type.rs | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 01aea9c08d..6a1ff6dae4 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -2305,7 +2305,8 @@ mod test { } } - // Planning again withougt changing inventory should result in the same state + // Planning again without changing inventory should result in the same + // state let blueprint3 = Planner::new_based_on( log.clone(), &blueprint2, @@ -2465,7 +2466,7 @@ mod test { .plan() .expect("plan"); - // We should see zones for 2 clickhouse keepers, and 2 servers created + // We should see zones for 3 clickhouse keepers, and 2 servers created let active_zones: Vec<_> = blueprint2 .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) .map(|(_, z)| z.clone()) @@ -2669,7 +2670,7 @@ mod test { .plan() .expect("plan"); - // We should see zones for 2 clickhouse keepers, and 2 servers created + // We should see zones for 3 clickhouse keepers, and 2 servers created let active_zones: Vec<_> = blueprint2 .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) .map(|(_, z)| z.clone()) diff --git a/nexus/types/src/deployment/zone_type.rs b/nexus/types/src/deployment/zone_type.rs index d09150b894..e3fa8db769 100644 --- a/nexus/types/src/deployment/zone_type.rs +++ b/nexus/types/src/deployment/zone_type.rs @@ -105,7 +105,7 @@ impl BlueprintZoneType { matches!(self, BlueprintZoneType::ClickhouseServer(_)) } - /// Identifies whether this is a clickhouse zone + /// Identifies whether this is a single-node clickhouse zone pub fn is_clickhouse(&self) -> bool { matches!(self, BlueprintZoneType::Clickhouse(_)) } From f73ea1083f9a3de0014ba903b96bad8492db35a1 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Fri, 27 Sep 2024 16:10:16 +0000 Subject: [PATCH 17/19] support adding initial config at once --- .../src/blueprint_builder/clickhouse.rs | 35 +-- nexus/reconfigurator/planning/src/planner.rs | 238 +++++++++++++----- 2 files changed, 194 insertions(+), 79 deletions(-) diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs b/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs index 868c310cc6..0161843436 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs @@ -130,7 +130,7 @@ impl ClickhouseAllocator { } // Now we need to configure the keepers. We can only add or remove - // one keeper at a time. + // one keeper at a time during a reconfiguration. // // We need to see if we have any keeper inventory so we can compare it // with our current configuration and see if any changes are required. @@ -139,24 +139,27 @@ impl ClickhouseAllocator { // know whether a configuration is ongoing or not. // // There is an exception to this rule: on *new* clusters that have - // keeper zones deployed but do not have any keepers running we must - // start at least one keeper unconditionally. This is because we cannot - // retrieve keeper inventory if there are no keepers running. + // keeper zones deployed but do not have any keepers running we can + // create a full cluster configuration unconditionally. We can add + // more than one keeper because this is the initial configuration + // and not a "reconfiguration" that only allows adding or removing + // one node at a time. Furthermore, we have to start at leasat one + // keeper unconditionally in this case because we cannot retrieve + // keeper inventory if there are no keepers running. Without retrieving + // inventory, we cannot make further progress. let current_keepers: BTreeSet<_> = self.parent_config.keepers.values().cloned().collect(); let Some(inventory_membership) = &self.inventory else { - // Are we a new cluster and do we have any active keeper zones? - if new_config.max_used_keeper_id == 0.into() - && !active_clickhouse_zones.keepers.is_empty() - { - // Unwrap safety: We check that there is at least one keeper in - // the `if` condition above. - let zone_id = active_clickhouse_zones.keepers.first().unwrap(); - // Allocate a new `KeeperId` and map it to the first zone - new_config.max_used_keeper_id = 1.into(); - new_config - .keepers - .insert(*zone_id, new_config.max_used_keeper_id); + // Are we a new cluster ? + if new_config.max_used_keeper_id == 0.into() { + // Generate our initial configuration + for zone_id in &active_clickhouse_zones.keepers { + // Allocate a new `KeeperId` and map it to the zone_id + new_config.max_used_keeper_id += 1.into(); + new_config + .keepers + .insert(*zone_id, new_config.max_used_keeper_id); + } } return bump_gen_if_necessary(new_config); }; diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 6a1ff6dae4..6591863e1c 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -2217,8 +2217,8 @@ mod test { logctx.cleanup_successful(); } - /// Deploy all keeper nodes 1 at a time and all server nodes at once - /// starting from 0 deployed nodes + /// Deploy all keeper nodes server nodes at once for a new cluster. + /// Then add keeper nodes 1 at a time. #[test] fn test_plan_deploy_all_clickhouse_cluster_nodes() { static TEST_NAME: &str = "planner_deploy_all_keeper_nodes"; @@ -2233,9 +2233,7 @@ mod test { // We shouldn't have a clickhouse cluster config, as we don't have a // clickhouse policy set yet assert!(blueprint1.clickhouse_cluster_config.is_none()); - // We'd typically use at least 3 servers here, but we're trying to keep - // this test reasonably short. - let target_keepers = 2; + let target_keepers = 3; let target_servers = 2; // Enable clickhouse clusters via policy @@ -2260,7 +2258,7 @@ mod test { .plan() .expect("plan"); - // We should see zones for 2 clickhouse keepers, and 2 servers created + // We should see zones for 3 clickhouse keepers, and 2 servers created let active_zones: Vec<_> = blueprint2 .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) .map(|(_, z)| z.clone()) @@ -2280,24 +2278,27 @@ mod test { assert_eq!(keeper_zone_ids.len(), target_keepers); assert_eq!(server_zone_ids.len(), target_servers); - // We should be attempting to allocate both servers and a single keeper + // We should be attempting to allocate all servers and a single keeper // until the inventory reflects a change in keeper membership. { let clickhouse_cluster_config = blueprint2.clickhouse_cluster_config.as_ref().unwrap(); assert_eq!(clickhouse_cluster_config.generation, 2.into()); - assert_eq!(clickhouse_cluster_config.max_used_keeper_id, 1.into()); + assert_eq!( + clickhouse_cluster_config.max_used_keeper_id, + (target_keepers as u64).into() + ); assert_eq!( clickhouse_cluster_config.max_used_server_id, (target_servers as u64).into() ); - assert_eq!(clickhouse_cluster_config.keepers.len(), 1); + assert_eq!(clickhouse_cluster_config.keepers.len(), target_keepers); assert_eq!(clickhouse_cluster_config.servers.len(), target_servers); - // Ensure that the added keeper is in a keeper zone - let zone_id = - &clickhouse_cluster_config.keepers.first_key_value().unwrap().0; - assert!(keeper_zone_ids.contains(zone_id)); + // Ensure that the added keepers are in server zones + for zone_id in clickhouse_cluster_config.keepers.keys() { + assert!(keeper_zone_ids.contains(zone_id)); + } // Ensure that the added servers are in server zones for zone_id in clickhouse_cluster_config.servers.keys() { @@ -2324,8 +2325,9 @@ mod test { blueprint3.clickhouse_cluster_config ); - // Updating the inventory to reflect the first keeper should result in a - // new configured keeper. + // Updating the inventory to reflect the keepers + // should result in the same state, except for the + // `highest_seen_keeper_leader_committed_log_index` let (zone_id, keeper_id) = blueprint3 .clickhouse_cluster_config .as_ref() @@ -2336,7 +2338,14 @@ mod test { let membership = ClickhouseKeeperClusterMembership { queried_keeper: *keeper_id, leader_committed_log_index: 1, - raft_config: [*keeper_id].into_iter().collect(), + raft_config: blueprint3 + .clickhouse_cluster_config + .as_ref() + .unwrap() + .keepers + .values() + .cloned() + .collect(), }; collection .clickhouse_keeper_cluster_membership @@ -2354,36 +2363,36 @@ mod test { .plan() .expect("plan"); - // We should be attempting to allocate 2 keepers now that one is running - // and returning inventory. - { - let clickhouse_cluster_config = - blueprint4.clickhouse_cluster_config.as_ref().unwrap(); - assert_eq!(clickhouse_cluster_config.generation, 3.into()); - assert_eq!(clickhouse_cluster_config.max_used_keeper_id, 2.into()); - assert_eq!(clickhouse_cluster_config.keepers.len(), 2); - - // Ensure that the keepers are in keeper zones - for zone_id in clickhouse_cluster_config.keepers.keys() { - assert!(keeper_zone_ids.contains(zone_id)); - } - } + let bp3_config = blueprint3.clickhouse_cluster_config.as_ref().unwrap(); + let bp4_config = blueprint4.clickhouse_cluster_config.as_ref().unwrap(); + assert_eq!(bp4_config.generation, bp3_config.generation); + assert_eq!( + bp4_config.max_used_keeper_id, + bp3_config.max_used_keeper_id + ); + assert_eq!( + bp4_config.max_used_server_id, + bp3_config.max_used_server_id + ); + assert_eq!(bp4_config.keepers, bp3_config.keepers); + assert_eq!(bp4_config.servers, bp3_config.servers); + assert_eq!( + bp4_config.highest_seen_keeper_leader_committed_log_index, + 1 + ); - // Fill in the inventory for the second keeper. We should not allocate - // a new keeper since we have reached our target count. However, we should update - // the `highests_seen_keeper_committed_log_index`; - for (zone_id, keeper_id) in - &blueprint4.clickhouse_cluster_config.as_ref().unwrap().keepers - { - let membership = ClickhouseKeeperClusterMembership { - queried_keeper: *keeper_id, - leader_committed_log_index: 2, - raft_config: [1.into(), 2.into()].into_iter().collect(), - }; - collection - .clickhouse_keeper_cluster_membership - .insert(*zone_id, membership); - } + // Let's bump the clickhouse target to 5 via policy so that we can add + // more nodes one at a time. Initial configuration deploys all nodes, + // but reconfigurations may only add or remove one node at a time. + // Enable clickhouse clusters via policy + let target_keepers = 5; + let mut input_builder = input.into_builder(); + input_builder.policy_mut().clickhouse_policy = Some(ClickhousePolicy { + deploy_with_standalone: true, + target_servers, + target_keepers, + }); + let input = input_builder.build(); let blueprint5 = Planner::new_based_on( log.clone(), &blueprint4, @@ -2392,39 +2401,142 @@ mod test { &collection, ) .expect("created planner") - .with_rng_seed((TEST_NAME, "bp4")) + .with_rng_seed((TEST_NAME, "bp5")) .plan() .expect("plan"); - let clickhouse_cluster_config4 = - blueprint4.clickhouse_cluster_config.as_ref().unwrap(); - let clickhouse_cluster_config5 = - blueprint5.clickhouse_cluster_config.as_ref().unwrap(); + let active_zones: Vec<_> = blueprint5 + .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) + .map(|(_, z)| z.clone()) + .collect(); + let new_keeper_zone_ids: BTreeSet<_> = active_zones + .iter() + .filter(|z| z.zone_type.is_clickhouse_keeper()) + .map(|z| z.id) + .collect(); + + // We should have allocated 2 new keeper zones + assert_eq!(new_keeper_zone_ids.len(), target_keepers); + assert!(keeper_zone_ids.is_subset(&new_keeper_zone_ids)); + + // We should be trying to provision one new keeper for a keeper zone + let bp4_config = blueprint4.clickhouse_cluster_config.as_ref().unwrap(); + let bp5_config = blueprint5.clickhouse_cluster_config.as_ref().unwrap(); + assert_eq!(bp5_config.generation, bp4_config.generation.next()); assert_eq!( - clickhouse_cluster_config4.generation, - clickhouse_cluster_config5.generation + bp5_config.max_used_keeper_id, + bp4_config.max_used_keeper_id + 1.into() ); assert_eq!( - clickhouse_cluster_config4.max_used_keeper_id, - clickhouse_cluster_config5.max_used_keeper_id + bp5_config.keepers.len(), + bp5_config.max_used_keeper_id.0 as usize ); + + // Planning again without updating inventory results in the same `ClickhouseClusterConfig` + let blueprint6 = Planner::new_based_on( + log.clone(), + &blueprint5, + &input, + "test_blueprint6", + &collection, + ) + .expect("created planner") + .with_rng_seed((TEST_NAME, "bp6")) + .plan() + .expect("plan"); + + let bp6_config = blueprint6.clickhouse_cluster_config.as_ref().unwrap(); + assert_eq!(bp5_config, bp6_config); + + // Updating the inventory to include the 4th node should add another + // keeper node + let membership = ClickhouseKeeperClusterMembership { + queried_keeper: *keeper_id, + leader_committed_log_index: 2, + raft_config: blueprint6 + .clickhouse_cluster_config + .as_ref() + .unwrap() + .keepers + .values() + .cloned() + .collect(), + }; + collection + .clickhouse_keeper_cluster_membership + .insert(*zone_id, membership); + + let blueprint7 = Planner::new_based_on( + log.clone(), + &blueprint6, + &input, + "test_blueprint7", + &collection, + ) + .expect("created planner") + .with_rng_seed((TEST_NAME, "bp7")) + .plan() + .expect("plan"); + + let bp7_config = blueprint7.clickhouse_cluster_config.as_ref().unwrap(); + assert_eq!(bp7_config.generation, bp6_config.generation.next()); assert_eq!( - clickhouse_cluster_config4.keepers, - clickhouse_cluster_config5.keepers + bp7_config.max_used_keeper_id, + bp6_config.max_used_keeper_id + 1.into() ); - assert_ne!( - clickhouse_cluster_config4 - .highest_seen_keeper_leader_committed_log_index, - clickhouse_cluster_config5 - .highest_seen_keeper_leader_committed_log_index + assert_eq!( + bp7_config.keepers.len(), + bp7_config.max_used_keeper_id.0 as usize ); + assert_eq!(bp7_config.keepers.len(), target_keepers); assert_eq!( - clickhouse_cluster_config5 - .highest_seen_keeper_leader_committed_log_index, + bp7_config.highest_seen_keeper_leader_committed_log_index, 2 ); + // Updating the inventory to reflect the newest keeper node should not + // increase the cluster size since we have reached the target. + let membership = ClickhouseKeeperClusterMembership { + queried_keeper: *keeper_id, + leader_committed_log_index: 3, + raft_config: blueprint7 + .clickhouse_cluster_config + .as_ref() + .unwrap() + .keepers + .values() + .cloned() + .collect(), + }; + collection + .clickhouse_keeper_cluster_membership + .insert(*zone_id, membership); + let blueprint8 = Planner::new_based_on( + log.clone(), + &blueprint7, + &input, + "test_blueprint8", + &collection, + ) + .expect("created planner") + .with_rng_seed((TEST_NAME, "bp8")) + .plan() + .expect("plan"); + + let bp8_config = blueprint8.clickhouse_cluster_config.as_ref().unwrap(); + assert_eq!(bp8_config.generation, bp7_config.generation); + assert_eq!( + bp8_config.max_used_keeper_id, + bp7_config.max_used_keeper_id + ); + assert_eq!(bp8_config.keepers, bp7_config.keepers); + assert_eq!(bp7_config.keepers.len(), target_keepers); + assert_eq!( + bp8_config.highest_seen_keeper_leader_committed_log_index, + 3 + ); + logctx.cleanup_successful(); } From 072097ffa963cc77d0d6caf2876205162f547e1c Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Fri, 27 Sep 2024 16:14:24 +0000 Subject: [PATCH 18/19] fix comments --- .../planning/src/blueprint_builder/clickhouse.rs | 10 +++++----- nexus/reconfigurator/planning/src/planner.rs | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs b/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs index 0161843436..0941806fc3 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs @@ -141,11 +141,11 @@ impl ClickhouseAllocator { // There is an exception to this rule: on *new* clusters that have // keeper zones deployed but do not have any keepers running we can // create a full cluster configuration unconditionally. We can add - // more than one keeper because this is the initial configuration - // and not a "reconfiguration" that only allows adding or removing - // one node at a time. Furthermore, we have to start at leasat one - // keeper unconditionally in this case because we cannot retrieve - // keeper inventory if there are no keepers running. Without retrieving + // more than one keeper because this is the initial configuration and + // not a "reconfiguration" that only allows adding or removing one + // node at a time. Furthermore, we have to start at last one keeper + // unconditionally in this case because we cannot retrieve keeper + // inventory if there are no keepers running. Without retrieving // inventory, we cannot make further progress. let current_keepers: BTreeSet<_> = self.parent_config.keepers.values().cloned().collect(); diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 6591863e1c..5cd681cfa5 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -2278,8 +2278,8 @@ mod test { assert_eq!(keeper_zone_ids.len(), target_keepers); assert_eq!(server_zone_ids.len(), target_servers); - // We should be attempting to allocate all servers and a single keeper - // until the inventory reflects a change in keeper membership. + // We should be attempting to allocate all servers and keepers since + // this the initial configuration { let clickhouse_cluster_config = blueprint2.clickhouse_cluster_config.as_ref().unwrap(); From 86c1fc0398c22ef741611232742e66060e2628fd Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Tue, 1 Oct 2024 22:45:13 +0000 Subject: [PATCH 19/19] Comments related to RSS --- nexus/test-utils/src/lib.rs | 3 ++- sled-agent/src/rack_setup/service.rs | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index a629a520c2..e80f8c0ae0 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -834,7 +834,8 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { cockroachdb_fingerprint: String::new(), cockroachdb_setting_preserve_downgrade: CockroachDbPreserveDowngrade::DoNotModify, - // TODO(ajs): Should we create this in RSS? it relies on policy + // Clickhouse clusters are not generated by RSS. One must run + // reconfigurator for that. clickhouse_cluster_config: None, time_created: Utc::now(), creator: "nexus-test-utils".to_string(), diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index fb4ffa1968..fa64a5c06c 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -1486,8 +1486,8 @@ pub(crate) fn build_initial_blueprint_from_sled_configs( cockroachdb_fingerprint: String::new(), cockroachdb_setting_preserve_downgrade: CockroachDbPreserveDowngrade::DoNotModify, - // TODO: Allocate keepers and servers from the plan if there is a policy, - // or just do it all in reconfigurator after RSS? + // We do not create clickhouse clusters in RSS. We create them via + // reconfigurator only. clickhouse_cluster_config: None, time_created: Utc::now(), creator: "RSS".to_string(),