From 7b54128a3cbe1ee53bf7d2fa6b78cf77c373eb62 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 28 Apr 2023 09:54:56 -0400 Subject: [PATCH 01/15] [sled-agent] Make service_manager responsible for storage services too --- illumos-utils/src/zpool.rs | 1 + openapi/sled-agent.json | 109 ++++- sled-agent/src/bootstrap/hardware.rs | 5 +- sled-agent/src/http_entrypoints.rs | 1 + sled-agent/src/lib.rs | 1 + sled-agent/src/params.rs | 49 +- sled-agent/src/rack_setup/plan/service.rs | 10 +- sled-agent/src/services.rs | 291 +++++++++-- sled-agent/src/sled_agent.rs | 37 +- sled-agent/src/storage/dataset.rs | 72 +++ sled-agent/src/storage/mod.rs | 7 + sled-agent/src/storage_manager.rs | 560 ++-------------------- smf/cockroachdb/method_script.sh | 6 + 13 files changed, 534 insertions(+), 615 deletions(-) create mode 100644 sled-agent/src/storage/dataset.rs create mode 100644 sled-agent/src/storage/mod.rs diff --git a/illumos-utils/src/zpool.rs b/illumos-utils/src/zpool.rs index cd0fa847c7..dc4507f0ea 100644 --- a/illumos-utils/src/zpool.rs +++ b/illumos-utils/src/zpool.rs @@ -249,6 +249,7 @@ impl Zpool { } #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, JsonSchema)] +#[serde(rename_all = "snake_case")] pub enum ZpoolKind { // This zpool is used for external storage (u.2) External, diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index fd93c6cc34..b77c318ead 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -640,13 +640,6 @@ { "type": "object", "properties": { - "all_addresses": { - "description": "The addresses of all nodes within the cluster.", - "type": "array", - "items": { - "type": "string" - } - }, "type": { "type": "string", "enum": [ @@ -655,7 +648,6 @@ } }, "required": [ - "all_addresses", "type" ] }, @@ -689,6 +681,21 @@ } ] }, + "DatasetName": { + "type": "object", + "properties": { + "kind": { + "$ref": "#/components/schemas/DatasetKind" + }, + "pool_name": { + "$ref": "#/components/schemas/ZpoolName" + } + }, + "required": [ + "kind", + "pool_name" + ] + }, "DendriteAsic": { "type": "string", "enum": [ @@ -1738,7 +1745,7 @@ "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" }, "ServiceEnsureBody": { - "description": "Used to request that the Sled initialize certain services on initialization.\n\nThis may be used to record that certain sleds are responsible for launching services which may not be associated with a dataset, such as Nexus.", + "description": "Used to request that the Sled initialize certain services.\n\nThis may be used to record that certain sleds are responsible for launching services which may not be associated with a dataset, such as Nexus.", "type": "object", "properties": { "services": { @@ -2036,6 +2043,48 @@ "mode", "type" ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "clickhouse" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "cockroach_db" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "crucible" + ] + } + }, + "required": [ + "type" + ] } ] }, @@ -2050,6 +2099,15 @@ "format": "ipv6" } }, + "dataset": { + "nullable": true, + "default": null, + "allOf": [ + { + "$ref": "#/components/schemas/DatasetName" + } + ] + }, "gz_addresses": { "default": [], "type": "array", @@ -2465,13 +2523,16 @@ "description": "The type of zone which may be requested from Sled Agent", "type": "string", "enum": [ + "clickhouse", + "cockroach_db", + "crucible_pantry", + "crucible", "external_dns", "internal_dns", "nexus", + "ntp", "oximeter", - "switch", - "crucible_pantry", - "ntp" + "switch" ] }, "Zpool": { @@ -2489,6 +2550,30 @@ "disk_type", "id" ] + }, + "ZpoolKind": { + "type": "string", + "enum": [ + "external", + "internal" + ] + }, + "ZpoolName": { + "description": "A wrapper around a zpool name.\n\nThis expects that the format will be: `ox{i,p}_` - we parse the prefix when reading the structure, and validate that the UUID can be utilized.", + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "kind": { + "$ref": "#/components/schemas/ZpoolKind" + } + }, + "required": [ + "id", + "kind" + ] } } } diff --git a/sled-agent/src/bootstrap/hardware.rs b/sled-agent/src/bootstrap/hardware.rs index 491d341617..2bb068438b 100644 --- a/sled-agent/src/bootstrap/hardware.rs +++ b/sled-agent/src/bootstrap/hardware.rs @@ -177,10 +177,7 @@ impl HardwareMonitor { let hardware = HardwareManager::new(log, sled_mode) .map_err(|e| Error::Hardware(e))?; - // TODO: The coupling between the storage and service manager is growing - // pretty tight; we should consider merging them together. - let storage_manager = - StorageManager::new(&log, underlay_etherstub.clone()).await; + let storage_manager = StorageManager::new(&log).await; let service_manager = ServiceManager::new( log.clone(), diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 027cf7dc88..4a0db86ed5 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -103,6 +103,7 @@ async fn filesystem_put( let sa = rqctx.context(); let body_args = body.into_inner(); sa.filesystem_ensure( + body_args.id, body_args.zpool_id, body_args.dataset_kind, body_args.address, diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index 0944aa62bb..9682fa3cc8 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -31,6 +31,7 @@ mod services; mod sled_agent; mod smf_helper; pub mod sp; +pub(crate) mod storage; mod storage_manager; mod updates; diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 104c1922eb..3276e1d46e 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -206,13 +206,12 @@ pub struct Zpool { /// The type of a dataset, and an auxiliary information necessary /// to successfully launch a zone managing the associated data. -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] #[serde(tag = "type", rename_all = "snake_case")] pub enum DatasetKind { - CockroachDb { - /// The addresses of all nodes within the cluster. - all_addresses: Vec, - }, + CockroachDb, Crucible, Clickhouse, } @@ -221,9 +220,7 @@ impl From for sled_agent_client::types::DatasetKind { fn from(k: DatasetKind) -> Self { use DatasetKind::*; match k { - CockroachDb { all_addresses } => Self::CockroachDb( - all_addresses.iter().map(|a| a.to_string()).collect(), - ), + CockroachDb => Self::CockroachDb, Crucible => Self::Crucible, Clickhouse => Self::Clickhouse, } @@ -333,6 +330,9 @@ pub enum ServiceType { Maghemite { mode: String, }, + Clickhouse, + CockroachDb, + Crucible, } impl std::fmt::Display for ServiceType { @@ -350,6 +350,9 @@ impl std::fmt::Display for ServiceType { ServiceType::BoundaryNtp { .. } | ServiceType::InternalNtp { .. } => write!(f, "ntp"), ServiceType::Maghemite { .. } => write!(f, "mg-ddm"), + ServiceType::Clickhouse => write!(f, "clickhouse"), + ServiceType::CockroachDb => write!(f, "cockroachdb"), + ServiceType::Crucible => write!(f, "crucible"), } } } @@ -427,6 +430,9 @@ impl From for sled_agent_client::types::ServiceType { AutoSt::InternalNtp { ntp_servers, dns_servers, domain } } St::Maghemite { mode } => AutoSt::Maghemite { mode }, + St::Clickhouse => AutoSt::Clickhouse, + St::CockroachDb => AutoSt::CockroachDb, + St::Crucible => AutoSt::Crucible, } } } @@ -437,25 +443,31 @@ impl From for sled_agent_client::types::ServiceType { )] #[serde(rename_all = "snake_case")] pub enum ZoneType { + Clickhouse, + CockroachDb, + CruciblePantry, + Crucible, ExternalDns, InternalDns, Nexus, + Ntp, Oximeter, Switch, - CruciblePantry, - Ntp, } impl From for sled_agent_client::types::ZoneType { fn from(zt: ZoneType) -> Self { match zt { + ZoneType::Clickhouse => Self::Clickhouse, + ZoneType::CockroachDb => Self::CockroachDb, + ZoneType::Crucible => Self::Crucible, + ZoneType::CruciblePantry => Self::CruciblePantry, ZoneType::InternalDns => Self::InternalDns, ZoneType::ExternalDns => Self::ExternalDns, ZoneType::Nexus => Self::Nexus, + ZoneType::Ntp => Self::Ntp, ZoneType::Oximeter => Self::Oximeter, ZoneType::Switch => Self::Switch, - ZoneType::CruciblePantry => Self::CruciblePantry, - ZoneType::Ntp => Self::Ntp, } } } @@ -464,13 +476,16 @@ impl std::fmt::Display for ZoneType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { use ZoneType::*; let name = match self { + Clickhouse => "clickhouse", + CockroachDb => "cockroachdb", + Crucible => "crucible", + CruciblePantry => "crucible_pantry", ExternalDns => "external_dns", InternalDns => "internal_dns", Nexus => "nexus", + Ntp => "ntp", Oximeter => "oximeter", Switch => "switch", - CruciblePantry => "crucible_pantry", - Ntp => "ntp", }; write!(f, "{name}") } @@ -487,6 +502,9 @@ pub struct ServiceZoneRequest { pub zone_type: ZoneType, // The addresses on which the service should listen for requests. pub addresses: Vec, + // Datasets which should be managed by this service. + #[serde(default)] + pub dataset: Option, // The addresses in the global zone which should be created, if necessary // to route to the service. // @@ -511,13 +529,14 @@ impl From for sled_agent_client::types::ServiceZoneRequest { id: s.id, zone_type: s.zone_type.into(), addresses: s.addresses, + dataset: s.dataset.map(|d| d.into()), gz_addresses: s.gz_addresses, services, } } } -/// Used to request that the Sled initialize certain services on initialization. +/// Used to request that the Sled initialize certain services. /// /// This may be used to record that certain sleds are responsible for /// launching services which may not be associated with a dataset, such diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 3f2f4af54e..9bc73ce22b 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -296,6 +296,7 @@ impl Plan { id, zone_type: ZoneType::ExternalDns, addresses: vec![internal_ip], + dataset: None, gz_addresses: vec![], services: vec![ServiceType::ExternalDns { http_address: SocketAddrV6::new( @@ -329,6 +330,7 @@ impl Plan { id, zone_type: ZoneType::Nexus, addresses: vec![address], + dataset: None, gz_addresses: vec![], services: vec![ServiceType::Nexus { internal_ip: address, @@ -354,6 +356,7 @@ impl Plan { id, zone_type: ZoneType::Oximeter, addresses: vec![address], + dataset: None, gz_addresses: vec![], services: vec![ServiceType::Oximeter], }) @@ -373,9 +376,7 @@ impl Plan { request.datasets.push(DatasetEnsureBody { id, zpool_id: u2_zpools[0], - dataset_kind: crate::params::DatasetKind::CockroachDb { - all_addresses: vec![address], - }, + dataset_kind: crate::params::DatasetKind::CockroachDb, address, }); } @@ -444,6 +445,7 @@ impl Plan { id, zone_type: ZoneType::InternalDns, addresses: vec![dns_addr], + dataset: None, gz_addresses: vec![dns_subnet.gz_address().ip()], services: vec![ServiceType::InternalDns { http_address: SocketAddrV6::new( @@ -476,6 +478,7 @@ impl Plan { id, zone_type: ZoneType::CruciblePantry, addresses: vec![address], + dataset: None, gz_addresses: vec![], services: vec![ServiceType::CruciblePantry], }) @@ -523,6 +526,7 @@ impl Plan { id, zone_type: ZoneType::Ntp, addresses: vec![address], + dataset: None, gz_addresses: vec![], services, }); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 0b3912e3ba..4dbfc163be 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -20,7 +20,7 @@ //! of what other services Nexus wants to have executing on the sled. //! //! To accomplish this, the following interfaces are exposed: -//! - [ServiceManager::ensure_persistent] exposes an API to request a set of +//! - [ServiceManager::ensure_all_services] exposes an API to request a set of //! services that should persist beyond reboot. //! - [ServiceManager::activate_switch] exposes an API to specifically enable //! or disable (via [ServiceManager::deactivate_switch]) the switch zone. @@ -50,7 +50,10 @@ use itertools::Itertools; use omicron_common::address::Ipv6Subnet; use omicron_common::address::AZ_PREFIX; use omicron_common::address::BOOTSTRAP_ARTIFACT_PORT; +use omicron_common::address::CLICKHOUSE_PORT; +use omicron_common::address::COCKROACH_PORT; use omicron_common::address::CRUCIBLE_PANTRY_PORT; +use omicron_common::address::CRUCIBLE_PORT; use omicron_common::address::DENDRITE_PORT; use omicron_common::address::MGS_PORT; use omicron_common::address::NEXUS_INTERNAL_PORT; @@ -86,7 +89,9 @@ use tokio::task::JoinHandle; use uuid::Uuid; // The filename of ServiceManager's internal storage. -const SERVICE_CONFIG_FILENAME: &str = "service.toml"; +const SERVICES_CONFIG_FILENAME: &str = "services.toml"; +const STORAGE_SERVICES_CONFIG_FILENAME: &str = "storage-services.toml"; + // The filename of a half-completed config, in need of parameters supplied at // runtime. const PARTIAL_CONFIG_FILENAME: &str = "config-partial.toml"; @@ -194,10 +199,16 @@ impl From for omicron_common::api::external::Error { } } -/// The default path to service configuration, if one is not -/// explicitly provided. -pub fn default_services_config_path() -> PathBuf { - Path::new(omicron_common::OMICRON_CONFIG_PATH).join(SERVICE_CONFIG_FILENAME) +// The default path to service configuration +fn default_services_config_path() -> PathBuf { + Path::new(omicron_common::OMICRON_CONFIG_PATH) + .join(SERVICES_CONFIG_FILENAME) +} + +// The default path to storage service configuration +fn default_storage_services_config_path() -> PathBuf { + Path::new(omicron_common::OMICRON_CONFIG_PATH) + .join(STORAGE_SERVICES_CONFIG_FILENAME) } /// Configuration parameters which modify the [`ServiceManager`]'s behavior. @@ -211,9 +222,10 @@ pub struct Config { /// An optional internet gateway address for external services. pub gateway_address: Option, - /// The path for the ServiceManager to store information about - /// all running services. - pub all_svcs_config_path: PathBuf, + // The path for the ServiceManager to store information about + // all running services. + all_svcs_config_path: PathBuf, + storage_svcs_config_path: PathBuf, } impl Config { @@ -227,6 +239,7 @@ impl Config { sidecar_revision, gateway_address, all_svcs_config_path: default_services_config_path(), + storage_svcs_config_path: default_storage_services_config_path(), } } } @@ -249,6 +262,7 @@ impl AllZoneRequests { #[derive(Clone, serde::Serialize, serde::Deserialize)] struct ZoneRequest { zone: ServiceZoneRequest, + // TODO: Consider collapsing "root" into ServiceZoneRequest root: PathBuf, } @@ -303,7 +317,10 @@ pub struct ServiceManagerInner { time_synced: AtomicBool, sidecar_revision: String, switch_zone_maghemite_links: Vec, + // Zones representing running services zones: Mutex>, + // Zones representing services which own datasets + dataset_zones: Mutex>, underlay_vnic_allocator: VnicAllocator, underlay_vnic: EtherstubVnic, bootstrap_vnic_allocator: VnicAllocator, @@ -373,6 +390,7 @@ impl ServiceManager { sidecar_revision, switch_zone_maghemite_links, zones: Mutex::new(vec![]), + dataset_zones: Mutex::new(vec![]), underlay_vnic_allocator: VnicAllocator::new( "Service", underlay_etherstub, @@ -521,6 +539,16 @@ impl ServiceManager { } } + // Returns either the path to the explicitly provided config path, or + // chooses the default one. + fn storage_services_config_path(&self) -> Result { + if let Some(info) = self.inner.sled_info.get() { + Ok(info.config.storage_svcs_config_path.clone()) + } else { + Err(Error::SledAgentNotReady) + } + } + // Advertise the /64 prefix of `address`, unless we already have. // // This method only blocks long enough to check our HashSet of @@ -864,6 +892,18 @@ impl ServiceManager { let opte_ports = self.opte_ports_needed(&request.zone).await?; let limit_priv = Self::privs_needed(&request.zone); + // If the zone is managing a particular dataset, plumb that + // dataset into the zone. Additionally, construct a "unique enough" name + // so we can create multiple zones of this type without collision. + let (unique_name, datasets) = + if let Some(dataset) = &request.zone.dataset { + ( + Some(dataset.pool().to_string()), + vec![zone::Dataset { name: dataset.full() }], + ) + } else { + (None, vec![]) + }; let devices: Vec = device_names .iter() .map(|d| zone::Device { name: d.to_string() }) @@ -874,11 +914,8 @@ impl ServiceManager { &self.inner.underlay_vnic_allocator, &request.root, &request.zone.zone_type.to_string(), - // unique_name= - None, - // dataset= - &[], - // filesystems= + unique_name.as_deref(), + &datasets, &filesystems, &devices, opte_ports, @@ -893,6 +930,107 @@ impl ServiceManager { // These zones are self-assembling -- after they boot, there should // be no "zlogin" necessary to initialize. match request.zone.zone_type { + ZoneType::Clickhouse => { + let Some(info) = self.inner.sled_info.get() else { + return Err(Error::SledAgentNotReady); + }; + let datalink = installed_zone.get_control_vnic_name(); + let gateway = &info.underlay_address.to_string(); + assert_eq!(request.zone.addresses.len(), 1); + let listen_addr = &request.zone.addresses[0].to_string(); + let listen_port = &CLICKHOUSE_PORT.to_string(); + + let config = PropertyGroupBuilder::new("config") + .add_property("datalink", "astring", datalink) + .add_property("gateway", "astring", gateway) + .add_property("listen_addr", "astring", listen_addr) + .add_property("listen_port", "astring", listen_port) + .add_property("store", "astring", "/data"); + + let profile = ProfileBuilder::new("omicron").add_service( + ServiceBuilder::new("oxide/clickhouse").add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(config), + ), + ); + profile + .add_to_zone(&self.inner.log, &installed_zone) + .await + .map_err(|err| { + Error::io("Failed to setup clickhouse profile", err) + })?; + return Ok(RunningZone::boot(installed_zone).await?); + } + ZoneType::CockroachDb => { + let Some(info) = self.inner.sled_info.get() else { + return Err(Error::SledAgentNotReady); + }; + let datalink = installed_zone.get_control_vnic_name(); + let gateway = &info.underlay_address.to_string(); + assert_eq!(request.zone.addresses.len(), 1); + let listen_addr = &request.zone.addresses[0].to_string(); + let listen_port = &COCKROACH_PORT.to_string(); + + let config = PropertyGroupBuilder::new("config") + .add_property("datalink", "astring", datalink) + .add_property("gateway", "astring", gateway) + .add_property("listen_addr", "astring", listen_addr) + .add_property("listen_port", "astring", listen_port) + .add_property("store", "astring", "/data"); + + let profile = ProfileBuilder::new("omicron").add_service( + ServiceBuilder::new("oxide/cockroachdb").add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(config), + ), + ); + profile + .add_to_zone(&self.inner.log, &installed_zone) + .await + .map_err(|err| { + Error::io("Failed to setup CRDB profile", err) + })?; + return Ok(RunningZone::boot(installed_zone).await?); + } + ZoneType::Crucible => { + let Some(info) = self.inner.sled_info.get() else { + return Err(Error::SledAgentNotReady); + }; + let datalink = installed_zone.get_control_vnic_name(); + let gateway = &info.underlay_address.to_string(); + assert_eq!(request.zone.addresses.len(), 1); + let listen_addr = &request.zone.addresses[0].to_string(); + let listen_port = &CRUCIBLE_PORT.to_string(); + + let dataset = request + .zone + .dataset + .as_ref() + .expect("Crucible requires dataset"); + let uuid = &Uuid::new_v4().to_string(); + let config = PropertyGroupBuilder::new("config") + .add_property("datalink", "astring", datalink) + .add_property("gateway", "astring", gateway) + .add_property("dataset", "astring", &dataset.full()) + .add_property("listen_addr", "astring", listen_addr) + .add_property("listen_port", "astring", listen_port) + .add_property("uuid", "astring", uuid) + .add_property("store", "astring", "/data"); + + let profile = ProfileBuilder::new("omicron").add_service( + ServiceBuilder::new("oxide/crucible/agent").add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(config), + ), + ); + profile + .add_to_zone(&self.inner.log, &installed_zone) + .await + .map_err(|err| { + Error::io("Failed to setup crucible profile", err) + })?; + return Ok(RunningZone::boot(installed_zone).await?); + } ZoneType::CruciblePantry => { let Some(info) = self.inner.sled_info.get() else { return Err(Error::SledAgentNotReady); @@ -1324,9 +1462,6 @@ impl ServiceManager { smfh.setprop("config/port", &format!("{}", DENDRITE_PORT))?; smfh.refresh()?; } - ServiceType::CruciblePantry => { - panic!("CruciblePantry is self-assembling now") - } ServiceType::BoundaryNtp { ntp_servers, dns_servers, @@ -1450,6 +1585,12 @@ impl ServiceManager { smfh.refresh()?; } + ServiceType::Crucible + | ServiceType::CruciblePantry + | ServiceType::CockroachDb + | ServiceType::Clickhouse => { + panic!("{service} is a service which exists as part of a self-assembling zone") + } } debug!(self.inner.log, "enabling service"); @@ -1514,7 +1655,7 @@ impl ServiceManager { /// These services will be instantiated by this function, and will be /// recorded to a local file to ensure they start automatically on next /// boot. - pub async fn ensure_persistent( + pub async fn ensure_all_services( &self, request: ServiceEnsureBody, ) -> Result<(), Error> { @@ -1590,6 +1731,72 @@ impl ServiceManager { Ok(()) } + /// Ensures that a storage zone be initialized. + /// + /// These services will be instantiated by this function, and will be + /// recorded to a local file to ensure they start automatically on next + /// boot. + pub async fn ensure_storage_service( + &self, + request: ServiceZoneRequest, + ) -> Result<(), Error> { + let mut existing_zones = self.inner.dataset_zones.lock().await; + let config_path = self.storage_services_config_path()?; + + let mut zone_requests: AllZoneRequests = { + if config_path.exists() { + debug!(self.inner.log, "Reading old storage service requests"); + toml::from_str( + &tokio::fs::read_to_string(&config_path) + .await + .map_err(|err| Error::io_path(&config_path, err))?, + ) + .map_err(|err| Error::TomlDeserialize { + path: config_path.clone(), + err, + })? + } else { + debug!(self.inner.log, "No old storage service requests"); + AllZoneRequests::new() + } + }; + + if !zone_requests + .requests + .iter() + .any(|zone_request| zone_request.zone.id == request.id) + { + // If this is a new request, provision a zone filesystem on the same + // disk as the dataset. + let dataset = request + .dataset + .as_ref() + .expect("Storage services should have a dataset"); + let root = dataset + .pool() + .dataset_mountpoint(sled_hardware::disk::ZONE_DATASET); + zone_requests.requests.push(ZoneRequest { zone: request, root }); + } + + self.initialize_services_locked( + &mut existing_zones, + &zone_requests.requests, + ) + .await?; + + let serialized_services = toml::Value::try_from(&zone_requests) + .expect("Cannot serialize service list"); + let services_str = + toml::to_string(&serialized_services).map_err(|err| { + Error::TomlSerialize { path: config_path.clone(), err } + })?; + tokio::fs::write(&config_path, services_str) + .await + .map_err(|err| Error::io_path(&config_path, err))?; + + Ok(()) + } + pub fn boottime_rewrite(&self, zones: &Vec) { if self .inner @@ -1745,6 +1952,7 @@ impl ServiceManager { id: Uuid::new_v4(), zone_type: ZoneType::Switch, addresses, + dataset: None, gz_addresses: vec![], services, }; @@ -2099,11 +2307,12 @@ mod test { async fn ensure_new_service(mgr: &ServiceManager, id: Uuid) { let _expectations = expect_new_service(); - mgr.ensure_persistent(ServiceEnsureBody { + mgr.ensure_all_services(ServiceEnsureBody { services: vec![ServiceZoneRequest { id, zone_type: ZoneType::Oximeter, addresses: vec![Ipv6Addr::LOCALHOST], + dataset: None, gz_addresses: vec![], services: vec![ServiceType::Oximeter], }], @@ -2115,11 +2324,12 @@ mod test { // Prepare to call "ensure" for a service which already exists. We should // return the service without actually installing a new zone. async fn ensure_existing_service(mgr: &ServiceManager, id: Uuid) { - mgr.ensure_persistent(ServiceEnsureBody { + mgr.ensure_all_services(ServiceEnsureBody { services: vec![ServiceZoneRequest { id, zone_type: ZoneType::Oximeter, addresses: vec![Ipv6Addr::LOCALHOST], + dataset: None, gz_addresses: vec![], services: vec![ServiceType::Oximeter], }], @@ -2162,12 +2372,15 @@ mod test { fn make_config(&self) -> Config { let all_svcs_config_path = - self.config_dir.path().join(SERVICE_CONFIG_FILENAME); + self.config_dir.path().join(SERVICES_CONFIG_FILENAME); + let storage_svcs_config_path = + self.config_dir.path().join(STORAGE_SERVICES_CONFIG_FILENAME); Config { sled_id: Uuid::new_v4(), sidecar_revision: "rev_whatever_its_a_test".to_string(), gateway_address: None, all_svcs_config_path, + storage_svcs_config_path, } } } @@ -2190,11 +2403,7 @@ mod test { "rev-test".to_string(), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageManager::new( - &log, - Etherstub(UNDERLAY_ETHERSTUB_NAME.to_string()), - ) - .await, + StorageManager::new(&log).await, ) .await .unwrap(); @@ -2239,11 +2448,7 @@ mod test { "rev-test".to_string(), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageManager::new( - &log, - Etherstub(UNDERLAY_ETHERSTUB_NAME.to_string()), - ) - .await, + StorageManager::new(&log).await, ) .await .unwrap(); @@ -2291,11 +2496,7 @@ mod test { "rev-test".to_string(), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageManager::new( - &log, - Etherstub(UNDERLAY_ETHERSTUB_NAME.to_string()), - ) - .await, + StorageManager::new(&log).await, ) .await .unwrap(); @@ -2331,11 +2532,7 @@ mod test { "rev-test".to_string(), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageManager::new( - &log, - Etherstub(UNDERLAY_ETHERSTUB_NAME.to_string()), - ) - .await, + StorageManager::new(&log).await, ) .await .unwrap(); @@ -2380,11 +2577,7 @@ mod test { "rev-test".to_string(), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageManager::new( - &log, - Etherstub(UNDERLAY_ETHERSTUB_NAME.to_string()), - ) - .await, + StorageManager::new(&log).await, ) .await .unwrap(); @@ -2422,11 +2615,7 @@ mod test { "rev-test".to_string(), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageManager::new( - &log, - Etherstub(UNDERLAY_ETHERSTUB_NAME.to_string()), - ) - .await, + StorageManager::new(&log).await, ) .await .unwrap(); diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 1831ac2594..1f73162218 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -12,7 +12,7 @@ use crate::params::{ DatasetKind, DiskStateRequested, InstanceHardware, InstanceMigrationSourceParams, InstancePutStateResponse, InstanceStateRequested, InstanceUnregisterResponse, ServiceEnsureBody, - SledRole, TimeSync, VpcFirewallRule, Zpool, + ServiceType, SledRole, TimeSync, VpcFirewallRule, ZoneType, Zpool, }; use crate::services::{self, ServiceManager}; use crate::storage_manager::{self, StorageManager}; @@ -254,7 +254,6 @@ impl SledAgent { storage .setup_underlay_access(storage_manager::UnderlayAccess { lazy_nexus_client: lazy_nexus_client.clone(), - underlay_address: *sled_address.ip(), sled_id: request.id, }) .await?; @@ -518,7 +517,7 @@ impl SledAgent { &self, requested_services: ServiceEnsureBody, ) -> Result<(), Error> { - self.inner.services.ensure_persistent(requested_services).await?; + self.inner.services.ensure_all_services(requested_services).await?; Ok(()) } @@ -540,14 +539,40 @@ impl SledAgent { /// Ensures that a filesystem type exists within the zpool. pub async fn filesystem_ensure( &self, - zpool_uuid: Uuid, + dataset_id: Uuid, + zpool_id: Uuid, dataset_kind: DatasetKind, address: SocketAddrV6, ) -> Result<(), Error> { - self.inner + // First, ensure the dataset exists + let dataset = self + .inner .storage - .upsert_filesystem(zpool_uuid, dataset_kind, address) + .upsert_filesystem(dataset_id, zpool_id, dataset_kind.clone()) .await?; + let (zone_type, services) = match dataset_kind { + DatasetKind::Clickhouse => { + (ZoneType::Clickhouse, vec![ServiceType::Clickhouse]) + } + DatasetKind::CockroachDb => { + (ZoneType::CockroachDb, vec![ServiceType::CockroachDb]) + } + DatasetKind::Crucible => { + (ZoneType::Crucible, vec![ServiceType::Crucible]) + } + }; + + // Next, ensure a zone exists to manage storage for that dataset + let request = crate::params::ServiceZoneRequest { + id: dataset_id, + zone_type, + addresses: vec![*address.ip()], + dataset: Some(dataset), + gz_addresses: vec![], + services, + }; + self.inner.services.ensure_storage_service(request).await?; + Ok(()) } diff --git a/sled-agent/src/storage/dataset.rs b/sled-agent/src/storage/dataset.rs new file mode 100644 index 0000000000..e36bc89e4e --- /dev/null +++ b/sled-agent/src/storage/dataset.rs @@ -0,0 +1,72 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::params::DatasetKind; +use illumos_utils::zpool::ZpoolKind; +use illumos_utils::zpool::ZpoolName; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +#[derive( + Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone, JsonSchema, +)] +pub struct DatasetName { + // A unique identifier for the Zpool on which the dataset is stored. + pool_name: ZpoolName, + // A name for the dataset within the Zpool. + kind: DatasetKind, +} + +impl DatasetName { + pub fn new(pool_name: ZpoolName, kind: DatasetKind) -> Self { + Self { pool_name, kind } + } + + pub fn pool(&self) -> &ZpoolName { + &self.pool_name + } + + pub fn dataset(&self) -> &DatasetKind { + &self.kind + } + + pub fn full(&self) -> String { + format!("{}/{}", self.pool_name, self.kind.to_string()) + } +} + +impl From for sled_agent_client::types::DatasetName { + fn from(n: DatasetName) -> Self { + let id = n.pool().id(); + + // NOTE: Ideally, this translation would live alongside the definitions + // of ZpoolKind and ZpoolName, but they're currently in illumos-utils, + // which has no dependency on sled_agent_client. + let kind = match n.pool().kind() { + ZpoolKind::External => { + sled_agent_client::types::ZpoolKind::External + } + ZpoolKind::Internal => { + sled_agent_client::types::ZpoolKind::Internal + } + }; + let pool_name = sled_agent_client::types::ZpoolName { id, kind }; + + Self { pool_name, kind: n.dataset().clone().into() } + } +} + +#[cfg(test)] +mod test { + use super::*; + use uuid::Uuid; + + #[test] + fn serialize_dataset_name() { + let pool = ZpoolName::new_internal(Uuid::new_v4()); + let kind = DatasetKind::Crucible; + let name = DatasetName::new(pool, kind); + toml::to_string(&name).unwrap(); + } +} diff --git a/sled-agent/src/storage/mod.rs b/sled-agent/src/storage/mod.rs new file mode 100644 index 0000000000..8444ecace4 --- /dev/null +++ b/sled-agent/src/storage/mod.rs @@ -0,0 +1,7 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Management of local storage + +pub(crate) mod dataset; diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 142fa9f568..8e23fc8929 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -6,35 +6,26 @@ use crate::nexus::LazyNexusClient; use crate::params::DatasetKind; -use crate::profile::*; +use crate::storage::dataset::DatasetName; use futures::stream::FuturesOrdered; use futures::FutureExt; use futures::StreamExt; -use illumos_utils::dladm::Etherstub; -use illumos_utils::link::VnicAllocator; -use illumos_utils::running_zone::{InstalledZone, RunningZone}; -use illumos_utils::zone::AddressRequest; use illumos_utils::zpool::{ZpoolKind, ZpoolName}; -use illumos_utils::{zfs::Mountpoint, zone::ZONE_PREFIX, zpool::ZpoolInfo}; +use illumos_utils::{zfs::Mountpoint, zpool::ZpoolInfo}; use nexus_client::types::PhysicalDiskDeleteRequest; use nexus_client::types::PhysicalDiskKind; use nexus_client::types::PhysicalDiskPutRequest; use nexus_client::types::ZpoolPutRequest; use omicron_common::api::external::{ByteCount, ByteCountRangeError}; use omicron_common::backoff; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; use sled_hardware::{Disk, DiskIdentity, DiskVariant, UnparsedDisk}; use slog::Logger; use std::collections::hash_map; use std::collections::HashMap; use std::convert::TryFrom; -use std::net::{IpAddr, Ipv6Addr, SocketAddrV6}; use std::path::PathBuf; use std::pin::Pin; use std::sync::Arc; -use tokio::fs::{create_dir_all, File}; -use tokio::io::AsyncWriteExt; use tokio::sync::{mpsc, oneshot, Mutex}; use tokio::task::JoinHandle; use uuid::Uuid; @@ -90,6 +81,9 @@ pub enum Error { err: uuid::Error, }, + #[error("Dataset {name:?} exists with a different uuid (has {old}, requested {new})")] + UuidMismatch { name: DatasetName, old: Uuid, new: Uuid }, + #[error("Error parsing pool {name}'s size: {err}")] BadPoolSize { name: String, @@ -132,18 +126,10 @@ pub enum Error { UnderlayNotInitialized, } -impl Error { - fn io(message: &str, err: std::io::Error) -> Self { - Self::Io { message: message.to_string(), err } - } -} - /// A ZFS storage pool. struct Pool { name: ZpoolName, info: ZpoolInfo, - // ZFS filesytem UUID -> Zone. - zones: HashMap, parent: DiskIdentity, } @@ -153,312 +139,12 @@ impl Pool { /// Returns Ok if the pool exists. fn new(name: ZpoolName, parent: DiskIdentity) -> Result { let info = Zpool::get_info(&name.to_string())?; - Ok(Pool { name, info, zones: HashMap::new(), parent }) - } - - /// Associate an already running zone with this pool object. - /// - /// Typically this is used when a dataset within the zone (identified - /// by ID) has a running zone (e.g. Crucible, Cockroach) operating on - /// behalf of that data. - fn add_zone(&mut self, id: Uuid, zone: RunningZone) { - self.zones.insert(id, zone); - } - - /// Access a zone managing data within this pool. - fn get_zone(&self, id: Uuid) -> Option<&RunningZone> { - self.zones.get(&id) - } - - /// Returns the ID of the pool itself. - fn id(&self) -> Uuid { - self.name.id() + Ok(Pool { name, info, parent }) } fn parent(&self) -> &DiskIdentity { &self.parent } - - /// Returns the path for the configuration of a particular - /// dataset within the pool. This configuration file provides - /// the necessary information for zones to "launch themselves" - /// after a reboot. - async fn dataset_config_path( - &self, - dataset_id: Uuid, - ) -> Result { - let path = std::path::Path::new(omicron_common::OMICRON_CONFIG_PATH) - .join(self.id().to_string()); - create_dir_all(&path).await.map_err(|err| Error::Io { - message: format!("creating config dir {path:?}, which would contain config for {dataset_id}"), - err, - })?; - let mut path = path.join(dataset_id.to_string()); - path.set_extension("toml"); - Ok(path) - } -} - -#[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)] -struct DatasetName { - // A unique identifier for the Zpool on which the dataset is stored. - pool_name: ZpoolName, - // A name for the dataset within the Zpool. - dataset_name: String, -} - -impl DatasetName { - fn new(pool_name: ZpoolName, dataset_name: &str) -> Self { - Self { pool_name, dataset_name: dataset_name.to_string() } - } - - fn full(&self) -> String { - format!("{}/{}", self.pool_name, self.dataset_name) - } -} - -// Description of a dataset within a ZFS pool, which should be created -// by the Sled Agent. -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -struct DatasetInfo { - address: SocketAddrV6, - kind: DatasetKind, - name: DatasetName, -} - -impl DatasetInfo { - fn new( - pool: ZpoolName, - kind: DatasetKind, - address: SocketAddrV6, - ) -> DatasetInfo { - match kind { - DatasetKind::CockroachDb { .. } => DatasetInfo { - name: DatasetName::new(pool, "cockroachdb"), - address, - kind, - }, - DatasetKind::Clickhouse { .. } => DatasetInfo { - name: DatasetName::new(pool, "clickhouse"), - address, - kind, - }, - DatasetKind::Crucible { .. } => DatasetInfo { - name: DatasetName::new(pool, "crucible"), - address, - kind, - }, - } - } - - fn zone_prefix(&self) -> String { - format!("{}{}_", ZONE_PREFIX, self.name.full()) - } -} - -// Ensures that a zone backing a particular dataset is running. -async fn ensure_running_zone( - log: &Logger, - vnic_allocator: &VnicAllocator, - dataset_info: &DatasetInfo, - dataset_name: &DatasetName, - do_format: bool, - underlay_address: Ipv6Addr, -) -> Result { - let address_request = AddressRequest::new_static( - IpAddr::V6(*dataset_info.address.ip()), - None, - ); - - let err = RunningZone::get( - log, - &vnic_allocator, - &dataset_info.zone_prefix(), - address_request, - ) - .await; - match err { - Ok(zone) => { - info!(log, "Zone for {} is already running", dataset_name.full()); - return Ok(zone); - } - Err(illumos_utils::running_zone::GetZoneError::NotFound { .. }) => { - info!(log, "Zone for {} was not found", dataset_name.full()); - - let zone_root_path = dataset_name - .pool_name - .dataset_mountpoint(sled_hardware::disk::ZONE_DATASET); - info!( - log, - "Installing zone {} to {}", - dataset_name.full(), - zone_root_path.display() - ); - let installed_zone = InstalledZone::install( - log, - vnic_allocator, - &zone_root_path, - &dataset_info.name.dataset_name, - Some(&dataset_name.pool_name.to_string()), - &[zone::Dataset { name: dataset_name.full() }], - &[], - &[], - vec![], - None, - vec![], - vec![], - ) - .await?; - - let datalink = installed_zone.get_control_vnic_name(); - let gateway = &underlay_address.to_string(); - let listen_addr = &dataset_info.address.ip().to_string(); - let listen_port = &dataset_info.address.port().to_string(); - - let zone = match dataset_info.kind { - DatasetKind::CockroachDb { .. } => { - let config = PropertyGroupBuilder::new("config") - .add_property("datalink", "astring", datalink) - .add_property("gateway", "astring", gateway) - .add_property("listen_addr", "astring", listen_addr) - .add_property("listen_port", "astring", listen_port) - .add_property("store", "astring", "/data"); - - let profile = ProfileBuilder::new("omicron").add_service( - ServiceBuilder::new("oxide/cockroachdb").add_instance( - ServiceInstanceBuilder::new("default") - .add_property_group(config), - ), - ); - profile.add_to_zone(log, &installed_zone).await.map_err( - |err| Error::io("Failed to setup CRDB profile", err), - )?; - let zone = RunningZone::boot(installed_zone).await?; - - // Await liveness of the cluster. - info!(log, "start_zone: awaiting liveness of CRDB"); - let check_health = || async { - let http_addr = SocketAddrV6::new( - *dataset_info.address.ip(), - 8080, - 0, - 0, - ); - reqwest::get(format!( - "http://{}/health?ready=1", - http_addr - )) - .await - .map_err(backoff::BackoffError::transient) - }; - let log_failure = |_, call_count, total_duration| { - if call_count == 0 { - info!(log, "cockroachdb not yet alive"); - } else if total_duration - > std::time::Duration::from_secs(5) - { - warn!(log, "cockroachdb not yet alive"; "total duration" => ?total_duration); - } - }; - backoff::retry_notify_ext( - backoff::retry_policy_internal_service(), - check_health, - log_failure, - ) - .await - .expect("expected an infinite retry loop waiting for crdb"); - - info!(log, "CRDB is online"); - // If requested, format the cluster with the initial tables. - if do_format { - info!(log, "Formatting CRDB"); - zone.run_cmd(&[ - "/opt/oxide/cockroachdb/bin/cockroach", - "sql", - "--insecure", - "--host", - &dataset_info.address.to_string(), - "--file", - "/opt/oxide/cockroachdb/sql/dbwipe.sql", - ])?; - zone.run_cmd(&[ - "/opt/oxide/cockroachdb/bin/cockroach", - "sql", - "--insecure", - "--host", - &dataset_info.address.to_string(), - "--file", - "/opt/oxide/cockroachdb/sql/dbinit.sql", - ])?; - info!(log, "Formatting CRDB - Completed"); - } - - zone - } - DatasetKind::Clickhouse { .. } => { - let config = PropertyGroupBuilder::new("config") - .add_property("datalink", "astring", datalink) - .add_property("gateway", "astring", gateway) - .add_property("listen_addr", "astring", listen_addr) - .add_property("listen_port", "astring", listen_port) - .add_property("store", "astring", "/data"); - - let profile = ProfileBuilder::new("omicron").add_service( - ServiceBuilder::new("oxide/clickhouse").add_instance( - ServiceInstanceBuilder::new("default") - .add_property_group(config), - ), - ); - profile.add_to_zone(log, &installed_zone).await.map_err( - |err| { - Error::io("Failed to setup clickhouse profile", err) - }, - )?; - RunningZone::boot(installed_zone).await? - } - DatasetKind::Crucible => { - let dataset = &dataset_info.name.full(); - let uuid = &Uuid::new_v4().to_string(); - let config = PropertyGroupBuilder::new("config") - .add_property("datalink", "astring", datalink) - .add_property("gateway", "astring", gateway) - .add_property("dataset", "astring", dataset) - .add_property("listen_addr", "astring", listen_addr) - .add_property("listen_port", "astring", listen_port) - .add_property("uuid", "astring", uuid) - .add_property("store", "astring", "/data"); - - let profile = ProfileBuilder::new("omicron").add_service( - ServiceBuilder::new("oxide/crucible/agent") - .add_instance( - ServiceInstanceBuilder::new("default") - .add_property_group(config), - ), - ); - profile.add_to_zone(log, &installed_zone).await.map_err( - |err| { - Error::io("Failed to setup crucible profile", err) - }, - )?; - RunningZone::boot(installed_zone).await? - } - }; - Ok(zone) - } - Err(illumos_utils::running_zone::GetZoneError::NotRunning { - name, - state, - }) => { - // TODO(https://github.com/oxidecomputer/omicron/issues/725): - unimplemented!("Handle a zone which exists, but is not running: {name}, in {state:?}"); - } - Err(err) => { - // TODO(https://github.com/oxidecomputer/omicron/issues/725): - unimplemented!( - "Handle a zone which exists, has some other problem: {err}" - ); - } - } } // The type of a future which is used to send a notification to Nexus. @@ -467,10 +153,10 @@ type NotifyFut = #[derive(Debug)] struct NewFilesystemRequest { + dataset_id: Uuid, zpool_id: Uuid, dataset_kind: DatasetKind, - address: SocketAddrV6, - responder: oneshot::Sender>, + responder: oneshot::Sender>, } struct UnderlayRequest { @@ -558,7 +244,6 @@ impl StorageResources { /// Describes the access to the underlay used by the StorageManager. pub struct UnderlayAccess { pub lazy_nexus_client: LazyNexusClient, - pub underlay_address: Ipv6Addr, pub sled_id: Uuid, } @@ -567,7 +252,6 @@ struct StorageWorker { log: Logger, nexus_notifications: FuturesOrdered, rx: mpsc::Receiver, - vnic_allocator: VnicAllocator, underlay: Arc>>, } @@ -582,17 +266,16 @@ impl StorageWorker { // creating it if `do_format` is true. // // Returns the UUID attached to the ZFS filesystem. - fn ensure_dataset_with_id( + fn ensure_dataset( + &mut self, + dataset_id: Uuid, dataset_name: &DatasetName, - do_format: bool, - ) -> Result { + ) -> Result<(), Error> { let zoned = true; let fs_name = &dataset_name.full(); + let do_format = true; Zfs::ensure_filesystem( - &format!( - "{}/{}", - dataset_name.pool_name, dataset_name.dataset_name - ), + &dataset_name.full(), Mountpoint::Path(PathBuf::from("/data")), zoned, do_format, @@ -600,70 +283,18 @@ impl StorageWorker { // Ensure the dataset has a usable UUID. if let Ok(id_str) = Zfs::get_oxide_value(&fs_name, "uuid") { if let Ok(id) = id_str.parse::() { - return Ok(id); + if id != dataset_id { + return Err(Error::UuidMismatch { + name: dataset_name.clone(), + old: id, + new: dataset_id, + }); + } + return Ok(()); } } - let id = Uuid::new_v4(); - Zfs::set_oxide_value(&fs_name, "uuid", &id.to_string())?; - Ok(id) - } - - // Starts the zone for a dataset within a particular zpool. - // - // If requested via the `do_format` parameter, may also initialize - // these resources. - // - // Returns the UUID attached to the underlying ZFS dataset. - // Returns (was_inserted, Uuid). - async fn initialize_dataset_and_zone( - &mut self, - pool: &mut Pool, - dataset_info: &DatasetInfo, - do_format: bool, - ) -> Result<(bool, Uuid), Error> { - // Ensure the underlying dataset exists before trying to poke at zones. - let dataset_name = &dataset_info.name; - info!(&self.log, "Ensuring dataset {} exists", dataset_name.full()); - let id = - StorageWorker::ensure_dataset_with_id(&dataset_name, do_format)?; - - // If this zone has already been processed by us, return immediately. - if let Some(_) = pool.get_zone(id) { - return Ok((false, id)); - } - // Otherwise, the zone may or may not exist. - // We need to either look up or create the zone. - info!( - &self.log, - "Ensuring zone for {} is running", - dataset_name.full() - ); - - let underlay_guard = self.underlay.lock().await; - let Some(underlay) = underlay_guard.as_ref() else { - return Err(Error::UnderlayNotInitialized); - }; - let underlay_address = underlay.underlay_address; - drop(underlay_guard); - - let zone = ensure_running_zone( - &self.log, - &self.vnic_allocator, - dataset_info, - &dataset_name, - do_format, - underlay_address, - ) - .await?; - - info!( - &self.log, - "Zone {} with address {} is running", - zone.name(), - dataset_info.address, - ); - pool.add_zone(id, zone); - Ok((true, id)) + Zfs::set_oxide_value(&fs_name, "uuid", &dataset_id.to_string())?; + Ok(()) } // Adds a "notification to nexus" to `nexus_notifications`, @@ -1012,34 +643,6 @@ impl StorageWorker { })?; // Notify Nexus of the zpool. self.add_zpool_notify(&pool, size); - - // If we find filesystems within our datasets, ensure their - // zones are up-and-running. - let mut datasets = vec![]; - let existing_filesystems = Zfs::list_datasets(&pool_name.to_string())?; - for fs_name in existing_filesystems { - info!( - &self.log, - "StorageWorker loading fs {} on zpool {}", - fs_name, - pool_name.to_string() - ); - // We intentionally do not exit on error here - - // otherwise, the failure of a single dataset would - // stop the storage manager from processing all storage. - // - // Instead, we opt to log the failure. - let dataset_name = DatasetName::new(pool_name.clone(), &fs_name); - let result = self.load_dataset(pool, &dataset_name).await; - match result { - Ok(dataset) => datasets.push(dataset), - Err(e) => warn!( - &self.log, - "StorageWorker Failed to load dataset: {}", e - ), - } - } - Ok(()) } @@ -1048,7 +651,7 @@ impl StorageWorker { &mut self, resources: &StorageResources, request: &NewFilesystemRequest, - ) -> Result<(), Error> { + ) -> Result { info!(self.log, "add_dataset: {:?}", request); let mut pools = resources.pools.lock().await; let pool = pools.get_mut(&request.zpool_id).ok_or_else(|| { @@ -1057,80 +660,10 @@ impl StorageWorker { request.zpool_id )) })?; - - let dataset_info = DatasetInfo::new( - pool.name.clone(), - request.dataset_kind.clone(), - request.address, - ); - let (is_new_dataset, id) = self - .initialize_dataset_and_zone( - pool, - &dataset_info, - // do_format= - true, - ) - .await?; - - if !is_new_dataset { - return Ok(()); - } - - // Now that the dataset has been initialized, record the configuration - // so it can re-initialize itself after a reboot. - let path = pool.dataset_config_path(id).await?; - let info_str = toml::to_string(&dataset_info) - .map_err(|err| Error::Serialize { path: path.clone(), err })?; - let pool_name = &pool.name; - let mut file = File::create(&path).await.map_err(|err| Error::Io { - message: format!("Failed creating config file at {path:?} for pool {pool_name}, dataset: {id}"), - err, - })?; - file.write_all(info_str.as_bytes()).await.map_err(|err| Error::Io { - message: format!("Failed writing config to {path:?} for pool {pool_name}, dataset: {id}"), - err, - })?; - - Ok(()) - } - - async fn load_dataset( - &mut self, - pool: &mut Pool, - dataset_name: &DatasetName, - ) -> Result<(Uuid, SocketAddrV6, DatasetKind), Error> { - let name = dataset_name.full(); - let id = Zfs::get_oxide_value(&name, "uuid")? - .parse::() - .map_err(|err| Error::ParseDatasetUuid { name, err })?; - let config_path = pool.dataset_config_path(id).await?; - info!( - self.log, - "Loading Dataset from {}", - config_path.to_string_lossy() - ); - let pool_name = pool.info.name(); - let dataset_info: DatasetInfo = - toml::from_str( - &tokio::fs::read_to_string(&config_path).await.map_err(|err| Error::Io { - message: format!("read config for pool {pool_name}, dataset {dataset_name:?} from {config_path:?}"), - err, - })? - ).map_err(|err| { - Error::Deserialize { - path: config_path, - err, - } - })?; - self.initialize_dataset_and_zone( - pool, - &dataset_info, - // do_format= - false, - ) - .await?; - - Ok((id, dataset_info.address, dataset_info.kind)) + let dataset_name = + DatasetName::new(pool.name.clone(), request.dataset_kind.clone()); + self.ensure_dataset(request.dataset_id, &dataset_name)?; + Ok(dataset_name) } // Small wrapper around `Self::do_work_internal` that ensures we always @@ -1221,14 +754,13 @@ pub struct StorageManager { impl StorageManager { /// Creates a new [`StorageManager`] which should manage local storage. - pub async fn new(log: &Logger, etherstub: Etherstub) -> Self { + pub async fn new(log: &Logger) -> Self { let log = log.new(o!("component" => "StorageManager")); let resources = StorageResources { disks: Arc::new(Mutex::new(HashMap::new())), pools: Arc::new(Mutex::new(HashMap::new())), }; let (tx, rx) = mpsc::channel(30); - let vnic_allocator = VnicAllocator::new("Storage", etherstub); StorageManager { inner: Arc::new(StorageManagerInner { @@ -1240,7 +772,6 @@ impl StorageManager { log, nexus_notifications: FuturesOrdered::new(), rx, - vnic_allocator, underlay: Arc::new(Mutex::new(None)), }; @@ -1351,15 +882,15 @@ impl StorageManager { pub async fn upsert_filesystem( &self, + dataset_id: Uuid, zpool_id: Uuid, dataset_kind: DatasetKind, - address: SocketAddrV6, - ) -> Result<(), Error> { + ) -> Result { let (tx, rx) = oneshot::channel(); let request = NewFilesystemRequest { + dataset_id, zpool_id, dataset_kind, - address, responder: tx, }; @@ -1367,13 +898,13 @@ impl StorageManager { .tx .send(StorageWorkerRequest::NewFilesystem(request)) .await - .map_err(|_| ()) + .map_err(|e| e.to_string()) .expect("Storage worker bug (not alive)"); - rx.await.expect( + let dataset_name = rx.await.expect( "Storage worker bug (dropped responder without responding)", )?; - Ok(()) + Ok(dataset_name) } pub fn resources(&self) -> &StorageResources { @@ -1392,22 +923,3 @@ impl Drop for StorageManagerInner { self.task.abort(); } } - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn serialize_dataset_info() { - let dataset_info = DatasetInfo { - address: "[::1]:8080".parse().unwrap(), - kind: DatasetKind::Crucible, - name: DatasetName::new( - ZpoolName::new_internal(Uuid::new_v4()), - "dataset", - ), - }; - - toml::to_string(&dataset_info).unwrap(); - } -} diff --git a/smf/cockroachdb/method_script.sh b/smf/cockroachdb/method_script.sh index fb9bacf854..de01742829 100755 --- a/smf/cockroachdb/method_script.sh +++ b/smf/cockroachdb/method_script.sh @@ -28,3 +28,9 @@ args=( ) exec /opt/oxide/cockroachdb/bin/cockroach start-single-node "${args[@]}" & + +# TODO: Do this from RSS; I don't think we can make this call locally +# since there may be multiple instances running... + +# /opt/oxide/cockroachdb/bin/cockroach sql --insecure --host "$FULL_ADDRESS" --file /opt/oxide/cockroachdb/sql/dbwipe.sql +# /opt/oxide/cockroachdb/bin/cockroach sql --insecure --host "$FULL_ADDRESS" --file /opt/oxide/cockroachdb/sql/dbinit.sql From 0b4b04037634004d251193802719d65ae290a24b Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 28 Apr 2023 12:19:16 -0400 Subject: [PATCH 02/15] CRDB auto-format on boot --- sled-agent/src/services.rs | 67 +++++++++++++++++++++++++++++-- sled-agent/src/storage/dataset.rs | 2 +- smf/cockroachdb/method_script.sh | 6 --- 3 files changed, 65 insertions(+), 10 deletions(-) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 4dbfc163be..f9c2251e55 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -968,8 +968,12 @@ impl ServiceManager { let datalink = installed_zone.get_control_vnic_name(); let gateway = &info.underlay_address.to_string(); assert_eq!(request.zone.addresses.len(), 1); - let listen_addr = &request.zone.addresses[0].to_string(); - let listen_port = &COCKROACH_PORT.to_string(); + let address = SocketAddr::new( + IpAddr::V6(request.zone.addresses[0]), + COCKROACH_PORT, + ); + let listen_addr = &address.ip().to_string(); + let listen_port = &address.port().to_string(); let config = PropertyGroupBuilder::new("config") .add_property("datalink", "astring", datalink) @@ -990,7 +994,64 @@ impl ServiceManager { .map_err(|err| { Error::io("Failed to setup CRDB profile", err) })?; - return Ok(RunningZone::boot(installed_zone).await?); + let running_zone = RunningZone::boot(installed_zone).await?; + + // TODO: The following lines are necessary to initialize CRDB + // in a single-node environment. They're bad! They're wrong! + // We definitely shouldn't be wiping the database every time + // we want to boot this zone. + // + // But they're also necessary to prevent the build from + // regressing. + // + // NOTE: In the (very short-term) future, this will be + // replaced by the following: + // 1. CRDB will simply "start", rather than "start-single-node". + // 2. The Sled Agent will expose an explicit API to "init" the + // Cockroach cluster, and populate it with the expected + // contents. + let format_crdb = || async { + info!(self.inner.log, "Formatting CRDB"); + running_zone + .run_cmd(&[ + "/opt/oxide/cockroachdb/bin/cockroach", + "sql", + "--insecure", + "--host", + &address.to_string(), + "--file", + "/opt/oxide/cockroachdb/sql/dbwipe.sql", + ]) + .map_err(BackoffError::transient)?; + running_zone + .run_cmd(&[ + "/opt/oxide/cockroachdb/bin/cockroach", + "sql", + "--insecure", + "--host", + &address.to_string(), + "--file", + "/opt/oxide/cockroachdb/sql/dbinit.sql", + ]) + .map_err(BackoffError::transient)?; + info!(self.inner.log, "Formatting CRDB - Completed"); + Ok::< + (), + BackoffError< + illumos_utils::running_zone::RunCommandError, + >, + >(()) + }; + let log_failure = |error, _| { + warn!( + self.inner.log, "failed to format CRDB"; + "error" => ?error, + ); + }; + retry_notify(retry_policy_local(), format_crdb, log_failure) + .await + .expect("expected an infinite retry loop waiting for crdb"); + return Ok(running_zone); } ZoneType::Crucible => { let Some(info) = self.inner.sled_info.get() else { diff --git a/sled-agent/src/storage/dataset.rs b/sled-agent/src/storage/dataset.rs index e36bc89e4e..d4f0d63474 100644 --- a/sled-agent/src/storage/dataset.rs +++ b/sled-agent/src/storage/dataset.rs @@ -32,7 +32,7 @@ impl DatasetName { } pub fn full(&self) -> String { - format!("{}/{}", self.pool_name, self.kind.to_string()) + format!("{}/{}", self.pool_name, self.kind) } } diff --git a/smf/cockroachdb/method_script.sh b/smf/cockroachdb/method_script.sh index de01742829..fb9bacf854 100755 --- a/smf/cockroachdb/method_script.sh +++ b/smf/cockroachdb/method_script.sh @@ -28,9 +28,3 @@ args=( ) exec /opt/oxide/cockroachdb/bin/cockroach start-single-node "${args[@]}" & - -# TODO: Do this from RSS; I don't think we can make this call locally -# since there may be multiple instances running... - -# /opt/oxide/cockroachdb/bin/cockroach sql --insecure --host "$FULL_ADDRESS" --file /opt/oxide/cockroachdb/sql/dbwipe.sql -# /opt/oxide/cockroachdb/bin/cockroach sql --insecure --host "$FULL_ADDRESS" --file /opt/oxide/cockroachdb/sql/dbinit.sql From ef9517c127f23e7535a6f9194623fbcf9733dae5 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 29 Apr 2023 22:28:51 -0400 Subject: [PATCH 03/15] better use of 'unique_name' (for storage zones), auto-launch storage zones, zone_name -> zone_type, config -> ledger --- illumos-utils/src/running_zone.rs | 12 +- sled-agent/src/params.rs | 15 ++ sled-agent/src/services.rs | 396 +++++++++++++++--------------- 3 files changed, 224 insertions(+), 199 deletions(-) diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index 7fb8d67be3..b2f3c5a76f 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -574,8 +574,8 @@ impl InstalledZone { /// /// This results in a zone name which is distinct across different zpools, /// but stable and predictable across reboots. - pub fn get_zone_name(zone_name: &str, unique_name: Option<&str>) -> String { - let mut zone_name = format!("{}{}", ZONE_PREFIX, zone_name); + pub fn get_zone_name(zone_type: &str, unique_name: Option<&str>) -> String { + let mut zone_name = format!("{}{}", ZONE_PREFIX, zone_type); if let Some(suffix) = unique_name { zone_name.push_str(&format!("_{}", suffix)); } @@ -600,7 +600,7 @@ impl InstalledZone { log: &Logger, underlay_vnic_allocator: &VnicAllocator, zone_root_path: &Path, - zone_name: &str, + zone_type: &str, unique_name: Option<&str>, datasets: &[zone::Dataset], filesystems: &[zone::Fs], @@ -613,14 +613,14 @@ impl InstalledZone { let control_vnic = underlay_vnic_allocator.new_control(None).map_err(|err| { InstallZoneError::CreateVnic { - zone: zone_name.to_string(), + zone: zone_type.to_string(), err, } })?; - let full_zone_name = Self::get_zone_name(zone_name, unique_name); + let full_zone_name = Self::get_zone_name(zone_type, unique_name); let zone_image_path = - PathBuf::from(&format!("/opt/oxide/{}.tar.gz", zone_name)); + PathBuf::from(&format!("/opt/oxide/{}.tar.gz", zone_type)); let net_device_names: Vec = opte_ports .iter() diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 3276e1d46e..8418cd3888 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -518,6 +518,21 @@ pub struct ServiceZoneRequest { pub services: Vec, } +impl ServiceZoneRequest { + // The full name of the zone, if it was to be created as a zone. + pub fn zone_name(&self) -> String { + illumos_utils::running_zone::InstalledZone::get_zone_name( + &self.zone_type.to_string(), + self.zone_name_unique_identifier().as_deref(), + ) + } + + // The name of a unique identifier for the zone, if one is necessary. + pub fn zone_name_unique_identifier(&self) -> Option { + self.dataset.as_ref().map(|d| d.pool().to_string()) + } +} + impl From for sled_agent_client::types::ServiceZoneRequest { fn from(s: ServiceZoneRequest) -> Self { let mut services = Vec::new(); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index f9c2251e55..9b9171de72 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -88,17 +88,6 @@ use tokio::sync::Mutex; use tokio::task::JoinHandle; use uuid::Uuid; -// The filename of ServiceManager's internal storage. -const SERVICES_CONFIG_FILENAME: &str = "services.toml"; -const STORAGE_SERVICES_CONFIG_FILENAME: &str = "storage-services.toml"; - -// The filename of a half-completed config, in need of parameters supplied at -// runtime. -const PARTIAL_CONFIG_FILENAME: &str = "config-partial.toml"; -// The filename of a completed config, merging the partial config with -// additional appended parameters known at runtime. -const COMPLETE_CONFIG_FILENAME: &str = "config.toml"; - #[derive(thiserror::Error, Debug)] pub enum Error { #[error("Cannot serialize TOML to file {path}: {err}")] @@ -199,18 +188,6 @@ impl From for omicron_common::api::external::Error { } } -// The default path to service configuration -fn default_services_config_path() -> PathBuf { - Path::new(omicron_common::OMICRON_CONFIG_PATH) - .join(SERVICES_CONFIG_FILENAME) -} - -// The default path to storage service configuration -fn default_storage_services_config_path() -> PathBuf { - Path::new(omicron_common::OMICRON_CONFIG_PATH) - .join(STORAGE_SERVICES_CONFIG_FILENAME) -} - /// Configuration parameters which modify the [`ServiceManager`]'s behavior. pub struct Config { /// Identifies the sled being configured @@ -224,8 +201,8 @@ pub struct Config { // The path for the ServiceManager to store information about // all running services. - all_svcs_config_path: PathBuf, - storage_svcs_config_path: PathBuf, + all_svcs_ledger_path: PathBuf, + storage_svcs_ledger_path: PathBuf, } impl Config { @@ -238,12 +215,42 @@ impl Config { sled_id, sidecar_revision, gateway_address, - all_svcs_config_path: default_services_config_path(), - storage_svcs_config_path: default_storage_services_config_path(), + all_svcs_ledger_path: default_services_ledger_path(), + storage_svcs_ledger_path: default_storage_services_ledger_path(), } } } +// The filename of ServiceManager's internal storage. +const SERVICES_CONFIG_FILENAME: &str = "services.toml"; +const STORAGE_SERVICES_CONFIG_FILENAME: &str = "storage-services.toml"; + +// The default path to service configuration +fn default_services_ledger_path() -> PathBuf { + Path::new(omicron_common::OMICRON_CONFIG_PATH) + .join(SERVICES_CONFIG_FILENAME) +} + +// The default path to storage service configuration +fn default_storage_services_ledger_path() -> PathBuf { + Path::new(omicron_common::OMICRON_CONFIG_PATH) + .join(STORAGE_SERVICES_CONFIG_FILENAME) +} + +// TODO(ideas): +// - "ServiceLedger" +// - Manages the serializable "AllZoneRequests" object +// - Constructor which reads from config location (kinda like +// "read_from") +// - ... Writer which *knows the type* to be serialized, so can direct it to the +// appropriate output path. +// +// - TODO: later: Can also make the path writing safer, by... +// - ... TODO: Writing to both M.2s, basically using multiple output paths +// - ... TODO: Using a temporary file and renaming it to make the update atomic +// - ... TODO: Add a .json EXPECTORATE test for the format of "AllZoneRequests" +// - we need to be careful not to break compatibility in the future. + // A wrapper around `ZoneRequest`, which allows it to be serialized // to a toml file. #[derive(Clone, serde::Serialize, serde::Deserialize)] @@ -255,6 +262,44 @@ impl AllZoneRequests { fn new() -> Self { Self { requests: vec![] } } + + // Reads from `path` as a toml-serialized version of `Self`. + async fn read_from(log: &Logger, path: &Path) -> Result { + if path.exists() { + debug!( + log, + "Reading old storage service requests from {}", + path.display() + ); + toml::from_str( + &tokio::fs::read_to_string(&path) + .await + .map_err(|err| Error::io_path(&path, err))?, + ) + .map_err(|err| Error::TomlDeserialize { + path: path.to_path_buf(), + err, + }) + } else { + debug!(log, "No old storage service requests"); + Ok(AllZoneRequests::new()) + } + } + + // Writes to `path` as a toml-serialized version of `Self`. + async fn write_to(&self, log: &Logger, path: &Path) -> Result<(), Error> { + debug!(log, "Writing zone request configuration to {}", path.display()); + let serialized_services = toml::Value::try_from(&self) + .expect("Cannot serialize service list"); + let services_str = + toml::to_string(&serialized_services).map_err(|err| { + Error::TomlSerialize { path: path.to_path_buf(), err } + })?; + tokio::fs::write(&path, services_str) + .await + .map_err(|err| Error::io_path(&path, err))?; + Ok(()) + } } // This struct represents the combo of "what zone did you ask for" + "where did @@ -414,6 +459,93 @@ impl ServiceManager { self.inner.switch_zone_bootstrap_address } + pub async fn load_non_storage_services(&self) -> Result<(), Error> { + let log = &self.inner.log; + let services = + AllZoneRequests::read_from(log, &self.services_ledger_path()?) + .await?; + let mut existing_zones = self.inner.zones.lock().await; + + // Initialize and DNS and NTP services first as they are required + // for time synchronization, which is a pre-requisite for the other + // services. + self.initialize_services_locked( + &mut existing_zones, + &services + .requests + .clone() + .into_iter() + .filter(|svc| { + matches!( + svc.zone.zone_type, + ZoneType::InternalDns | ZoneType::Ntp + ) + }) + .collect(), + ) + .await?; + + drop(existing_zones); + + info!(&self.inner.log, "Waiting for sled time synchronization"); + + retry_notify( + retry_policy_local(), + || async { + match self.timesync_get().await { + Ok(TimeSync { sync: true, .. }) => { + info!(&self.inner.log, "Time is synchronized"); + Ok(()) + } + Ok(ts) => Err(BackoffError::transient(format!( + "No sync {:?}", + ts + ))), + Err(e) => Err(BackoffError::transient(format!( + "Error checking for time synchronization: {}", + e + ))), + } + }, + |error, delay| { + warn!( + self.inner.log, + "Time not yet synchronised (retrying in {:?})", + delay; + "error" => ?error + ); + }, + ) + .await + .expect("Expected an infinite retry loop syncing time"); + + let mut existing_zones = self.inner.zones.lock().await; + + // Initialize all remaining serivces + self.initialize_services_locked( + &mut existing_zones, + &services.requests, + ) + .await?; + Ok(()) + } + + pub async fn load_storage_services(&self) -> Result<(), Error> { + let log = &self.inner.log; + let services = AllZoneRequests::read_from( + log, + &self.storage_services_ledger_path()?, + ) + .await?; + let mut existing_zones = self.inner.dataset_zones.lock().await; + self.initialize_services_locked( + &mut existing_zones, + &services.requests, + ) + .await?; + Ok(()) + } + /// Loads services from the services manager, and returns once all requested /// services have been started. pub async fn sled_agent_started( @@ -438,88 +570,14 @@ impl ServiceManager { .map_err(|_| "already set".to_string()) .expect("Sled Agent should only start once"); - let config_path = self.services_config_path()?; - if config_path.exists() { - info!( - &self.inner.log, - "Sled services found at {}; loading", - config_path.to_string_lossy() - ); - let cfg: AllZoneRequests = toml::from_str( - &tokio::fs::read_to_string(&config_path) - .await - .map_err(|err| Error::io_path(&config_path, err))?, - ) - .map_err(|err| Error::TomlDeserialize { - path: config_path.clone(), - err, - })?; - let mut existing_zones = self.inner.zones.lock().await; - - // Initialize and DNS and NTP services first as they are required - // for time synchronization, which is a pre-requisite for the other - // services. - self.initialize_services_locked( - &mut existing_zones, - &cfg.requests - .clone() - .into_iter() - .filter(|svc| { - matches!( - svc.zone.zone_type, - ZoneType::InternalDns | ZoneType::Ntp - ) - }) - .collect(), - ) - .await?; - - drop(existing_zones); - - info!(&self.inner.log, "Waiting for sled time synchronization"); - - retry_notify( - retry_policy_local(), - || async { - match self.timesync_get().await { - Ok(TimeSync { sync: true, .. }) => { - info!(&self.inner.log, "Time is synchronized"); - Ok(()) - } - Ok(ts) => Err(BackoffError::transient(format!( - "No sync {:?}", - ts - ))), - Err(e) => Err(BackoffError::transient(format!( - "Error checking for time synchronization: {}", - e - ))), - } - }, - |error, delay| { - warn!( - self.inner.log, - "Time not yet synchronised (retrying in {:?})", - delay; - "error" => ?error - ); - }, - ) - .await - .expect("Expected an infinite retry loop syncing time"); - - let mut existing_zones = self.inner.zones.lock().await; - - // Initialize all remaining serivces - self.initialize_services_locked(&mut existing_zones, &cfg.requests) - .await?; - } else { - info!( - &self.inner.log, - "No sled services found at {}", - config_path.to_string_lossy() - ); - } + self.load_non_storage_services().await?; + // TODO: These will fail if the disks aren't attached. + // Should we have a retry loop here? Kinda like we have with the switch + // / NTP zone? + // + // NOTE: We could totally do the same thing with + // "load_non_storage_services". + self.load_storage_services().await?; Ok(()) } @@ -529,21 +587,21 @@ impl ServiceManager { self.inner.sled_mode } - // Returns either the path to the explicitly provided config path, or + // Returns either the path to the explicitly provided ledger path, or // chooses the default one. - fn services_config_path(&self) -> Result { + fn services_ledger_path(&self) -> Result { if let Some(info) = self.inner.sled_info.get() { - Ok(info.config.all_svcs_config_path.clone()) + Ok(info.config.all_svcs_ledger_path.clone()) } else { Err(Error::SledAgentNotReady) } } - // Returns either the path to the explicitly provided config path, or + // Returns either the path to the explicitly provided ledger path, or // chooses the default one. - fn storage_services_config_path(&self) -> Result { + fn storage_services_ledger_path(&self) -> Result { if let Some(info) = self.inner.sled_info.get() { - Ok(info.config.storage_svcs_config_path.clone()) + Ok(info.config.storage_svcs_ledger_path.clone()) } else { Err(Error::SledAgentNotReady) } @@ -895,15 +953,14 @@ impl ServiceManager { // If the zone is managing a particular dataset, plumb that // dataset into the zone. Additionally, construct a "unique enough" name // so we can create multiple zones of this type without collision. - let (unique_name, datasets) = - if let Some(dataset) = &request.zone.dataset { - ( - Some(dataset.pool().to_string()), - vec![zone::Dataset { name: dataset.full() }], - ) - } else { - (None, vec![]) - }; + let unique_name = request.zone.zone_name_unique_identifier(); + let datasets = request + .zone + .dataset + .iter() + .map(|d| zone::Dataset { name: d.full() }) + .collect::>(); + let devices: Vec = device_names .iter() .map(|d| zone::Device { name: d.to_string() }) @@ -915,7 +972,7 @@ impl ServiceManager { &request.root, &request.zone.zone_type.to_string(), unique_name.as_deref(), - &datasets, + datasets.as_slice(), &filesystems, &devices, opte_ports, @@ -1286,6 +1343,12 @@ impl ServiceManager { "{}/var/svc/manifest/site/nexus", running_zone.root() )); + // The filename of a half-completed config, in need of parameters supplied at + // runtime. + const PARTIAL_CONFIG_FILENAME: &str = "config-partial.toml"; + // The filename of a completed config, merging the partial config with + // additional appended parameters known at runtime. + const COMPLETE_CONFIG_FILENAME: &str = "config.toml"; let partial_config_path = config_dir.join(PARTIAL_CONFIG_FILENAME); let config_path = config_dir.join(COMPLETE_CONFIG_FILENAME); @@ -1682,10 +1745,7 @@ impl ServiceManager { ); // Before we bother allocating anything for this request, check if // this service has already been created. - let expected_zone_name = InstalledZone::get_zone_name( - &req.zone.zone_type.to_string(), - None, - ); + let expected_zone_name = req.zone.zone_name(); if existing_zones.iter().any(|z| z.name() == expected_zone_name) { info!( self.inner.log, @@ -1721,25 +1781,10 @@ impl ServiceManager { request: ServiceEnsureBody, ) -> Result<(), Error> { let mut existing_zones = self.inner.zones.lock().await; - let config_path = self.services_config_path()?; + let ledger_path = self.services_ledger_path()?; - let old_zone_requests: AllZoneRequests = { - if config_path.exists() { - debug!(self.inner.log, "Reading old service requests"); - toml::from_str( - &tokio::fs::read_to_string(&config_path) - .await - .map_err(|err| Error::io_path(&config_path, err))?, - ) - .map_err(|err| Error::TomlDeserialize { - path: config_path.clone(), - err, - })? - } else { - debug!(self.inner.log, "No old service requests"); - AllZoneRequests::new() - } - }; + let old_zone_requests = + AllZoneRequests::read_from(&self.inner.log, &ledger_path).await?; let new_zone_requests: Vec = { let known_set: HashSet<&ServiceZoneRequest> = HashSet::from_iter( @@ -1747,6 +1792,7 @@ impl ServiceManager { ); let requested_set = HashSet::from_iter(request.services.iter()); + // TODO: We probably want to handle this case. if !requested_set.is_superset(&known_set) { // The caller may only request services additively. // @@ -1779,15 +1825,7 @@ impl ServiceManager { .await?; zone_requests.requests.append(&mut old_zone_requests.requests.clone()); - let serialized_services = toml::Value::try_from(&zone_requests) - .expect("Cannot serialize service list"); - let services_str = - toml::to_string(&serialized_services).map_err(|err| { - Error::TomlSerialize { path: config_path.clone(), err } - })?; - tokio::fs::write(&config_path, services_str) - .await - .map_err(|err| Error::io_path(&config_path, err))?; + zone_requests.write_to(&self.inner.log, &ledger_path).await?; Ok(()) } @@ -1802,25 +1840,10 @@ impl ServiceManager { request: ServiceZoneRequest, ) -> Result<(), Error> { let mut existing_zones = self.inner.dataset_zones.lock().await; - let config_path = self.storage_services_config_path()?; + let ledger_path = self.storage_services_ledger_path()?; - let mut zone_requests: AllZoneRequests = { - if config_path.exists() { - debug!(self.inner.log, "Reading old storage service requests"); - toml::from_str( - &tokio::fs::read_to_string(&config_path) - .await - .map_err(|err| Error::io_path(&config_path, err))?, - ) - .map_err(|err| Error::TomlDeserialize { - path: config_path.clone(), - err, - })? - } else { - debug!(self.inner.log, "No old storage service requests"); - AllZoneRequests::new() - } - }; + let mut zone_requests = + AllZoneRequests::read_from(&self.inner.log, &ledger_path).await?; if !zone_requests .requests @@ -1845,15 +1868,7 @@ impl ServiceManager { ) .await?; - let serialized_services = toml::Value::try_from(&zone_requests) - .expect("Cannot serialize service list"); - let services_str = - toml::to_string(&serialized_services).map_err(|err| { - Error::TomlSerialize { path: config_path.clone(), err } - })?; - tokio::fs::write(&config_path, services_str) - .await - .map_err(|err| Error::io_path(&config_path, err))?; + zone_requests.write_to(&self.inner.log, &ledger_path).await?; Ok(()) } @@ -2423,25 +2438,20 @@ mod test { impl TestConfig { async fn new() -> Self { let config_dir = tempfile::TempDir::new().unwrap(); - tokio::fs::File::create( - config_dir.path().join(PARTIAL_CONFIG_FILENAME), - ) - .await - .unwrap(); Self { config_dir } } fn make_config(&self) -> Config { - let all_svcs_config_path = + let all_svcs_ledger_path = self.config_dir.path().join(SERVICES_CONFIG_FILENAME); - let storage_svcs_config_path = + let storage_svcs_ledger_path = self.config_dir.path().join(STORAGE_SERVICES_CONFIG_FILENAME); Config { sled_id: Uuid::new_v4(), sidecar_revision: "rev_whatever_its_a_test".to_string(), gateway_address: None, - all_svcs_config_path, - storage_svcs_config_path, + all_svcs_ledger_path, + storage_svcs_ledger_path, } } } @@ -2663,7 +2673,7 @@ mod test { // Next, delete the config. This means the service we just created will // not be remembered on the next initialization. let config = test_config.make_config(); - std::fs::remove_file(&config.all_svcs_config_path).unwrap(); + std::fs::remove_file(&config.all_svcs_ledger_path).unwrap(); // Observe that the old service is not re-initialized. let mgr = ServiceManager::new( From 5752a83b920ff5aae70b9282079cae0bf740a337 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 30 Apr 2023 01:29:03 -0400 Subject: [PATCH 04/15] [RSS] Explicit set of Bootstrap Agents --- sled-agent/src/bootstrap/params.rs | 13 +++ sled-agent/src/rack_setup/config.rs | 2 + sled-agent/src/rack_setup/service.rs | 80 ++++++------------- .../gimlet-standalone/config-rss.toml | 3 + smf/sled-agent/non-gimlet/config-rss.toml | 3 + 5 files changed, 46 insertions(+), 55 deletions(-) diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index 0fc17cb8e0..146f0dac48 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -11,9 +11,19 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use serde_with::serde_as; use std::borrow::Cow; +use std::collections::HashSet; use std::net::{Ipv4Addr, Ipv6Addr, SocketAddrV6}; use uuid::Uuid; +#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub enum BootstrapAddressDiscovery { + /// Ignore all bootstrap addresses except our own. + OnlyOurs, + /// Ignore all bootstrap addresses except the following. + OnlyThese(HashSet), +} + /// Configuration for the "rack setup service". /// /// The Rack Setup Service should be responsible for one-time setup actions, @@ -24,6 +34,9 @@ use uuid::Uuid; pub struct RackInitializeRequest { pub rack_subnet: Ipv6Addr, + /// Describes how bootstrap addresses should be collected during RSS. + pub bootstrap_discovery: BootstrapAddressDiscovery, + /// The minimum number of sleds required to unlock the rack secret. /// /// If this value is less than 2, no rack secret will be created on startup; diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index cba9a9f44f..9957d9257f 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -39,6 +39,7 @@ impl SetupServiceConfig { #[cfg(test)] mod test { use super::*; + use crate::bootstrap::params::BootstrapAddressDiscovery; use crate::bootstrap::params::Gateway; use omicron_common::address::IpRange; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; @@ -47,6 +48,7 @@ mod test { fn test_subnets() { let cfg = SetupServiceConfig { rack_subnet: "fd00:1122:3344:0100::".parse().unwrap(), + bootstrap_discovery: BootstrapAddressDiscovery::OnlyOurs, rack_secret_threshold: 0, gateway: Some(Gateway { address: None, diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 1586cfb5de..3024805a33 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -56,6 +56,7 @@ use super::config::SetupServiceConfig as Config; use crate::bootstrap::config::BOOTSTRAP_AGENT_HTTP_PORT; +use crate::bootstrap::params::BootstrapAddressDiscovery; use crate::bootstrap::params::SledAgentRequest; use crate::bootstrap::rss_handle::BootstrapAgentHandle; use crate::nexus::d2n_params; @@ -94,9 +95,6 @@ use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::path::PathBuf; use thiserror::Error; -// The minimum number of sleds to initialize the rack. -const MINIMUM_SLED_COUNT: usize = 1; - /// Describes errors which may occur while operating the setup service. #[derive(Error, Debug)] pub enum SetupServiceError { @@ -217,24 +215,6 @@ fn rss_completed_marker_path() -> PathBuf { .join("rss-plan-completed.marker") } -// Describes the options when awaiting for peers. -enum PeerExpectation { - // Await a set of peers that matches this group of IPv6 addresses exactly. - // - // TODO: We currently don't deal with the case where: - // - // - RSS boots, sees some sleds, comes up with a plan. - // - RSS reboots, sees a *different* set of sleds, and needs - // to adjust the plan. - // - // This case is fairly tricky because some sleds may have - // already received requests to initialize - modifying the - // allocated subnets would be non-trivial. - LoadOldPlan(HashSet), - // Await any peers, as long as there are at least enough to make a new plan. - CreateNewPlan(usize), -} - /// The implementation of the Rack Setup Service. struct ServiceInner { log: Logger, @@ -462,7 +442,7 @@ impl ServiceInner { /// can be sent out. async fn wait_for_peers( &self, - expectation: PeerExpectation, + expectation: &HashSet, our_bootstrap_address: Ipv6Addr, ) -> Result, DdmError> { let ddm_admin_client = DdmAdminClient::localhost(&self.log)?; @@ -484,31 +464,12 @@ impl ServiceInner { .chain(iter::once(our_bootstrap_address)) .collect::>(); - match expectation { - PeerExpectation::LoadOldPlan(ref expected) => { - if all_addrs.is_superset(expected) { - Ok(all_addrs.into_iter().collect()) - } else { - Err(BackoffError::transient( - concat!( - "Waiting for a LoadOldPlan set ", - "of peers not found yet." - ) - .to_string(), - )) - } - } - PeerExpectation::CreateNewPlan(wanted_peer_count) => { - if all_addrs.len() >= wanted_peer_count { - Ok(all_addrs.into_iter().collect()) - } else { - Err(BackoffError::transient(format!( - "Waiting for {} peers (currently have {})", - wanted_peer_count, - all_addrs.len() - ))) - } - } + if all_addrs.is_superset(expectation) { + Ok(all_addrs.into_iter().collect()) + } else { + Err(BackoffError::transient( + "Waiting for a set of peers not found yet".to_string(), + )) } }, |message, duration| { @@ -936,17 +897,26 @@ impl ServiceInner { // Wait for either: // - All the peers to re-load an old plan (if one exists) // - Enough peers to create a new plan (if one does not exist) - let maybe_sled_plan = SledPlan::load(&self.log).await?; - let expectation = if let Some(plan) = &maybe_sled_plan { - PeerExpectation::LoadOldPlan( - plan.sleds.keys().map(|a| *a.ip()).collect(), - ) - } else { - PeerExpectation::CreateNewPlan(MINIMUM_SLED_COUNT) + let expected_peers = match &config.bootstrap_discovery { + BootstrapAddressDiscovery::OnlyOurs => { + HashSet::from([local_bootstrap_agent.our_address()]) + } + BootstrapAddressDiscovery::OnlyThese(peers) => peers.clone(), }; + let maybe_sled_plan = SledPlan::load(&self.log).await?; + if let Some(plan) = &maybe_sled_plan { + let stored_peers: HashSet = + plan.sleds.keys().map(|a| *a.ip()).collect(); + if stored_peers != expected_peers { + return Err(SetupServiceError::BadConfig("Set of sleds requested does not match those in existing sled plan".to_string())); + } + } let addrs = self - .wait_for_peers(expectation, local_bootstrap_agent.our_address()) + .wait_for_peers( + &expected_peers, + local_bootstrap_agent.our_address(), + ) .await?; info!(self.log, "Enough peers exist to enact RSS plan"); diff --git a/smf/sled-agent/gimlet-standalone/config-rss.toml b/smf/sled-agent/gimlet-standalone/config-rss.toml index 05f2860502..246f57848d 100644 --- a/smf/sled-agent/gimlet-standalone/config-rss.toml +++ b/smf/sled-agent/gimlet-standalone/config-rss.toml @@ -6,6 +6,9 @@ # |...............| <- This /56 is the Rack Subnet rack_subnet = "fd00:1122:3344:0100::" +# Only include "our own sled" in the bootstrap network +bootstrap_discovery = "only_ours" + # The number of sleds required to unlock the rack secret. # # For values less than 2, no rack secret will be generated. diff --git a/smf/sled-agent/non-gimlet/config-rss.toml b/smf/sled-agent/non-gimlet/config-rss.toml index 05f2860502..246f57848d 100644 --- a/smf/sled-agent/non-gimlet/config-rss.toml +++ b/smf/sled-agent/non-gimlet/config-rss.toml @@ -6,6 +6,9 @@ # |...............| <- This /56 is the Rack Subnet rack_subnet = "fd00:1122:3344:0100::" +# Only include "our own sled" in the bootstrap network +bootstrap_discovery = "only_ours" + # The number of sleds required to unlock the rack secret. # # For values less than 2, no rack secret will be generated. From 31b52a732f5480386bcfcd42a79298ae15ca887f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 30 Apr 2023 01:37:39 -0400 Subject: [PATCH 05/15] Refuse to enact a sled plan unless it's on the explicit set --- sled-agent/src/rack_setup/plan/sled.rs | 4 +- sled-agent/src/rack_setup/service.rs | 69 +++----------------------- 2 files changed, 10 insertions(+), 63 deletions(-) diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs index a754aac8e5..f78a4c2484 100644 --- a/sled-agent/src/rack_setup/plan/sled.rs +++ b/sled-agent/src/rack_setup/plan/sled.rs @@ -13,7 +13,7 @@ use crate::rack_setup::config::SetupServiceConfig as Config; use serde::{Deserialize, Serialize}; use slog::Logger; use sprockets_host::Ed25519Certificate; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::net::{Ipv6Addr, SocketAddrV6}; use std::path::{Path, PathBuf}; use thiserror::Error; @@ -119,7 +119,7 @@ impl Plan { pub async fn create( log: &Logger, config: &Config, - bootstrap_addrs: Vec, + bootstrap_addrs: HashSet, ) -> Result { let rack_id = Uuid::new_v4(); diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 3024805a33..ba9310afe9 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -438,56 +438,6 @@ impl ServiceInner { Ok(()) } - /// Waits for sufficient neighbors to exist so the initial set of requests - /// can be sent out. - async fn wait_for_peers( - &self, - expectation: &HashSet, - our_bootstrap_address: Ipv6Addr, - ) -> Result, DdmError> { - let ddm_admin_client = DdmAdminClient::localhost(&self.log)?; - let addrs = retry_notify( - retry_policy_internal_service_aggressive(), - || async { - let peer_addrs = ddm_admin_client - .derive_bootstrap_addrs_from_prefixes(&[ - BootstrapInterface::GlobalZone, - ]) - .await - .map_err(|err| { - BackoffError::transient(format!( - "Failed getting peers from mg-ddm: {err}" - )) - })?; - - let all_addrs = peer_addrs - .chain(iter::once(our_bootstrap_address)) - .collect::>(); - - if all_addrs.is_superset(expectation) { - Ok(all_addrs.into_iter().collect()) - } else { - Err(BackoffError::transient( - "Waiting for a set of peers not found yet".to_string(), - )) - } - }, - |message, duration| { - info!( - self.log, - "{} (will retry after {:?})", message, duration - ); - }, - ) - // `retry_policy_internal_service_aggressive()` retries indefinitely on - // transient errors (the only kind we produce), allowing us to - // `.unwrap()` without panicking - .await - .unwrap(); - - Ok(addrs) - } - async fn sled_timesync( &self, sled_address: &SocketAddrV6, @@ -897,7 +847,7 @@ impl ServiceInner { // Wait for either: // - All the peers to re-load an old plan (if one exists) // - Enough peers to create a new plan (if one does not exist) - let expected_peers = match &config.bootstrap_discovery { + let bootstrap_addrs = match &config.bootstrap_discovery { BootstrapAddressDiscovery::OnlyOurs => { HashSet::from([local_bootstrap_agent.our_address()]) } @@ -907,18 +857,15 @@ impl ServiceInner { if let Some(plan) = &maybe_sled_plan { let stored_peers: HashSet = plan.sleds.keys().map(|a| *a.ip()).collect(); - if stored_peers != expected_peers { + if stored_peers != bootstrap_addrs { return Err(SetupServiceError::BadConfig("Set of sleds requested does not match those in existing sled plan".to_string())); } } - - let addrs = self - .wait_for_peers( - &expected_peers, - local_bootstrap_agent.our_address(), - ) - .await?; - info!(self.log, "Enough peers exist to enact RSS plan"); + if bootstrap_addrs.is_empty() { + return Err(SetupServiceError::BadConfig( + "Must request at least one peer".to_string(), + )); + } // If we created a plan, reuse it. Otherwise, create a new plan. // @@ -931,7 +878,7 @@ impl ServiceInner { plan } else { info!(self.log, "Creating new allocation plan"); - SledPlan::create(&self.log, config, addrs).await? + SledPlan::create(&self.log, config, bootstrap_addrs).await? }; let config = &plan.config; From f77f2357cd283125f700d4279bbc46dad1f439d6 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 28 Apr 2023 10:51:59 -0400 Subject: [PATCH 06/15] wip --- sled-agent/src/http_entrypoints.rs | 21 +++++++++++++++++++++ sled-agent/src/rack_setup/service.rs | 17 ++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 4a0db86ed5..3fd625df1f 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -32,6 +32,7 @@ pub fn api() -> SledApiDescription { fn register_endpoints(api: &mut SledApiDescription) -> Result<(), String> { api.register(disk_put)?; api.register(filesystem_put)?; + api.register(cockroachdb_init)?; api.register(instance_issue_disk_snapshot_request)?; api.register(instance_put_migration_ids)?; api.register(instance_put_state)?; @@ -113,6 +114,26 @@ async fn filesystem_put( Ok(HttpResponseUpdatedNoContent()) } +#[endpoint { + method = POST, + path = "/cockroachdb", +}] +async fn cockroachdb_init( + rqctx: RequestContext, +) -> Result { + let sa = rqctx.context(); + + // TODO: In RSS, launch three CRDB nodes across the U.2s. + // + // TODO: In the SMF file for cockroach, call "start" instead of + // "start-single-node". + // + // TODO: Access the CRDB zone, run "init", then run "dbinit.sql". + + Ok(HttpResponseUpdatedNoContent()) +} + + /// Path parameters for Instance requests (sled agent API) #[derive(Deserialize, JsonSchema)] struct InstancePathParam { diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index ba9310afe9..3ec36de2a9 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -995,13 +995,28 @@ impl ServiceInner { info!(self.log, "Finished setting up agents and datasets"); + // TODO: Initialize Cockroachdb? +// let initialize_db = || async { +// +// }; +// let log_failure = |error, _| { +// warn!(self.log, "Time is not yet synchronized"; "error" => ?error); +// }; +// retry_notify( +// retry_policy_internal_service_aggressive(), +// initialize_db, +// log_failure, +// ) +// .await +// .unwrap(); + // Issue service initialization requests. // // NOTE: This must happen *after* the dataset initialization, // to ensure that CockroachDB has been initialized before Nexus // starts. // - // If Nexus was more resilient to concurrent initialization + // TODO: If Nexus was more resilient to concurrent initialization // of CRDB, this requirement could be relaxed. futures::future::join_all(service_plan.services.iter().map( |(sled_address, services_request)| async move { From 2810f93869b09290117d9390447c1a91d7c5e0ea Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 29 Apr 2023 20:27:53 -0400 Subject: [PATCH 07/15] RSS explicitly calling to initialize CRDB --- internal-dns/src/resolver.rs | 39 +++++--- openapi/sled-agent.json | 18 ++++ sled-agent/src/http_entrypoints.rs | 16 ++-- sled-agent/src/rack_setup/service.rs | 66 +++++++++++--- sled-agent/src/services.rs | 131 +++++++++++++++------------ sled-agent/src/sled_agent.rs | 5 + smf/cockroachdb/method_script.sh | 9 +- 7 files changed, 185 insertions(+), 99 deletions(-) diff --git a/internal-dns/src/resolver.rs b/internal-dns/src/resolver.rs index c6e0ea1e19..484f31c7fa 100644 --- a/internal-dns/src/resolver.rs +++ b/internal-dns/src/resolver.rs @@ -121,6 +121,31 @@ impl Resolver { Ok(*address) } + pub async fn lookup_all_ipv6( + &self, + srv: crate::ServiceName, + ) -> Result, ResolveError> { + let name = format!("{}.{}", srv.dns_name(), DNS_ZONE); + debug!(self.log, "lookup_ipv6 srv"; "dns_name" => &name); + let response = self.inner.ipv6_lookup(&name).await?; + let addresses = response.iter().map(|a| *a).collect::>(); + Ok(addresses) + } + + pub async fn lookup_ip( + &self, + srv: crate::ServiceName, + ) -> Result { + let name = format!("{}.{}", srv.dns_name(), DNS_ZONE); + debug!(self.log, "lookup srv"; "dns_name" => &name); + let response = self.inner.lookup_ip(&name).await?; + let address = response + .iter() + .next() + .ok_or_else(|| ResolveError::NotFound(srv))?; + Ok(address) + } + /// Looks up a single [`SocketAddrV6`] based on the SRV name /// Returns an error if the record does not exist. pub async fn lookup_socket_v6( @@ -156,20 +181,6 @@ impl Resolver { } }) } - - pub async fn lookup_ip( - &self, - srv: crate::ServiceName, - ) -> Result { - let name = format!("{}.{}", srv.dns_name(), DNS_ZONE); - debug!(self.log, "lookup srv"; "dns_name" => &name); - let response = self.inner.lookup_ip(&name).await?; - let address = response - .iter() - .next() - .ok_or_else(|| ResolveError::NotFound(srv))?; - Ok(address) - } } #[cfg(test)] diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index c41da8ab8b..1e09ce82a2 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -10,6 +10,24 @@ "version": "0.0.1" }, "paths": { + "/cockroachdb": { + "post": { + "summary": "Initializes a CockroachDB cluster, calling:", + "description": "\nand also populating the contents of the filesystem with preliminary tables.", + "operationId": "cockroachdb_init", + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/disks/{disk_id}": { "put": { "operationId": "disk_put", diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 3fd625df1f..06757e2e1a 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -114,6 +114,12 @@ async fn filesystem_put( Ok(HttpResponseUpdatedNoContent()) } +/// Initializes a CockroachDB cluster, calling: +/// +/// +/// +/// and also populating the contents of the filesystem with preliminary +/// tables. #[endpoint { method = POST, path = "/cockroachdb", @@ -122,18 +128,10 @@ async fn cockroachdb_init( rqctx: RequestContext, ) -> Result { let sa = rqctx.context(); - - // TODO: In RSS, launch three CRDB nodes across the U.2s. - // - // TODO: In the SMF file for cockroach, call "start" instead of - // "start-single-node". - // - // TODO: Access the CRDB zone, run "init", then run "dbinit.sql". - + sa.cockroachdb_initialize().await?; Ok(HttpResponseUpdatedNoContent()) } - /// Path parameters for Instance requests (sled agent API) #[derive(Deserialize, JsonSchema)] struct InstancePathParam { diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 3ec36de2a9..3c9dde29a1 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -231,7 +231,6 @@ impl ServiceInner { datasets: &Vec, ) -> Result<(), SetupServiceError> { let dur = std::time::Duration::from_secs(60); - let client = reqwest::ClientBuilder::new() .connect_timeout(dur) .timeout(dur) @@ -786,6 +785,54 @@ impl ServiceInner { Ok(()) } + async fn initialize_cockroach( + &self, + service_plan: &ServicePlan, + ) -> Result<(), SetupServiceError> { + // Now that datasets and zones have started for CockroachDB, + // perform one-time initialization of the cluster. + let sled_address = service_plan + .services + .iter() + .find_map(|(sled_address, sled_request)| { + if sled_request.datasets.iter().any(|dataset| { + dataset.dataset_kind + == crate::params::DatasetKind::CockroachDb + }) { + Some(sled_address) + } else { + None + } + }) + .expect("Should not create service plans without CockroachDb"); + let dur = std::time::Duration::from_secs(60); + let client = reqwest::ClientBuilder::new() + .connect_timeout(dur) + .timeout(dur) + .build() + .map_err(SetupServiceError::HttpClient)?; + let client = SledAgentClient::new_with_client( + &format!("http://{}", sled_address), + client, + self.log.new(o!("SledAgentClient" => sled_address.to_string())), + ); + let initialize_db = || async { + client.cockroachdb_init().await.map_err(BackoffError::transient)?; + Ok::<(), BackoffError>>(()) + }; + let log_failure = |error, _| { + warn!(self.log, "Failed to initialize CockroachDB"; "error" => ?error); + }; + retry_notify( + retry_policy_internal_service_aggressive(), + initialize_db, + log_failure, + ) + .await + .unwrap(); + Ok(()) + } + // This method has a few distinct phases, identified by files in durable // storage: // @@ -995,20 +1042,9 @@ impl ServiceInner { info!(self.log, "Finished setting up agents and datasets"); - // TODO: Initialize Cockroachdb? -// let initialize_db = || async { -// -// }; -// let log_failure = |error, _| { -// warn!(self.log, "Time is not yet synchronized"; "error" => ?error); -// }; -// retry_notify( -// retry_policy_internal_service_aggressive(), -// initialize_db, -// log_failure, -// ) -// .await -// .unwrap(); + // Now that datasets and zones have started for CockroachDB, + // perform one-time initialization of the cluster. + self.initialize_cockroach(&service_plan).await?; // Issue service initialization requests. // diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 77976bf33b..da20294c4d 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -40,7 +40,9 @@ use illumos_utils::addrobj::IPV6_LINK_LOCAL_NAME; use illumos_utils::dladm::{Dladm, Etherstub, EtherstubVnic, PhysicalLink}; use illumos_utils::link::{Link, VnicAllocator}; use illumos_utils::opte::{Port, PortManager, PortTicket}; -use illumos_utils::running_zone::{InstalledZone, RunningZone}; +use illumos_utils::running_zone::{ + InstalledZone, RunCommandError, RunningZone, +}; use illumos_utils::zfs::ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT; use illumos_utils::zone::AddressRequest; use illumos_utils::zone::Zones; @@ -90,6 +92,12 @@ use uuid::Uuid; #[derive(thiserror::Error, Debug)] pub enum Error { + #[error("Failed to initilize CockroachDb: {err}")] + CockroachInit { + #[source] + err: RunCommandError, + }, + #[error("Cannot serialize TOML to file {path}: {err}")] TomlSerialize { path: PathBuf, err: toml::ser::Error }, @@ -1038,12 +1046,26 @@ impl ServiceManager { let listen_addr = &address.ip().to_string(); let listen_port = &address.port().to_string(); + // Look up all cockroachdb addresses stored in internal DNS. + let all_crdb_addresses = info + .resolver + .lookup_all_ipv6(internal_dns::ServiceName::Cockroach) + .await? + .into_iter() + .map(|addr| { + SocketAddr::new(IpAddr::V6(addr), COCKROACH_PORT) + .to_string() + }) + .collect::>() + .join(","); + let config = PropertyGroupBuilder::new("config") .add_property("datalink", "astring", datalink) .add_property("gateway", "astring", gateway) .add_property("listen_addr", "astring", listen_addr) .add_property("listen_port", "astring", listen_port) - .add_property("store", "astring", "/data"); + .add_property("store", "astring", "/data") + .add_property("join_addrs", "astring", &all_crdb_addresses); let profile = ProfileBuilder::new("omicron").add_service( ServiceBuilder::new("oxide/cockroachdb").add_instance( @@ -1057,64 +1079,7 @@ impl ServiceManager { .map_err(|err| { Error::io("Failed to setup CRDB profile", err) })?; - let running_zone = RunningZone::boot(installed_zone).await?; - - // TODO: The following lines are necessary to initialize CRDB - // in a single-node environment. They're bad! They're wrong! - // We definitely shouldn't be wiping the database every time - // we want to boot this zone. - // - // But they're also necessary to prevent the build from - // regressing. - // - // NOTE: In the (very short-term) future, this will be - // replaced by the following: - // 1. CRDB will simply "start", rather than "start-single-node". - // 2. The Sled Agent will expose an explicit API to "init" the - // Cockroach cluster, and populate it with the expected - // contents. - let format_crdb = || async { - info!(self.inner.log, "Formatting CRDB"); - running_zone - .run_cmd(&[ - "/opt/oxide/cockroachdb/bin/cockroach", - "sql", - "--insecure", - "--host", - &address.to_string(), - "--file", - "/opt/oxide/cockroachdb/sql/dbwipe.sql", - ]) - .map_err(BackoffError::transient)?; - running_zone - .run_cmd(&[ - "/opt/oxide/cockroachdb/bin/cockroach", - "sql", - "--insecure", - "--host", - &address.to_string(), - "--file", - "/opt/oxide/cockroachdb/sql/dbinit.sql", - ]) - .map_err(BackoffError::transient)?; - info!(self.inner.log, "Formatting CRDB - Completed"); - Ok::< - (), - BackoffError< - illumos_utils::running_zone::RunCommandError, - >, - >(()) - }; - let log_failure = |error, _| { - warn!( - self.inner.log, "failed to format CRDB"; - "error" => ?error, - ); - }; - retry_notify(retry_policy_local(), format_crdb, log_failure) - .await - .expect("expected an infinite retry loop waiting for crdb"); - return Ok(running_zone); + return Ok(RunningZone::boot(installed_zone).await?); } ZoneType::Crucible => { let Some(info) = self.inner.sled_info.get() else { @@ -1904,6 +1869,52 @@ impl ServiceManager { Ok(()) } + pub async fn cockroachdb_initialize(&self) -> Result<(), Error> { + let log = &self.inner.log; + // TODO: Can we confirm this address is okay? + let host = &format!("[::1]:{COCKROACH_PORT}"); + let dataset_zones = self.inner.dataset_zones.lock().await; + for zone in dataset_zones.iter() { + // TODO: We could probably store the ZoneKind in the running zone to + // make this a bit safer. + if zone.name().contains("cockroach") { + info!(log, "Initializing CRDB Cluster"); + zone.run_cmd(&[ + "/opt/oxide/cockroachdb/bin/cockroach", + "init", + "--insecure", + "--host", + host, + ]) + .map_err(|err| Error::CockroachInit { err })?; + info!(log, "Formatting CRDB"); + zone.run_cmd(&[ + "/opt/oxide/cockroachdb/bin/cockroach", + "sql", + "--insecure", + "--host", + host, + "--file", + "/opt/oxide/cockroachdb/sql/dbwipe.sql", + ]) + .map_err(|err| Error::CockroachInit { err })?; + zone.run_cmd(&[ + "/opt/oxide/cockroachdb/bin/cockroach", + "sql", + "--insecure", + "--host", + host, + "--file", + "/opt/oxide/cockroachdb/sql/dbinit.sql", + ]) + .map_err(|err| Error::CockroachInit { err })?; + info!(log, "Formatting CRDB - Completed"); + } + } + + Ok(()) + } + pub fn boottime_rewrite(&self, zones: &Vec) { if self .inner diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index fa87b4b9fc..73e56d4827 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -504,6 +504,11 @@ impl SledAgent { Ok(()) } + pub async fn cockroachdb_initialize(&self) -> Result<(), Error> { + self.inner.services.cockroachdb_initialize().await?; + Ok(()) + } + /// Gets the sled's current list of all zpools. pub async fn zpools_get(&self) -> Result, Error> { let zpools = self.inner.storage.get_zpools().await?; diff --git a/smf/cockroachdb/method_script.sh b/smf/cockroachdb/method_script.sh index fb9bacf854..5027aeb42b 100755 --- a/smf/cockroachdb/method_script.sh +++ b/smf/cockroachdb/method_script.sh @@ -11,12 +11,18 @@ LISTEN_PORT="$(svcprop -c -p config/listen_port "${SMF_FMRI}")" DATASTORE="$(svcprop -c -p config/store "${SMF_FMRI}")" DATALINK="$(svcprop -c -p config/datalink "${SMF_FMRI}")" GATEWAY="$(svcprop -c -p config/gateway "${SMF_FMRI}")" +JOIN_ADDRS="$(svcprop -c -p config/join_addrs "${SMF_FMRI}")" if [[ $DATALINK == unknown ]] || [[ $GATEWAY == unknown ]]; then printf 'ERROR: missing datalink or gateway\n' >&2 exit "$SMF_EXIT_ERR_CONFIG" fi +if [[ $JOIN_ADDRS == unknown ]]; then + printf 'ERROR: missing join_addrs\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" +fi + ipadm show-addr "$DATALINK/ll" || ipadm create-addr -t -T addrconf "$DATALINK/ll" ipadm show-addr "$DATALINK/omicron6" || ipadm create-addr -t -T static -a "$LISTEN_ADDR" "$DATALINK/omicron6" route get -inet6 default -inet6 "$GATEWAY" || route add -inet6 default -inet6 "$GATEWAY" @@ -25,6 +31,7 @@ args=( '--insecure' '--listen-addr' "[$LISTEN_ADDR]:$LISTEN_PORT" '--store' "$DATASTORE" + '--join' "$JOIN_ADDRS" ) -exec /opt/oxide/cockroachdb/bin/cockroach start-single-node "${args[@]}" & +exec /opt/oxide/cockroachdb/bin/cockroach start "${args[@]}" & From 5297950e909bb12f85f3ecb206fbd37b45fe12b7 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 30 Apr 2023 00:21:40 -0400 Subject: [PATCH 08/15] Read the CRDB address from the running zone --- illumos-utils/src/running_zone.rs | 4 ++++ sled-agent/src/params.rs | 2 ++ sled-agent/src/services.rs | 21 +++++++++++++++++---- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index f26aa82721..eda6b00ef9 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -147,6 +147,10 @@ impl RunningZone { format!("{}/root", self.inner.zonepath.display()) } + pub fn control_interface(&self) -> AddrObject { + AddrObject::new(self.inner.get_control_vnic_name(), "omicron6").unwrap() + } + /// Runs a command within the Zone, return the output. pub fn run_cmd(&self, args: I) -> Result where diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 0b80c434b4..84ba8a083a 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -521,6 +521,8 @@ impl std::fmt::Display for ZoneType { )] pub struct ServiceZoneRequest { // The UUID of the zone to be initialized. + // TODO: Should this be removed? If we have UUIDs on the services, what's + // the point of this? pub id: Uuid, // The type of the zone to be created. pub zone_type: ZoneType, diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index da20294c4d..c7d9c59a92 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -169,6 +169,9 @@ pub enum Error { #[error("Services already configured for this Sled Agent")] ServicesAlreadyConfigured, + #[error("Failed to get address: {0}")] + GetAddressFailure(#[from] illumos_utils::zone::GetAddressError), + #[error("NTP zone not ready")] NtpZoneNotReady, @@ -1871,13 +1874,18 @@ impl ServiceManager { pub async fn cockroachdb_initialize(&self) -> Result<(), Error> { let log = &self.inner.log; - // TODO: Can we confirm this address is okay? - let host = &format!("[::1]:{COCKROACH_PORT}"); let dataset_zones = self.inner.dataset_zones.lock().await; for zone in dataset_zones.iter() { // TODO: We could probably store the ZoneKind in the running zone to - // make this a bit safer. - if zone.name().contains("cockroach") { + // make this "comparison to existing zones by name" mechanism a bit + // safer. + if zone.name().contains(&ZoneType::CockroachDb.to_string()) { + let address = Zones::get_address( + Some(zone.name()), + &zone.control_interface(), + )? + .network(); + let host = &format!("[{address}]:{COCKROACH_PORT}"); info!(log, "Initializing CRDB Cluster"); zone.run_cmd(&[ "/opt/oxide/cockroachdb/bin/cockroach", @@ -1909,6 +1917,11 @@ impl ServiceManager { ]) .map_err(|err| Error::CockroachInit { err })?; info!(log, "Formatting CRDB - Completed"); + + // In the single-sled case, if there are multiple CRDB nodes on + // a single device, we'd still only want to send the + // initialization requests to a single dataset. + return Ok(()); } } From c21be48ac12ebfd3f6acb9619f8f9757b9b2aa9d Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 30 Apr 2023 02:03:37 -0400 Subject: [PATCH 09/15] Send requests to the right address --- sled-agent/src/services.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index c7d9c59a92..0e9bdd8772 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -1884,9 +1884,9 @@ impl ServiceManager { Some(zone.name()), &zone.control_interface(), )? - .network(); + .ip(); let host = &format!("[{address}]:{COCKROACH_PORT}"); - info!(log, "Initializing CRDB Cluster"); + info!(log, "Initializing CRDB Cluster - sending request to {host}"); zone.run_cmd(&[ "/opt/oxide/cockroachdb/bin/cockroach", "init", From f11c153a9d3a0330ccc15c57f569644ad4037942 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 30 Apr 2023 02:06:25 -0400 Subject: [PATCH 10/15] fmt --- sled-agent/src/services.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 0e9bdd8772..eacfe146a9 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -1886,7 +1886,10 @@ impl ServiceManager { )? .ip(); let host = &format!("[{address}]:{COCKROACH_PORT}"); - info!(log, "Initializing CRDB Cluster - sending request to {host}"); + info!( + log, + "Initializing CRDB Cluster - sending request to {host}" + ); zone.run_cmd(&[ "/opt/oxide/cockroachdb/bin/cockroach", "init", From 4377f1d7fde458a1ef7c6260ad9178042eba532d Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 30 Apr 2023 01:45:37 -0400 Subject: [PATCH 11/15] Stop deleting chelsio addresses during uninstall (#2953) ## Before this PR Running on rack2 and calling `omicron-package uninstall` would involve a fatal termination of the connection, as it would delete the `cxgbe0/ll` and `cxgbe1/ll` IP addresses necessary for contacting the sled. ## After this PR Those addresses are left alone. This is pretty useful for development, as it allows us to run `uninstall` to cleanly wipe a Gimlet, preparing it for future "clean installs". --- sled-hardware/src/cleanup.rs | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/sled-hardware/src/cleanup.rs b/sled-hardware/src/cleanup.rs index 0c14aad7df..1a7f8be2f7 100644 --- a/sled-hardware/src/cleanup.rs +++ b/sled-hardware/src/cleanup.rs @@ -6,11 +6,11 @@ use anyhow::Error; use futures::stream::{self, StreamExt, TryStreamExt}; +use illumos_utils::dladm::Dladm; use illumos_utils::dladm::BOOTSTRAP_ETHERSTUB_NAME; use illumos_utils::dladm::BOOTSTRAP_ETHERSTUB_VNIC_NAME; use illumos_utils::dladm::UNDERLAY_ETHERSTUB_NAME; use illumos_utils::dladm::UNDERLAY_ETHERSTUB_VNIC_NAME; -use illumos_utils::dladm::{Dladm, VnicSource}; use illumos_utils::link::LinkKind; use illumos_utils::opte; use illumos_utils::zone::IPADM; @@ -30,14 +30,6 @@ pub fn delete_bootstrap_addresses(log: &Logger) -> Result<(), Error> { delete_addresses_matching_prefixes(log, &[bootstrap_prefix]) } -fn delete_chelsio_addresses(log: &Logger) -> Result<(), Error> { - let prefixes = crate::underlay::find_chelsio_links()? - .into_iter() - .map(|link| format!("{}/", link.name())) - .collect::>(); - delete_addresses_matching_prefixes(log, &prefixes) -} - fn delete_addresses_matching_prefixes( log: &Logger, prefixes: &[String], @@ -112,7 +104,6 @@ pub async fn delete_omicron_vnics(log: &Logger) -> Result<(), Error> { pub async fn cleanup_networking_resources(log: &Logger) -> Result<(), Error> { delete_underlay_addresses(log)?; delete_bootstrap_addresses(log)?; - delete_chelsio_addresses(log)?; delete_omicron_vnics(log).await?; delete_etherstub(log)?; opte::delete_all_xde_devices(log)?; From ec3b1e4b1a38e695555d6bc1d28cddb3b5e1d483 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 30 Apr 2023 01:29:03 -0400 Subject: [PATCH 12/15] [RSS] Explicit set of Bootstrap Agents --- openapi/sled-agent.json | 3 +- sled-agent/src/bootstrap/params.rs | 13 ++ sled-agent/src/rack_setup/config.rs | 2 + sled-agent/src/rack_setup/plan/sled.rs | 4 +- sled-agent/src/rack_setup/service.rs | 123 +++--------------- .../gimlet-standalone/config-rss.toml | 3 + smf/sled-agent/non-gimlet/config-rss.toml | 3 + 7 files changed, 45 insertions(+), 106 deletions(-) diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index c41da8ab8b..ad26e7a609 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -1745,7 +1745,7 @@ "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" }, "ServiceEnsureBody": { - "description": "Used to request that the Sled initialize certain services.\n\nThis may be used to record that certain sleds are responsible for launching services which may not be associated with a dataset, such as Nexus.", + "description": "Used to request that the Sled initialize certain services on initialization.\n\nThis may be used to record that certain sleds are responsible for launching services which may not be associated with a dataset, such as Nexus.", "type": "object", "properties": { "services": { @@ -2138,6 +2138,7 @@ ] }, "ServiceZoneService": { + "description": "Used to request that the Sled initialize certain services.", "type": "object", "properties": { "details": { diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index 0fc17cb8e0..146f0dac48 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -11,9 +11,19 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use serde_with::serde_as; use std::borrow::Cow; +use std::collections::HashSet; use std::net::{Ipv4Addr, Ipv6Addr, SocketAddrV6}; use uuid::Uuid; +#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub enum BootstrapAddressDiscovery { + /// Ignore all bootstrap addresses except our own. + OnlyOurs, + /// Ignore all bootstrap addresses except the following. + OnlyThese(HashSet), +} + /// Configuration for the "rack setup service". /// /// The Rack Setup Service should be responsible for one-time setup actions, @@ -24,6 +34,9 @@ use uuid::Uuid; pub struct RackInitializeRequest { pub rack_subnet: Ipv6Addr, + /// Describes how bootstrap addresses should be collected during RSS. + pub bootstrap_discovery: BootstrapAddressDiscovery, + /// The minimum number of sleds required to unlock the rack secret. /// /// If this value is less than 2, no rack secret will be created on startup; diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index cba9a9f44f..9957d9257f 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -39,6 +39,7 @@ impl SetupServiceConfig { #[cfg(test)] mod test { use super::*; + use crate::bootstrap::params::BootstrapAddressDiscovery; use crate::bootstrap::params::Gateway; use omicron_common::address::IpRange; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; @@ -47,6 +48,7 @@ mod test { fn test_subnets() { let cfg = SetupServiceConfig { rack_subnet: "fd00:1122:3344:0100::".parse().unwrap(), + bootstrap_discovery: BootstrapAddressDiscovery::OnlyOurs, rack_secret_threshold: 0, gateway: Some(Gateway { address: None, diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs index a754aac8e5..f78a4c2484 100644 --- a/sled-agent/src/rack_setup/plan/sled.rs +++ b/sled-agent/src/rack_setup/plan/sled.rs @@ -13,7 +13,7 @@ use crate::rack_setup::config::SetupServiceConfig as Config; use serde::{Deserialize, Serialize}; use slog::Logger; use sprockets_host::Ed25519Certificate; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::net::{Ipv6Addr, SocketAddrV6}; use std::path::{Path, PathBuf}; use thiserror::Error; @@ -119,7 +119,7 @@ impl Plan { pub async fn create( log: &Logger, config: &Config, - bootstrap_addrs: Vec, + bootstrap_addrs: HashSet, ) -> Result { let rack_id = Uuid::new_v4(); diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 1586cfb5de..ba9310afe9 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -56,6 +56,7 @@ use super::config::SetupServiceConfig as Config; use crate::bootstrap::config::BOOTSTRAP_AGENT_HTTP_PORT; +use crate::bootstrap::params::BootstrapAddressDiscovery; use crate::bootstrap::params::SledAgentRequest; use crate::bootstrap::rss_handle::BootstrapAgentHandle; use crate::nexus::d2n_params; @@ -94,9 +95,6 @@ use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::path::PathBuf; use thiserror::Error; -// The minimum number of sleds to initialize the rack. -const MINIMUM_SLED_COUNT: usize = 1; - /// Describes errors which may occur while operating the setup service. #[derive(Error, Debug)] pub enum SetupServiceError { @@ -217,24 +215,6 @@ fn rss_completed_marker_path() -> PathBuf { .join("rss-plan-completed.marker") } -// Describes the options when awaiting for peers. -enum PeerExpectation { - // Await a set of peers that matches this group of IPv6 addresses exactly. - // - // TODO: We currently don't deal with the case where: - // - // - RSS boots, sees some sleds, comes up with a plan. - // - RSS reboots, sees a *different* set of sleds, and needs - // to adjust the plan. - // - // This case is fairly tricky because some sleds may have - // already received requests to initialize - modifying the - // allocated subnets would be non-trivial. - LoadOldPlan(HashSet), - // Await any peers, as long as there are at least enough to make a new plan. - CreateNewPlan(usize), -} - /// The implementation of the Rack Setup Service. struct ServiceInner { log: Logger, @@ -458,75 +438,6 @@ impl ServiceInner { Ok(()) } - /// Waits for sufficient neighbors to exist so the initial set of requests - /// can be sent out. - async fn wait_for_peers( - &self, - expectation: PeerExpectation, - our_bootstrap_address: Ipv6Addr, - ) -> Result, DdmError> { - let ddm_admin_client = DdmAdminClient::localhost(&self.log)?; - let addrs = retry_notify( - retry_policy_internal_service_aggressive(), - || async { - let peer_addrs = ddm_admin_client - .derive_bootstrap_addrs_from_prefixes(&[ - BootstrapInterface::GlobalZone, - ]) - .await - .map_err(|err| { - BackoffError::transient(format!( - "Failed getting peers from mg-ddm: {err}" - )) - })?; - - let all_addrs = peer_addrs - .chain(iter::once(our_bootstrap_address)) - .collect::>(); - - match expectation { - PeerExpectation::LoadOldPlan(ref expected) => { - if all_addrs.is_superset(expected) { - Ok(all_addrs.into_iter().collect()) - } else { - Err(BackoffError::transient( - concat!( - "Waiting for a LoadOldPlan set ", - "of peers not found yet." - ) - .to_string(), - )) - } - } - PeerExpectation::CreateNewPlan(wanted_peer_count) => { - if all_addrs.len() >= wanted_peer_count { - Ok(all_addrs.into_iter().collect()) - } else { - Err(BackoffError::transient(format!( - "Waiting for {} peers (currently have {})", - wanted_peer_count, - all_addrs.len() - ))) - } - } - } - }, - |message, duration| { - info!( - self.log, - "{} (will retry after {:?})", message, duration - ); - }, - ) - // `retry_policy_internal_service_aggressive()` retries indefinitely on - // transient errors (the only kind we produce), allowing us to - // `.unwrap()` without panicking - .await - .unwrap(); - - Ok(addrs) - } - async fn sled_timesync( &self, sled_address: &SocketAddrV6, @@ -936,19 +847,25 @@ impl ServiceInner { // Wait for either: // - All the peers to re-load an old plan (if one exists) // - Enough peers to create a new plan (if one does not exist) - let maybe_sled_plan = SledPlan::load(&self.log).await?; - let expectation = if let Some(plan) = &maybe_sled_plan { - PeerExpectation::LoadOldPlan( - plan.sleds.keys().map(|a| *a.ip()).collect(), - ) - } else { - PeerExpectation::CreateNewPlan(MINIMUM_SLED_COUNT) + let bootstrap_addrs = match &config.bootstrap_discovery { + BootstrapAddressDiscovery::OnlyOurs => { + HashSet::from([local_bootstrap_agent.our_address()]) + } + BootstrapAddressDiscovery::OnlyThese(peers) => peers.clone(), }; - - let addrs = self - .wait_for_peers(expectation, local_bootstrap_agent.our_address()) - .await?; - info!(self.log, "Enough peers exist to enact RSS plan"); + let maybe_sled_plan = SledPlan::load(&self.log).await?; + if let Some(plan) = &maybe_sled_plan { + let stored_peers: HashSet = + plan.sleds.keys().map(|a| *a.ip()).collect(); + if stored_peers != bootstrap_addrs { + return Err(SetupServiceError::BadConfig("Set of sleds requested does not match those in existing sled plan".to_string())); + } + } + if bootstrap_addrs.is_empty() { + return Err(SetupServiceError::BadConfig( + "Must request at least one peer".to_string(), + )); + } // If we created a plan, reuse it. Otherwise, create a new plan. // @@ -961,7 +878,7 @@ impl ServiceInner { plan } else { info!(self.log, "Creating new allocation plan"); - SledPlan::create(&self.log, config, addrs).await? + SledPlan::create(&self.log, config, bootstrap_addrs).await? }; let config = &plan.config; diff --git a/smf/sled-agent/gimlet-standalone/config-rss.toml b/smf/sled-agent/gimlet-standalone/config-rss.toml index 05f2860502..246f57848d 100644 --- a/smf/sled-agent/gimlet-standalone/config-rss.toml +++ b/smf/sled-agent/gimlet-standalone/config-rss.toml @@ -6,6 +6,9 @@ # |...............| <- This /56 is the Rack Subnet rack_subnet = "fd00:1122:3344:0100::" +# Only include "our own sled" in the bootstrap network +bootstrap_discovery = "only_ours" + # The number of sleds required to unlock the rack secret. # # For values less than 2, no rack secret will be generated. diff --git a/smf/sled-agent/non-gimlet/config-rss.toml b/smf/sled-agent/non-gimlet/config-rss.toml index 05f2860502..246f57848d 100644 --- a/smf/sled-agent/non-gimlet/config-rss.toml +++ b/smf/sled-agent/non-gimlet/config-rss.toml @@ -6,6 +6,9 @@ # |...............| <- This /56 is the Rack Subnet rack_subnet = "fd00:1122:3344:0100::" +# Only include "our own sled" in the bootstrap network +bootstrap_discovery = "only_ours" + # The number of sleds required to unlock the rack secret. # # For values less than 2, no rack secret will be generated. From e2c3dc8d6b6e9c8af638619f50221fcea6565b1a Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 30 Apr 2023 02:55:59 -0400 Subject: [PATCH 13/15] Provision multiple CRDB nodes, if multiple sleds exist --- sled-agent/src/rack_setup/plan/service.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index d79409adbc..4c09b87d35 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -43,7 +43,7 @@ const BOUNDARY_NTP_COUNT: usize = 2; const NEXUS_COUNT: usize = 1; // The number of CRDB instances to create from RSS. -const CRDB_COUNT: usize = 1; +const CRDB_COUNT: usize = 3; // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove // when Nexus provisions Oximeter. From 9d00c936dfdea3090ccc9fefd9be2085749414de Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 30 Apr 2023 09:19:07 -0400 Subject: [PATCH 14/15] Fix tests --- openapi/sled-agent.json | 3 ++- sled-agent/src/params.rs | 4 ++-- sled-agent/src/services.rs | 19 +++++++++++-------- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index c41da8ab8b..a138ba1537 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -1745,7 +1745,7 @@ "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" }, "ServiceEnsureBody": { - "description": "Used to request that the Sled initialize certain services.\n\nThis may be used to record that certain sleds are responsible for launching services which may not be associated with a dataset, such as Nexus.", + "description": "Used to request that the Sled initialize multiple services.\n\nThis may be used to record that certain sleds are responsible for launching services which may not be associated with a dataset, such as Nexus.", "type": "object", "properties": { "services": { @@ -2138,6 +2138,7 @@ ] }, "ServiceZoneService": { + "description": "Used to request that the Sled initialize a single service.", "type": "object", "properties": { "details": { diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 0b80c434b4..d8a9cd5337 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -575,7 +575,7 @@ impl From for sled_agent_client::types::ServiceZoneRequest { } } -/// Used to request that the Sled initialize certain services. +/// Used to request that the Sled initialize a single service. #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, )] @@ -590,7 +590,7 @@ impl From for sled_agent_client::types::ServiceZoneService { } } -/// Used to request that the Sled initialize certain services on initialization. +/// Used to request that the Sled initialize multiple services. /// /// This may be used to record that certain sleds are responsible for /// launching services which may not be associated with a dataset, such diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 77976bf33b..d8366c85f6 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -464,9 +464,11 @@ impl ServiceManager { pub async fn load_non_storage_services(&self) -> Result<(), Error> { let log = &self.inner.log; - let services = - AllZoneRequests::read_from(log, &self.services_ledger_path()?) - .await?; + let ledger = self.services_ledger_path()?; + if !ledger.exists() { + return Ok(()); + } + let services = AllZoneRequests::read_from(log, &ledger).await?; let mut existing_zones = self.inner.zones.lock().await; // Initialize and DNS and NTP services first as they are required @@ -535,11 +537,11 @@ impl ServiceManager { pub async fn load_storage_services(&self) -> Result<(), Error> { let log = &self.inner.log; - let services = AllZoneRequests::read_from( - log, - &self.storage_services_ledger_path()?, - ) - .await?; + let ledger = self.storage_services_ledger_path()?; + if !ledger.exists() { + return Ok(()); + } + let services = AllZoneRequests::read_from(log, &ledger).await?; let mut existing_zones = self.inner.dataset_zones.lock().await; self.initialize_services_locked( &mut existing_zones, @@ -558,6 +560,7 @@ impl ServiceManager { underlay_address: Ipv6Addr, rack_id: Uuid, ) -> Result<(), Error> { + debug!(&self.inner.log, "sled agent started"; "underlay_address" => underlay_address.to_string()); self.inner .sled_info .set(SledAgentInfo { From cfb7cbc61ae370390c8bc1c6d41f1f220f928371 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 30 Apr 2023 15:59:42 -0400 Subject: [PATCH 15/15] make serialization happier --- sled-agent/src/bootstrap/params.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index 146f0dac48..9c01c75af9 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -16,7 +16,7 @@ use std::net::{Ipv4Addr, Ipv6Addr, SocketAddrV6}; use uuid::Uuid; #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] -#[serde(rename_all = "snake_case")] +#[serde(rename_all = "snake_case", tag = "type")] pub enum BootstrapAddressDiscovery { /// Ignore all bootstrap addresses except our own. OnlyOurs,