diff --git a/Cargo.lock b/Cargo.lock index ce366cd7f4..b8153817a1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2232,6 +2232,7 @@ dependencies = [ "russh-keys", "serde", "serde_json", + "sled-agent-types", "socket2 0.5.7", "tokio", "toml 0.8.15", @@ -5845,6 +5846,7 @@ dependencies = [ "serde_json", "sha3", "sled-agent-client", + "sled-agent-types", "sled-hardware", "sled-hardware-types", "sled-storage", @@ -8891,6 +8893,30 @@ dependencies = [ "uuid", ] +[[package]] +name = "sled-agent-types" +version = "0.1.0" +dependencies = [ + "anyhow", + "bootstore", + "camino", + "camino-tempfile", + "nexus-client", + "omicron-common", + "omicron-test-utils", + "omicron-uuid-kinds", + "omicron-workspace-hack", + "oxnet", + "rcgen", + "schemars", + "serde", + "serde_json", + "sled-hardware-types", + "slog", + "thiserror", + "toml 0.8.15", +] + [[package]] name = "sled-hardware" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 8e9663bc7b..1a8f691f0d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,6 +73,7 @@ members = [ "passwords", "rpaths", "sled-agent", + "sled-agent/types", "sled-hardware", "sled-hardware/types", "sled-storage", @@ -170,6 +171,7 @@ default-members = [ "passwords", "rpaths", "sled-agent", + "sled-agent/types", "sled-hardware", "sled-hardware/types", "sled-storage", @@ -472,6 +474,7 @@ similar-asserts = "1.5.0" # server zones. sled = "=0.34.7" sled-agent-client = { path = "clients/sled-agent-client" } +sled-agent-types = { path = "sled-agent/types" } sled-hardware = { path = "sled-hardware" } sled-hardware-types = { path = "sled-hardware/types" } sled-storage = { path = "sled-storage" } diff --git a/end-to-end-tests/Cargo.toml b/end-to-end-tests/Cargo.toml index 1102094b61..157317cdad 100644 --- a/end-to-end-tests/Cargo.toml +++ b/end-to-end-tests/Cargo.toml @@ -23,6 +23,7 @@ russh = "0.43.0" russh-keys = "0.43.0" serde.workspace = true serde_json.workspace = true +sled-agent-types.workspace = true tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } toml.workspace = true trust-dns-resolver.workspace = true diff --git a/end-to-end-tests/src/helpers/ctx.rs b/end-to-end-tests/src/helpers/ctx.rs index e4bf61356c..76b759608c 100644 --- a/end-to-end-tests/src/helpers/ctx.rs +++ b/end-to-end-tests/src/helpers/ctx.rs @@ -1,7 +1,6 @@ use crate::helpers::generate_name; use anyhow::{anyhow, Context as _, Result}; use chrono::Utc; -use omicron_sled_agent::rack_setup::config::SetupServiceConfig; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oxide_client::types::{Name, ProjectCreate}; use oxide_client::CustomDnsResolver; @@ -9,6 +8,7 @@ use oxide_client::{Client, ClientImagesExt, ClientProjectsExt, ClientVpcsExt}; use reqwest::dns::Resolve; use reqwest::header::{HeaderMap, HeaderValue}; use reqwest::Url; +use sled_agent_types::rack_init::RackInitializeRequest; use std::net::IpAddr; use std::net::SocketAddr; use std::sync::Arc; @@ -73,7 +73,7 @@ impl Context { } } -fn rss_config() -> Result { +fn rss_config() -> Result { let path = "/opt/oxide/sled-agent/pkg/config-rss.toml"; let content = std::fs::read_to_string(&path).unwrap_or(RSS_CONFIG_STR.to_string()); @@ -81,7 +81,7 @@ fn rss_config() -> Result { .with_context(|| "parsing config-rss as TOML".to_string()) } -fn nexus_external_dns_name(config: &SetupServiceConfig) -> String { +fn nexus_external_dns_name(config: &RackInitializeRequest) -> String { format!( "{}.sys.{}", config.recovery_silo.silo_name.as_str(), @@ -89,7 +89,7 @@ fn nexus_external_dns_name(config: &SetupServiceConfig) -> String { ) } -fn external_dns_addr(config: &SetupServiceConfig) -> Result { +fn external_dns_addr(config: &RackInitializeRequest) -> Result { // From the RSS config, grab the first address from the configured services // IP pool as the DNS server's IP address. let dns_ip = config @@ -138,7 +138,7 @@ pub async fn nexus_addr() -> Result { } pub struct ClientParams { - rss_config: SetupServiceConfig, + rss_config: RackInitializeRequest, nexus_dns_name: String, resolver: Arc, proto: &'static str, diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 27cfe576b7..1323769da2 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -2658,7 +2658,7 @@ ] }, "EarlyNetworkConfig": { - "description": "Network configuration required to bring up the control plane\n\nThe fields in this structure are those from [`super::params::RackInitializeRequest`] necessary for use beyond RSS. This is just for the initial rack configuration and cold boot purposes. Updates come from Nexus.", + "description": "Network configuration required to bring up the control plane\n\nThe fields in this structure are those from [`crate::rack_init::RackInitializeRequest`] necessary for use beyond RSS. This is just for the initial rack configuration and cold boot purposes. Updates come from Nexus.", "type": "object", "properties": { "body": { diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index b798ba783d..a85884587f 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -68,6 +68,7 @@ serde_human_bytes.workspace = true serde_json = { workspace = true, features = ["raw_value"] } sha3.workspace = true sled-agent-client.workspace = true +sled-agent-types.workspace = true sled-hardware.workspace = true sled-hardware-types.workspace = true sled-storage.workspace = true diff --git a/sled-agent/src/bin/sled-agent.rs b/sled-agent/src/bin/sled-agent.rs index b8b5abf07f..1bd83653ad 100644 --- a/sled-agent/src/bin/sled-agent.rs +++ b/sled-agent/src/bin/sled-agent.rs @@ -11,8 +11,8 @@ use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; use omicron_sled_agent::bootstrap::server as bootstrap_server; use omicron_sled_agent::bootstrap::RssAccessError; -use omicron_sled_agent::rack_setup::config::SetupServiceConfig as RssConfig; use omicron_sled_agent::{config::Config as SledConfig, server as sled_server}; +use sled_agent_types::rack_init::RackInitializeRequest; #[derive(Subcommand, Debug)] enum OpenapiFlavor { @@ -81,7 +81,7 @@ async fn do_run() -> Result<(), CmdError> { }; let rss_config = if rss_config_path.exists() { Some( - RssConfig::from_file(rss_config_path) + RackInitializeRequest::from_file(rss_config_path) .map_err(|e| CmdError::Failure(anyhow!(e)))?, ) } else { diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs index 664e3242ab..742cff4e61 100644 --- a/sled-agent/src/bootstrap/early_networking.rs +++ b/sled-agent/src/bootstrap/early_networking.rs @@ -5,7 +5,6 @@ //! Network setup required to bring up the control plane use anyhow::{anyhow, Context}; -use bootstore::schemes::v0 as bootstore; use dpd_client::types::{ LinkCreate, LinkId, LinkSettings, PortId, PortSettings, }; @@ -26,9 +25,8 @@ use omicron_common::address::DENDRITE_PORT; use omicron_common::address::{MGD_PORT, MGS_PORT}; use omicron_common::api::external::{BfdMode, ImportExportPolicy}; use omicron_common::api::internal::shared::{ - BfdPeerConfig, BgpConfig, BgpPeerConfig, PortConfig, PortConfigV2, PortFec, - PortSpeed, RackNetworkConfig, RackNetworkConfigV2, RouteConfig, - SwitchLocation, UplinkAddressConfig, + BgpConfig, PortConfig, PortFec, PortSpeed, RackNetworkConfig, + SwitchLocation, }; use omicron_common::backoff::{ retry_notify, retry_policy_local, BackoffError, ExponentialBackoff, @@ -36,13 +34,10 @@ use omicron_common::backoff::{ }; use omicron_common::OMICRON_DPD_TAG; use omicron_ddm_admin_client::DdmError; -use oxnet::{IpNet, Ipv4Net, Ipv6Net}; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use oxnet::IpNet; use slog::Logger; use std::collections::{HashMap, HashSet}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddrV6}; -use std::str::FromStr; use std::time::{Duration, Instant}; use thiserror::Error; @@ -728,418 +723,6 @@ fn retry_policy_switch_mapping() -> ExponentialBackoff { .build() } -/// Network configuration required to bring up the control plane -/// -/// The fields in this structure are those from -/// [`super::params::RackInitializeRequest`] necessary for use beyond RSS. This -/// is just for the initial rack configuration and cold boot purposes. Updates -/// come from Nexus. -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] -pub struct EarlyNetworkConfig { - // The current generation number of data as stored in CRDB. - // The initial generation is set during RSS time and then only mutated - // by Nexus. - pub generation: u64, - - // Which version of the data structure do we have. This is to help with - // deserialization and conversion in future updates. - pub schema_version: u32, - - // The actual configuration details - pub body: EarlyNetworkConfigBody, -} - -impl FromStr for EarlyNetworkConfig { - type Err = String; - - fn from_str(value: &str) -> Result { - #[derive(Deserialize)] - struct ShadowConfig { - generation: u64, - schema_version: u32, - body: EarlyNetworkConfigBody, - } - - let v2_err = match serde_json::from_str::(&value) { - Ok(cfg) => { - return Ok(EarlyNetworkConfig { - generation: cfg.generation, - schema_version: cfg.schema_version, - body: cfg.body, - }) - } - Err(e) => format!("unable to parse EarlyNetworkConfig: {e:?}"), - }; - // If we fail to parse the config as any known version, we return the - // error corresponding to the parse failure of the newest schema. - serde_json::from_str::(&value) - .map(|v1| EarlyNetworkConfig { - generation: v1.generation, - schema_version: Self::schema_version(), - body: v1.body.into(), - }) - .map_err(|_| v2_err) - } -} - -impl EarlyNetworkConfig { - pub fn schema_version() -> u32 { - 2 - } - - // Note: This currently only converts between v0 and v1 or deserializes v1 of - // `EarlyNetworkConfig`. - pub fn deserialize_bootstore_config( - log: &Logger, - config: &bootstore::NetworkConfig, - ) -> Result { - // Try to deserialize the latest version of the data structure (v2). If - // that succeeds we are done. - let v2_error = - match serde_json::from_slice::(&config.blob) { - Ok(val) => return Ok(val), - Err(error) => { - // Log this error and continue trying to deserialize older - // versions. - warn!( - log, - "Failed to deserialize EarlyNetworkConfig \ - as v2, trying next as v1: {}", - error, - ); - error - } - }; - - match serde_json::from_slice::( - &config.blob, - ) { - Ok(v1) => { - // Convert from v1 to v2 - return Ok(EarlyNetworkConfig { - generation: v1.generation, - schema_version: EarlyNetworkConfig::schema_version(), - body: v1.body.into(), - }); - } - Err(error) => { - // Log this error. - warn!( - log, - "Failed to deserialize EarlyNetworkConfig \ - as v1, trying next as v0: {}", - error - ); - } - }; - - match serde_json::from_slice::( - &config.blob, - ) { - Ok(val) => { - // Convert from v0 to v2 - return Ok(EarlyNetworkConfig { - generation: val.generation, - schema_version: 2, - body: EarlyNetworkConfigBody { - ntp_servers: val.ntp_servers, - rack_network_config: val.rack_network_config.map( - |v0_config| { - back_compat::RackNetworkConfigV0::to_v2( - val.rack_subnet, - v0_config, - ) - }, - ), - }, - }); - } - Err(error) => { - // Log this error. - warn!( - log, - "Failed to deserialize EarlyNetworkConfig as v0: {}", error, - ); - } - }; - - // If we fail to parse the config as any known version, we return the - // error corresponding to the parse failure of the newest schema. - Err(v2_error) - } -} - -/// This is the actual configuration of EarlyNetworking. -/// -/// We nest it below the "header" of `generation` and `schema_version` so that -/// we can perform partial deserialization of `EarlyNetworkConfig` to only read -/// the header and defer deserialization of the body once we know the schema -/// version. This is possible via the use of [`serde_json::value::RawValue`] in -/// future (post-v1) deserialization paths. -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] -pub struct EarlyNetworkConfigBody { - /// The external NTP server addresses. - pub ntp_servers: Vec, - - // Rack network configuration as delivered from RSS or Nexus - pub rack_network_config: Option, -} - -impl From for bootstore::NetworkConfig { - fn from(value: EarlyNetworkConfig) -> Self { - // Can this ever actually fail? - // We literally just deserialized the same data in RSS - let blob = serde_json::to_vec(&value).unwrap(); - - // Yes this is duplicated, but that seems fine. - let generation = value.generation; - - bootstore::NetworkConfig { generation, blob } - } -} - -/// Structures and routines used to maintain backwards compatibility. The -/// contents of this module should only be used to convert older data into the -/// current format, and not for any ongoing run-time operations. -pub mod back_compat { - use super::*; - - #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] - pub struct EarlyNetworkConfigBodyV1 { - /// The external NTP server addresses. - pub ntp_servers: Vec, - - // Rack network configuration as delivered from RSS or Nexus - pub rack_network_config: Option, - } - - impl From for EarlyNetworkConfigBody { - fn from(v1: EarlyNetworkConfigBodyV1) -> Self { - EarlyNetworkConfigBody { - ntp_servers: v1.ntp_servers, - rack_network_config: v1 - .rack_network_config - .map(|v1_config| v1_config.into()), - } - } - } - - /// Deprecated, use `RackNetworkConfig` instead. Cannot actually deprecate due to - /// - /// - /// Our first version of `RackNetworkConfig`. If this exists in the bootstore, we - /// upgrade out of it into `RackNetworkConfigV1` or later versions if possible. - #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] - pub(crate) struct RackNetworkConfigV0 { - // TODO: #3591 Consider making infra-ip ranges implicit for uplinks - /// First ip address to be used for configuring network infrastructure - pub infra_ip_first: Ipv4Addr, - /// Last ip address to be used for configuring network infrastructure - pub infra_ip_last: Ipv4Addr, - /// Uplinks for connecting the rack to external networks - pub uplinks: Vec, - } - - impl RackNetworkConfigV0 { - /// Convert from `RackNetworkConfigV0` to `RackNetworkConfigV1` - /// - /// We cannot use `From for `RackNetworkConfigV2` - /// because the `rack_subnet` field does not exist in `RackNetworkConfigV0` - /// and must be passed in from the `EarlyNetworkConfigV0` struct which - /// contains the `RackNetworkConfigV0` struct. - pub fn to_v2( - rack_subnet: Ipv6Addr, - v0: RackNetworkConfigV0, - ) -> RackNetworkConfigV2 { - RackNetworkConfigV2 { - rack_subnet: Ipv6Net::new(rack_subnet, 56).unwrap(), - infra_ip_first: v0.infra_ip_first, - infra_ip_last: v0.infra_ip_last, - ports: v0 - .uplinks - .into_iter() - .map(|uplink| PortConfigV2::from(uplink)) - .collect(), - bgp: vec![], - bfd: vec![], - } - } - } - - /// Deprecated, use PortConfigV2 instead. Cannot actually deprecate due to - /// - #[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)] - pub struct PortConfigV1 { - /// The set of routes associated with this port. - pub routes: Vec, - /// This port's addresses and optional vlan IDs - pub addresses: Vec, - /// Switch the port belongs to. - pub switch: SwitchLocation, - /// Nmae of the port this config applies to. - pub port: String, - /// Port speed. - pub uplink_port_speed: PortSpeed, - /// Port forward error correction type. - pub uplink_port_fec: PortFec, - /// BGP peers on this port - pub bgp_peers: Vec, - /// Whether or not to set autonegotiation - #[serde(default)] - pub autoneg: bool, - } - - impl From for PortConfigV2 { - fn from(v1: PortConfigV1) -> Self { - PortConfigV2 { - routes: v1.routes.clone(), - addresses: v1 - .addresses - .iter() - .map(|a| UplinkAddressConfig { address: *a, vlan_id: None }) - .collect(), - switch: v1.switch, - port: v1.port, - uplink_port_speed: v1.uplink_port_speed, - uplink_port_fec: v1.uplink_port_fec, - bgp_peers: v1.bgp_peers.clone(), - autoneg: v1.autoneg, - } - } - } - - /// Deprecated, use PortConfigV2 instead. Cannot actually deprecate due to - /// - #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] - pub(crate) struct UplinkConfig { - /// Gateway address - pub gateway_ip: Ipv4Addr, - /// Switch to use for uplink - pub switch: SwitchLocation, - /// Switchport to use for external connectivity - pub uplink_port: String, - /// Speed for the Switchport - pub uplink_port_speed: PortSpeed, - /// Forward Error Correction setting for the uplink port - pub uplink_port_fec: PortFec, - /// IP Address and prefix (e.g., `192.168.0.1/16`) to apply to switchport - /// (must be in infra_ip pool) - pub uplink_cidr: Ipv4Net, - /// VLAN id to use for uplink - pub uplink_vid: Option, - } - - impl From for PortConfigV2 { - fn from(value: UplinkConfig) -> Self { - PortConfigV2 { - routes: vec![RouteConfig { - destination: "0.0.0.0/0".parse().unwrap(), - nexthop: value.gateway_ip.into(), - vlan_id: value.uplink_vid, - }], - addresses: vec![UplinkAddressConfig { - address: value.uplink_cidr.into(), - vlan_id: value.uplink_vid, - }], - switch: value.switch, - port: value.uplink_port, - uplink_port_speed: value.uplink_port_speed, - uplink_port_fec: value.uplink_port_fec, - bgp_peers: vec![], - autoneg: false, - } - } - } - - /// Deprecated, use `RackNetworkConfig` instead. Cannot actually deprecate due to - /// - /// - /// Our second version of `RackNetworkConfig`. If this exists in the bootstore, - /// we upgrade out of it into `RackNetworkConfigV1` or later versions if - /// possible. - #[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)] - pub struct RackNetworkConfigV1 { - pub rack_subnet: Ipv6Net, - // TODO: #3591 Consider making infra-ip ranges implicit for uplinks - /// First ip address to be used for configuring network infrastructure - pub infra_ip_first: Ipv4Addr, - /// Last ip address to be used for configuring network infrastructure - pub infra_ip_last: Ipv4Addr, - /// Uplinks for connecting the rack to external networks - pub ports: Vec, - /// BGP configurations for connecting the rack to external networks - pub bgp: Vec, - /// BFD configuration for connecting the rack to external networks - #[serde(default)] - pub bfd: Vec, - } - - impl From for RackNetworkConfigV2 { - fn from(v1: RackNetworkConfigV1) -> Self { - RackNetworkConfigV2 { - rack_subnet: v1.rack_subnet, - infra_ip_first: v1.infra_ip_first, - infra_ip_last: v1.infra_ip_last, - ports: v1 - .ports - .into_iter() - .map(|ports| PortConfigV2::from(ports)) - .collect(), - bgp: v1.bgp.clone(), - bfd: v1.bfd.clone(), - } - } - } - - // The second production version of the `EarlyNetworkConfig`. - // - // If this version is in the bootstore than we need to convert it to - // `EarlyNetworkConfigV2`. - // - // Once we do this for all customers that have initialized racks with the - // old version we can go ahead and remove this type and its conversion code - // altogether. - #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] - pub struct EarlyNetworkConfigV1 { - // The current generation number of data as stored in CRDB. - // The initial generation is set during RSS time and then only mutated - // by Nexus. - pub generation: u64, - - // Which version of the data structure do we have. This is to help with - // deserialization and conversion in future updates. - pub schema_version: u32, - - // The actual configuration details - pub body: EarlyNetworkConfigBodyV1, - } - - // The first production version of the `EarlyNetworkConfig`. - // - // If this version is in the bootstore than we need to convert it to - // `EarlyNetworkConfigV2`. - // - // Once we do this for all customers that have initialized racks with the - // old version we can go ahead and remove this type and its conversion code - // altogether. - #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] - pub(crate) struct EarlyNetworkConfigV0 { - // The current generation number of data as stored in CRDB. - // The initial generation is set during RSS time and then only mutated - // by Nexus. - pub generation: u64, - - pub rack_subnet: Ipv6Addr, - - /// The external NTP server addresses. - pub ntp_servers: Vec, - - // Rack network configuration as delivered from RSS and only existing at - // generation 1 - pub rack_network_config: Option, - } -} - // The following two conversion functions translate the speed and fec types used // in the internal API to the types used in the dpd-client API. The conversion // is done here, rather than with "impl From" at the definition, to avoid a @@ -1165,163 +748,3 @@ fn convert_fec(fec: &PortFec) -> dpd_client::types::PortFec { PortFec::Rs => dpd_client::types::PortFec::Rs, } } - -#[cfg(test)] -mod tests { - use super::*; - use omicron_common::api::internal::shared::RouteConfig; - use omicron_common::api::internal::shared::UplinkAddressConfig; - use omicron_test_utils::dev::test_setup_log; - - #[test] - fn serialized_early_network_config_v0_to_v2_conversion() { - let logctx = test_setup_log( - "serialized_early_network_config_v0_to_v2_conversion", - ); - let v0 = back_compat::EarlyNetworkConfigV0 { - generation: 1, - rack_subnet: Ipv6Addr::UNSPECIFIED, - ntp_servers: Vec::new(), - rack_network_config: Some(back_compat::RackNetworkConfigV0 { - infra_ip_first: Ipv4Addr::UNSPECIFIED, - infra_ip_last: Ipv4Addr::UNSPECIFIED, - uplinks: vec![back_compat::UplinkConfig { - gateway_ip: Ipv4Addr::UNSPECIFIED, - switch: SwitchLocation::Switch0, - uplink_port: "Port0".to_string(), - uplink_port_speed: PortSpeed::Speed100G, - uplink_port_fec: PortFec::None, - uplink_cidr: "192.168.0.1/16".parse().unwrap(), - uplink_vid: None, - }], - }), - }; - - let v0_serialized = serde_json::to_vec(&v0).unwrap(); - let bootstore_conf = - bootstore::NetworkConfig { generation: 1, blob: v0_serialized }; - - let v2 = EarlyNetworkConfig::deserialize_bootstore_config( - &logctx.log, - &bootstore_conf, - ) - .unwrap(); - let v0_rack_network_config = v0.rack_network_config.unwrap(); - let uplink = v0_rack_network_config.uplinks[0].clone(); - let expected = EarlyNetworkConfig { - generation: 1, - schema_version: EarlyNetworkConfig::schema_version(), - body: EarlyNetworkConfigBody { - ntp_servers: v0.ntp_servers.clone(), - rack_network_config: Some(RackNetworkConfigV2 { - rack_subnet: Ipv6Net::new(v0.rack_subnet, 56).unwrap(), - infra_ip_first: v0_rack_network_config.infra_ip_first, - infra_ip_last: v0_rack_network_config.infra_ip_last, - ports: vec![PortConfigV2 { - routes: vec![RouteConfig { - destination: "0.0.0.0/0".parse().unwrap(), - nexthop: uplink.gateway_ip.into(), - vlan_id: None, - }], - addresses: vec![UplinkAddressConfig { - address: uplink.uplink_cidr.into(), - vlan_id: None, - }], - switch: uplink.switch, - port: uplink.uplink_port, - uplink_port_speed: uplink.uplink_port_speed, - uplink_port_fec: uplink.uplink_port_fec, - autoneg: false, - bgp_peers: vec![], - }], - bgp: vec![], - bfd: vec![], - }), - }, - }; - - assert_eq!(expected, v2); - - logctx.cleanup_successful(); - } - - #[test] - fn serialized_early_network_config_v1_to_v2_conversion() { - let logctx = test_setup_log( - "serialized_early_network_config_v1_to_v2_conversion", - ); - - let v1 = back_compat::EarlyNetworkConfigV1 { - generation: 1, - schema_version: 1, - body: back_compat::EarlyNetworkConfigBodyV1 { - ntp_servers: Vec::new(), - rack_network_config: Some(back_compat::RackNetworkConfigV1 { - rack_subnet: Ipv6Net::new(Ipv6Addr::UNSPECIFIED, 56) - .unwrap(), - infra_ip_first: Ipv4Addr::UNSPECIFIED, - infra_ip_last: Ipv4Addr::UNSPECIFIED, - ports: vec![back_compat::PortConfigV1 { - routes: vec![RouteConfig { - destination: "0.0.0.0/0".parse().unwrap(), - nexthop: "192.168.0.2".parse().unwrap(), - vlan_id: None, - }], - addresses: vec!["192.168.0.1/16".parse().unwrap()], - switch: SwitchLocation::Switch0, - port: "Port0".to_string(), - uplink_port_speed: PortSpeed::Speed100G, - uplink_port_fec: PortFec::None, - bgp_peers: Vec::new(), - autoneg: false, - }], - bgp: Vec::new(), - bfd: Vec::new(), - }), - }, - }; - - let v1_serialized = serde_json::to_vec(&v1).unwrap(); - let bootstore_conf = - bootstore::NetworkConfig { generation: 1, blob: v1_serialized }; - - let v2 = EarlyNetworkConfig::deserialize_bootstore_config( - &logctx.log, - &bootstore_conf, - ) - .unwrap(); - let v1_rack_network_config = v1.body.rack_network_config.unwrap(); - let port = v1_rack_network_config.ports[0].clone(); - let expected = EarlyNetworkConfig { - generation: 1, - schema_version: EarlyNetworkConfig::schema_version(), - body: EarlyNetworkConfigBody { - ntp_servers: v1.body.ntp_servers.clone(), - rack_network_config: Some(RackNetworkConfigV2 { - rack_subnet: v1_rack_network_config.rack_subnet, - infra_ip_first: v1_rack_network_config.infra_ip_first, - infra_ip_last: v1_rack_network_config.infra_ip_last, - ports: vec![PortConfigV2 { - routes: port.routes.clone(), - addresses: vec![UplinkAddressConfig { - address: port.addresses[0], - vlan_id: None, - }], - switch: port.switch, - port: port.port, - uplink_port_speed: port.uplink_port_speed, - uplink_port_fec: port.uplink_port_fec, - autoneg: false, - bgp_peers: vec![], - }], - bgp: vec![], - bfd: vec![], - }), - }, - }; - - assert_eq!(expected, v2); - - logctx.cleanup_successful(); - } -} diff --git a/sled-agent/src/bootstrap/http_entrypoints.rs b/sled-agent/src/bootstrap/http_entrypoints.rs index d3207f05a8..824bb5fd25 100644 --- a/sled-agent/src/bootstrap/http_entrypoints.rs +++ b/sled-agent/src/bootstrap/http_entrypoints.rs @@ -10,7 +10,6 @@ use super::rack_ops::RssAccess; use super::BootstrapError; use super::RssAccessError; -use crate::bootstrap::params::RackInitializeRequest; use crate::updates::ConfigUpdates; use crate::updates::{Component, UpdateManager}; use bootstore::schemes::v0 as bootstore; @@ -23,8 +22,8 @@ use http::StatusCode; use omicron_common::api::external::Error; use omicron_uuid_kinds::RackInitUuid; use omicron_uuid_kinds::RackResetUuid; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use sled_agent_types::rack_init::RackInitializeRequest; +use sled_agent_types::rack_ops::RackOperationStatus; use sled_hardware_types::Baseboard; use sled_storage::manager::StorageHandle; use slog::Logger; @@ -82,45 +81,6 @@ pub(crate) fn api() -> BootstrapApiDescription { api } -/// Current status of any rack-level operation being performed by this bootstrap -/// agent. -#[derive( - Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema, -)] -#[serde(tag = "status", rename_all = "snake_case")] -pub enum RackOperationStatus { - Initializing { - id: RackInitUuid, - }, - /// `id` will be none if the rack was already initialized on startup. - Initialized { - id: Option, - }, - InitializationFailed { - id: RackInitUuid, - message: String, - }, - InitializationPanicked { - id: RackInitUuid, - }, - Resetting { - id: RackResetUuid, - }, - /// `reset_id` will be None if the rack is in an uninitialized-on-startup, - /// or Some if it is in an uninitialized state due to a reset operation - /// completing. - Uninitialized { - reset_id: Option, - }, - ResetFailed { - id: RackResetUuid, - message: String, - }, - ResetPanicked { - id: RackResetUuid, - }, -} - /// Return the baseboard identity of this sled. #[endpoint { method = GET, diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index 4a5b443dc3..9fe399419f 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -4,301 +4,17 @@ //! Request types for the bootstrap agent -use crate::bootstrap::early_networking::back_compat::RackNetworkConfigV1; -use anyhow::{bail, Result}; +use anyhow::Result; use async_trait::async_trait; use omicron_common::address::{self, Ipv6Subnet, SLED_PREFIX}; -use omicron_common::api::external::AllowedSourceIps; -use omicron_common::api::internal::shared::RackNetworkConfig; use omicron_common::ledger::Ledgerable; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use sha3::{Digest, Sha3_256}; -use sled_hardware_types::Baseboard; use std::borrow::Cow; -use std::collections::BTreeSet; use std::net::{IpAddr, Ipv6Addr, SocketAddrV6}; use uuid::Uuid; -#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] -#[serde(rename_all = "snake_case", tag = "type")] -pub enum BootstrapAddressDiscovery { - /// Ignore all bootstrap addresses except our own. - OnlyOurs, - /// Ignore all bootstrap addresses except the following. - OnlyThese { addrs: BTreeSet }, -} - -/// Structures and routines used to maintain backwards compatibility. The -/// contents of this module should only be used to convert older data into the -/// current format, and not for any ongoing run-time operations. -pub mod back_compat { - use super::*; - - #[derive(Clone, Deserialize)] - struct UnvalidatedRackInitializeRequestV1 { - trust_quorum_peers: Option>, - bootstrap_discovery: BootstrapAddressDiscovery, - ntp_servers: Vec, - dns_servers: Vec, - internal_services_ip_pool_ranges: Vec, - external_dns_ips: Vec, - external_dns_zone_name: String, - external_certificates: Vec, - recovery_silo: RecoverySiloConfig, - rack_network_config: RackNetworkConfigV1, - #[serde(default = "default_allowed_source_ips")] - allowed_source_ips: AllowedSourceIps, - } - - /// This is a deprecated format, maintained to allow importing from older - /// versions. - #[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)] - #[serde(try_from = "UnvalidatedRackInitializeRequestV1")] - pub struct RackInitializeRequestV1 { - pub trust_quorum_peers: Option>, - pub bootstrap_discovery: BootstrapAddressDiscovery, - pub ntp_servers: Vec, - pub dns_servers: Vec, - pub internal_services_ip_pool_ranges: Vec, - pub external_dns_ips: Vec, - pub external_dns_zone_name: String, - pub external_certificates: Vec, - pub recovery_silo: RecoverySiloConfig, - pub rack_network_config: RackNetworkConfigV1, - #[serde(default = "default_allowed_source_ips")] - pub allowed_source_ips: AllowedSourceIps, - } - - impl TryFrom for RackInitializeRequestV1 { - type Error = anyhow::Error; - - fn try_from(value: UnvalidatedRackInitializeRequestV1) -> Result { - validate_external_dns( - &value.external_dns_ips, - &value.internal_services_ip_pool_ranges, - )?; - - Ok(RackInitializeRequestV1 { - trust_quorum_peers: value.trust_quorum_peers, - bootstrap_discovery: value.bootstrap_discovery, - ntp_servers: value.ntp_servers, - dns_servers: value.dns_servers, - internal_services_ip_pool_ranges: value - .internal_services_ip_pool_ranges, - external_dns_ips: value.external_dns_ips, - external_dns_zone_name: value.external_dns_zone_name, - external_certificates: value.external_certificates, - recovery_silo: value.recovery_silo, - rack_network_config: value.rack_network_config, - allowed_source_ips: value.allowed_source_ips, - }) - } - } - impl From for RackInitializeRequest { - fn from(v1: RackInitializeRequestV1) -> Self { - RackInitializeRequest { - trust_quorum_peers: v1.trust_quorum_peers, - bootstrap_discovery: v1.bootstrap_discovery, - ntp_servers: v1.ntp_servers, - dns_servers: v1.dns_servers, - internal_services_ip_pool_ranges: v1 - .internal_services_ip_pool_ranges, - external_dns_ips: v1.external_dns_ips, - external_dns_zone_name: v1.external_dns_zone_name, - external_certificates: v1.external_certificates, - recovery_silo: v1.recovery_silo, - rack_network_config: v1.rack_network_config.into(), - allowed_source_ips: v1.allowed_source_ips, - } - } - } -} - -// "Shadow" copy of `RackInitializeRequest` that does no validation on its -// fields. -#[derive(Clone, Deserialize)] -struct UnvalidatedRackInitializeRequest { - trust_quorum_peers: Option>, - bootstrap_discovery: BootstrapAddressDiscovery, - ntp_servers: Vec, - dns_servers: Vec, - internal_services_ip_pool_ranges: Vec, - external_dns_ips: Vec, - external_dns_zone_name: String, - external_certificates: Vec, - recovery_silo: RecoverySiloConfig, - rack_network_config: RackNetworkConfig, - #[serde(default = "default_allowed_source_ips")] - allowed_source_ips: AllowedSourceIps, -} - -/// Configuration for the "rack setup service". -/// -/// The Rack Setup Service should be responsible for one-time setup actions, -/// such as CockroachDB placement and initialization. Without operator -/// intervention, however, these actions need a way to be automated in our -/// deployment. -#[derive(Clone, Deserialize, Serialize, PartialEq, JsonSchema)] -#[serde(try_from = "UnvalidatedRackInitializeRequest")] -pub struct RackInitializeRequest { - /// The set of peer_ids required to initialize trust quorum - /// - /// The value is `None` if we are not using trust quorum - pub trust_quorum_peers: Option>, - - /// Describes how bootstrap addresses should be collected during RSS. - pub bootstrap_discovery: BootstrapAddressDiscovery, - - /// The external NTP server addresses. - pub ntp_servers: Vec, - - /// The external DNS server addresses. - pub dns_servers: Vec, - - /// Ranges of the service IP pool which may be used for internal services. - // TODO(https://github.com/oxidecomputer/omicron/issues/1530): Eventually, - // we want to configure multiple pools. - pub internal_services_ip_pool_ranges: Vec, - - /// Service IP addresses on which we run external DNS servers. - /// - /// Each address must be present in `internal_services_ip_pool_ranges`. - pub external_dns_ips: Vec, - - /// DNS name for the DNS zone delegated to the rack for external DNS - pub external_dns_zone_name: String, - - /// initial TLS certificates for the external API - pub external_certificates: Vec, - - /// Configuration of the Recovery Silo (the initial Silo) - pub recovery_silo: RecoverySiloConfig, - - /// Initial rack network configuration - pub rack_network_config: RackNetworkConfig, - - /// IPs or subnets allowed to make requests to user-facing services - #[serde(default = "default_allowed_source_ips")] - pub allowed_source_ips: AllowedSourceIps, -} - -impl RackInitializeRequest { - pub fn from_toml_with_fallback( - data: &str, - ) -> Result { - let v2_err = match toml::from_str::(&data) { - Ok(req) => return Ok(req), - Err(e) => e, - }; - if let Ok(v1) = - toml::from_str::(&data) - { - return Ok(v1.into()); - } - - // If we fail to parse the request as any known version, we return the - // error corresponding to the parse failure of the newest schema. - Err(v2_err.into()) - } -} - -/// This field was added after several racks were already deployed. RSS plans -/// for those racks should default to allowing any source IP, since that is -/// effectively what they did. -const fn default_allowed_source_ips() -> AllowedSourceIps { - AllowedSourceIps::Any -} - -// This custom debug implementation hides the private keys. -impl std::fmt::Debug for RackInitializeRequest { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // If you find a compiler error here, and you just added a field to this - // struct, be sure to add it to the Debug impl below! - let RackInitializeRequest { - trust_quorum_peers: trust_qurorum_peers, - bootstrap_discovery, - ntp_servers, - dns_servers, - internal_services_ip_pool_ranges, - external_dns_ips, - external_dns_zone_name, - external_certificates: _, - recovery_silo, - rack_network_config, - allowed_source_ips, - } = &self; - - f.debug_struct("RackInitializeRequest") - .field("trust_quorum_peers", trust_qurorum_peers) - .field("bootstrap_discovery", bootstrap_discovery) - .field("ntp_servers", ntp_servers) - .field("dns_servers", dns_servers) - .field( - "internal_services_ip_pool_ranges", - internal_services_ip_pool_ranges, - ) - .field("external_dns_ips", external_dns_ips) - .field("external_dns_zone_name", external_dns_zone_name) - .field("external_certificates", &"") - .field("recovery_silo", recovery_silo) - .field("rack_network_config", rack_network_config) - .field("allowed_source_ips", allowed_source_ips) - .finish() - } -} - -fn validate_external_dns( - dns_ips: &Vec, - internal_ranges: &Vec, -) -> Result<()> { - if dns_ips.is_empty() { - bail!("At least one external DNS IP is required"); - } - - // Every external DNS IP should also be present in one of the internal - // services IP pool ranges. This check is O(N*M), but we expect both N - // and M to be small (~5 DNS servers, and a small number of pools). - for &dns_ip in dns_ips { - if !internal_ranges.iter().any(|range| range.contains(dns_ip)) { - bail!( - "External DNS IP {dns_ip} is not contained in \ - `internal_services_ip_pool_ranges`" - ); - } - } - Ok(()) -} - -impl TryFrom for RackInitializeRequest { - type Error = anyhow::Error; - - fn try_from(value: UnvalidatedRackInitializeRequest) -> Result { - validate_external_dns( - &value.external_dns_ips, - &value.internal_services_ip_pool_ranges, - )?; - - Ok(RackInitializeRequest { - trust_quorum_peers: value.trust_quorum_peers, - bootstrap_discovery: value.bootstrap_discovery, - ntp_servers: value.ntp_servers, - dns_servers: value.dns_servers, - internal_services_ip_pool_ranges: value - .internal_services_ip_pool_ranges, - external_dns_ips: value.external_dns_ips, - external_dns_zone_name: value.external_dns_zone_name, - external_certificates: value.external_certificates, - recovery_silo: value.recovery_silo, - rack_network_config: value.rack_network_config, - allowed_source_ips: value.allowed_source_ips, - }) - } -} - -pub type Certificate = nexus_client::types::Certificate; -pub type RecoverySiloConfig = nexus_client::types::RecoverySiloConfig; - /// A representation of a Baseboard ID as used in the inventory subsystem /// This type is essentially the same as a `Baseboard` except it doesn't have a /// revision or HW type (Gimlet, PC, Unknown). @@ -480,70 +196,11 @@ pub(super) mod version { pub(crate) const V1: u32 = 1; } -#[cfg(test)] -pub fn test_config() -> RackInitializeRequest { - let manifest = std::env::var("CARGO_MANIFEST_DIR") - .expect("Cannot access manifest directory"); - let manifest = camino::Utf8PathBuf::from(manifest); - let path = manifest.join("../smf/sled-agent/non-gimlet/config-rss.toml"); - let contents = std::fs::read_to_string(&path).unwrap(); - toml::from_str(&contents) - .unwrap_or_else(|e| panic!("failed to parse {:?}: {}", &path, e)) -} - #[cfg(test)] mod tests { - use std::net::Ipv4Addr; use std::net::Ipv6Addr; use super::*; - use camino::Utf8PathBuf; - use oxnet::Ipv6Net; - - #[test] - fn parse_rack_initialization() { - let manifest = std::env::var("CARGO_MANIFEST_DIR") - .expect("Cannot access manifest directory"); - let manifest = Utf8PathBuf::from(manifest); - - let path = - manifest.join("../smf/sled-agent/non-gimlet/config-rss.toml"); - let contents = std::fs::read_to_string(&path).unwrap(); - let _: RackInitializeRequest = toml::from_str(&contents) - .unwrap_or_else(|e| panic!("failed to parse {:?}: {}", &path, e)); - - let path = manifest - .join("../smf/sled-agent/gimlet-standalone/config-rss.toml"); - let contents = std::fs::read_to_string(&path).unwrap(); - let _: RackInitializeRequest = toml::from_str(&contents) - .unwrap_or_else(|e| panic!("failed to parse {:?}: {}", &path, e)); - } - - #[test] - fn parse_rack_initialization_weak_hash() { - let config = r#" - bootstrap_discovery.type = "only_ours" - ntp_servers = [ "ntp.eng.oxide.computer" ] - dns_servers = [ "1.1.1.1", "9.9.9.9" ] - external_dns_zone_name = "oxide.test" - - [[internal_services_ip_pool_ranges]] - first = "192.168.1.20" - last = "192.168.1.22" - - [recovery_silo] - silo_name = "recovery" - user_name = "recovery" - user_password_hash = "$argon2i$v=19$m=16,t=2,p=1$NVR0a2QxVXNiQjlObFJXbA$iGFJWOlUqN20B8KR4Fsmrg" - "#; - - let error = toml::from_str::(config) - .expect_err("unexpectedly parsed with bad password hash"); - println!("found error: {}", error); - assert!(error.to_string().contains( - "password hash: algorithm: expected argon2id, found argon2i" - )); - } #[test] fn json_serialization_round_trips() { @@ -600,123 +257,4 @@ mod tests { Ledgerable::deserialize(&serialized).unwrap(); assert_eq!(expected, actual); } - - #[test] - fn validate_external_dns_ips_must_be_in_internal_services_ip_pools() { - // Conjure up a config; we'll tweak the internal services pools and - // external DNS IPs, but no other fields matter. - let mut config = UnvalidatedRackInitializeRequest { - trust_quorum_peers: None, - bootstrap_discovery: BootstrapAddressDiscovery::OnlyOurs, - ntp_servers: Vec::new(), - dns_servers: Vec::new(), - internal_services_ip_pool_ranges: Vec::new(), - external_dns_ips: Vec::new(), - external_dns_zone_name: "".to_string(), - external_certificates: Vec::new(), - recovery_silo: RecoverySiloConfig { - silo_name: "recovery".parse().unwrap(), - user_name: "recovery".parse().unwrap(), - user_password_hash: "$argon2id$v=19$m=98304,t=13,p=1$RUlWc0ZxaHo0WFdrN0N6ZQ$S8p52j85GPvMhR/ek3GL0el/oProgTwWpHJZ8lsQQoY".parse().unwrap(), - }, - rack_network_config: RackNetworkConfig { - rack_subnet: Ipv6Net::host_net(Ipv6Addr::LOCALHOST), - infra_ip_first: Ipv4Addr::LOCALHOST, - infra_ip_last: Ipv4Addr::LOCALHOST, - ports: Vec::new(), - bgp: Vec::new(), - bfd: Vec::new(), - }, - allowed_source_ips: AllowedSourceIps::Any, - }; - - // Valid configs: all external DNS IPs are contained in the IP pool - // ranges. - for (ip_pool_ranges, dns_ips) in [ - ( - &[("fd00::1", "fd00::10")] as &[(&str, &str)], - &["fd00::1", "fd00::5", "fd00::10"] as &[&str], - ), - ( - &[("192.168.1.10", "192.168.1.20")], - &["192.168.1.10", "192.168.1.15", "192.168.1.20"], - ), - ( - &[("fd00::1", "fd00::10"), ("192.168.1.10", "192.168.1.20")], - &[ - "fd00::1", - "fd00::5", - "fd00::10", - "192.168.1.10", - "192.168.1.15", - "192.168.1.20", - ], - ), - ] { - config.internal_services_ip_pool_ranges = ip_pool_ranges - .iter() - .map(|(a, b)| { - address::IpRange::try_from(( - a.parse::().unwrap(), - b.parse::().unwrap(), - )) - .unwrap() - }) - .collect(); - config.external_dns_ips = - dns_ips.iter().map(|ip| ip.parse().unwrap()).collect(); - - match RackInitializeRequest::try_from(config.clone()) { - Ok(_) => (), - Err(err) => panic!( - "failure on {ip_pool_ranges:?} with DNS IPs {dns_ips:?}: \ - {err}" - ), - } - } - - // Invalid configs: either no DNS IPs, or one or more DNS IPs are not - // contained in the ip pool ranges. - for (ip_pool_ranges, dns_ips) in [ - (&[("fd00::1", "fd00::10")] as &[(&str, &str)], &[] as &[&str]), - (&[("fd00::1", "fd00::10")], &["fd00::1", "fd00::5", "fd00::11"]), - ( - &[("192.168.1.10", "192.168.1.20")], - &["192.168.1.9", "192.168.1.15", "192.168.1.20"], - ), - ( - &[("fd00::1", "fd00::10"), ("192.168.1.10", "192.168.1.20")], - &[ - "fd00::1", - "fd00::5", - "fd00::10", - "192.168.1.10", - "192.168.1.15", - "192.168.1.20", - "192.168.1.21", - ], - ), - ] { - config.internal_services_ip_pool_ranges = ip_pool_ranges - .iter() - .map(|(a, b)| { - address::IpRange::try_from(( - a.parse::().unwrap(), - b.parse::().unwrap(), - )) - .unwrap() - }) - .collect(); - config.external_dns_ips = - dns_ips.iter().map(|ip| ip.parse().unwrap()).collect(); - - match RackInitializeRequest::try_from(config.clone()) { - Ok(_) => panic!( - "unexpected success on {ip_pool_ranges:?} with \ - DNS IPs {dns_ips:?}" - ), - Err(_) => (), - } - } - } } diff --git a/sled-agent/src/bootstrap/rack_ops.rs b/sled-agent/src/bootstrap/rack_ops.rs index 4da5f0ab28..3eb00b419a 100644 --- a/sled-agent/src/bootstrap/rack_ops.rs +++ b/sled-agent/src/bootstrap/rack_ops.rs @@ -4,13 +4,13 @@ //! Internal API for rack-level bootstrap agent operations. -use crate::bootstrap::http_entrypoints::RackOperationStatus; -use crate::bootstrap::params::RackInitializeRequest; use crate::bootstrap::rss_handle::RssHandle; use crate::rack_setup::service::SetupServiceError; use bootstore::schemes::v0 as bootstore; use omicron_uuid_kinds::RackInitUuid; use omicron_uuid_kinds::RackResetUuid; +use sled_agent_types::rack_init::RackInitializeRequest; +use sled_agent_types::rack_ops::RackOperationStatus; use sled_storage::manager::StorageHandle; use slog::Logger; use std::mem; diff --git a/sled-agent/src/bootstrap/rss_handle.rs b/sled-agent/src/bootstrap/rss_handle.rs index 9baf0e7ef3..73f7537853 100644 --- a/sled-agent/src/bootstrap/rss_handle.rs +++ b/sled-agent/src/bootstrap/rss_handle.rs @@ -6,7 +6,6 @@ use super::client as bootstrap_agent_client; use super::params::StartSledAgentRequest; -use crate::rack_setup::config::SetupServiceConfig; use crate::rack_setup::service::RackSetupService; use crate::rack_setup::service::SetupServiceError; use ::bootstrap_agent_client::Client as BootstrapAgentClient; @@ -16,6 +15,7 @@ use futures::StreamExt; use omicron_common::backoff::retry_notify; use omicron_common::backoff::retry_policy_local; use omicron_common::backoff::BackoffError; +use sled_agent_types::rack_init::RackInitializeRequest; use sled_storage::manager::StorageHandle; use slog::Logger; use std::net::Ipv6Addr; @@ -44,7 +44,7 @@ impl RssHandle { /// Executes the rack setup service until it has completed pub(super) async fn run_rss( log: &Logger, - config: SetupServiceConfig, + config: RackInitializeRequest, our_bootstrap_address: Ipv6Addr, storage_manager: StorageHandle, bootstore: bootstore::NodeHandle, diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index 69a6f455cc..656be1a394 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -6,7 +6,6 @@ use super::config::BOOTSTRAP_AGENT_HTTP_PORT; use super::http_entrypoints; -use super::params::RackInitializeRequest; use super::params::StartSledAgentRequest; use super::views::SledAgentResponse; use super::BootstrapError; @@ -42,6 +41,7 @@ use omicron_common::ledger::Ledger; use omicron_ddm_admin_client::Client as DdmAdminClient; use omicron_ddm_admin_client::DdmError; use omicron_uuid_kinds::RackInitUuid; +use sled_agent_types::rack_init::RackInitializeRequest; use sled_hardware::underlay; use sled_storage::dataset::CONFIG_DATASET; use sled_storage::manager::StorageHandle; diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 2612e504f5..1ecda51657 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -5,7 +5,6 @@ //! HTTP entrypoint functions for the sled agent's exposed API use super::sled_agent::SledAgent; -use crate::bootstrap::early_networking::EarlyNetworkConfig; use crate::bootstrap::params::AddSledRequest; use crate::params::{ BootstoreStatus, CleanupContextUpdate, DiskEnsureBody, InstanceEnsureBody, @@ -37,6 +36,7 @@ use omicron_common::api::internal::shared::{ use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_hardware::DiskVariant; use sled_storage::resources::DisksManagementResult; use std::collections::BTreeMap; diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs deleted file mode 100644 index 43664cfd04..0000000000 --- a/sled-agent/src/rack_setup/config.rs +++ /dev/null @@ -1,249 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Interfaces for working with RSS config. - -use crate::config::ConfigError; -use camino::Utf8Path; -use omicron_common::address::{ - get_64_subnet, Ipv6Subnet, AZ_PREFIX, RACK_PREFIX, SLED_PREFIX, -}; - -pub use crate::bootstrap::params::back_compat::RackInitializeRequestV1 as SetupServiceConfigV1; -use crate::bootstrap::params::Certificate; -pub use crate::bootstrap::params::RackInitializeRequest as SetupServiceConfig; - -impl SetupServiceConfig { - pub fn from_file>(path: P) -> Result { - let path = path.as_ref(); - let contents = std::fs::read_to_string(&path) - .map_err(|err| ConfigError::Io { path: path.into(), err })?; - let mut raw_config = - SetupServiceConfig::from_toml_with_fallback(&contents) - .map_err(|err| ConfigError::Parse { path: path.into(), err })?; - - // In the same way that sled-agent itself (our caller) discovers the - // optional config-rss.toml in a well-known path relative to its config - // file, we look for a pair of well-known paths adjacent to - // config-rss.toml that specify an extra TLS certificate and private - // key. This is used by the end-to-end tests. Any developer can also - // use this to inject a TLS certificate into their setup. - // (config-rss.toml is only used for dev/test, not production - // deployments, which will always get their RSS configuration from - // Wicket.) - if let Some(parent) = path.parent() { - let cert_path = parent.join("initial-tls-cert.pem"); - let key_path = parent.join("initial-tls-key.pem"); - let cert_bytes = std::fs::read_to_string(&cert_path); - let key_bytes = std::fs::read_to_string(&key_path); - match (cert_bytes, key_bytes) { - (Ok(cert), Ok(key)) => { - raw_config - .external_certificates - .push(Certificate { key, cert }); - } - (Err(cert_error), Err(key_error)) - if cert_error.kind() == std::io::ErrorKind::NotFound - && key_error.kind() == std::io::ErrorKind::NotFound => - { - // Fine. No extra cert was provided. - } - (Err(cert_error), _) => { - return Err(ConfigError::Certificate( - anyhow::Error::new(cert_error).context(format!( - "loading certificate from {:?}", - cert_path - )), - )); - } - (_, Err(key_error)) => { - return Err(ConfigError::Certificate( - anyhow::Error::new(key_error).context(format!( - "loading private key from {:?}", - key_path - )), - )); - } - }; - } - - Ok(raw_config) - } - - pub fn az_subnet(&self) -> Ipv6Subnet { - Ipv6Subnet::::new( - self.rack_network_config.rack_subnet.addr(), - ) - } - - /// Returns the subnet for our rack. - pub fn rack_subnet(&self) -> Ipv6Subnet { - Ipv6Subnet::::new( - self.rack_network_config.rack_subnet.addr(), - ) - } - - /// Returns the subnet for the `index`-th sled in the rack. - pub fn sled_subnet(&self, index: u8) -> Ipv6Subnet { - get_64_subnet(self.rack_subnet(), index) - } -} - -#[cfg(test)] -mod test { - use super::*; - use crate::bootstrap::params::BootstrapAddressDiscovery; - use crate::bootstrap::params::RecoverySiloConfig; - use anyhow::Context; - use camino::Utf8PathBuf; - use omicron_common::address::IpRange; - use omicron_common::api::internal::shared::AllowedSourceIps; - use omicron_common::api::internal::shared::RackNetworkConfig; - use oxnet::Ipv6Net; - use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; - - #[test] - fn test_subnets() { - let cfg = SetupServiceConfig { - trust_quorum_peers: None, - bootstrap_discovery: BootstrapAddressDiscovery::OnlyOurs, - ntp_servers: vec![String::from("test.pool.example.com")], - dns_servers: vec!["1.1.1.1".parse().unwrap()], - external_dns_zone_name: String::from("oxide.test"), - internal_services_ip_pool_ranges: vec![IpRange::from(IpAddr::V4( - Ipv4Addr::new(129, 168, 1, 20), - ))], - external_dns_ips: vec![], - external_certificates: vec![], - recovery_silo: RecoverySiloConfig { - silo_name: "test-silo".parse().unwrap(), - user_name: "dummy".parse().unwrap(), - // This is a hash for the password "oxide". It doesn't matter, - // though; it's not used. - user_password_hash: "$argon2id$v=19$m=98304,t=13,p=1$\ - RUlWc0ZxaHo0WFdrN0N6ZQ$S8p52j85GPvMhR/\ - ek3GL0el/oProgTwWpHJZ8lsQQoY" - .parse() - .unwrap(), - }, - rack_network_config: RackNetworkConfig { - rack_subnet: Ipv6Net::new( - "fd00:1122:3344:0100::".parse().unwrap(), - RACK_PREFIX, - ) - .unwrap(), - infra_ip_first: Ipv4Addr::LOCALHOST, - infra_ip_last: Ipv4Addr::LOCALHOST, - ports: Vec::new(), - bgp: Vec::new(), - bfd: Vec::new(), - }, - allowed_source_ips: AllowedSourceIps::Any, - }; - - assert_eq!( - Ipv6Subnet::::new( - // Masked out in AZ Subnet - // vv - "fd00:1122:3344:0000::".parse::().unwrap(), - ), - cfg.az_subnet() - ); - assert_eq!( - Ipv6Subnet::::new( - // Shows up from Rack Subnet - // vv - "fd00:1122:3344:0100::".parse::().unwrap(), - ), - cfg.rack_subnet() - ); - assert_eq!( - Ipv6Subnet::::new( - // 0th Sled Subnet - // vv - "fd00:1122:3344:0100::".parse::().unwrap(), - ), - cfg.sled_subnet(0) - ); - assert_eq!( - Ipv6Subnet::::new( - // 1st Sled Subnet - // vv - "fd00:1122:3344:0101::".parse::().unwrap(), - ), - cfg.sled_subnet(1) - ); - assert_eq!( - Ipv6Subnet::::new( - // Last Sled Subnet - // vv - "fd00:1122:3344:01ff::".parse::().unwrap(), - ), - cfg.sled_subnet(255) - ); - } - - #[test] - fn test_extra_certs() { - // The stock non-Gimlet config has no TLS certificates. - let path = Utf8PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("../smf/sled-agent/non-gimlet/config-rss.toml"); - let cfg = SetupServiceConfig::from_file(&path) - .unwrap_or_else(|e| panic!("failed to parse {:?}: {}", &path, e)); - assert!(cfg.external_certificates.is_empty()); - - // Now let's create a configuration that does have an adjacent - // certificate and key. - let tempdir = - camino_tempfile::tempdir().expect("creating temporary directory"); - println!("using temp path: {:?}", tempdir); - - // Generate the certificate. - let domain = format!( - "{}.sys.{}", - cfg.external_dns_zone_name, - cfg.recovery_silo.silo_name.as_str(), - ); - let cert = rcgen::generate_simple_self_signed(vec![domain.clone()]) - .unwrap_or_else(|error| { - panic!( - "generating certificate for domain {:?}: {}", - domain, error - ) - }); - - // Write the configuration file. - let cfg_path = tempdir.path().join("config-rss.toml"); - let _ = std::fs::copy(&path, &cfg_path) - .with_context(|| { - format!("failed to copy file {:?} to {:?}", &path, &cfg_path) - }) - .unwrap(); - - // Write the certificate. - let cert_bytes = cert - .serialize_pem() - .expect("serializing generated certificate") - .into_bytes(); - let cert_path = tempdir.path().join("initial-tls-cert.pem"); - std::fs::write(&cert_path, &cert_bytes) - .with_context(|| format!("failed to write to {:?}", &cert_path)) - .unwrap(); - - // Write the private key. - let key_path = tempdir.path().join("initial-tls-key.pem"); - let key_bytes = cert.serialize_private_key_pem().into_bytes(); - std::fs::write(&key_path, &key_bytes) - .with_context(|| format!("failed to write to {:?}", &key_path)) - .unwrap(); - - // Now try to load it all. - let read_cfg = SetupServiceConfig::from_file(&cfg_path) - .expect("failed to read generated config with certificate"); - assert_eq!(read_cfg.external_certificates.len(), 1); - let cert = read_cfg.external_certificates.first().unwrap(); - let _ = rcgen::KeyPair::from_pem(&cert.key) - .expect("generated PEM did not parse as KeyPair"); - } -} diff --git a/sled-agent/src/rack_setup/mod.rs b/sled-agent/src/rack_setup/mod.rs index 0ad8e0ce71..0ec14138fc 100644 --- a/sled-agent/src/rack_setup/mod.rs +++ b/sled-agent/src/rack_setup/mod.rs @@ -4,8 +4,6 @@ //! Rack Setup Service -/// Configuration files which automate input to RSS. -pub mod config; mod plan; /// The main implementation of the RSS service. pub mod service; diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 9493361d19..d23c6715c6 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -9,7 +9,6 @@ use crate::params::{ OmicronPhysicalDiskConfig, OmicronPhysicalDisksConfig, OmicronZoneConfig, OmicronZoneDataset, OmicronZoneType, }; -use crate::rack_setup::config::SetupServiceConfig as Config; use camino::Utf8PathBuf; use dns_service_client::types::DnsConfigParams; use illumos_utils::zpool::ZpoolName; @@ -37,6 +36,7 @@ use serde::{Deserialize, Serialize}; use sled_agent_client::{ types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, }; +use sled_agent_types::rack_init::RackInitializeRequest as Config; use sled_storage::dataset::{DatasetKind, DatasetName, CONFIG_DATASET}; use sled_storage::manager::StorageHandle; use slog::Logger; @@ -1180,12 +1180,12 @@ impl ServicePortBuilder { #[cfg(test)] mod tests { use super::*; - use crate::bootstrap::params::BootstrapAddressDiscovery; - use crate::bootstrap::params::RecoverySiloConfig; use omicron_common::address::IpRange; use omicron_common::api::internal::shared::AllowedSourceIps; use omicron_common::api::internal::shared::RackNetworkConfig; use oxnet::Ipv6Net; + use sled_agent_types::rack_init::BootstrapAddressDiscovery; + use sled_agent_types::rack_init::RecoverySiloConfig; const EXPECTED_RESERVED_ADDRESSES: u16 = 2; const EXPECTED_USABLE_ADDRESSES: u16 = diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs index c6d2e73ccd..3d5b90a22d 100644 --- a/sled-agent/src/rack_setup/plan/sled.rs +++ b/sled-agent/src/rack_setup/plan/sled.rs @@ -8,12 +8,12 @@ use crate::bootstrap::params::StartSledAgentRequestBody; use crate::bootstrap::{ config::BOOTSTRAP_AGENT_RACK_INIT_PORT, params::StartSledAgentRequest, }; -use crate::rack_setup::config::SetupServiceConfig as Config; -use crate::rack_setup::config::SetupServiceConfigV1 as ConfigV1; use camino::Utf8PathBuf; use omicron_common::ledger::{self, Ledger, Ledgerable}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use sled_agent_types::rack_init::back_compat::RackInitializeRequestV1 as ConfigV1; +use sled_agent_types::rack_init::RackInitializeRequest as Config; use sled_storage::dataset::CONFIG_DATASET; use sled_storage::manager::StorageHandle; use slog::Logger; diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 2d7a355440..c8e56ae9f4 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -64,14 +64,11 @@ //! completing execution, and unconditionally calls the "handoff to Nexus" API //! thereafter. -use super::config::SetupServiceConfig as Config; use super::plan::service::SledConfig; use crate::bootstrap::config::BOOTSTRAP_AGENT_HTTP_PORT; use crate::bootstrap::early_networking::{ - EarlyNetworkConfig, EarlyNetworkConfigBody, EarlyNetworkSetup, - EarlyNetworkSetupError, + EarlyNetworkSetup, EarlyNetworkSetupError, }; -use crate::bootstrap::params::BootstrapAddressDiscovery; use crate::bootstrap::params::StartSledAgentRequest; use crate::bootstrap::rss_handle::BootstrapAgentHandle; use crate::nexus::{d2n_params, ConvertInto}; @@ -111,6 +108,12 @@ use serde::{Deserialize, Serialize}; use sled_agent_client::{ types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, }; +use sled_agent_types::early_networking::{ + EarlyNetworkConfig, EarlyNetworkConfigBody, +}; +use sled_agent_types::rack_init::{ + BootstrapAddressDiscovery, RackInitializeRequest as Config, +}; use sled_hardware_types::underlay::BootstrapInterface; use sled_storage::dataset::CONFIG_DATASET; use sled_storage::manager::StorageHandle; @@ -1545,7 +1548,7 @@ impl<'a> OmicronZonesConfigGenerator<'a> { #[cfg(test)] mod test { - use super::OmicronZonesConfigGenerator; + use super::{Config, OmicronZonesConfigGenerator}; use crate::{ params::OmicronZoneType, rack_setup::plan::service::{Plan as ServicePlan, SledInfo}, @@ -1594,7 +1597,7 @@ mod test { } fn make_test_service_plan() -> ServicePlan { - let rss_config = crate::bootstrap::params::test_config(); + let rss_config = Config::test_config(); let fake_sleds = vec![ make_sled_info( SledUuid::new_v4(), diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index 78d48be0ff..399ec334f4 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -4,7 +4,6 @@ //! HTTP entrypoint functions for the sled agent's exposed API -use crate::bootstrap::early_networking::EarlyNetworkConfig; use crate::bootstrap::params::AddSledRequest; use crate::params::{ DiskEnsureBody, InstanceEnsureBody, InstanceExternalIpBody, @@ -30,6 +29,7 @@ use omicron_common::api::internal::shared::{ use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_storage::resources::DisksManagementResult; use std::sync::Arc; use uuid::Uuid; diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 9cb146531b..f23b14c377 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -10,9 +10,6 @@ use super::disk::SimDisk; use super::instance::SimInstance; use super::storage::CrucibleData; use super::storage::Storage; -use crate::bootstrap::early_networking::{ - EarlyNetworkConfig, EarlyNetworkConfigBody, -}; use crate::nexus::NexusClient; use crate::params::{ DiskStateRequested, InstanceExternalIpBody, InstanceHardware, @@ -47,6 +44,9 @@ use propolis_client::{ types::VolumeConstructionRequest, Client as PropolisClient, }; use propolis_mock_server::Context as PropolisContext; +use sled_agent_types::early_networking::{ + EarlyNetworkConfig, EarlyNetworkConfigBody, +}; use sled_storage::resources::DisksManagementResult; use slog::Logger; use std::collections::{HashMap, HashSet, VecDeque}; diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index dc946c1bfa..4bf7117bc9 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -6,9 +6,7 @@ use crate::boot_disk_os_writer::BootDiskOsWriter; use crate::bootstrap::config::BOOTSTRAP_AGENT_RACK_INIT_PORT; -use crate::bootstrap::early_networking::{ - EarlyNetworkConfig, EarlyNetworkSetupError, -}; +use crate::bootstrap::early_networking::EarlyNetworkSetupError; use crate::bootstrap::params::{BaseboardId, StartSledAgentRequest}; use crate::config::Config; use crate::instance_manager::InstanceManager; @@ -63,6 +61,7 @@ use omicron_common::backoff::{ use omicron_ddm_admin_client::Client as DdmAdminClient; use omicron_uuid_kinds::{InstanceUuid, PropolisUuid}; use oximeter::types::ProducerRegistry; +use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_hardware::{underlay, HardwareManager}; use sled_hardware_types::underlay::BootstrapInterface; use sled_hardware_types::Baseboard; diff --git a/sled-agent/tests/integration_tests/early_network.rs b/sled-agent/tests/integration_tests/early_network.rs index 28fc0fd010..6fa91e0e4a 100644 --- a/sled-agent/tests/integration_tests/early_network.rs +++ b/sled-agent/tests/integration_tests/early_network.rs @@ -15,10 +15,10 @@ use omicron_common::api::{ RackNetworkConfig, RouteConfig, }, }; -use omicron_sled_agent::bootstrap::early_networking::{ +use omicron_test_utils::dev::test_setup_log; +use sled_agent_types::early_networking::{ EarlyNetworkConfig, EarlyNetworkConfigBody, }; -use omicron_test_utils::dev::test_setup_log; const BLOB_PATH: &str = "tests/data/early_network_blobs.txt"; diff --git a/sled-agent/types/Cargo.toml b/sled-agent/types/Cargo.toml new file mode 100644 index 0000000000..57881a37d1 --- /dev/null +++ b/sled-agent/types/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "sled-agent-types" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +bootstore.workspace = true +camino.workspace = true +nexus-client.workspace = true +omicron-common.workspace = true +omicron-uuid-kinds.workspace = true +omicron-workspace-hack.workspace = true +oxnet.workspace = true +schemars.workspace = true +serde.workspace = true +serde_json.workspace = true +sled-hardware-types.workspace = true +slog.workspace = true +thiserror.workspace = true +toml.workspace = true + +[dev-dependencies] +camino-tempfile.workspace = true +omicron-test-utils.workspace = true +rcgen.workspace = true diff --git a/sled-agent/types/src/early_networking.rs b/sled-agent/types/src/early_networking.rs new file mode 100644 index 0000000000..dc93aa1300 --- /dev/null +++ b/sled-agent/types/src/early_networking.rs @@ -0,0 +1,606 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types for network setup required to bring up the control plane. + +use std::str::FromStr; + +use bootstore::schemes::v0 as bootstore; +use omicron_common::api::internal::shared::RackNetworkConfig; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use slog::{warn, Logger}; + +/// Network configuration required to bring up the control plane +/// +/// The fields in this structure are those from +/// [`crate::rack_init::RackInitializeRequest`] necessary for use beyond RSS. +/// This is just for the initial rack configuration and cold boot purposes. +/// Updates come from Nexus. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct EarlyNetworkConfig { + // The current generation number of data as stored in CRDB. + // The initial generation is set during RSS time and then only mutated + // by Nexus. + pub generation: u64, + + // Which version of the data structure do we have. This is to help with + // deserialization and conversion in future updates. + pub schema_version: u32, + + // The actual configuration details + pub body: EarlyNetworkConfigBody, +} + +impl FromStr for EarlyNetworkConfig { + type Err = String; + + fn from_str(value: &str) -> Result { + #[derive(Deserialize)] + struct ShadowConfig { + generation: u64, + schema_version: u32, + body: EarlyNetworkConfigBody, + } + + let v2_err = match serde_json::from_str::(&value) { + Ok(cfg) => { + return Ok(EarlyNetworkConfig { + generation: cfg.generation, + schema_version: cfg.schema_version, + body: cfg.body, + }) + } + Err(e) => format!("unable to parse EarlyNetworkConfig: {e:?}"), + }; + // If we fail to parse the config as any known version, we return the + // error corresponding to the parse failure of the newest schema. + serde_json::from_str::(&value) + .map(|v1| EarlyNetworkConfig { + generation: v1.generation, + schema_version: Self::schema_version(), + body: v1.body.into(), + }) + .map_err(|_| v2_err) + } +} + +impl EarlyNetworkConfig { + pub fn schema_version() -> u32 { + 2 + } + + // Note: This currently only converts between v0 and v1 or deserializes v1 of + // `EarlyNetworkConfig`. + pub fn deserialize_bootstore_config( + log: &Logger, + config: &bootstore::NetworkConfig, + ) -> Result { + // Try to deserialize the latest version of the data structure (v2). If + // that succeeds we are done. + let v2_error = + match serde_json::from_slice::(&config.blob) { + Ok(val) => return Ok(val), + Err(error) => { + // Log this error and continue trying to deserialize older + // versions. + warn!( + log, + "Failed to deserialize EarlyNetworkConfig \ + as v2, trying next as v1: {}", + error, + ); + error + } + }; + + match serde_json::from_slice::( + &config.blob, + ) { + Ok(v1) => { + // Convert from v1 to v2 + return Ok(EarlyNetworkConfig { + generation: v1.generation, + schema_version: EarlyNetworkConfig::schema_version(), + body: v1.body.into(), + }); + } + Err(error) => { + // Log this error. + warn!( + log, + "Failed to deserialize EarlyNetworkConfig \ + as v1, trying next as v0: {}", + error + ); + } + }; + + match serde_json::from_slice::( + &config.blob, + ) { + Ok(val) => { + // Convert from v0 to v2 + return Ok(EarlyNetworkConfig { + generation: val.generation, + schema_version: 2, + body: EarlyNetworkConfigBody { + ntp_servers: val.ntp_servers, + rack_network_config: val.rack_network_config.map( + |v0_config| { + back_compat::RackNetworkConfigV0::to_v2( + val.rack_subnet, + v0_config, + ) + }, + ), + }, + }); + } + Err(error) => { + // Log this error. + warn!( + log, + "Failed to deserialize EarlyNetworkConfig as v0: {}", error, + ); + } + }; + + // If we fail to parse the config as any known version, we return the + // error corresponding to the parse failure of the newest schema. + Err(v2_error) + } +} + +/// This is the actual configuration of EarlyNetworking. +/// +/// We nest it below the "header" of `generation` and `schema_version` so that +/// we can perform partial deserialization of `EarlyNetworkConfig` to only read +/// the header and defer deserialization of the body once we know the schema +/// version. This is possible via the use of [`serde_json::value::RawValue`] in +/// future (post-v1) deserialization paths. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct EarlyNetworkConfigBody { + /// The external NTP server addresses. + pub ntp_servers: Vec, + + // Rack network configuration as delivered from RSS or Nexus + pub rack_network_config: Option, +} + +impl From for bootstore::NetworkConfig { + fn from(value: EarlyNetworkConfig) -> Self { + // Can this ever actually fail? + // We literally just deserialized the same data in RSS + let blob = serde_json::to_vec(&value).unwrap(); + + // Yes this is duplicated, but that seems fine. + let generation = value.generation; + + bootstore::NetworkConfig { generation, blob } + } +} + +/// Structures and routines used to maintain backwards compatibility. The +/// contents of this module should only be used to convert older data into the +/// current format, and not for any ongoing run-time operations. +pub mod back_compat { + use std::net::{Ipv4Addr, Ipv6Addr}; + + use omicron_common::api::{ + external::SwitchLocation, + internal::shared::{ + BfdPeerConfig, BgpConfig, BgpPeerConfig, PortConfigV2, PortFec, + PortSpeed, RackNetworkConfigV2, RouteConfig, UplinkAddressConfig, + }, + }; + use oxnet::{IpNet, Ipv4Net, Ipv6Net}; + + use super::*; + + #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] + pub struct EarlyNetworkConfigBodyV1 { + /// The external NTP server addresses. + pub ntp_servers: Vec, + + // Rack network configuration as delivered from RSS or Nexus + pub rack_network_config: Option, + } + + impl From for EarlyNetworkConfigBody { + fn from(v1: EarlyNetworkConfigBodyV1) -> Self { + EarlyNetworkConfigBody { + ntp_servers: v1.ntp_servers, + rack_network_config: v1 + .rack_network_config + .map(|v1_config| v1_config.into()), + } + } + } + + /// Deprecated, use `RackNetworkConfig` instead. Cannot actually deprecate due to + /// + /// + /// Our first version of `RackNetworkConfig`. If this exists in the bootstore, we + /// upgrade out of it into `RackNetworkConfigV1` or later versions if possible. + #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] + pub(crate) struct RackNetworkConfigV0 { + // TODO: #3591 Consider making infra-ip ranges implicit for uplinks + /// First ip address to be used for configuring network infrastructure + pub infra_ip_first: Ipv4Addr, + /// Last ip address to be used for configuring network infrastructure + pub infra_ip_last: Ipv4Addr, + /// Uplinks for connecting the rack to external networks + pub uplinks: Vec, + } + + impl RackNetworkConfigV0 { + /// Convert from `RackNetworkConfigV0` to `RackNetworkConfigV1` + /// + /// We cannot use `From for `RackNetworkConfigV2` + /// because the `rack_subnet` field does not exist in `RackNetworkConfigV0` + /// and must be passed in from the `EarlyNetworkConfigV0` struct which + /// contains the `RackNetworkConfigV0` struct. + pub fn to_v2( + rack_subnet: Ipv6Addr, + v0: RackNetworkConfigV0, + ) -> RackNetworkConfigV2 { + RackNetworkConfigV2 { + rack_subnet: Ipv6Net::new(rack_subnet, 56).unwrap(), + infra_ip_first: v0.infra_ip_first, + infra_ip_last: v0.infra_ip_last, + ports: v0 + .uplinks + .into_iter() + .map(|uplink| PortConfigV2::from(uplink)) + .collect(), + bgp: vec![], + bfd: vec![], + } + } + } + + /// Deprecated, use PortConfigV2 instead. Cannot actually deprecate due to + /// + #[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)] + pub struct PortConfigV1 { + /// The set of routes associated with this port. + pub routes: Vec, + /// This port's addresses and optional vlan IDs + pub addresses: Vec, + /// Switch the port belongs to. + pub switch: SwitchLocation, + /// Nmae of the port this config applies to. + pub port: String, + /// Port speed. + pub uplink_port_speed: PortSpeed, + /// Port forward error correction type. + pub uplink_port_fec: PortFec, + /// BGP peers on this port + pub bgp_peers: Vec, + /// Whether or not to set autonegotiation + #[serde(default)] + pub autoneg: bool, + } + + impl From for PortConfigV2 { + fn from(v1: PortConfigV1) -> Self { + PortConfigV2 { + routes: v1.routes.clone(), + addresses: v1 + .addresses + .iter() + .map(|a| UplinkAddressConfig { address: *a, vlan_id: None }) + .collect(), + switch: v1.switch, + port: v1.port, + uplink_port_speed: v1.uplink_port_speed, + uplink_port_fec: v1.uplink_port_fec, + bgp_peers: v1.bgp_peers.clone(), + autoneg: v1.autoneg, + } + } + } + + /// Deprecated, use PortConfigV2 instead. Cannot actually deprecate due to + /// + #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] + pub(crate) struct UplinkConfig { + /// Gateway address + pub gateway_ip: Ipv4Addr, + /// Switch to use for uplink + pub switch: SwitchLocation, + /// Switchport to use for external connectivity + pub uplink_port: String, + /// Speed for the Switchport + pub uplink_port_speed: PortSpeed, + /// Forward Error Correction setting for the uplink port + pub uplink_port_fec: PortFec, + /// IP Address and prefix (e.g., `192.168.0.1/16`) to apply to switchport + /// (must be in infra_ip pool) + pub uplink_cidr: Ipv4Net, + /// VLAN id to use for uplink + pub uplink_vid: Option, + } + + impl From for PortConfigV2 { + fn from(value: UplinkConfig) -> Self { + PortConfigV2 { + routes: vec![RouteConfig { + destination: "0.0.0.0/0".parse().unwrap(), + nexthop: value.gateway_ip.into(), + vlan_id: value.uplink_vid, + }], + addresses: vec![UplinkAddressConfig { + address: value.uplink_cidr.into(), + vlan_id: value.uplink_vid, + }], + switch: value.switch, + port: value.uplink_port, + uplink_port_speed: value.uplink_port_speed, + uplink_port_fec: value.uplink_port_fec, + bgp_peers: vec![], + autoneg: false, + } + } + } + + /// Deprecated, use `RackNetworkConfig` instead. Cannot actually deprecate due to + /// + /// + /// Our second version of `RackNetworkConfig`. If this exists in the bootstore, + /// we upgrade out of it into `RackNetworkConfigV1` or later versions if + /// possible. + #[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)] + pub struct RackNetworkConfigV1 { + pub rack_subnet: Ipv6Net, + // TODO: #3591 Consider making infra-ip ranges implicit for uplinks + /// First ip address to be used for configuring network infrastructure + pub infra_ip_first: Ipv4Addr, + /// Last ip address to be used for configuring network infrastructure + pub infra_ip_last: Ipv4Addr, + /// Uplinks for connecting the rack to external networks + pub ports: Vec, + /// BGP configurations for connecting the rack to external networks + pub bgp: Vec, + /// BFD configuration for connecting the rack to external networks + #[serde(default)] + pub bfd: Vec, + } + + impl From for RackNetworkConfigV2 { + fn from(v1: RackNetworkConfigV1) -> Self { + RackNetworkConfigV2 { + rack_subnet: v1.rack_subnet, + infra_ip_first: v1.infra_ip_first, + infra_ip_last: v1.infra_ip_last, + ports: v1 + .ports + .into_iter() + .map(|ports| PortConfigV2::from(ports)) + .collect(), + bgp: v1.bgp.clone(), + bfd: v1.bfd.clone(), + } + } + } + + // The second production version of the `EarlyNetworkConfig`. + // + // If this version is in the bootstore than we need to convert it to + // `EarlyNetworkConfigV2`. + // + // Once we do this for all customers that have initialized racks with the + // old version we can go ahead and remove this type and its conversion code + // altogether. + #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] + pub struct EarlyNetworkConfigV1 { + // The current generation number of data as stored in CRDB. + // The initial generation is set during RSS time and then only mutated + // by Nexus. + pub generation: u64, + + // Which version of the data structure do we have. This is to help with + // deserialization and conversion in future updates. + pub schema_version: u32, + + // The actual configuration details + pub body: EarlyNetworkConfigBodyV1, + } + + // The first production version of the `EarlyNetworkConfig`. + // + // If this version is in the bootstore than we need to convert it to + // `EarlyNetworkConfigV2`. + // + // Once we do this for all customers that have initialized racks with the + // old version we can go ahead and remove this type and its conversion code + // altogether. + #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] + pub(crate) struct EarlyNetworkConfigV0 { + // The current generation number of data as stored in CRDB. + // The initial generation is set during RSS time and then only mutated + // by Nexus. + pub generation: u64, + + pub rack_subnet: Ipv6Addr, + + /// The external NTP server addresses. + pub ntp_servers: Vec, + + // Rack network configuration as delivered from RSS and only existing at + // generation 1 + pub rack_network_config: Option, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use std::net::Ipv4Addr; + use std::net::Ipv6Addr; + + use omicron_common::api::external::SwitchLocation; + use omicron_common::api::internal::shared::PortConfigV2; + use omicron_common::api::internal::shared::PortFec; + use omicron_common::api::internal::shared::PortSpeed; + use omicron_common::api::internal::shared::RackNetworkConfigV2; + use omicron_common::api::internal::shared::RouteConfig; + use omicron_common::api::internal::shared::UplinkAddressConfig; + use omicron_test_utils::dev::test_setup_log; + use oxnet::Ipv6Net; + + #[test] + fn serialized_early_network_config_v0_to_v2_conversion() { + let logctx = test_setup_log( + "serialized_early_network_config_v0_to_v2_conversion", + ); + let v0 = back_compat::EarlyNetworkConfigV0 { + generation: 1, + rack_subnet: Ipv6Addr::UNSPECIFIED, + ntp_servers: Vec::new(), + rack_network_config: Some(back_compat::RackNetworkConfigV0 { + infra_ip_first: Ipv4Addr::UNSPECIFIED, + infra_ip_last: Ipv4Addr::UNSPECIFIED, + uplinks: vec![back_compat::UplinkConfig { + gateway_ip: Ipv4Addr::UNSPECIFIED, + switch: SwitchLocation::Switch0, + uplink_port: "Port0".to_string(), + uplink_port_speed: PortSpeed::Speed100G, + uplink_port_fec: PortFec::None, + uplink_cidr: "192.168.0.1/16".parse().unwrap(), + uplink_vid: None, + }], + }), + }; + + let v0_serialized = serde_json::to_vec(&v0).unwrap(); + let bootstore_conf = + bootstore::NetworkConfig { generation: 1, blob: v0_serialized }; + + let v2 = EarlyNetworkConfig::deserialize_bootstore_config( + &logctx.log, + &bootstore_conf, + ) + .unwrap(); + let v0_rack_network_config = v0.rack_network_config.unwrap(); + let uplink = v0_rack_network_config.uplinks[0].clone(); + let expected = EarlyNetworkConfig { + generation: 1, + schema_version: EarlyNetworkConfig::schema_version(), + body: EarlyNetworkConfigBody { + ntp_servers: v0.ntp_servers.clone(), + rack_network_config: Some(RackNetworkConfigV2 { + rack_subnet: Ipv6Net::new(v0.rack_subnet, 56).unwrap(), + infra_ip_first: v0_rack_network_config.infra_ip_first, + infra_ip_last: v0_rack_network_config.infra_ip_last, + ports: vec![PortConfigV2 { + routes: vec![RouteConfig { + destination: "0.0.0.0/0".parse().unwrap(), + nexthop: uplink.gateway_ip.into(), + vlan_id: None, + }], + addresses: vec![UplinkAddressConfig { + address: uplink.uplink_cidr.into(), + vlan_id: None, + }], + switch: uplink.switch, + port: uplink.uplink_port, + uplink_port_speed: uplink.uplink_port_speed, + uplink_port_fec: uplink.uplink_port_fec, + autoneg: false, + bgp_peers: vec![], + }], + bgp: vec![], + bfd: vec![], + }), + }, + }; + + assert_eq!(expected, v2); + + logctx.cleanup_successful(); + } + + #[test] + fn serialized_early_network_config_v1_to_v2_conversion() { + let logctx = test_setup_log( + "serialized_early_network_config_v1_to_v2_conversion", + ); + + let v1 = back_compat::EarlyNetworkConfigV1 { + generation: 1, + schema_version: 1, + body: back_compat::EarlyNetworkConfigBodyV1 { + ntp_servers: Vec::new(), + rack_network_config: Some(back_compat::RackNetworkConfigV1 { + rack_subnet: Ipv6Net::new(Ipv6Addr::UNSPECIFIED, 56) + .unwrap(), + infra_ip_first: Ipv4Addr::UNSPECIFIED, + infra_ip_last: Ipv4Addr::UNSPECIFIED, + ports: vec![back_compat::PortConfigV1 { + routes: vec![RouteConfig { + destination: "0.0.0.0/0".parse().unwrap(), + nexthop: "192.168.0.2".parse().unwrap(), + vlan_id: None, + }], + addresses: vec!["192.168.0.1/16".parse().unwrap()], + switch: SwitchLocation::Switch0, + port: "Port0".to_string(), + uplink_port_speed: PortSpeed::Speed100G, + uplink_port_fec: PortFec::None, + bgp_peers: Vec::new(), + autoneg: false, + }], + bgp: Vec::new(), + bfd: Vec::new(), + }), + }, + }; + + let v1_serialized = serde_json::to_vec(&v1).unwrap(); + let bootstore_conf = + bootstore::NetworkConfig { generation: 1, blob: v1_serialized }; + + let v2 = EarlyNetworkConfig::deserialize_bootstore_config( + &logctx.log, + &bootstore_conf, + ) + .unwrap(); + let v1_rack_network_config = v1.body.rack_network_config.unwrap(); + let port = v1_rack_network_config.ports[0].clone(); + let expected = EarlyNetworkConfig { + generation: 1, + schema_version: EarlyNetworkConfig::schema_version(), + body: EarlyNetworkConfigBody { + ntp_servers: v1.body.ntp_servers.clone(), + rack_network_config: Some(RackNetworkConfigV2 { + rack_subnet: v1_rack_network_config.rack_subnet, + infra_ip_first: v1_rack_network_config.infra_ip_first, + infra_ip_last: v1_rack_network_config.infra_ip_last, + ports: vec![PortConfigV2 { + routes: port.routes.clone(), + addresses: vec![UplinkAddressConfig { + address: port.addresses[0], + vlan_id: None, + }], + switch: port.switch, + port: port.port, + uplink_port_speed: port.uplink_port_speed, + uplink_port_fec: port.uplink_port_fec, + autoneg: false, + bgp_peers: vec![], + }], + bgp: vec![], + bfd: vec![], + }), + }, + }; + + assert_eq!(expected, v2); + + logctx.cleanup_successful(); + } +} diff --git a/sled-agent/types/src/lib.rs b/sled-agent/types/src/lib.rs new file mode 100644 index 0000000000..12e8f049f9 --- /dev/null +++ b/sled-agent/types/src/lib.rs @@ -0,0 +1,9 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Common types for sled-agent. + +pub mod early_networking; +pub mod rack_init; +pub mod rack_ops; diff --git a/sled-agent/types/src/rack_init.rs b/sled-agent/types/src/rack_init.rs new file mode 100644 index 0000000000..8fcf3c93fd --- /dev/null +++ b/sled-agent/types/src/rack_init.rs @@ -0,0 +1,732 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Rack initialization types. + +use std::{ + collections::BTreeSet, + net::{IpAddr, Ipv6Addr}, +}; + +use anyhow::{bail, Result}; +use camino::{Utf8Path, Utf8PathBuf}; +use omicron_common::{ + address::{ + get_64_subnet, IpRange, Ipv6Subnet, AZ_PREFIX, RACK_PREFIX, SLED_PREFIX, + }, + api::{external::AllowedSourceIps, internal::shared::RackNetworkConfig}, +}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use sled_hardware_types::Baseboard; + +pub type Certificate = nexus_client::types::Certificate; +pub type RecoverySiloConfig = nexus_client::types::RecoverySiloConfig; + +/// Structures and routines used to maintain backwards compatibility. The +/// contents of this module should only be used to convert older data into the +/// current format, and not for any ongoing run-time operations. +pub mod back_compat { + use crate::early_networking::back_compat::RackNetworkConfigV1; + + use super::*; + + #[derive(Clone, Deserialize)] + struct UnvalidatedRackInitializeRequestV1 { + trust_quorum_peers: Option>, + bootstrap_discovery: BootstrapAddressDiscovery, + ntp_servers: Vec, + dns_servers: Vec, + internal_services_ip_pool_ranges: Vec, + external_dns_ips: Vec, + external_dns_zone_name: String, + external_certificates: Vec, + recovery_silo: RecoverySiloConfig, + rack_network_config: RackNetworkConfigV1, + #[serde(default = "default_allowed_source_ips")] + allowed_source_ips: AllowedSourceIps, + } + + /// This is a deprecated format, maintained to allow importing from older + /// versions. + #[derive(Clone, Debug, PartialEq, Deserialize, Serialize, JsonSchema)] + #[serde(try_from = "UnvalidatedRackInitializeRequestV1")] + pub struct RackInitializeRequestV1 { + pub trust_quorum_peers: Option>, + pub bootstrap_discovery: BootstrapAddressDiscovery, + pub ntp_servers: Vec, + pub dns_servers: Vec, + pub internal_services_ip_pool_ranges: Vec, + pub external_dns_ips: Vec, + pub external_dns_zone_name: String, + pub external_certificates: Vec, + pub recovery_silo: RecoverySiloConfig, + pub rack_network_config: RackNetworkConfigV1, + #[serde(default = "default_allowed_source_ips")] + pub allowed_source_ips: AllowedSourceIps, + } + + impl TryFrom for RackInitializeRequestV1 { + type Error = anyhow::Error; + + fn try_from(value: UnvalidatedRackInitializeRequestV1) -> Result { + validate_external_dns( + &value.external_dns_ips, + &value.internal_services_ip_pool_ranges, + )?; + + Ok(RackInitializeRequestV1 { + trust_quorum_peers: value.trust_quorum_peers, + bootstrap_discovery: value.bootstrap_discovery, + ntp_servers: value.ntp_servers, + dns_servers: value.dns_servers, + internal_services_ip_pool_ranges: value + .internal_services_ip_pool_ranges, + external_dns_ips: value.external_dns_ips, + external_dns_zone_name: value.external_dns_zone_name, + external_certificates: value.external_certificates, + recovery_silo: value.recovery_silo, + rack_network_config: value.rack_network_config, + allowed_source_ips: value.allowed_source_ips, + }) + } + } + impl From for RackInitializeRequest { + fn from(v1: RackInitializeRequestV1) -> Self { + RackInitializeRequest { + trust_quorum_peers: v1.trust_quorum_peers, + bootstrap_discovery: v1.bootstrap_discovery, + ntp_servers: v1.ntp_servers, + dns_servers: v1.dns_servers, + internal_services_ip_pool_ranges: v1 + .internal_services_ip_pool_ranges, + external_dns_ips: v1.external_dns_ips, + external_dns_zone_name: v1.external_dns_zone_name, + external_certificates: v1.external_certificates, + recovery_silo: v1.recovery_silo, + rack_network_config: v1.rack_network_config.into(), + allowed_source_ips: v1.allowed_source_ips, + } + } + } +} + +// "Shadow" copy of `RackInitializeRequest` that does no validation on its +// fields. +#[derive(Clone, Deserialize)] +struct UnvalidatedRackInitializeRequest { + trust_quorum_peers: Option>, + bootstrap_discovery: BootstrapAddressDiscovery, + ntp_servers: Vec, + dns_servers: Vec, + internal_services_ip_pool_ranges: Vec, + external_dns_ips: Vec, + external_dns_zone_name: String, + external_certificates: Vec, + recovery_silo: RecoverySiloConfig, + rack_network_config: RackNetworkConfig, + #[serde(default = "default_allowed_source_ips")] + allowed_source_ips: AllowedSourceIps, +} + +fn validate_external_dns( + dns_ips: &Vec, + internal_ranges: &Vec, +) -> Result<()> { + if dns_ips.is_empty() { + bail!("At least one external DNS IP is required"); + } + + // Every external DNS IP should also be present in one of the internal + // services IP pool ranges. This check is O(N*M), but we expect both N + // and M to be small (~5 DNS servers, and a small number of pools). + for &dns_ip in dns_ips { + if !internal_ranges.iter().any(|range| range.contains(dns_ip)) { + bail!( + "External DNS IP {dns_ip} is not contained in \ + `internal_services_ip_pool_ranges`" + ); + } + } + Ok(()) +} + +impl TryFrom for RackInitializeRequest { + type Error = anyhow::Error; + + fn try_from(value: UnvalidatedRackInitializeRequest) -> Result { + validate_external_dns( + &value.external_dns_ips, + &value.internal_services_ip_pool_ranges, + )?; + + Ok(RackInitializeRequest { + trust_quorum_peers: value.trust_quorum_peers, + bootstrap_discovery: value.bootstrap_discovery, + ntp_servers: value.ntp_servers, + dns_servers: value.dns_servers, + internal_services_ip_pool_ranges: value + .internal_services_ip_pool_ranges, + external_dns_ips: value.external_dns_ips, + external_dns_zone_name: value.external_dns_zone_name, + external_certificates: value.external_certificates, + recovery_silo: value.recovery_silo, + rack_network_config: value.rack_network_config, + allowed_source_ips: value.allowed_source_ips, + }) + } +} + +/// Configuration for the "rack setup service". +/// +/// The Rack Setup Service should be responsible for one-time setup actions, +/// such as CockroachDB placement and initialization. Without operator +/// intervention, however, these actions need a way to be automated in our +/// deployment. +#[derive(Clone, Deserialize, Serialize, PartialEq, JsonSchema)] +#[serde(try_from = "UnvalidatedRackInitializeRequest")] +pub struct RackInitializeRequest { + /// The set of peer_ids required to initialize trust quorum + /// + /// The value is `None` if we are not using trust quorum + pub trust_quorum_peers: Option>, + + /// Describes how bootstrap addresses should be collected during RSS. + pub bootstrap_discovery: BootstrapAddressDiscovery, + + /// The external NTP server addresses. + pub ntp_servers: Vec, + + /// The external DNS server addresses. + pub dns_servers: Vec, + + /// Ranges of the service IP pool which may be used for internal services. + // TODO(https://github.com/oxidecomputer/omicron/issues/1530): Eventually, + // we want to configure multiple pools. + pub internal_services_ip_pool_ranges: Vec, + + /// Service IP addresses on which we run external DNS servers. + /// + /// Each address must be present in `internal_services_ip_pool_ranges`. + pub external_dns_ips: Vec, + + /// DNS name for the DNS zone delegated to the rack for external DNS + pub external_dns_zone_name: String, + + /// initial TLS certificates for the external API + pub external_certificates: Vec, + + /// Configuration of the Recovery Silo (the initial Silo) + pub recovery_silo: RecoverySiloConfig, + + /// Initial rack network configuration + pub rack_network_config: RackNetworkConfig, + + /// IPs or subnets allowed to make requests to user-facing services + #[serde(default = "default_allowed_source_ips")] + pub allowed_source_ips: AllowedSourceIps, +} + +impl RackInitializeRequest { + pub fn from_file>( + path: P, + ) -> Result { + let path = path.as_ref(); + let contents = std::fs::read_to_string(&path).map_err(|err| { + RackInitializeRequestParseError::Io { path: path.into(), err } + })?; + let mut raw_config = + Self::from_toml_with_fallback(&contents).map_err(|err| { + RackInitializeRequestParseError::Deserialize { + path: path.into(), + err, + } + })?; + + // In the same way that sled-agent itself (our caller) discovers the + // optional config-rss.toml in a well-known path relative to its config + // file, we look for a pair of well-known paths adjacent to + // config-rss.toml that specify an extra TLS certificate and private + // key. This is used by the end-to-end tests. Any developer can also + // use this to inject a TLS certificate into their setup. + // (config-rss.toml is only used for dev/test, not production + // deployments, which will always get their RSS configuration from + // Wicket.) + if let Some(parent) = path.parent() { + let cert_path = parent.join("initial-tls-cert.pem"); + let key_path = parent.join("initial-tls-key.pem"); + let cert_bytes = std::fs::read_to_string(&cert_path); + let key_bytes = std::fs::read_to_string(&key_path); + match (cert_bytes, key_bytes) { + (Ok(cert), Ok(key)) => { + raw_config + .external_certificates + .push(Certificate { key, cert }); + } + (Err(cert_error), Err(key_error)) + if cert_error.kind() == std::io::ErrorKind::NotFound + && key_error.kind() == std::io::ErrorKind::NotFound => + { + // Fine. No extra cert was provided. + } + (Err(cert_error), _) => { + return Err(RackInitializeRequestParseError::Certificate( + anyhow::Error::new(cert_error).context(format!( + "loading certificate from {:?}", + cert_path + )), + )); + } + (_, Err(key_error)) => { + return Err(RackInitializeRequestParseError::Certificate( + anyhow::Error::new(key_error).context(format!( + "loading private key from {:?}", + key_path + )), + )); + } + }; + } + + Ok(raw_config) + } + + pub fn from_toml_with_fallback( + data: &str, + ) -> Result { + let v2_err = match toml::from_str::(&data) { + Ok(req) => return Ok(req), + Err(e) => e, + }; + if let Ok(v1) = + toml::from_str::(&data) + { + return Ok(v1.into()); + } + + // If we fail to parse the request as any known version, we return the + // error corresponding to the parse failure of the newest schema. + Err(v2_err.into()) + } + + /// Return a configuration suitable for testing. + pub fn test_config() -> Self { + // Use env! rather than std::env::var because this might be called from + // a dependent crate. + let manifest_dir = Utf8Path::new(env!("CARGO_MANIFEST_DIR")); + let path = manifest_dir + .join("../../smf/sled-agent/non-gimlet/config-rss.toml"); + let contents = std::fs::read_to_string(&path).unwrap(); + toml::from_str(&contents) + .unwrap_or_else(|e| panic!("failed to parse {:?}: {}", &path, e)) + } + + pub fn az_subnet(&self) -> Ipv6Subnet { + Ipv6Subnet::::new( + self.rack_network_config.rack_subnet.addr(), + ) + } + + /// Returns the subnet for our rack. + pub fn rack_subnet(&self) -> Ipv6Subnet { + Ipv6Subnet::::new( + self.rack_network_config.rack_subnet.addr(), + ) + } + + /// Returns the subnet for the `index`-th sled in the rack. + pub fn sled_subnet(&self, index: u8) -> Ipv6Subnet { + get_64_subnet(self.rack_subnet(), index) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum RackInitializeRequestParseError { + #[error("Failed to read config from {path}: {err}")] + Io { + path: Utf8PathBuf, + #[source] + err: std::io::Error, + }, + #[error("Failed to deserialize config from {path}: {err}")] + Deserialize { + path: Utf8PathBuf, + #[source] + err: anyhow::Error, + }, + #[error("Loading certificate: {0}")] + Certificate(#[source] anyhow::Error), +} + +/// This field was added after several racks were already deployed. RSS plans +/// for those racks should default to allowing any source IP, since that is +/// effectively what they did. +const fn default_allowed_source_ips() -> AllowedSourceIps { + AllowedSourceIps::Any +} + +// This custom debug implementation hides the private keys. +impl std::fmt::Debug for RackInitializeRequest { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // If you find a compiler error here, and you just added a field to this + // struct, be sure to add it to the Debug impl below! + let RackInitializeRequest { + trust_quorum_peers: trust_qurorum_peers, + bootstrap_discovery, + ntp_servers, + dns_servers, + internal_services_ip_pool_ranges, + external_dns_ips, + external_dns_zone_name, + external_certificates: _, + recovery_silo, + rack_network_config, + allowed_source_ips, + } = &self; + + f.debug_struct("RackInitializeRequest") + .field("trust_quorum_peers", trust_qurorum_peers) + .field("bootstrap_discovery", bootstrap_discovery) + .field("ntp_servers", ntp_servers) + .field("dns_servers", dns_servers) + .field( + "internal_services_ip_pool_ranges", + internal_services_ip_pool_ranges, + ) + .field("external_dns_ips", external_dns_ips) + .field("external_dns_zone_name", external_dns_zone_name) + .field("external_certificates", &"") + .field("recovery_silo", recovery_silo) + .field("rack_network_config", rack_network_config) + .field("allowed_source_ips", allowed_source_ips) + .finish() + } +} + +#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] +#[serde(rename_all = "snake_case", tag = "type")] +pub enum BootstrapAddressDiscovery { + /// Ignore all bootstrap addresses except our own. + OnlyOurs, + /// Ignore all bootstrap addresses except the following. + OnlyThese { addrs: BTreeSet }, +} + +#[cfg(test)] +mod tests { + use std::net::Ipv4Addr; + use std::net::Ipv6Addr; + + use super::*; + use anyhow::Context; + use oxnet::Ipv6Net; + + #[test] + fn parse_rack_initialization() { + let manifest = std::env::var("CARGO_MANIFEST_DIR") + .expect("Cannot access manifest directory"); + let manifest = Utf8PathBuf::from(manifest); + + let path = + manifest.join("../../smf/sled-agent/non-gimlet/config-rss.toml"); + let contents = std::fs::read_to_string(&path).unwrap(); + let _: RackInitializeRequest = toml::from_str(&contents) + .unwrap_or_else(|e| panic!("failed to parse {:?}: {}", &path, e)); + + let path = manifest + .join("../../smf/sled-agent/gimlet-standalone/config-rss.toml"); + let contents = std::fs::read_to_string(&path).unwrap(); + let _: RackInitializeRequest = toml::from_str(&contents) + .unwrap_or_else(|e| panic!("failed to parse {:?}: {}", &path, e)); + } + + #[test] + fn parse_rack_initialization_weak_hash() { + let config = r#" + bootstrap_discovery.type = "only_ours" + ntp_servers = [ "ntp.eng.oxide.computer" ] + dns_servers = [ "1.1.1.1", "9.9.9.9" ] + external_dns_zone_name = "oxide.test" + + [[internal_services_ip_pool_ranges]] + first = "192.168.1.20" + last = "192.168.1.22" + + [recovery_silo] + silo_name = "recovery" + user_name = "recovery" + user_password_hash = "$argon2i$v=19$m=16,t=2,p=1$NVR0a2QxVXNiQjlObFJXbA$iGFJWOlUqN20B8KR4Fsmrg" + "#; + + let error = toml::from_str::(config) + .expect_err("unexpectedly parsed with bad password hash"); + println!("found error: {}", error); + assert!(error.to_string().contains( + "password hash: algorithm: expected argon2id, found argon2i" + )); + } + + #[test] + fn validate_external_dns_ips_must_be_in_internal_services_ip_pools() { + // Conjure up a config; we'll tweak the internal services pools and + // external DNS IPs, but no other fields matter. + let mut config = UnvalidatedRackInitializeRequest { + trust_quorum_peers: None, + bootstrap_discovery: BootstrapAddressDiscovery::OnlyOurs, + ntp_servers: Vec::new(), + dns_servers: Vec::new(), + internal_services_ip_pool_ranges: Vec::new(), + external_dns_ips: Vec::new(), + external_dns_zone_name: "".to_string(), + external_certificates: Vec::new(), + recovery_silo: RecoverySiloConfig { + silo_name: "recovery".parse().unwrap(), + user_name: "recovery".parse().unwrap(), + user_password_hash: "$argon2id$v=19$m=98304,t=13,p=1$RUlWc0ZxaHo0WFdrN0N6ZQ$S8p52j85GPvMhR/ek3GL0el/oProgTwWpHJZ8lsQQoY".parse().unwrap(), + }, + rack_network_config: RackNetworkConfig { + rack_subnet: Ipv6Net::host_net(Ipv6Addr::LOCALHOST), + infra_ip_first: Ipv4Addr::LOCALHOST, + infra_ip_last: Ipv4Addr::LOCALHOST, + ports: Vec::new(), + bgp: Vec::new(), + bfd: Vec::new(), + }, + allowed_source_ips: AllowedSourceIps::Any, + }; + + // Valid configs: all external DNS IPs are contained in the IP pool + // ranges. + for (ip_pool_ranges, dns_ips) in [ + ( + &[("fd00::1", "fd00::10")] as &[(&str, &str)], + &["fd00::1", "fd00::5", "fd00::10"] as &[&str], + ), + ( + &[("192.168.1.10", "192.168.1.20")], + &["192.168.1.10", "192.168.1.15", "192.168.1.20"], + ), + ( + &[("fd00::1", "fd00::10"), ("192.168.1.10", "192.168.1.20")], + &[ + "fd00::1", + "fd00::5", + "fd00::10", + "192.168.1.10", + "192.168.1.15", + "192.168.1.20", + ], + ), + ] { + config.internal_services_ip_pool_ranges = ip_pool_ranges + .iter() + .map(|(a, b)| { + IpRange::try_from(( + a.parse::().unwrap(), + b.parse::().unwrap(), + )) + .unwrap() + }) + .collect(); + config.external_dns_ips = + dns_ips.iter().map(|ip| ip.parse().unwrap()).collect(); + + match RackInitializeRequest::try_from(config.clone()) { + Ok(_) => (), + Err(err) => panic!( + "failure on {ip_pool_ranges:?} with DNS IPs {dns_ips:?}: \ + {err}" + ), + } + } + + // Invalid configs: either no DNS IPs, or one or more DNS IPs are not + // contained in the ip pool ranges. + for (ip_pool_ranges, dns_ips) in [ + (&[("fd00::1", "fd00::10")] as &[(&str, &str)], &[] as &[&str]), + (&[("fd00::1", "fd00::10")], &["fd00::1", "fd00::5", "fd00::11"]), + ( + &[("192.168.1.10", "192.168.1.20")], + &["192.168.1.9", "192.168.1.15", "192.168.1.20"], + ), + ( + &[("fd00::1", "fd00::10"), ("192.168.1.10", "192.168.1.20")], + &[ + "fd00::1", + "fd00::5", + "fd00::10", + "192.168.1.10", + "192.168.1.15", + "192.168.1.20", + "192.168.1.21", + ], + ), + ] { + config.internal_services_ip_pool_ranges = ip_pool_ranges + .iter() + .map(|(a, b)| { + IpRange::try_from(( + a.parse::().unwrap(), + b.parse::().unwrap(), + )) + .unwrap() + }) + .collect(); + config.external_dns_ips = + dns_ips.iter().map(|ip| ip.parse().unwrap()).collect(); + + match RackInitializeRequest::try_from(config.clone()) { + Ok(_) => panic!( + "unexpected success on {ip_pool_ranges:?} with \ + DNS IPs {dns_ips:?}" + ), + Err(_) => (), + } + } + } + + #[test] + fn test_subnets() { + let cfg = RackInitializeRequest { + trust_quorum_peers: None, + bootstrap_discovery: BootstrapAddressDiscovery::OnlyOurs, + ntp_servers: vec![String::from("test.pool.example.com")], + dns_servers: vec!["1.1.1.1".parse().unwrap()], + external_dns_zone_name: String::from("oxide.test"), + internal_services_ip_pool_ranges: vec![IpRange::from(IpAddr::V4( + Ipv4Addr::new(129, 168, 1, 20), + ))], + external_dns_ips: vec![], + external_certificates: vec![], + recovery_silo: RecoverySiloConfig { + silo_name: "test-silo".parse().unwrap(), + user_name: "dummy".parse().unwrap(), + // This is a hash for the password "oxide". It doesn't matter, + // though; it's not used. + user_password_hash: "$argon2id$v=19$m=98304,t=13,p=1$\ + RUlWc0ZxaHo0WFdrN0N6ZQ$S8p52j85GPvMhR/\ + ek3GL0el/oProgTwWpHJZ8lsQQoY" + .parse() + .unwrap(), + }, + rack_network_config: RackNetworkConfig { + rack_subnet: Ipv6Net::new( + "fd00:1122:3344:0100::".parse().unwrap(), + RACK_PREFIX, + ) + .unwrap(), + infra_ip_first: Ipv4Addr::LOCALHOST, + infra_ip_last: Ipv4Addr::LOCALHOST, + ports: Vec::new(), + bgp: Vec::new(), + bfd: Vec::new(), + }, + allowed_source_ips: AllowedSourceIps::Any, + }; + + assert_eq!( + Ipv6Subnet::::new( + // Masked out in AZ Subnet + // vv + "fd00:1122:3344:0000::".parse::().unwrap(), + ), + cfg.az_subnet() + ); + assert_eq!( + Ipv6Subnet::::new( + // Shows up from Rack Subnet + // vv + "fd00:1122:3344:0100::".parse::().unwrap(), + ), + cfg.rack_subnet() + ); + assert_eq!( + Ipv6Subnet::::new( + // 0th Sled Subnet + // vv + "fd00:1122:3344:0100::".parse::().unwrap(), + ), + cfg.sled_subnet(0) + ); + assert_eq!( + Ipv6Subnet::::new( + // 1st Sled Subnet + // vv + "fd00:1122:3344:0101::".parse::().unwrap(), + ), + cfg.sled_subnet(1) + ); + assert_eq!( + Ipv6Subnet::::new( + // Last Sled Subnet + // vv + "fd00:1122:3344:01ff::".parse::().unwrap(), + ), + cfg.sled_subnet(255) + ); + } + + #[test] + fn test_extra_certs() { + // The stock non-Gimlet config has no TLS certificates. + let path = Utf8PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("../../smf/sled-agent/non-gimlet/config-rss.toml"); + let cfg = RackInitializeRequest::from_file(&path) + .unwrap_or_else(|e| panic!("failed to parse {:?}: {}", &path, e)); + assert!(cfg.external_certificates.is_empty()); + + // Now let's create a configuration that does have an adjacent + // certificate and key. + let tempdir = + camino_tempfile::tempdir().expect("creating temporary directory"); + println!("using temp path: {:?}", tempdir); + + // Generate the certificate. + let domain = format!( + "{}.sys.{}", + cfg.external_dns_zone_name, + cfg.recovery_silo.silo_name.as_str(), + ); + let cert = rcgen::generate_simple_self_signed(vec![domain.clone()]) + .unwrap_or_else(|error| { + panic!( + "generating certificate for domain {:?}: {}", + domain, error + ) + }); + + // Write the configuration file. + let cfg_path = tempdir.path().join("config-rss.toml"); + let _ = std::fs::copy(&path, &cfg_path) + .with_context(|| { + format!("failed to copy file {:?} to {:?}", &path, &cfg_path) + }) + .unwrap(); + + // Write the certificate. + let cert_bytes = cert + .serialize_pem() + .expect("serializing generated certificate") + .into_bytes(); + let cert_path = tempdir.path().join("initial-tls-cert.pem"); + std::fs::write(&cert_path, &cert_bytes) + .with_context(|| format!("failed to write to {:?}", &cert_path)) + .unwrap(); + + // Write the private key. + let key_path = tempdir.path().join("initial-tls-key.pem"); + let key_bytes = cert.serialize_private_key_pem().into_bytes(); + std::fs::write(&key_path, &key_bytes) + .with_context(|| format!("failed to write to {:?}", &key_path)) + .unwrap(); + + // Now try to load it all. + let read_cfg = RackInitializeRequest::from_file(&cfg_path) + .expect("failed to read generated config with certificate"); + assert_eq!(read_cfg.external_certificates.len(), 1); + let cert = read_cfg.external_certificates.first().unwrap(); + let _ = rcgen::KeyPair::from_pem(&cert.key) + .expect("generated PEM did not parse as KeyPair"); + } +} diff --git a/sled-agent/types/src/rack_ops.rs b/sled-agent/types/src/rack_ops.rs new file mode 100644 index 0000000000..d8c0fa1c88 --- /dev/null +++ b/sled-agent/types/src/rack_ops.rs @@ -0,0 +1,46 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use omicron_uuid_kinds::{RackInitUuid, RackResetUuid}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +/// Current status of any rack-level operation being performed by this bootstrap +/// agent. +#[derive( + Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema, +)] +#[serde(tag = "status", rename_all = "snake_case")] +pub enum RackOperationStatus { + Initializing { + id: RackInitUuid, + }, + /// `id` will be none if the rack was already initialized on startup. + Initialized { + id: Option, + }, + InitializationFailed { + id: RackInitUuid, + message: String, + }, + InitializationPanicked { + id: RackInitUuid, + }, + Resetting { + id: RackResetUuid, + }, + /// `reset_id` will be None if the rack is in an uninitialized-on-startup, + /// or Some if it is in an uninitialized state due to a reset operation + /// completing. + Uninitialized { + reset_id: Option, + }, + ResetFailed { + id: RackResetUuid, + message: String, + }, + ResetPanicked { + id: RackResetUuid, + }, +}