diff --git a/Cargo.lock b/Cargo.lock index fd2a269da3..cfa8c4aedb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3461,6 +3461,20 @@ dependencies = [ "uuid", ] +[[package]] +name = "internal-dns-cli" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap 4.3.8", + "dropshot", + "internal-dns 0.1.0", + "omicron-common 0.1.0", + "slog", + "tokio", + "trust-dns-resolver", +] + [[package]] name = "io-lifetimes" version = "1.0.11" diff --git a/Cargo.toml b/Cargo.toml index bc115afbc9..1c69c2b80d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ members = [ "installinator-artifactd", "installinator-common", "internal-dns", + "internal-dns-cli", "ipcc-key-value", "key-manager", "nexus", @@ -83,6 +84,7 @@ default-members = [ "installinator-artifactd", "installinator-common", "internal-dns", + "internal-dns-cli", "ipcc-key-value", "key-manager", "nexus", diff --git a/dns-server/src/dns_server.rs b/dns-server/src/dns_server.rs index 990cc7171e..769b29f1ac 100644 --- a/dns-server/src/dns_server.rs +++ b/dns-server/src/dns_server.rs @@ -15,8 +15,7 @@ use anyhow::anyhow; use anyhow::Context; use pretty_hex::*; use serde::Deserialize; -use slog::info; -use slog::{debug, error, o, Logger}; +use slog::{debug, error, info, o, trace, Logger}; use std::net::SocketAddr; use std::str::FromStr; use std::sync::Arc; @@ -150,7 +149,7 @@ async fn handle_dns_packet(request: Request) { let log = &request.log; let buf = &request.packet; - debug!(&log, "buffer"; "buffer" => ?buf.hex_dump()); + trace!(&log, "buffer"; "buffer" => ?buf.hex_dump()); // Decode the message. let mut dec = BinDecoder::new(&buf); @@ -270,6 +269,12 @@ async fn handle_dns_message( } }) .collect::, RequestError>>()?; + debug!( + &log, + "dns response"; + "query" => ?query, + "records" => ?&response_records + ); respond_records(request, rb, header, &response_records).await } diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index ac46507663..439236c96f 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -260,6 +260,10 @@ impl RunningZone { self.inner.zonepath.join("root") } + pub fn control_interface(&self) -> AddrObject { + AddrObject::new(self.inner.get_control_vnic_name(), "omicron6").unwrap() + } + /// Runs a command within the Zone, return the output. // // NOTE: It's important that this function is synchronous. diff --git a/internal-dns-cli/Cargo.toml b/internal-dns-cli/Cargo.toml new file mode 100644 index 0000000000..d922544722 --- /dev/null +++ b/internal-dns-cli/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "internal-dns-cli" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +anyhow.workspace = true +clap.workspace = true +dropshot.workspace = true +internal-dns.workspace = true +omicron-common.workspace = true +slog.workspace = true +tokio.workspace = true +trust-dns-resolver.workspace = true diff --git a/internal-dns-cli/src/bin/dnswait.rs b/internal-dns-cli/src/bin/dnswait.rs new file mode 100644 index 0000000000..1b577a7d6d --- /dev/null +++ b/internal-dns-cli/src/bin/dnswait.rs @@ -0,0 +1,98 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Resolves DNS names within the Oxide control plane + +use anyhow::Context; +use anyhow::Result; +use clap::Parser; +use clap::ValueEnum; +use internal_dns::resolver::ResolveError; +use internal_dns::resolver::Resolver; +use slog::{info, warn}; +use std::net::SocketAddr; + +#[derive(Debug, Parser)] +#[clap(name = "dnswait", about = "Resolves DNS names in the control plane")] +struct Opt { + /// Nameserver(s) to query + /// + /// If unspecified, uses the system configuration (usually the nameservers + /// configured in /etc/resolv.conf). + #[clap(long, action)] + nameserver_addresses: Vec, + + /// service name to be resolved (should be the target of a DNS name) + #[arg(value_enum)] + srv_name: ServiceName, +} + +#[derive(Debug, Clone, Copy, ValueEnum)] +#[value(rename_all = "kebab-case")] +enum ServiceName { + Cockroach, +} + +impl From for internal_dns::ServiceName { + fn from(value: ServiceName) -> Self { + match value { + ServiceName::Cockroach => internal_dns::ServiceName::Cockroach, + } + } +} + +#[tokio::main] +async fn main() -> Result<()> { + let opt = Opt::parse(); + let log = dropshot::ConfigLogging::File { + path: "/dev/stderr".into(), + level: dropshot::ConfigLoggingLevel::Info, + if_exists: dropshot::ConfigLoggingIfExists::Append, + } + .to_logger("dnswait") + .context("creating log")?; + + let resolver = if opt.nameserver_addresses.is_empty() { + info!(&log, "using system configuration"); + let async_resolver = + trust_dns_resolver::AsyncResolver::tokio_from_system_conf() + .context("initializing resolver from system configuration")?; + Resolver::new_with_resolver(log.clone(), async_resolver) + } else { + let addrs = opt.nameserver_addresses; + info!(&log, "using explicit nameservers"; "nameservers" => ?addrs); + Resolver::new_from_addrs(log.clone(), addrs) + .context("creating resolver with explicit nameserver addresses")? + }; + + let result = omicron_common::backoff::retry_notify( + omicron_common::backoff::retry_policy_internal_service(), + || async { + let dns_name = internal_dns::ServiceName::from(opt.srv_name); + resolver.lookup_srv(dns_name).await.map_err(|error| match error { + ResolveError::Resolve(_) + | ResolveError::NotFound(_) + | ResolveError::NotFoundByString(_) => { + omicron_common::backoff::BackoffError::transient(error) + } + }) + }, + |error, delay| { + warn!( + &log, + "DNS query failed; will try again"; + "error" => format!("{:#}", error), + "delay" => ?delay, + ); + }, + ) + .await + .context("unexpectedly gave up")?; + + for ip in result { + println!("{}", ip) + } + + Ok(()) +} diff --git a/internal-dns/src/resolver.rs b/internal-dns/src/resolver.rs index b0c5ad0617..aacedd031d 100644 --- a/internal-dns/src/resolver.rs +++ b/internal-dns/src/resolver.rs @@ -6,7 +6,7 @@ use crate::DNS_ZONE; use omicron_common::address::{ Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT, }; -use slog::{debug, info}; +use slog::{debug, info, trace}; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use trust_dns_proto::rr::record_type::RecordType; use trust_dns_resolver::config::{ @@ -70,6 +70,15 @@ impl Resolver { Self::new_from_subnet(log, subnet) } + /// Return a resolver that uses the system configuration (usually + /// /etc/resolv.conf) for the underlying nameservers. + pub fn new_with_resolver( + log: slog::Logger, + tokio_resolver: TokioAsyncResolver, + ) -> Self { + Resolver { log, inner: Box::new(tokio_resolver) } + } + // TODO-correctness This function and its callers make assumptions about how // many internal DNS servers there are on the subnet and where they are. Is // that okay? It would seem more flexible not to assume this. Instead, we @@ -121,6 +130,107 @@ impl Resolver { Ok(*address) } + /// Returns the targets of the SRV records for a DNS name + /// + /// The returned values are generally other DNS names that themselves would + /// need to be looked up to find A/AAAA records. + pub async fn lookup_srv( + &self, + srv: crate::ServiceName, + ) -> Result, ResolveError> { + let name = format!("{}.{}", srv.dns_name(), DNS_ZONE); + trace!(self.log, "lookup_srv"; "dns_name" => &name); + let response = self.inner.srv_lookup(&name).await?; + debug!( + self.log, + "lookup_srv"; + "dns_name" => &name, + "response" => ?response + ); + + Ok(response.into_iter().map(|srv| srv.target().to_string()).collect()) + } + + pub async fn lookup_all_ipv6( + &self, + srv: crate::ServiceName, + ) -> Result, ResolveError> { + let name = format!("{}.{}", srv.dns_name(), DNS_ZONE); + trace!(self.log, "lookup_all_ipv6 srv"; "dns_name" => &name); + let response = self.inner.srv_lookup(&name).await?; + debug!( + self.log, + "lookup_ipv6 srv"; + "dns_name" => &name, + "response" => ?response + ); + + // SRV records have a target, which is itself another DNS name that + // needs to be looked up in order to get to the actual IP addresses. + // Many DNS servers return these IP addresses directly in the response + // to the SRV query as Additional records. Ours does not. See + // omicron#3434. So we need to do another round of lookups separately. + // + // According to the docs` for + // `trust_dns_resolver::lookup::SrvLookup::ip_iter()`, it sounds like + // trust-dns would have done this for us. It doesn't. See + // bluejekyll/trust-dns#1980. + // + // So if we have gotten any IPs, then we assume that one of the above + // issues has been addressed and so we have all the IPs and we're done. + // Otherwise, explicitly do the extra lookups. + let addresses: Vec = response + .ip_iter() + .filter_map(|addr| match addr { + IpAddr::V4(_) => None, + IpAddr::V6(addr) => Some(addr), + }) + .collect(); + if !addresses.is_empty() { + return Ok(addresses); + } + + // What do we do if some of these queries succeed while others fail? We + // may have some addresses, but the list might be incomplete. That + // might be okay for some use cases but not others. For now, we do the + // simple thing. In the future, we'll want a more cueball-like resolver + // interface that better deals with these cases. + let log = &self.log; + let futures = response.iter().map(|srv| async { + let target = srv.target(); + trace!( + log, + "lookup_all_ipv6: looking up SRV target"; + "name" => ?target, + ); + self.inner.ipv6_lookup(target.clone()).await + }); + let results = futures::future::try_join_all(futures).await?; + let results = results + .into_iter() + .flat_map(|ipv6| ipv6.into_iter()) + .collect::>(); + if results.is_empty() { + Err(ResolveError::NotFound(srv)) + } else { + Ok(results) + } + } + + pub async fn lookup_ip( + &self, + srv: crate::ServiceName, + ) -> Result { + let name = format!("{}.{}", srv.dns_name(), DNS_ZONE); + debug!(self.log, "lookup srv"; "dns_name" => &name); + let response = self.inner.lookup_ip(&name).await?; + let address = response + .iter() + .next() + .ok_or_else(|| ResolveError::NotFound(srv))?; + Ok(address) + } + /// Looks up a single [`SocketAddrV6`] based on the SRV name /// Returns an error if the record does not exist. pub async fn lookup_socket_v6( @@ -156,20 +266,6 @@ impl Resolver { } }) } - - pub async fn lookup_ip( - &self, - srv: crate::ServiceName, - ) -> Result { - let name = format!("{}.{}", srv.dns_name(), DNS_ZONE); - debug!(self.log, "lookup srv"; "dns_name" => &name); - let response = self.inner.lookup_ip(&name).await?; - let address = response - .iter() - .next() - .ok_or_else(|| ResolveError::NotFound(srv))?; - Ok(address) - } } #[cfg(test)] @@ -427,6 +523,17 @@ mod test { .expect("Should have been able to look up IP address"); assert!(cockroach_addrs.iter().any(|addr| addr.ip() == &ip)); + // Look up all the Cockroach addresses. + let mut ips = + resolver.lookup_all_ipv6(ServiceName::Cockroach).await.expect( + "Should have been able to look up all CockroachDB addresses", + ); + ips.sort(); + assert_eq!( + ips, + cockroach_addrs.iter().map(|s| *s.ip()).collect::>() + ); + // Look up Clickhouse let ip = resolver .lookup_ipv6(ServiceName::Clickhouse) diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index a15840e009..034e564d09 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -10,6 +10,23 @@ "version": "0.0.1" }, "paths": { + "/cockroachdb": { + "post": { + "summary": "Initializes a CockroachDB cluster", + "operationId": "cockroachdb_init", + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/disks/{disk_id}": { "put": { "operationId": "disk_put", diff --git a/package-manifest.toml b/package-manifest.toml index 8e4dd48232..577dfc04b0 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -104,6 +104,13 @@ setup_hint = "Run `./tools/ci_download_clickhouse` to download the necessary bin [package.cockroachdb] service_name = "cockroachdb" only_for_targets.image = "standard" +source.type = "composite" +source.packages = [ "cockroachdb-service.tar.gz", "internal-dns-cli.tar.gz" ] +output.type = "zone" + +[package.cockroachdb-service] +service_name = "cockroachdb-service" +only_for_targets.image = "standard" source.type = "local" source.paths = [ { from = "out/cockroachdb", to = "/opt/oxide/cockroachdb" }, @@ -112,8 +119,19 @@ source.paths = [ { from = "smf/cockroachdb/method_script.sh", to = "/opt/oxide/lib/svc/manifest/cockroachdb.sh" }, ] output.type = "zone" +output.intermediate_only = true setup_hint = "Run `./tools/ci_download_cockroachdb` to download the necessary binaries" +[package.internal-dns-cli] +service_name = "internal-dns-cli" +only_for_targets.image = "standard" +source.type = "local" +source.rust.binary_names = ["dnswait"] +source.rust.release = true +source.paths = [] +output.type = "zone" +output.intermediate_only = true + [package.internal-dns] service_name = "internal_dns" only_for_targets.image = "standard" diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index c3ac0e0968..694db1dde5 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -32,6 +32,7 @@ type SledApiDescription = ApiDescription; pub fn api() -> SledApiDescription { fn register_endpoints(api: &mut SledApiDescription) -> Result<(), String> { api.register(disk_put)?; + api.register(cockroachdb_init)?; api.register(instance_issue_disk_snapshot_request)?; api.register(instance_put_migration_ids)?; api.register(instance_put_state)?; @@ -203,6 +204,19 @@ async fn sled_role_get( Ok(HttpResponseOk(sa.get_role().await)) } +/// Initializes a CockroachDB cluster +#[endpoint { + method = POST, + path = "/cockroachdb", +}] +async fn cockroachdb_init( + rqctx: RequestContext, +) -> Result { + let sa = rqctx.context(); + sa.cockroachdb_initialize().await?; + Ok(HttpResponseUpdatedNoContent()) +} + /// Path parameters for Instance requests (sled agent API) #[derive(Deserialize, JsonSchema)] struct InstancePathParam { diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index d737e2359d..51c4968260 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -518,6 +518,8 @@ impl From for sled_agent_client::types::DatasetRequest { )] pub struct ServiceZoneRequest { // The UUID of the zone to be initialized. + // TODO: Should this be removed? If we have UUIDs on the services, what's + // the point of this? pub id: Uuid, // The type of the zone to be created. pub zone_type: ZoneType, diff --git a/sled-agent/src/profile.rs b/sled-agent/src/profile.rs index eaf93bc97e..7c2d8d1738 100644 --- a/sled-agent/src/profile.rs +++ b/sled-agent/src/profile.rs @@ -6,7 +6,10 @@ use illumos_utils::running_zone::InstalledZone; use slog::Logger; -use std::fmt::{Display, Formatter}; +use std::{ + collections::BTreeMap, + fmt::{Display, Formatter}, +}; pub struct ProfileBuilder { name: String, @@ -58,17 +61,30 @@ impl Display for ProfileBuilder { pub struct ServiceBuilder { name: String, instances: Vec, + property_groups: Vec, } impl ServiceBuilder { pub fn new(name: &str) -> Self { - Self { name: name.to_string(), instances: vec![] } + Self { + name: name.to_string(), + instances: vec![], + property_groups: vec![], + } } pub fn add_instance(mut self, instance: ServiceInstanceBuilder) -> Self { self.instances.push(instance); self } + + pub fn add_property_group( + mut self, + property_group: PropertyGroupBuilder, + ) -> Self { + self.property_groups.push(property_group); + self + } } impl Display for ServiceBuilder { @@ -80,6 +96,10 @@ impl Display for ServiceBuilder { name = self.name )?; + for property_group in &self.property_groups { + write!(f, "{}", property_group)?; + } + for instance in &self.instances { write!(f, "{}", instance)?; } @@ -138,20 +158,51 @@ impl Display for ServiceInstanceBuilder { pub struct PropertyGroupBuilder { name: String, - properties: Vec, + /// names of the properties that were added, in the order they were added + properties_seen: Vec, + /// type for each property, by name + property_types: BTreeMap, + /// values seen for each property, by name + property_values: BTreeMap>, } impl PropertyGroupBuilder { pub fn new(name: &str) -> Self { - Self { name: name.to_string(), properties: vec![] } + Self { + name: name.to_string(), + properties_seen: vec![], + property_types: BTreeMap::new(), + property_values: BTreeMap::new(), + } } pub fn add_property(mut self, name: &str, ty: &str, value: &str) -> Self { - self.properties.push(Property { - name: name.to_string(), - ty: ty.to_string(), - value: value.to_string(), - }); + // The data structures here are oriented around a few goals: + // + // - Properties will be written out in the order that they were added. + // This does not affect correctness but is a nicer developer + // experience than writing them out alphabetically or in some other + // arbitrary order. This tends to keep related properties together, + // since the code that adds them in the first place tends to do so in + // a logical way. + // + // - We support multi-valued properties using the `` syntax. + // + // - If there's only one value for a property, we use the more concise + // `` syntax. + if self + .property_types + .insert(name.to_string(), ty.to_string()) + .is_none() + { + self.properties_seen.push(name.to_string()); + } + + let values = self + .property_values + .entry(name.to_string()) + .or_insert_with(Vec::new); + values.push(value.to_string()); self } } @@ -164,34 +215,33 @@ impl Display for PropertyGroupBuilder { "#, name = self.name )?; - for property in &self.properties { - write!(f, "{}", property)?; - } - write!( - f, - r#" -"# - )?; - Ok(()) - } -} - -pub struct Property { - name: String, - ty: String, - value: String, -} - -impl Display for Property { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { - write!( - f, - r#" + for property_name in &self.properties_seen { + let ty = self.property_types.get(property_name).unwrap(); + let values = self.property_values.get(property_name).unwrap(); + if values.len() == 1 { + write!( + f, + r#" "#, - name = self.name, - ty = self.ty, - value = self.value - )?; + name = property_name, + value = &values[0], + )?; + } else { + write!( + f, + r#" +"#, + name = property_name, + )?; + write!(f, " <{ty}_list>\n")?; + for v in values { + write!(f, " \n", v,)?; + } + write!(f, " \n")?; + write!(f, " \n")?; + } + } + write!(f, " \n")?; Ok(()) } } @@ -226,6 +276,27 @@ mod tests { ); } + #[test] + fn test_service_property_group() { + let builder = ProfileBuilder::new("myprofile").add_service( + ServiceBuilder::new("myservice").add_property_group( + PropertyGroupBuilder::new("mypg") + .add_property("myprop", "astring", "myvalue"), + ), + ); + assert_eq!( + format!("{}", builder), + r#" + + + + + + +"#, + ); + } + #[test] fn test_instance() { let builder = ProfileBuilder::new("myprofile").add_service( @@ -294,24 +365,38 @@ mod tests { #[test] fn test_multiple() { let builder = ProfileBuilder::new("myprofile").add_service( - ServiceBuilder::new("myservice").add_instance( - ServiceInstanceBuilder::new("default") - .add_property_group( - PropertyGroupBuilder::new("mypg") - .add_property("prop", "type", "value") - .add_property("prop2", "type", "value2"), - ) - .add_property_group( - PropertyGroupBuilder::new("mypg2") - .add_property("prop3", "type", "value3"), - ), - ), + ServiceBuilder::new("myservice") + .add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group( + PropertyGroupBuilder::new("mypg") + .add_property("prop", "type", "value") + .add_property("prop2", "type", "value2"), + ) + .add_property_group( + PropertyGroupBuilder::new("mypg2") + .add_property("prop3", "type", "value3"), + ), + ) + .add_property_group( + PropertyGroupBuilder::new("mypgsvc") + .add_property("prop", "astring", "valuesvc") + .add_property("prop", "astring", "value2svc"), + ), ); assert_eq!( format!("{}", builder), r#" + + + + + + + + diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index e2f78a42b0..a415a4c9e4 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -906,6 +906,54 @@ impl ServiceInner { Ok(()) } + async fn initialize_cockroach( + &self, + service_plan: &ServicePlan, + ) -> Result<(), SetupServiceError> { + // Now that datasets and zones have started for CockroachDB, + // perform one-time initialization of the cluster. + let sled_address = + service_plan + .services + .iter() + .find_map(|(sled_address, sled_request)| { + if sled_request.services.iter().any(|service| { + service.zone_type == ZoneType::CockroachDb + }) { + Some(sled_address) + } else { + None + } + }) + .expect("Should not create service plans without CockroachDb"); + let dur = std::time::Duration::from_secs(60); + let client = reqwest::ClientBuilder::new() + .connect_timeout(dur) + .timeout(dur) + .build() + .map_err(SetupServiceError::HttpClient)?; + let client = SledAgentClient::new_with_client( + &format!("http://{}", sled_address), + client, + self.log.new(o!("SledAgentClient" => sled_address.to_string())), + ); + let initialize_db = || async { + client.cockroachdb_init().await.map_err(BackoffError::transient)?; + Ok::<(), BackoffError>>(()) + }; + let log_failure = |error, _| { + warn!(self.log, "Failed to initialize CockroachDB"; "error" => ?error); + }; + retry_notify( + retry_policy_internal_service_aggressive(), + initialize_db, + log_failure, + ) + .await + .unwrap(); + Ok(()) + } + // This method has a few distinct phases, identified by files in durable // storage: // @@ -1206,6 +1254,10 @@ impl ServiceInner { zone_types.insert(ZoneType::CockroachDb); self.ensure_all_services_of_type(&service_plan, &zone_types).await?; + // Now that datasets and zones have started for CockroachDB, + // perform one-time initialization of the cluster. + self.initialize_cockroach(&service_plan).await?; + // Issue service initialization requests. futures::future::join_all(service_plan.services.iter().map( |(sled_address, services_request)| async move { diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index a0da643f3c..a25fbfc314 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -45,7 +45,9 @@ use illumos_utils::addrobj::IPV6_LINK_LOCAL_NAME; use illumos_utils::dladm::{Dladm, Etherstub, EtherstubVnic, PhysicalLink}; use illumos_utils::link::{Link, VnicAllocator}; use illumos_utils::opte::{Port, PortManager, PortTicket}; -use illumos_utils::running_zone::{InstalledZone, RunningZone}; +use illumos_utils::running_zone::{ + InstalledZone, RunCommandError, RunningZone, +}; use illumos_utils::zfs::ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT; use illumos_utils::zone::AddressRequest; use illumos_utils::zone::Zones; @@ -102,6 +104,12 @@ use uuid::Uuid; #[derive(thiserror::Error, Debug)] pub enum Error { + #[error("Failed to initialize CockroachDb: {err}")] + CockroachInit { + #[source] + err: RunCommandError, + }, + #[error("Cannot serialize TOML to file: {path}: {err}")] TomlSerialize { path: Utf8PathBuf, err: toml::ser::Error }, @@ -173,6 +181,9 @@ pub enum Error { #[error("Services already configured for this Sled Agent")] ServicesAlreadyConfigured, + #[error("Failed to get address: {0}")] + GetAddressFailure(#[from] illumos_utils::zone::GetAddressError), + #[error("NTP zone not ready")] NtpZoneNotReady, @@ -1050,6 +1061,40 @@ impl ServiceManager { let Some(info) = self.inner.sled_info.get() else { return Err(Error::SledAgentNotReady); }; + + // We want to configure the dns/install SMF service inside the + // zone with the list of DNS nameservers. This will cause + // /etc/resolv.conf to be populated inside the zone. To do + // this, we need the full list of nameservers. Fortunately, the + // nameservers provide a DNS name for the full list of + // nameservers. + // + // Note that when we configure the dns/install service, we're + // supplying values for an existing property group on the SMF + // *service*. We're not creating a new property group, nor are + // we configuring a property group on the instance. + let all_nameservers = info + .resolver + .lookup_all_ipv6(internal_dns::ServiceName::InternalDns) + .await?; + let mut dns_config_builder = + PropertyGroupBuilder::new("install_props"); + for ns_addr in &all_nameservers { + dns_config_builder = dns_config_builder.add_property( + "nameserver", + "net_address", + &ns_addr.to_string(), + ); + } + let dns_install = ServiceBuilder::new("network/dns/install") + .add_property_group(dns_config_builder) + // We do need to enable the default instance of the + // dns/install service. It's enough to just mention it + // here, as the ServiceInstanceBuilder always enables the + // instance being added. + .add_instance(ServiceInstanceBuilder::new("default")); + + // Configure the CockroachDB service. let datalink = installed_zone.get_control_vnic_name(); let gateway = &info.underlay_address.to_string(); assert_eq!(request.zone.addresses.len(), 1); @@ -1060,83 +1105,28 @@ impl ServiceManager { let listen_addr = &address.ip().to_string(); let listen_port = &address.port().to_string(); - let config = PropertyGroupBuilder::new("config") + let cockroachdb_config = PropertyGroupBuilder::new("config") .add_property("datalink", "astring", datalink) .add_property("gateway", "astring", gateway) .add_property("listen_addr", "astring", listen_addr) .add_property("listen_port", "astring", listen_port) .add_property("store", "astring", "/data"); - - let profile = ProfileBuilder::new("omicron").add_service( + let cockroachdb_service = ServiceBuilder::new("oxide/cockroachdb").add_instance( ServiceInstanceBuilder::new("default") - .add_property_group(config), - ), - ); + .add_property_group(cockroachdb_config), + ); + + let profile = ProfileBuilder::new("omicron") + .add_service(cockroachdb_service) + .add_service(dns_install); profile .add_to_zone(&self.inner.log, &installed_zone) .await .map_err(|err| { Error::io("Failed to setup CRDB profile", err) })?; - let running_zone = RunningZone::boot(installed_zone).await?; - - // TODO: The following lines are necessary to initialize CRDB - // in a single-node environment. They're bad! They're wrong! - // We definitely shouldn't be wiping the database every time - // we want to boot this zone. - // - // But they're also necessary to prevent the build from - // regressing. - // - // NOTE: In the (very short-term) future, this will be - // replaced by the following: - // 1. CRDB will simply "start", rather than "start-single-node". - // 2. The Sled Agent will expose an explicit API to "init" the - // Cockroach cluster, and populate it with the expected - // contents. - let format_crdb = || async { - info!(self.inner.log, "Formatting CRDB"); - running_zone - .run_cmd(&[ - "/opt/oxide/cockroachdb/bin/cockroach", - "sql", - "--insecure", - "--host", - &address.to_string(), - "--file", - "/opt/oxide/cockroachdb/sql/dbwipe.sql", - ]) - .map_err(BackoffError::transient)?; - running_zone - .run_cmd(&[ - "/opt/oxide/cockroachdb/bin/cockroach", - "sql", - "--insecure", - "--host", - &address.to_string(), - "--file", - "/opt/oxide/cockroachdb/sql/dbinit.sql", - ]) - .map_err(BackoffError::transient)?; - info!(self.inner.log, "Formatting CRDB - Completed"); - Ok::< - (), - BackoffError< - illumos_utils::running_zone::RunCommandError, - >, - >(()) - }; - let log_failure = |error, _| { - warn!( - self.inner.log, "failed to format CRDB"; - "error" => ?error, - ); - }; - retry_notify(retry_policy_local(), format_crdb, log_failure) - .await - .expect("expected an infinite retry loop waiting for crdb"); - return Ok(running_zone); + return Ok(RunningZone::boot(installed_zone).await?); } ZoneType::Crucible => { let Some(info) = self.inner.sled_info.get() else { @@ -2451,6 +2441,65 @@ impl ServiceManager { Ok(()) } + pub async fn cockroachdb_initialize(&self) -> Result<(), Error> { + let log = &self.inner.log; + let dataset_zones = self.inner.zones.lock().await; + for zone in dataset_zones.iter() { + // TODO: We could probably store the ZoneKind in the running zone to + // make this "comparison to existing zones by name" mechanism a bit + // safer. + if zone.name().contains(&ZoneType::CockroachDb.to_string()) { + let address = Zones::get_address( + Some(zone.name()), + &zone.control_interface(), + )? + .ip(); + let host = &format!("[{address}]:{COCKROACH_PORT}"); + info!( + log, + "Initializing CRDB Cluster - sending request to {host}" + ); + zone.run_cmd(&[ + "/opt/oxide/cockroachdb/bin/cockroach", + "init", + "--insecure", + "--host", + host, + ]) + .map_err(|err| Error::CockroachInit { err })?; + info!(log, "Formatting CRDB"); + zone.run_cmd(&[ + "/opt/oxide/cockroachdb/bin/cockroach", + "sql", + "--insecure", + "--host", + host, + "--file", + "/opt/oxide/cockroachdb/sql/dbwipe.sql", + ]) + .map_err(|err| Error::CockroachInit { err })?; + zone.run_cmd(&[ + "/opt/oxide/cockroachdb/bin/cockroach", + "sql", + "--insecure", + "--host", + host, + "--file", + "/opt/oxide/cockroachdb/sql/dbinit.sql", + ]) + .map_err(|err| Error::CockroachInit { err })?; + info!(log, "Formatting CRDB - Completed"); + + // In the single-sled case, if there are multiple CRDB nodes on + // a single device, we'd still only want to send the + // initialization requests to a single dataset. + return Ok(()); + } + } + + Ok(()) + } + pub fn boottime_rewrite(&self, zones: &Vec) { if self .inner diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index f0fa2a6b6d..3bdc23dcf9 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -595,6 +595,11 @@ impl SledAgent { Ok(()) } + pub async fn cockroachdb_initialize(&self) -> Result<(), Error> { + self.inner.services.cockroachdb_initialize().await?; + Ok(()) + } + /// Gets the sled's current list of all zpools. pub async fn zpools_get(&self) -> Result, Error> { let zpools = self.inner.storage.get_zpools().await?; diff --git a/smf/cockroachdb/manifest.xml b/smf/cockroachdb/manifest.xml index 96650066ca..b4e69f6376 100644 --- a/smf/cockroachdb/manifest.xml +++ b/smf/cockroachdb/manifest.xml @@ -28,7 +28,6 @@ - diff --git a/smf/cockroachdb/method_script.sh b/smf/cockroachdb/method_script.sh index fb9bacf854..38c3831702 100755 --- a/smf/cockroachdb/method_script.sh +++ b/smf/cockroachdb/method_script.sh @@ -21,10 +21,24 @@ ipadm show-addr "$DATALINK/ll" || ipadm create-addr -t -T addrconf "$DATALINK/ll ipadm show-addr "$DATALINK/omicron6" || ipadm create-addr -t -T static -a "$LISTEN_ADDR" "$DATALINK/omicron6" route get -inet6 default -inet6 "$GATEWAY" || route add -inet6 default -inet6 "$GATEWAY" +# We need to tell CockroachDB the DNS names or IP addresses of the other nodes +# in the cluster. Look these up in internal DNS. Per the recommendations in +# the CockroachDB docs, we choose at most five addresses. Providing more +# addresses just increases the time for the cluster to stabilize. +JOIN_ADDRS="$(/opt/oxide/internal-dns-cli/bin/dnswait cockroach \ + | head -n 5 \ + | tr '\n' ,)" + +if [[ -z "$JOIN_ADDRS" ]]; then + printf 'ERROR: found no addresses for other CockroachDB nodes\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" +fi + args=( '--insecure' '--listen-addr' "[$LISTEN_ADDR]:$LISTEN_PORT" '--store' "$DATASTORE" + '--join' "$JOIN_ADDRS" ) -exec /opt/oxide/cockroachdb/bin/cockroach start-single-node "${args[@]}" & +exec /opt/oxide/cockroachdb/bin/cockroach start "${args[@]}" &