From 91e46a60bf19d6140553b840bb0b721f09e610a9 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 4 Nov 2022 11:20:45 -0400 Subject: [PATCH 1/9] [sled-agent] Add monitoring for Tofino driver as factor in 'are we scrimlet' decision --- Cargo.lock | 11 ++ sled-agent/Cargo.toml | 1 + sled-agent/src/bootstrap/agent.rs | 30 +--- sled-agent/src/bootstrap/server.rs | 2 +- sled-agent/src/config.rs | 3 + sled-agent/src/hardware/illumos/mod.rs | 113 +++++++++++++ sled-agent/src/hardware/mod.rs | 59 +++++++ sled-agent/src/hardware/non_illumos/mod.rs | 37 +++++ sled-agent/src/lib.rs | 1 + sled-agent/src/nexus.rs | 36 ++++ sled-agent/src/server.rs | 61 +------ sled-agent/src/sled_agent.rs | 182 +++++++++++++++++++-- smf/sled-agent/config.toml | 8 + 13 files changed, 447 insertions(+), 97 deletions(-) create mode 100644 sled-agent/src/hardware/illumos/mod.rs create mode 100644 sled-agent/src/hardware/mod.rs create mode 100644 sled-agent/src/hardware/non_illumos/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 14c2cdcbe8..ecdee70313 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2400,6 +2400,16 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "illumos-devinfo" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/illumos-devinfo?rev=8fca0709e5137a3758374cb41ab1bfc60b03e6a9#8fca0709e5137a3758374cb41ab1bfc60b03e6a9" +dependencies = [ + "anyhow", + "libc", + "num_enum", +] + [[package]] name = "illumos-sys-hdrs" version = "0.1.0" @@ -3404,6 +3414,7 @@ dependencies = [ "expectorate", "futures", "http", + "illumos-devinfo", "internal-dns-client", "ipnetwork", "libc", diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index c0fd3cc29c..44691d9bb7 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -59,6 +59,7 @@ vsss-rs = { version = "2.0.0", default-features = false, features = ["std"] } zone = "0.1" [target.'cfg(target_os = "illumos")'.dependencies] +illumos-devinfo = { git = "https://github.com/oxidecomputer/illumos-devinfo", rev = "8fca0709e5137a3758374cb41ab1bfc60b03e6a9" } opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "23fdf5856f10f23e2d26865d2d7e2d3bc537bca3" } [dev-dependencies] diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index b5b964edb0..252b3789d5 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -272,19 +272,11 @@ impl Agent { *self.share.lock().await = Some(share); } - // TODO(https://github.com/oxidecomputer/omicron/issues/823): - // Currently, the prescence or abscence of RSS is our signal - // for "is this a scrimlet or not". - // Longer-term, we should make this call based on the underlying - // hardware. - let is_scrimlet = self.rss.lock().await.is_some(); - // Server does not exist, initialize it. let server = SledServer::start( &self.sled_config, self.parent_log.clone(), sled_address, - is_scrimlet, request.clone(), ) .await @@ -464,9 +456,13 @@ impl Agent { Ok(rack_secret) } - // Initializes the Rack Setup Service. - async fn start_rss(&self, config: &Config) -> Result<(), BootstrapError> { + /// Initializes the Rack Setup Service, if requested by `config`. + pub async fn start_rss( + &self, + config: &Config, + ) -> Result<(), BootstrapError> { if let Some(rss_config) = &config.rss_config { + info!(&self.log, "bootstrap service initializing RSS"); let rss = RssHandle::start_rss( &self.parent_log, rss_config.clone(), @@ -484,20 +480,6 @@ impl Agent { } Ok(()) } - - /// Performs device initialization: - /// - /// - Verifies, unpacks, and launches other services. - pub async fn initialize( - &self, - config: &Config, - ) -> Result<(), BootstrapError> { - info!(&self.log, "bootstrap service initializing"); - - self.start_rss(config).await?; - - Ok(()) - } } // We intentionally DO NOT derive `Debug` or `Serialize`; both provide avenues diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index 35193f6c33..d79d82e47b 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -113,7 +113,7 @@ impl Server { // This ordering allows the bootstrap agent to communicate with // other bootstrap agents on the rack during the initialization // process. - if let Err(e) = server.bootstrap_agent.initialize(&config).await { + if let Err(e) = server.bootstrap_agent.start_rss(&config).await { server.inner.abort(); return Err(e.to_string()); } diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index e1042f3b4b..487f58e6c1 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -19,6 +19,9 @@ pub struct Config { pub id: Uuid, /// Configuration for the sled agent debug log pub log: ConfigLogging, + /// Optionally force the sled to self-identify as a scrimlet (or gimlet, + /// if set to false). + pub force_scrimlet: Option, /// Optional VLAN ID to be used for tagging guest VNICs. pub vlan: Option, /// Optional list of zpools to be used as "discovered disks". diff --git a/sled-agent/src/hardware/illumos/mod.rs b/sled-agent/src/hardware/illumos/mod.rs new file mode 100644 index 0000000000..8180c5bfe1 --- /dev/null +++ b/sled-agent/src/hardware/illumos/mod.rs @@ -0,0 +1,113 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use illumos_devinfo::DevInfo; +use slog::Logger; +use std::sync::Arc; +use std::sync::Mutex; +use tokio::sync::broadcast; +use tokio::task::JoinHandle; + +// A cached copy of "our latest view of what hardware exists". +// +// This struct can be expanded arbitrarily, as it's useful for the Sled Agent +// to perceive hardware. +// +// Q: Why bother caching this information at all? Why not rely on devinfo for +// all queries? +// A: By keeping an in-memory representation, we can "diff" with the information +// reported from libdevinfo to decide when to send notifications. +struct HardwareInner { + is_scrimlet: bool, + // TODO: Add U.2s, M.2s, other devices. +} + +/// A representation of the underlying hardware. +/// +/// This structure provides interfaces for both querying and for receiving new +/// events. +pub struct Hardware { + log: Logger, + inner: Arc>, + tx: broadcast::Sender, + _worker: JoinHandle<()>, +} + +// Performs a single walk of the device info tree, updating our view of hardware +// and sending notifications to any subscribers. +fn poll_device_tree( + log: &Logger, + inner: &Arc>, + tx: &broadcast::Sender, +) -> Result<(), String> { + let mut device_info = DevInfo::new().map_err(|e| e.to_string())?; + let mut node_walker = device_info.walk_node(); + while let Some(node) = + node_walker.next().transpose().map_err(|e| e.to_string())? + { + if let Some(driver_name) = node.driver_name() { + if driver_name == "tofino" { + let scrimlet_status_updated = { + let mut inner = inner.lock().unwrap(); + if !inner.is_scrimlet { + inner.is_scrimlet = true; + true + } else { + false + } + }; + if scrimlet_status_updated { + info!(log, "Polling device tree: Found tofino driver"); + let _ = tx.send(super::HardwareUpdate::TofinoLoaded); + } + } + } + } + Ok(()) +} + +async fn hardware_tracking_task( + log: Logger, + inner: Arc>, + tx: broadcast::Sender, +) { + loop { + if let Err(err) = poll_device_tree(&log, &inner, &tx) { + warn!(log, "Failed to query device tree: {err}"); + } + tokio::time::sleep(tokio::time::Duration::from_secs(5)).await; + } +} + +impl Hardware { + /// Creates a new representation of the underlying hardware, and initialize + /// a task which periodically updates that representation. + pub fn new(log: Logger, is_scrimlet: bool) -> Self { + let (tx, _) = broadcast::channel(1024); + let inner = Arc::new(Mutex::new(HardwareInner { is_scrimlet })); + + let log2 = log.clone(); + let inner2 = inner.clone(); + let tx2 = tx.clone(); + let _worker = tokio::task::spawn(async move { + hardware_tracking_task(log2, inner2, tx2).await + }); + + Self { log, inner, tx, _worker } + } + + pub fn is_scrimlet(&self) -> bool { + self.inner.lock().unwrap().is_scrimlet + } + + pub fn monitor(&self) -> broadcast::Receiver { + info!(self.log, "Monitoring for hardware updates"); + self.tx.subscribe() + // TODO: Do we want to send initial messages, based on the existing + // state? Or should we leave this responsibility to the caller, to + // start monitoring, and then query for the initial state? + // + // This could simplify the `SledAgent::monitor` function? + } +} diff --git a/sled-agent/src/hardware/mod.rs b/sled-agent/src/hardware/mod.rs new file mode 100644 index 0000000000..6f4dcd5c2d --- /dev/null +++ b/sled-agent/src/hardware/mod.rs @@ -0,0 +1,59 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use slog::Logger; +use tokio::sync::broadcast; + +cfg_if::cfg_if! { + if #[cfg(target_os = "illumos")] { + mod illumos; + use illumos::*; + } else { + mod non_illumos; + use non_illumos::*; + } +} + +/// A platform-independent interface for interacting with the underlying hardware. +pub(crate) struct HardwareManager { + _log: Logger, + inner: Hardware, +} + +impl HardwareManager { + pub fn new(config: &crate::config::Config, log: Logger) -> Self { + // Unless explicitly specified, we assume this device is a Gimlet until + // told otherwise. + let is_scrimlet = if let Some(is_scrimlet) = config.force_scrimlet { + is_scrimlet + } else { + false + }; + + let log = log.new(o!("component" => "HardwareManager")); + Self { _log: log.clone(), inner: Hardware::new(log, is_scrimlet) } + } + + pub fn is_scrimlet(&self) -> bool { + self.inner.is_scrimlet() + } + + // Monitors the underlying hardware for updates. + pub fn monitor(&self) -> broadcast::Receiver { + self.inner.monitor() + } +} + +/// Provides information from the underlying hardware about updates +/// which may require action on behalf of the Sled Agent. +/// +/// These updates should generally be "non-opinionated" - the higher +/// layers of the sled agent can make the call to ignore these updates +/// or not. +#[derive(Clone)] +#[allow(dead_code)] +pub enum HardwareUpdate { + TofinoLoaded, + // TODO: Notify about disks being added / removed, etc. +} diff --git a/sled-agent/src/hardware/non_illumos/mod.rs b/sled-agent/src/hardware/non_illumos/mod.rs new file mode 100644 index 0000000000..1dee9550c5 --- /dev/null +++ b/sled-agent/src/hardware/non_illumos/mod.rs @@ -0,0 +1,37 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use slog::Logger; +use std::sync::Mutex; +use tokio::sync::broadcast; + +struct HardwareInner { + is_scrimlet: bool, +} + +/// A simulated representation of the underlying hardware. +/// +/// This is intended for non-illumos systems to have roughly the same interface +/// as illumos systems. +pub struct Hardware { + log: Logger, + inner: Mutex, + tx: broadcast::Sender, +} + +impl Hardware { + pub fn new(log: Logger, is_scrimlet: bool) -> Self { + let (tx, _) = broadcast::channel(1024); + Self { log, inner: Mutex::new(HardwareInner { is_scrimlet }), tx } + } + + pub fn is_scrimlet(&self) -> bool { + self.inner.lock().unwrap().is_scrimlet + } + + pub fn monitor(&self) -> broadcast::Receiver { + info!(self.log, "Monitoring for hardware updates"); + self.tx.subscribe() + } +} diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index d36880b06d..73dcdca062 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -21,6 +21,7 @@ pub mod common; // Modules for the non-simulated sled agent. pub mod bootstrap; pub mod config; +mod hardware; mod http_entrypoints; pub mod illumos; mod instance; diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 738371efd4..80e1b926ca 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -13,8 +13,12 @@ use internal_dns_client::{ }; use omicron_common::address::NEXUS_INTERNAL_PORT; use slog::Logger; +use std::future::Future; use std::net::Ipv6Addr; +use std::pin::Pin; use std::sync::Arc; +use tokio::sync::mpsc; +use tokio::task::JoinHandle; struct Inner { log: Logger, @@ -59,3 +63,35 @@ impl LazyNexusClient { )) } } + +type NexusRequestFut = dyn Future + Send; +type NexusRequest = Pin>; + +/// A queue of futures which represent requests to Nexus. +pub struct NexusRequestQueue { + tx: mpsc::UnboundedSender, + _worker: JoinHandle<()>, +} + +impl NexusRequestQueue { + /// Creates a new request queue, along with a worker which executes + /// any incoming tasks. + pub fn new() -> Self { + let (tx, mut rx) = mpsc::unbounded_channel(); + + let _worker = tokio::spawn(async move { + while let Some(fut) = rx.recv().await { + fut.await; + } + }); + + Self { tx, _worker } + } + + /// Gets access to the sending portion of the request queue. + /// + /// Callers can use this to add their own requests. + pub fn sender(&self) -> &mpsc::UnboundedSender { + &self.tx + } +} diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 4319f7019a..4aa9078890 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -9,9 +9,6 @@ use super::http_entrypoints::api as http_api; use super::sled_agent::SledAgent; use crate::bootstrap::params::SledAgentRequest; use crate::nexus::LazyNexusClient; -use omicron_common::backoff::{ - internal_service_policy_with_max, retry_notify, BackoffError, -}; use slog::Logger; use std::net::{SocketAddr, SocketAddrV6}; use uuid::Uuid; @@ -21,7 +18,6 @@ use uuid::Uuid; pub struct Server { /// Dropshot server for the API. http_server: dropshot::HttpServer, - _nexus_notifier_handle: tokio::task::JoinHandle<()>, } impl Server { @@ -38,7 +34,6 @@ impl Server { config: &Config, log: Logger, addr: SocketAddrV6, - is_scrimlet: bool, request: SledAgentRequest, ) -> Result { info!(log, "setting up sled agent server"); @@ -71,61 +66,7 @@ impl Server { .map_err(|error| format!("initializing server: {}", error))? .start(); - let sled_address = http_server.local_addr(); - let sled_id = config.id; - let nexus_notifier_handle = tokio::task::spawn(async move { - // Notify the control plane that we're up, and continue trying this - // until it succeeds. We retry with an randomized, capped exponential - // backoff. - // - // TODO-robustness if this returns a 400 error, we probably want to - // return a permanent error from the `notify_nexus` closure. - let notify_nexus = || async { - info!( - log, - "contacting server nexus, registering sled: {}", sled_id - ); - let role = if is_scrimlet { - nexus_client::types::SledRole::Scrimlet - } else { - nexus_client::types::SledRole::Gimlet - }; - - let nexus_client = lazy_nexus_client - .get() - .await - .map_err(|err| BackoffError::transient(err.to_string()))?; - nexus_client - .sled_agent_put( - &sled_id, - &nexus_client::types::SledAgentStartupInfo { - sa_address: sled_address.to_string(), - role, - }, - ) - .await - .map_err(|err| BackoffError::transient(err.to_string())) - }; - let log_notification_failure = |err, delay| { - warn!( - log, - "failed to notify nexus about sled agent: {}, will retry in {:?}", err, delay; - ); - }; - retry_notify( - internal_service_policy_with_max( - std::time::Duration::from_secs(1), - ), - notify_nexus, - log_notification_failure, - ) - .await - .expect("Expected an infinite retry loop contacting Nexus"); - }); - Ok(Server { - http_server, - _nexus_notifier_handle: nexus_notifier_handle, - }) + Ok(Server { http_server }) } /// Wait for the given server to shut down diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index c5e11a1f4b..3cebf2d06e 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -6,6 +6,7 @@ use crate::bootstrap::params::SledAgentRequest; use crate::config::Config; +use crate::hardware::HardwareManager; use crate::illumos::vnic::VnicKind; use crate::illumos::zfs::{ Mountpoint, ZONE_ZFS_DATASET, ZONE_ZFS_DATASET_MOUNTPOINT, @@ -13,7 +14,7 @@ use crate::illumos::zfs::{ use crate::illumos::zone::IPADM; use crate::illumos::{execute, PFEXEC}; use crate::instance_manager::InstanceManager; -use crate::nexus::LazyNexusClient; +use crate::nexus::{LazyNexusClient, NexusRequestQueue}; use crate::params::{ DatasetKind, DiskStateRequested, InstanceHardware, InstanceMigrateParams, InstanceRuntimeStateRequested, InstanceSerialConsoleData, @@ -27,9 +28,13 @@ use omicron_common::api::{ internal::nexus::DiskRuntimeState, internal::nexus::InstanceRuntimeState, internal::nexus::UpdateArtifact, }; +use omicron_common::backoff::{ + internal_service_policy_with_max, retry_notify, BackoffError, +}; use slog::Logger; use std::net::SocketAddrV6; use std::process::Command; +use std::sync::Arc; use uuid::Uuid; use crucible_client_types::VolumeConstructionRequest; @@ -140,20 +145,35 @@ impl From for dropshot::HttpError { /// Describes an executing Sled Agent object. /// /// Contains both a connection to the Nexus, as well as managed instances. -pub struct SledAgent { +struct SledAgentInner { // ID of the Sled id: Uuid, + // Sled underlay address + addr: SocketAddrV6, + // Component of Sled Agent responsible for storage and dataset management. storage: StorageManager, // Component of Sled Agent responsible for managing Propolis instances. instances: InstanceManager, - lazy_nexus_client: LazyNexusClient, + // Component of Sled Agent responsible for monitoring hardware. + hardware: HardwareManager, // Other Oxide-controlled services running on this Sled. services: ServiceManager, + + // Lazily-acquired connection to Nexus. + lazy_nexus_client: LazyNexusClient, + + // A serialized request queue for operations interacting with Nexus. + nexus_request_queue: NexusRequestQueue, +} + +#[derive(Clone)] +pub struct SledAgent { + inner: Arc, } impl SledAgent { @@ -298,6 +318,9 @@ impl SledAgent { gateway_address: request.gateway.address, ..Default::default() }; + + let hardware = HardwareManager::new(&config, parent_log.clone()); + let services = ServiceManager::new( parent_log.clone(), etherstub.clone(), @@ -309,11 +332,138 @@ impl SledAgent { ) .await?; - Ok(SledAgent { id, storage, instances, lazy_nexus_client, services }) + let sled_agent = SledAgent { + inner: Arc::new(SledAgentInner { + id, + addr: sled_address, + storage, + instances, + hardware, + services, + lazy_nexus_client, + + // TODO(https://github.com/oxidecomputer/omicron/issues/1917): + // Propagate usage of this request queue throughout the Sled Agent. + // + // Also, we could maybe de-dup some of the backoff code in the request queue? + nexus_request_queue: NexusRequestQueue::new(), + }), + }; + + // We immediately add a notification to the request queue about our + // existence. If inspection of the hardware later informs us that we're + // actually running on a scrimlet, that's fine, the updated value will + // be received by Nexus eventually. + sled_agent.notify_nexus_about_self(&log); + + // Begin monitoring the underlying hardware, and reacting to changes. + let sa = sled_agent.clone(); + tokio::spawn(async move { + sa.monitor(log).await; + }); + + Ok(sled_agent) + } + + async fn monitor(&self, log: Logger) { + // Start monitoring the hardware for changes + let mut hardware_updates = self.inner.hardware.monitor(); + + // Scan the existing system for noteworthy events + // that may have happened before we started monitoring + if self.inner.hardware.is_scrimlet() { + self.ensure_scrimlet_services_active(&log).await; + } + + // Rely on monitoring for tracking all future updates. + loop { + let update = hardware_updates.recv().await.unwrap(); + match update { + crate::hardware::HardwareUpdate::TofinoLoaded => { + self.ensure_scrimlet_services_active(&log).await; + } + } + } + } + + async fn ensure_scrimlet_services_active(&self, log: &Logger) { + // Inform Nexus that we're now a scrimlet, instead of a Gimlet. + // + // This won't block on Nexus responding; it may take while before + // Nexus actually comes online. + self.notify_nexus_about_self(log); + + // TODO(https://github.com/oxidecomputer/omicron/issues/823): Launch the switch zone, with + // Dendrite, MGS, and any other services we want to enable. } pub fn id(&self) -> Uuid { - self.id + self.inner.id + } + + // Sends a request to Nexus informing it that the current sled exists. + fn notify_nexus_about_self(&self, log: &Logger) { + let sled_id = self.inner.id; + let lazy_nexus_client = self.inner.lazy_nexus_client.clone(); + let sled_address = self.inner.addr; + let is_scrimlet = self.inner.hardware.is_scrimlet(); + let log = log.clone(); + let fut = async move { + // Notify the control plane that we're up, and continue trying this + // until it succeeds. We retry with an randomized, capped exponential + // backoff. + // + // TODO-robustness if this returns a 400 error, we probably want to + // return a permanent error from the `notify_nexus` closure. + let notify_nexus = || async { + info!( + log, + "contacting server nexus, registering sled: {}", sled_id + ); + let role = if is_scrimlet { + nexus_client::types::SledRole::Scrimlet + } else { + nexus_client::types::SledRole::Gimlet + }; + + let nexus_client = lazy_nexus_client + .get() + .await + .map_err(|err| BackoffError::transient(err.to_string()))?; + nexus_client + .sled_agent_put( + &sled_id, + &nexus_client::types::SledAgentStartupInfo { + sa_address: sled_address.to_string(), + role, + }, + ) + .await + .map_err(|err| BackoffError::transient(err.to_string())) + }; + let log_notification_failure = |err, delay| { + warn!( + log, + "failed to notify nexus about sled agent: {}, will retry in {:?}", err, delay; + ); + }; + retry_notify( + internal_service_policy_with_max( + std::time::Duration::from_secs(1), + ), + notify_nexus, + log_notification_failure, + ) + .await + .expect("Expected an infinite retry loop contacting Nexus"); + }; + self.inner + .nexus_request_queue + .sender() + .send(Box::pin(fut)) + .unwrap_or_else(|err| { + panic!("Failed to send future to request queue: {err}"); + }); } /// Ensures that particular services should be initialized. @@ -324,7 +474,7 @@ impl SledAgent { &self, requested_services: ServiceEnsureBody, ) -> Result<(), Error> { - self.services.ensure(requested_services).await?; + self.inner.services.ensure(requested_services).await?; Ok(()) } @@ -335,7 +485,8 @@ impl SledAgent { dataset_kind: DatasetKind, address: SocketAddrV6, ) -> Result<(), Error> { - self.storage + self.inner + .storage .upsert_filesystem(zpool_uuid, dataset_kind, address) .await?; Ok(()) @@ -349,7 +500,8 @@ impl SledAgent { target: InstanceRuntimeStateRequested, migrate: Option, ) -> Result { - self.instances + self.inner + .instances .ensure(instance_id, initial, target, migrate) .await .map_err(|e| Error::Instance(e)) @@ -373,7 +525,7 @@ impl SledAgent { &self, artifact: UpdateArtifact, ) -> Result<(), Error> { - let nexus_client = self.lazy_nexus_client.get().await?; + let nexus_client = self.inner.lazy_nexus_client.get().await?; crate::updates::download_artifact(artifact, &nexus_client).await?; Ok(()) } @@ -384,7 +536,8 @@ impl SledAgent { byte_offset: ByteOffset, max_bytes: Option, ) -> Result { - self.instances + self.inner + .instances .instance_serial_console_buffer_data( instance_id, byte_offset, @@ -401,7 +554,8 @@ impl SledAgent { disk_id: Uuid, snapshot_id: Uuid, ) -> Result<(), Error> { - self.instances + self.inner + .instances .instance_issue_disk_snapshot_request( instance_id, disk_id, @@ -430,7 +584,11 @@ impl SledAgent { _vpc_id: Uuid, rules: &[VpcFirewallRule], ) -> Result<(), Error> { - self.instances.firewall_rules_ensure(rules).await.map_err(Error::from) + self.inner + .instances + .firewall_rules_ensure(rules) + .await + .map_err(Error::from) } } diff --git a/smf/sled-agent/config.toml b/smf/sled-agent/config.toml index 9af1db6f2e..dd906481ce 100644 --- a/smf/sled-agent/config.toml +++ b/smf/sled-agent/config.toml @@ -2,6 +2,14 @@ id = "fb0f7546-4d46-40ca-9d56-cbb810684ca7" +# Identifies if the sled should be unconditionally treated as a scrimlet. +# +# If this is set to "true", the sled agent treats itself as a scrimlet. +# If this is set to "false", the sled agent treats itself as a gimlet. +# If this is unset, the sled automatically detects whether or not it is a +# scrimlet. +# force_scrimlet = true + # A file-backed zpool can be manually created with the following: # # truncate -s 10GB testpool.vdev # # zpool create oxp_d462a7f7-b628-40fe-80ff-4e4189e2d62b "$PWD/testpool.vdev" From 1230a1b52e5add6af72ec3c0f5f0fcb0672979d1 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 4 Nov 2022 11:50:59 -0400 Subject: [PATCH 2/9] Poll the device tree once during initialization --- sled-agent/src/hardware/illumos/mod.rs | 11 +++++++++-- sled-agent/src/hardware/mod.rs | 7 +++++-- sled-agent/src/hardware/non_illumos/mod.rs | 4 ++-- sled-agent/src/sled_agent.rs | 6 +++++- 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/sled-agent/src/hardware/illumos/mod.rs b/sled-agent/src/hardware/illumos/mod.rs index 8180c5bfe1..0a156610ac 100644 --- a/sled-agent/src/hardware/illumos/mod.rs +++ b/sled-agent/src/hardware/illumos/mod.rs @@ -83,10 +83,17 @@ async fn hardware_tracking_task( impl Hardware { /// Creates a new representation of the underlying hardware, and initialize /// a task which periodically updates that representation. - pub fn new(log: Logger, is_scrimlet: bool) -> Self { + pub fn new(log: Logger, is_scrimlet: bool) -> Result { let (tx, _) = broadcast::channel(1024); let inner = Arc::new(Mutex::new(HardwareInner { is_scrimlet })); + // Force the device tree to be polled at least once before returning. + // This mitigates issues where the Sled Agent could try to propagate + // an "empty" view of hardware to other consumers before the first + // query. + poll_device_tree(&log, &inner, &tx) + .map_err(|err| format!("Failed to poll device tree: {err}"))?; + let log2 = log.clone(); let inner2 = inner.clone(); let tx2 = tx.clone(); @@ -94,7 +101,7 @@ impl Hardware { hardware_tracking_task(log2, inner2, tx2).await }); - Self { log, inner, tx, _worker } + Ok(Self { log, inner, tx, _worker }) } pub fn is_scrimlet(&self) -> bool { diff --git a/sled-agent/src/hardware/mod.rs b/sled-agent/src/hardware/mod.rs index 6f4dcd5c2d..d5e5ddcaf6 100644 --- a/sled-agent/src/hardware/mod.rs +++ b/sled-agent/src/hardware/mod.rs @@ -22,7 +22,10 @@ pub(crate) struct HardwareManager { } impl HardwareManager { - pub fn new(config: &crate::config::Config, log: Logger) -> Self { + pub fn new( + config: &crate::config::Config, + log: Logger, + ) -> Result { // Unless explicitly specified, we assume this device is a Gimlet until // told otherwise. let is_scrimlet = if let Some(is_scrimlet) = config.force_scrimlet { @@ -32,7 +35,7 @@ impl HardwareManager { }; let log = log.new(o!("component" => "HardwareManager")); - Self { _log: log.clone(), inner: Hardware::new(log, is_scrimlet) } + Ok(Self { _log: log.clone(), inner: Hardware::new(log, is_scrimlet)? }) } pub fn is_scrimlet(&self) -> bool { diff --git a/sled-agent/src/hardware/non_illumos/mod.rs b/sled-agent/src/hardware/non_illumos/mod.rs index 1dee9550c5..2bc9c9aa1c 100644 --- a/sled-agent/src/hardware/non_illumos/mod.rs +++ b/sled-agent/src/hardware/non_illumos/mod.rs @@ -21,9 +21,9 @@ pub struct Hardware { } impl Hardware { - pub fn new(log: Logger, is_scrimlet: bool) -> Self { + pub fn new(log: Logger, is_scrimlet: bool) -> Result { let (tx, _) = broadcast::channel(1024); - Self { log, inner: Mutex::new(HardwareInner { is_scrimlet }), tx } + Ok(Self { log, inner: Mutex::new(HardwareInner { is_scrimlet }), tx }) } pub fn is_scrimlet(&self) -> bool { diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 3cebf2d06e..4df3e36e91 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -97,6 +97,9 @@ pub enum Error { #[error("Error managing guest networking: {0}")] Opte(#[from] crate::opte::Error), + #[error("Error monitoring hardware: {0}")] + Hardware(String), + #[error("Error resolving DNS name: {0}")] ResolveError(#[from] internal_dns_client::multiclient::ResolveError), } @@ -319,7 +322,8 @@ impl SledAgent { ..Default::default() }; - let hardware = HardwareManager::new(&config, parent_log.clone()); + let hardware = HardwareManager::new(&config, parent_log.clone()) + .map_err(|e| Error::Hardware(e))?; let services = ServiceManager::new( parent_log.clone(), From b8d537a9c1d53932eef6a881da7efcc0c33e71b0 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 8 Nov 2022 09:56:46 -0500 Subject: [PATCH 3/9] Identify driver loading, improve 'stub' tofino support, identify driver unloading --- sled-agent/src/config.rs | 2 +- sled-agent/src/hardware/illumos/mod.rs | 132 +++++++++++++++++---- sled-agent/src/hardware/mod.rs | 16 +-- sled-agent/src/hardware/non_illumos/mod.rs | 12 +- sled-agent/src/sled_agent.rs | 9 ++ smf/sled-agent/config.toml | 7 +- 6 files changed, 136 insertions(+), 42 deletions(-) diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index 487f58e6c1..7986b3b647 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -21,7 +21,7 @@ pub struct Config { pub log: ConfigLogging, /// Optionally force the sled to self-identify as a scrimlet (or gimlet, /// if set to false). - pub force_scrimlet: Option, + pub stub_scrimlet: Option, /// Optional VLAN ID to be used for tagging guest VNICs. pub vlan: Option, /// Optional list of zpools to be used as "discovered disks". diff --git a/sled-agent/src/hardware/illumos/mod.rs b/sled-agent/src/hardware/illumos/mod.rs index 0a156610ac..d898e95ee8 100644 --- a/sled-agent/src/hardware/illumos/mod.rs +++ b/sled-agent/src/hardware/illumos/mod.rs @@ -9,6 +9,38 @@ use std::sync::Mutex; use tokio::sync::broadcast; use tokio::task::JoinHandle; +// A snapshot of information about the underlying Tofino device +struct TofinoSnapshot { + exists: bool, + driver_loaded: bool, +} + +impl TofinoSnapshot { + fn new() -> Self { + Self { exists: false, driver_loaded: false } + } +} + +// A snapshot of information about the underlying hardware +struct HardwareSnapshot { + tofino: TofinoSnapshot, +} + +impl HardwareSnapshot { + fn new() -> Self { + Self { tofino: TofinoSnapshot::new() } + } +} + +// Describes a view of the Tofino switch. +enum TofinoView { + // The view of the Tofino switch exactly matches the snapshot of hardware. + Real(TofinoSnapshot), + // The Tofino switch has been "stubbed out", and the underlying hardware is + // being ignored. + Stub { active: bool }, +} + // A cached copy of "our latest view of what hardware exists". // // This struct can be expanded arbitrarily, as it's useful for the Sled Agent @@ -17,59 +49,100 @@ use tokio::task::JoinHandle; // Q: Why bother caching this information at all? Why not rely on devinfo for // all queries? // A: By keeping an in-memory representation, we can "diff" with the information -// reported from libdevinfo to decide when to send notifications. -struct HardwareInner { - is_scrimlet: bool, +// reported from libdevinfo to decide when to send notifications and change +// which services are currently executing. +struct HardwareView { + tofino: TofinoView, // TODO: Add U.2s, M.2s, other devices. } +impl HardwareView { + fn new() -> Self { + Self { tofino: TofinoView::Real(TofinoSnapshot::new()) } + } + + fn new_stub_tofino(active: bool) -> Self { + Self { tofino: TofinoView::Stub { active } } + } +} + /// A representation of the underlying hardware. /// /// This structure provides interfaces for both querying and for receiving new /// events. pub struct Hardware { log: Logger, - inner: Arc>, + inner: Arc>, tx: broadcast::Sender, _worker: JoinHandle<()>, } +const TOFINO_SUBSYSTEM_VID: i32 = 0x1d1c; +const TOFINO_SUBSYSTEM_ID: i32 = 0x100; + +fn node_name(subsystem_vid: i32, subsystem_id: i32) -> String { + format!("pci{subsystem_vid:x},{subsystem_id:x}") +} + // Performs a single walk of the device info tree, updating our view of hardware // and sending notifications to any subscribers. fn poll_device_tree( log: &Logger, - inner: &Arc>, + inner: &Arc>, tx: &broadcast::Sender, ) -> Result<(), String> { + // Construct a view of hardware by walking the device tree. let mut device_info = DevInfo::new().map_err(|e| e.to_string())?; let mut node_walker = device_info.walk_node(); + let mut polled_hw = HardwareSnapshot::new(); while let Some(node) = node_walker.next().transpose().map_err(|e| e.to_string())? { - if let Some(driver_name) = node.driver_name() { - if driver_name == "tofino" { - let scrimlet_status_updated = { - let mut inner = inner.lock().unwrap(); - if !inner.is_scrimlet { - inner.is_scrimlet = true; - true - } else { - false - } - }; - if scrimlet_status_updated { - info!(log, "Polling device tree: Found tofino driver"); - let _ = tx.send(super::HardwareUpdate::TofinoLoaded); - } + if node.node_name() + == node_name(TOFINO_SUBSYSTEM_VID, TOFINO_SUBSYSTEM_ID) + { + polled_hw.tofino.exists = true; + polled_hw.tofino.driver_loaded = + node.driver_name().as_deref() == Some("tofino"); + } + } + + // After inspecting the device tree, diff with the old view, and provide + // necessary updates. + let tofino_update = { + let mut inner = inner.lock().unwrap(); + match inner.tofino { + TofinoView::Real(TofinoSnapshot { driver_loaded, .. }) => { + // Identify if we have an update to perform + let tofino_update = + match (driver_loaded, polled_hw.tofino.driver_loaded) { + (false, true) => { + Some(super::HardwareUpdate::TofinoLoaded) + } + (true, false) => { + Some(super::HardwareUpdate::TofinoUnloaded) + } + _ => None, + }; + // Update our view of the underlying hardware + inner.tofino = TofinoView::Real(polled_hw.tofino); + tofino_update } + TofinoView::Stub { .. } => None, } + }; + + if let Some(update) = tofino_update { + info!(log, "Update from polling device tree: {:?}", update); + let _ = tx.send(update); } + Ok(()) } async fn hardware_tracking_task( log: Logger, - inner: Arc>, + inner: Arc>, tx: broadcast::Sender, ) { loop { @@ -83,9 +156,16 @@ async fn hardware_tracking_task( impl Hardware { /// Creates a new representation of the underlying hardware, and initialize /// a task which periodically updates that representation. - pub fn new(log: Logger, is_scrimlet: bool) -> Result { + pub fn new( + log: Logger, + stub_scrimlet: Option, + ) -> Result { let (tx, _) = broadcast::channel(1024); - let inner = Arc::new(Mutex::new(HardwareInner { is_scrimlet })); + let hw = match stub_scrimlet { + None => HardwareView::new(), + Some(active) => HardwareView::new_stub_tofino(active), + }; + let inner = Arc::new(Mutex::new(hw)); // Force the device tree to be polled at least once before returning. // This mitigates issues where the Sled Agent could try to propagate @@ -105,7 +185,11 @@ impl Hardware { } pub fn is_scrimlet(&self) -> bool { - self.inner.lock().unwrap().is_scrimlet + let inner = self.inner.lock().unwrap(); + match inner.tofino { + TofinoView::Real(TofinoSnapshot { exists, .. }) => exists, + TofinoView::Stub { active } => active, + } } pub fn monitor(&self) -> broadcast::Receiver { diff --git a/sled-agent/src/hardware/mod.rs b/sled-agent/src/hardware/mod.rs index d5e5ddcaf6..6fb8d27c8a 100644 --- a/sled-agent/src/hardware/mod.rs +++ b/sled-agent/src/hardware/mod.rs @@ -26,16 +26,11 @@ impl HardwareManager { config: &crate::config::Config, log: Logger, ) -> Result { - // Unless explicitly specified, we assume this device is a Gimlet until - // told otherwise. - let is_scrimlet = if let Some(is_scrimlet) = config.force_scrimlet { - is_scrimlet - } else { - false - }; - let log = log.new(o!("component" => "HardwareManager")); - Ok(Self { _log: log.clone(), inner: Hardware::new(log, is_scrimlet)? }) + Ok(Self { + _log: log.clone(), + inner: Hardware::new(log, config.stub_scrimlet)?, + }) } pub fn is_scrimlet(&self) -> bool { @@ -54,9 +49,10 @@ impl HardwareManager { /// These updates should generally be "non-opinionated" - the higher /// layers of the sled agent can make the call to ignore these updates /// or not. -#[derive(Clone)] +#[derive(Clone, Debug)] #[allow(dead_code)] pub enum HardwareUpdate { TofinoLoaded, + TofinoUnloaded, // TODO: Notify about disks being added / removed, etc. } diff --git a/sled-agent/src/hardware/non_illumos/mod.rs b/sled-agent/src/hardware/non_illumos/mod.rs index 2bc9c9aa1c..5924bad943 100644 --- a/sled-agent/src/hardware/non_illumos/mod.rs +++ b/sled-agent/src/hardware/non_illumos/mod.rs @@ -7,7 +7,7 @@ use std::sync::Mutex; use tokio::sync::broadcast; struct HardwareInner { - is_scrimlet: bool, + stub_scrimlet: bool, } /// A simulated representation of the underlying hardware. @@ -21,13 +21,17 @@ pub struct Hardware { } impl Hardware { - pub fn new(log: Logger, is_scrimlet: bool) -> Result { + pub fn new( + log: Logger, + stub_scrimlet: Option, + ) -> Result { let (tx, _) = broadcast::channel(1024); - Ok(Self { log, inner: Mutex::new(HardwareInner { is_scrimlet }), tx }) + let stub_scrimlet = stub_scrimlet.unwrap_or(false); + Ok(Self { log, inner: Mutex::new(HardwareInner { stub_scrimlet }), tx }) } pub fn is_scrimlet(&self) -> bool { - self.inner.lock().unwrap().is_scrimlet + self.inner.lock().unwrap().stub_scrimlet } pub fn monitor(&self) -> broadcast::Receiver { diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 4df3e36e91..08ab3f7f10 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -386,6 +386,9 @@ impl SledAgent { crate::hardware::HardwareUpdate::TofinoLoaded => { self.ensure_scrimlet_services_active(&log).await; } + crate::hardware::HardwareUpdate::TofinoUnloaded => { + self.ensure_scrimlet_services_deactive(&log).await; + } } } } @@ -399,6 +402,12 @@ impl SledAgent { // TODO(https://github.com/oxidecomputer/omicron/issues/823): Launch the switch zone, with // Dendrite, MGS, and any other services we want to enable. + warn!(log, "Activating scrimlet services not yet implemented"); + } + + async fn ensure_scrimlet_services_deactive(&self, log: &Logger) { + // TODO(https://github.com/oxidecomputer/omicron/issues/823): Terminate the switch zone. + warn!(log, "Deactivating scrimlet services not yet implemented"); } pub fn id(&self) -> Uuid { diff --git a/smf/sled-agent/config.toml b/smf/sled-agent/config.toml index dd906481ce..cf082e7dfc 100644 --- a/smf/sled-agent/config.toml +++ b/smf/sled-agent/config.toml @@ -6,9 +6,10 @@ id = "fb0f7546-4d46-40ca-9d56-cbb810684ca7" # # If this is set to "true", the sled agent treats itself as a scrimlet. # If this is set to "false", the sled agent treats itself as a gimlet. -# If this is unset, the sled automatically detects whether or not it is a -# scrimlet. -# force_scrimlet = true +# If this is unset: +# - On illumos, the sled automatically detects whether or not it is a scrimlet. +# - On all other platforms, the sled assumes it is a gimlet. +# stub_scrimlet = true # A file-backed zpool can be manually created with the following: # # truncate -s 10GB testpool.vdev From 4863667218cd4f2c0942600ab45c78c63fb7d36b Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 8 Nov 2022 11:17:19 -0500 Subject: [PATCH 4/9] Flatten hardware views, add docs, simplify non-illumos code --- sled-agent/src/hardware/illumos/mod.rs | 82 ++++++++++++++-------- sled-agent/src/hardware/mod.rs | 36 +--------- sled-agent/src/hardware/non_illumos/mod.rs | 38 +++++----- sled-agent/src/nexus.rs | 7 ++ sled-agent/src/sled_agent.rs | 52 ++++++++++---- 5 files changed, 119 insertions(+), 96 deletions(-) diff --git a/sled-agent/src/hardware/illumos/mod.rs b/sled-agent/src/hardware/illumos/mod.rs index d898e95ee8..e9a54cfc77 100644 --- a/sled-agent/src/hardware/illumos/mod.rs +++ b/sled-agent/src/hardware/illumos/mod.rs @@ -66,17 +66,6 @@ impl HardwareView { } } -/// A representation of the underlying hardware. -/// -/// This structure provides interfaces for both querying and for receiving new -/// events. -pub struct Hardware { - log: Logger, - inner: Arc>, - tx: broadcast::Sender, - _worker: JoinHandle<()>, -} - const TOFINO_SUBSYSTEM_VID: i32 = 0x1d1c; const TOFINO_SUBSYSTEM_ID: i32 = 0x100; @@ -109,30 +98,32 @@ fn poll_device_tree( // After inspecting the device tree, diff with the old view, and provide // necessary updates. - let tofino_update = { + let mut updates = vec![]; + { let mut inner = inner.lock().unwrap(); match inner.tofino { - TofinoView::Real(TofinoSnapshot { driver_loaded, .. }) => { - // Identify if we have an update to perform - let tofino_update = - match (driver_loaded, polled_hw.tofino.driver_loaded) { - (false, true) => { - Some(super::HardwareUpdate::TofinoLoaded) - } - (true, false) => { - Some(super::HardwareUpdate::TofinoUnloaded) - } - _ => None, - }; + TofinoView::Real(TofinoSnapshot { driver_loaded, exists }) => { + use super::HardwareUpdate::*; + // Identify if the Tofino device changed power states. + if exists != polled_hw.tofino.exists { + updates.push(TofinoDeviceChange); + } + + // Identify if the Tofino driver was recently loaded/unloaded. + match (driver_loaded, polled_hw.tofino.driver_loaded) { + (false, true) => updates.push(TofinoLoaded), + (true, false) => updates.push(TofinoUnloaded), + _ => (), + }; + // Update our view of the underlying hardware inner.tofino = TofinoView::Real(polled_hw.tofino); - tofino_update } - TofinoView::Stub { .. } => None, + TofinoView::Stub { .. } => (), } }; - if let Some(update) = tofino_update { + for update in updates.into_iter() { info!(log, "Update from polling device tree: {:?}", update); let _ = tx.send(update); } @@ -153,13 +144,36 @@ async fn hardware_tracking_task( } } -impl Hardware { - /// Creates a new representation of the underlying hardware, and initialize +/// A representation of the underlying hardware. +/// +/// This structure provides interfaces for both querying and for receiving new +/// events. +pub struct HardwareManager { + log: Logger, + inner: Arc>, + tx: broadcast::Sender, + _worker: JoinHandle<()>, +} + +impl HardwareManager { + /// Creates a new representation of the underlying hardware, and initializes /// a task which periodically updates that representation. + /// + /// Arguments: + /// - `stub_scrimlet`: Identifies if we should ignore the attached Tofino + /// device, and assume the device is a scrimlet (true) or gimlet (false). + /// If this argument is not supplied, we assume the device is a gimlet until + /// device scanning informs us otherwise. pub fn new( log: Logger, stub_scrimlet: Option, ) -> Result { + let log = log.new(o!("component" => "HardwareManager")); + + // The size of the broadcast channel is arbitrary, but bounded. + // If the channel fills up, old notifications will be dropped, and the + // receiver will receive a tokio::sync::broadcast::error::RecvError::Lagged + // error, indicating they should re-scan the hardware themselves. let (tx, _) = broadcast::channel(1024); let hw = match stub_scrimlet { None => HardwareView::new(), @@ -192,6 +206,16 @@ impl Hardware { } } + pub fn is_scrimlet_driver_loaded(&self) -> bool { + let inner = self.inner.lock().unwrap(); + match inner.tofino { + TofinoView::Real(TofinoSnapshot { driver_loaded, .. }) => { + driver_loaded + } + TofinoView::Stub { active } => active, + } + } + pub fn monitor(&self) -> broadcast::Receiver { info!(self.log, "Monitoring for hardware updates"); self.tx.subscribe() diff --git a/sled-agent/src/hardware/mod.rs b/sled-agent/src/hardware/mod.rs index 6fb8d27c8a..b678087a8e 100644 --- a/sled-agent/src/hardware/mod.rs +++ b/sled-agent/src/hardware/mod.rs @@ -2,44 +2,13 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use slog::Logger; -use tokio::sync::broadcast; - cfg_if::cfg_if! { if #[cfg(target_os = "illumos")] { mod illumos; - use illumos::*; + pub(crate) use illumos::*; } else { mod non_illumos; - use non_illumos::*; - } -} - -/// A platform-independent interface for interacting with the underlying hardware. -pub(crate) struct HardwareManager { - _log: Logger, - inner: Hardware, -} - -impl HardwareManager { - pub fn new( - config: &crate::config::Config, - log: Logger, - ) -> Result { - let log = log.new(o!("component" => "HardwareManager")); - Ok(Self { - _log: log.clone(), - inner: Hardware::new(log, config.stub_scrimlet)?, - }) - } - - pub fn is_scrimlet(&self) -> bool { - self.inner.is_scrimlet() - } - - // Monitors the underlying hardware for updates. - pub fn monitor(&self) -> broadcast::Receiver { - self.inner.monitor() + pub(crate) use non_illumos::*; } } @@ -52,6 +21,7 @@ impl HardwareManager { #[derive(Clone, Debug)] #[allow(dead_code)] pub enum HardwareUpdate { + TofinoDeviceChange, TofinoLoaded, TofinoUnloaded, // TODO: Notify about disks being added / removed, etc. diff --git a/sled-agent/src/hardware/non_illumos/mod.rs b/sled-agent/src/hardware/non_illumos/mod.rs index 5924bad943..18e89c4dba 100644 --- a/sled-agent/src/hardware/non_illumos/mod.rs +++ b/sled-agent/src/hardware/non_illumos/mod.rs @@ -3,39 +3,35 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use slog::Logger; -use std::sync::Mutex; use tokio::sync::broadcast; -struct HardwareInner { - stub_scrimlet: bool, -} - -/// A simulated representation of the underlying hardware. +/// An unimplemented, stub representation of the underlying hardware. /// /// This is intended for non-illumos systems to have roughly the same interface -/// as illumos systems. -pub struct Hardware { - log: Logger, - inner: Mutex, - tx: broadcast::Sender, -} +/// as illumos systems - it allows compilation to "work" on non-illumos +/// platforms, which can be handy for editor support. +/// +/// If you're actually trying to run the Sled Agent on non-illumos platforms, +/// use the simulated sled agent, which does not attempt to abstract hardware. +pub struct HardwareManager {} -impl Hardware { +impl HardwareManager { pub fn new( - log: Logger, - stub_scrimlet: Option, + _log: Logger, + _stub_scrimlet: Option, ) -> Result { - let (tx, _) = broadcast::channel(1024); - let stub_scrimlet = stub_scrimlet.unwrap_or(false); - Ok(Self { log, inner: Mutex::new(HardwareInner { stub_scrimlet }), tx }) + unimplemented!("Accessing hardware unsupported on non-illumos"); } pub fn is_scrimlet(&self) -> bool { - self.inner.lock().unwrap().stub_scrimlet + unimplemented!("Accessing hardware unsupported on non-illumos"); + } + + pub fn is_scrimlet_driver_loaded(&self) -> bool { + unimplemented!("Accessing hardware unsupported on non-illumos"); } pub fn monitor(&self) -> broadcast::Receiver { - info!(self.log, "Monitoring for hardware updates"); - self.tx.subscribe() + unimplemented!("Accessing hardware unsupported on non-illumos"); } } diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 80e1b926ca..9f9206545a 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -77,6 +77,13 @@ impl NexusRequestQueue { /// Creates a new request queue, along with a worker which executes /// any incoming tasks. pub fn new() -> Self { + // TODO(https://github.com/oxidecomputer/omicron/issues/1917): + // In the future, this should basically just be a wrapper around a + // generation number, and we shouldn't be serializing requests to Nexus. + // + // In the meanwhile, we're using an unbounded_channel for simplicity, so + // that we don't need to cope with dropped notifications / + // retransmissions. let (tx, mut rx) = mpsc::unbounded_channel(); let _worker = tokio::spawn(async move { diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 08ab3f7f10..d465a8c13a 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -322,8 +322,9 @@ impl SledAgent { ..Default::default() }; - let hardware = HardwareManager::new(&config, parent_log.clone()) - .map_err(|e| Error::Hardware(e))?; + let hardware = + HardwareManager::new(parent_log.clone(), config.stub_scrimlet) + .map_err(|e| Error::Hardware(e))?; let services = ServiceManager::new( parent_log.clone(), @@ -369,25 +370,50 @@ impl SledAgent { Ok(sled_agent) } + // Observe the current hardware state manually. + // + // We use this when we're monitoring hardware for the first + // time, and if we miss notifications. + async fn full_hardware_scan(&self, log: &Logger) { + info!(log, "Performing full hardware scan"); + self.notify_nexus_about_self(log); + if self.inner.hardware.is_scrimlet_driver_loaded() { + self.ensure_scrimlet_services_active(&log).await; + } else { + self.ensure_scrimlet_services_deactive(&log).await; + } + } + async fn monitor(&self, log: Logger) { // Start monitoring the hardware for changes let mut hardware_updates = self.inner.hardware.monitor(); - // Scan the existing system for noteworthy events - // that may have happened before we started monitoring - if self.inner.hardware.is_scrimlet() { - self.ensure_scrimlet_services_active(&log).await; - } + // Scan the system manually for events we have have missed + // before we started monitoring. + self.full_hardware_scan(&log).await; // Rely on monitoring for tracking all future updates. loop { - let update = hardware_updates.recv().await.unwrap(); - match update { - crate::hardware::HardwareUpdate::TofinoLoaded => { - self.ensure_scrimlet_services_active(&log).await; + use tokio::sync::broadcast::error::RecvError; + match hardware_updates.recv().await { + Ok(update) => match update { + crate::hardware::HardwareUpdate::TofinoDeviceChange => { + self.notify_nexus_about_self(&log); + } + crate::hardware::HardwareUpdate::TofinoLoaded => { + self.ensure_scrimlet_services_active(&log).await; + } + crate::hardware::HardwareUpdate::TofinoUnloaded => { + self.ensure_scrimlet_services_deactive(&log).await; + } + }, + Err(RecvError::Lagged(count)) => { + warn!(log, "Hardware monitor missed {count} messages"); + self.full_hardware_scan(&log).await; } - crate::hardware::HardwareUpdate::TofinoUnloaded => { - self.ensure_scrimlet_services_deactive(&log).await; + Err(RecvError::Closed) => { + warn!(log, "Hardware monitor receiver closed; exiting"); + return; } } } From aa14f533e3477fba29f828c6547058b1c55d7468 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 8 Nov 2022 11:22:57 -0500 Subject: [PATCH 5/9] Simplify nexus notifications --- sled-agent/src/sled_agent.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index d465a8c13a..4cc5d65ad7 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -398,6 +398,10 @@ impl SledAgent { match hardware_updates.recv().await { Ok(update) => match update { crate::hardware::HardwareUpdate::TofinoDeviceChange => { + // Inform Nexus that we're now a scrimlet, instead of a Gimlet. + // + // This won't block on Nexus responding; it may take while before + // Nexus actually comes online. self.notify_nexus_about_self(&log); } crate::hardware::HardwareUpdate::TofinoLoaded => { @@ -420,12 +424,6 @@ impl SledAgent { } async fn ensure_scrimlet_services_active(&self, log: &Logger) { - // Inform Nexus that we're now a scrimlet, instead of a Gimlet. - // - // This won't block on Nexus responding; it may take while before - // Nexus actually comes online. - self.notify_nexus_about_self(log); - // TODO(https://github.com/oxidecomputer/omicron/issues/823): Launch the switch zone, with // Dendrite, MGS, and any other services we want to enable. warn!(log, "Activating scrimlet services not yet implemented"); From 7dcc5406c6efff641e57c7f2f0cd9b7dca36470d Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 10 Nov 2022 11:28:53 -0500 Subject: [PATCH 6/9] Launch switch zone automatically when /dev/tofino detected - Expand the service manager to support initializing the switch zone as a background task. - Reserve the Switch Zone address as the second address in the sled's subnet. The first address is already reserved for the sled agent, but this ensures that any sled can "become" a scrimlet without needing to provision a new IP through Nexus. - This has some fall-out effects - we bump all the other addresses up, which are currently statically allocated. - Miscellaneous cleanups - Identify zones by a "ZoneType" enum, rather than a name. - Incorporate the portions of https://github.com/oxidecomputer/omicron/pull/1820 responsible for verifying links and devices when launching zones. Fixes https://github.com/oxidecomputer/minimum-upgradable-product/issues/16 Part of https://github.com/oxidecomputer/minimum-upgradable-product/issues/18 --- common/src/address.rs | 12 +- docs/crdb-debugging.adoc | 2 +- docs/how-to-run.adoc | 15 +- openapi/sled-agent.json | 16 +- package-manifest.toml | 14 +- sled-agent/src/bootstrap/agent.rs | 5 +- sled-agent/src/bootstrap/params.rs | 14 +- sled-agent/src/illumos/dladm.rs | 19 +- sled-agent/src/illumos/{vnic.rs => link.rs} | 56 +- sled-agent/src/illumos/mod.rs | 2 +- sled-agent/src/illumos/running_zone.rs | 20 +- sled-agent/src/instance.rs | 2 +- sled-agent/src/instance_manager.rs | 2 +- sled-agent/src/params.rs | 47 +- sled-agent/src/rack_setup/service.rs | 4 +- sled-agent/src/server.rs | 5 +- sled-agent/src/services.rs | 772 ++++++++++++-------- sled-agent/src/sled_agent.rs | 74 +- sled-agent/src/storage_manager.rs | 2 +- smf/internal-dns/manifest.xml | 6 +- smf/nexus/config-partial.toml | 2 +- smf/sled-agent/config-rss.toml | 37 +- 22 files changed, 721 insertions(+), 407 deletions(-) rename sled-agent/src/illumos/{vnic.rs => link.rs} (79%) diff --git a/common/src/address.rs b/common/src/address.rs index 5fd6954345..536fd75b0e 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -53,7 +53,7 @@ const DNS_ADDRESS_INDEX: usize = 1; const GZ_ADDRESS_INDEX: usize = 2; /// The maximum number of addresses per sled reserved for RSS. -pub const RSS_RESERVED_ADDRESSES: u16 = 10; +pub const RSS_RESERVED_ADDRESSES: u16 = 16; /// Wraps an [`Ipv6Network`] with a compile-time prefix length. #[derive( @@ -136,6 +136,7 @@ impl ReservedRackSubnet { } const SLED_AGENT_ADDRESS_INDEX: usize = 1; +const SWITCH_ZONE_ADDRESS_INDEX: usize = 2; /// Return the sled agent address for a subnet. /// @@ -146,6 +147,15 @@ pub fn get_sled_address(sled_subnet: Ipv6Subnet) -> SocketAddrV6 { SocketAddrV6::new(sled_agent_ip, SLED_AGENT_PORT, 0, 0) } +/// Return the switch zone address for a subnet. +/// +/// This address will come from the second address of the [`SLED_PREFIX`] subnet. +pub fn get_switch_zone_address( + sled_subnet: Ipv6Subnet, +) -> Ipv6Addr { + sled_subnet.net().iter().nth(SWITCH_ZONE_ADDRESS_INDEX).unwrap() +} + /// Returns a sled subnet within a rack subnet. /// /// The subnet at index == 0 is used for rack-local services. diff --git a/docs/crdb-debugging.adoc b/docs/crdb-debugging.adoc index 362cde5b49..c2d857db02 100644 --- a/docs/crdb-debugging.adoc +++ b/docs/crdb-debugging.adoc @@ -14,6 +14,6 @@ The following provides instructions for connecting to a CRDB shell on a running 1. **Find the zone running CockroachDB**. This can be accomplished by running `zoneadm list -cv`, and finding the zone with a prefix of `oxz_cockroachdb`. 2. **Log into that zone**. This can be done using `pfexec zlogin `. -3. **Read the CockroachDB log file to determine the connection instructions**. This can be done with `tail -f $(svcs -L cockroachdb)`, look for a line starting with `RPC client flags:`. As one example, this may look like `/opt/oxide/cockroachdb/bin/cockroach --host=[fd00:1122:3344:101::2]:32221 --insecure` +3. **Read the CockroachDB log file to determine the connection instructions**. This can be done with `tail -f $(svcs -L cockroachdb)`, look for a line starting with `RPC client flags:`. As one example, this may look like `/opt/oxide/cockroachdb/bin/cockroach --host=[fd00:1122:3344:101::3]:32221 --insecure` 4. Run that command, with however you want to access `cockroach`. One notable `` is `sql`, which grants access to a SQL shell. diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index 28504d9d7e..81181c97f5 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -172,13 +172,14 @@ be set as a default route for the Nexus zone. | Service | Endpoint | Sled Agent: Bootstrap | Derived from MAC address of physical data link. | Sled Agent: Dropshot API | `[fd00:1122:3344:0101::1]:12345` -| Cockroach DB | `[fd00:1122:3344:0101::2]:32221` -| Nexus: Internal API | `[fd00:1122:3344:0101::3]:12221` -| Oximeter | `[fd00:1122:3344:0101::4]:12223` -| Clickhouse | `[fd00:1122:3344:0101::5]:8123` -| Crucible Downstairs 1 | `[fd00:1122:3344:0101::6]:32345` -| Crucible Downstairs 2 | `[fd00:1122:3344:0101::7]:32345` -| Crucible Downstairs 3 | `[fd00:1122:3344:0101::8]:32345` +| Switch Zone | `[fd00:1122:3344:0101::2]` +| Cockroach DB | `[fd00:1122:3344:0101::3]:32221` +| Nexus: Internal API | `[fd00:1122:3344:0101::4]:12221` +| Oximeter | `[fd00:1122:3344:0101::5]:12223` +| Clickhouse | `[fd00:1122:3344:0101::6]:8123` +| Crucible Downstairs 1 | `[fd00:1122:3344:0101::7]:32345` +| Crucible Downstairs 2 | `[fd00:1122:3344:0101::8]:32345` +| Crucible Downstairs 3 | `[fd00:1122:3344:0101::9]:32345` | Internal DNS Service | `[fd00:1122:3344:0001::1]:5353` | Nexus: External API | `192.168.1.20:80` | Internet Gateway | None, but can be set in `smf/sled-agent/config-rss.toml` diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 8b9bdc64d5..b81ebcf56d 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -1525,15 +1525,15 @@ "$ref": "#/components/schemas/ServiceType" } }, - "zone_name": { - "type": "string" + "zone_type": { + "$ref": "#/components/schemas/ZoneType" } }, "required": [ "addresses", "id", "services", - "zone_name" + "zone_type" ] }, "Slot": { @@ -1834,6 +1834,16 @@ "required": [ "rules" ] + }, + "ZoneType": { + "description": "The type of zone which may be requested from Sled Agent", + "type": "string", + "enum": [ + "internal_dns", + "nexus", + "oximeter", + "switch" + ] } } } diff --git a/package-manifest.toml b/package-manifest.toml index 0da7246d6e..fbf648d6f1 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -61,11 +61,11 @@ output.type = "zone" setup_hint = "Run `./tools/ci_download_cockroachdb` to download the necessary binaries" [package.internal-dns] -service_name = "internal-dns" +service_name = "internal_dns" source.type = "local" source.rust.binary_names = ["dnsadm", "dns-server"] source.rust.release = true -source.paths = [ { from = "smf/internal-dns", to = "/var/svc/manifest/site/internal-dns" } ] +source.paths = [ { from = "smf/internal-dns", to = "/var/svc/manifest/site/internal_dns" } ] output.type = "zone" [package.omicron-gateway] @@ -161,6 +161,10 @@ source.sha256 = "208ae10a61f834608378eb135e4b6e5993dc363019b8fba75465b6ea5506b63 output.type = "zone" output.intermediate_only = true +# To package and install the asic variant of the switch, do: +# +# $ cargo run --release -p omicron-package -- -t switch_variant=asic package +# $ pfexec ./target/release/omicron-package -t switch_variant=asic install [package.switch-asic] service_name = "switch" only_for_targets.switch_variant = "asic" @@ -168,6 +172,12 @@ source.type = "composite" source.packages = [ "omicron-gateway.tar.gz", "dendrite-asic.tar.gz" ] output.type = "zone" +# To package and install the stub variant of the switch, do the following: +# +# - Set the sled agent's configuration option "stub_scrimlet" to "true" +# - Run the following: +# $ cargo run --release -p omicron-package -- -t switch_variant=stub package +# $ pfexec ./target/release/omicron-package -t switch_variant=stub install [package.switch-stub] service_name = "switch" only_for_targets.switch_variant = "stub" diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index 252b3789d5..cb0be2a82a 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -20,7 +20,7 @@ use crate::illumos::dladm::{self, Dladm, PhysicalLink}; use crate::illumos::zone::Zones; use crate::server::Server as SledServer; use crate::sp::SpHandle; -use omicron_common::address::{get_sled_address, Ipv6Subnet}; +use omicron_common::address::Ipv6Subnet; use omicron_common::api::external::{Error as ExternalError, MacAddr}; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, @@ -234,7 +234,7 @@ impl Agent { ) -> Result { info!(&self.log, "Loading Sled Agent: {:?}", request); - let sled_address = get_sled_address(request.subnet); + let sled_address = request.sled_address(); let mut maybe_agent = self.sled_agent.lock().await; if let Some(server) = &*maybe_agent { @@ -276,7 +276,6 @@ impl Agent { let server = SledServer::start( &self.sled_config, self.parent_log.clone(), - sled_address, request.clone(), ) .await diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index 94db44729d..1c61327ed1 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -6,13 +6,13 @@ use super::trust_quorum::SerializableShareDistribution; use macaddr::MacAddr6; -use omicron_common::address::{Ipv6Subnet, SLED_PREFIX}; +use omicron_common::address::{self, Ipv6Subnet, SLED_PREFIX}; use serde::{Deserialize, Deserializer, Serialize}; use serde_with::serde_as; use serde_with::DeserializeAs; use serde_with::PickFirst; use std::borrow::Cow; -use std::net::Ipv4Addr; +use std::net::{Ipv4Addr, Ipv6Addr, SocketAddrV6}; use uuid::Uuid; /// Information about the internet gateway used for externally-facing services. @@ -78,6 +78,16 @@ pub struct SledAgentRequest { pub subnet: Ipv6Subnet, } +impl SledAgentRequest { + pub fn sled_address(&self) -> SocketAddrV6 { + address::get_sled_address(self.subnet) + } + + pub fn switch_ip(&self) -> Ipv6Addr { + address::get_switch_zone_address(self.subnet) + } +} + // We intentionally DO NOT derive `Debug` or `Serialize`; both provide avenues // by which we may accidentally log the contents of our `share`. To serialize a // request, use `RequestEnvelope::danger_serialize_as_json()`. diff --git a/sled-agent/src/illumos/dladm.rs b/sled-agent/src/illumos/dladm.rs index af74298c0a..0be67cc6b5 100644 --- a/sled-agent/src/illumos/dladm.rs +++ b/sled-agent/src/illumos/dladm.rs @@ -5,7 +5,7 @@ //! Utilities for poking at data links. use crate::common::vlan::VlanID; -use crate::illumos::vnic::VnicKind; +use crate::illumos::link::{Link, LinkKind}; use crate::illumos::zone::IPADM; use crate::illumos::{execute, ExecutionError, PFEXEC}; use omicron_common::api::external::MacAddr; @@ -221,6 +221,21 @@ impl Dladm { Ok(()) } + /// Verify that the given link exists + pub fn verify_link(link: &str) -> Result { + let mut command = std::process::Command::new(PFEXEC); + let cmd = command.args(&[DLADM, "show-link", "-p", "-o", "LINK", link]); + let output = execute(cmd)?; + match String::from_utf8_lossy(&output.stdout) + .lines() + .next() + .map(|s| s.trim()) + { + Some(x) if x == link => Ok(Link::wrap_physical(link)), + _ => Err(FindPhysicalLinkError::NoPhysicalLinkFound), + } + } + /// Returns the name of the first observed physical data link. pub fn find_physical() -> Result { let mut command = std::process::Command::new(PFEXEC); @@ -322,7 +337,7 @@ impl Dladm { .filter_map(|name| { // Ensure this is a kind of VNIC that the sled agent could be // responsible for. - match VnicKind::from_name(name) { + match LinkKind::from_name(name) { Some(_) => Some(name.to_owned()), None => None, } diff --git a/sled-agent/src/illumos/vnic.rs b/sled-agent/src/illumos/link.rs similarity index 79% rename from sled-agent/src/illumos/vnic.rs rename to sled-agent/src/illumos/link.rs index 3fbc5b77b2..acd2b421c1 100644 --- a/sled-agent/src/illumos/vnic.rs +++ b/sled-agent/src/illumos/link.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! API for controlling a single instance. +//! API for allocating and managing data links. use crate::illumos::dladm::{ CreateVnicError, DeleteVnicError, VnicSource, VNIC_PREFIX, @@ -54,13 +54,13 @@ impl VnicAllocator
{ pub fn new_control( &self, mac: Option, - ) -> Result { + ) -> Result { let allocator = self.new_superscope("Control"); let name = allocator.next(); debug_assert!(name.starts_with(VNIC_PREFIX)); debug_assert!(name.starts_with(VNIC_PREFIX_CONTROL)); Dladm::create_vnic(&self.data_link, &name, mac, None)?; - Ok(Vnic { name, deleted: false, kind: VnicKind::OxideControl }) + Ok(Link { name, deleted: false, kind: LinkKind::OxideControlVnic }) } fn new_superscope>(&self, scope: S) -> Self { @@ -82,22 +82,23 @@ impl VnicAllocator
{ } } -/// Represents the kind of a VNIC, such as whether it's for guest networking or +/// Represents the kind of a Link, such as whether it's for guest networking or /// communicating with Oxide services. #[derive(Debug, Clone, Copy, PartialEq)] -pub enum VnicKind { - OxideControl, - Guest, +pub enum LinkKind { + Physical, + OxideControlVnic, + GuestVnic, } -impl VnicKind { +impl LinkKind { /// Infer the kind from a VNIC's name, if this one the sled agent can /// manage, and `None` otherwise. pub fn from_name(name: &str) -> Option { if name.starts_with(VNIC_PREFIX) { - Some(VnicKind::OxideControl) + Some(LinkKind::OxideControlVnic) } else if name.starts_with(VNIC_PREFIX_GUEST) { - Some(VnicKind::Guest) + Some(LinkKind::GuestVnic) } else { None } @@ -106,7 +107,7 @@ impl VnicKind { #[derive(thiserror::Error, Debug)] #[error("VNIC with name '{0}' is not valid for sled agent management")] -pub struct InvalidVnicKind(String); +pub struct InvalidLinkKind(String); /// Represents an allocated VNIC on the system. /// The VNIC is de-allocated when it goes out of scope. @@ -115,30 +116,41 @@ pub struct InvalidVnicKind(String); /// another process in the global zone could also modify / destroy /// the VNIC while this object is alive. #[derive(Debug)] -pub struct Vnic { +pub struct Link { name: String, deleted: bool, - kind: VnicKind, + kind: LinkKind, } -impl Vnic { +impl Link { /// Takes ownership of an existing VNIC. pub fn wrap_existing>( name: S, - ) -> Result { - match VnicKind::from_name(name.as_ref()) { - Some(kind) => Ok(Vnic { + ) -> Result { + match LinkKind::from_name(name.as_ref()) { + Some(kind) => Ok(Self { name: name.as_ref().to_owned(), deleted: false, kind, }), - None => Err(InvalidVnicKind(name.as_ref().to_owned())), + None => Err(InvalidLinkKind(name.as_ref().to_owned())), + } + } + + /// Wraps a physical nic in a Link structure. + /// + /// It is the caller's responsibility to ensure this is a physical link. + pub fn wrap_physical>(name: S) -> Self { + Link { + name: name.as_ref().to_owned(), + deleted: false, + kind: LinkKind::Physical, } } /// Deletes a NIC (if it has not already been deleted). pub fn delete(&mut self) -> Result<(), DeleteVnicError> { - if self.deleted { + if self.deleted || self.kind == LinkKind::Physical { Ok(()) } else { self.deleted = true; @@ -150,16 +162,16 @@ impl Vnic { &self.name } - pub fn kind(&self) -> VnicKind { + pub fn kind(&self) -> LinkKind { self.kind } } -impl Drop for Vnic { +impl Drop for Link { fn drop(&mut self) { let r = self.delete(); if let Err(e) = r { - eprintln!("Failed to delete VNIC: {}", e); + eprintln!("Failed to delete Link: {}", e); } } } diff --git a/sled-agent/src/illumos/mod.rs b/sled-agent/src/illumos/mod.rs index 3aead10208..ad7200c360 100644 --- a/sled-agent/src/illumos/mod.rs +++ b/sled-agent/src/illumos/mod.rs @@ -8,9 +8,9 @@ use cfg_if::cfg_if; pub mod addrobj; pub mod dladm; +pub mod link; pub mod running_zone; pub mod svc; -pub mod vnic; pub mod zfs; pub mod zone; pub mod zpool; diff --git a/sled-agent/src/illumos/running_zone.rs b/sled-agent/src/illumos/running_zone.rs index de50d2b5d4..dd38cd7dfa 100644 --- a/sled-agent/src/illumos/running_zone.rs +++ b/sled-agent/src/illumos/running_zone.rs @@ -6,8 +6,8 @@ use crate::illumos::addrobj::AddrObject; use crate::illumos::dladm::Etherstub; +use crate::illumos::link::{Link, VnicAllocator}; use crate::illumos::svc::wait_for_service; -use crate::illumos::vnic::{Vnic, VnicAllocator}; use crate::illumos::zone::{AddressRequest, ZONE_PREFIX}; use crate::opte::Port; use ipnetwork::IpNetwork; @@ -190,7 +190,8 @@ impl RunningZone { ) -> Result { info!(self.inner.log, "Adding address: {:?}", addrtype); let addrobj = AddrObject::new( - self.inner.physical_nic + self.inner + .link .as_ref() .expect("Cannot allocate external address on zone without physical NIC") .name(), @@ -284,7 +285,7 @@ impl RunningZone { }, )?; - let control_vnic = Vnic::wrap_existing(vnic_name) + let control_vnic = Link::wrap_existing(vnic_name) .expect("Failed to wrap valid control VNIC"); Ok(Self { @@ -296,7 +297,7 @@ impl RunningZone { // // Re-initialize guest_vnic state by inspecting the zone. opte_ports: vec![], - physical_nic: None, + link: None, }, }) } @@ -346,14 +347,13 @@ pub struct InstalledZone { name: String, // NIC used for control plane communication. - control_vnic: Vnic, + control_vnic: Link, // OPTE devices for the guest network interfaces opte_ports: Vec, // Physical NIC possibly provisioned to the zone. - // TODO: Remove once Nexus traffic is transmitted over OPTE. - physical_nic: Option, + link: Option, } impl InstalledZone { @@ -384,7 +384,7 @@ impl InstalledZone { datasets: &[zone::Dataset], devices: &[zone::Device], opte_ports: Vec, - physical_nic: Option, + link: Option, ) -> Result { let control_vnic = vnic_allocator.new_control(None).map_err(|err| { InstallZoneError::CreateVnic { zone: zone_name.to_string(), err } @@ -398,7 +398,7 @@ impl InstalledZone { .iter() .map(|port| port.vnic_name().to_string()) .chain(std::iter::once(control_vnic.name().to_string())) - .chain(physical_nic.as_ref().map(|vnic| vnic.name().to_string())) + .chain(link.as_ref().map(|vnic| vnic.name().to_string())) .collect(); Zones::install_omicron_zone( @@ -420,7 +420,7 @@ impl InstalledZone { name: full_zone_name, control_vnic, opte_ports, - physical_nic, + link, }) } } diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index b73f1a3924..62b2043317 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -6,11 +6,11 @@ use crate::common::instance::{Action as InstanceAction, InstanceStates}; use crate::illumos::dladm::Etherstub; +use crate::illumos::link::VnicAllocator; use crate::illumos::running_zone::{ InstalledZone, RunCommandError, RunningZone, }; use crate::illumos::svc::wait_for_service; -use crate::illumos::vnic::VnicAllocator; use crate::illumos::zone::{AddressRequest, PROPOLIS_ZONE_PREFIX}; use crate::instance_manager::InstanceTicket; use crate::nexus::LazyNexusClient; diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index 3af21061cc..4ff923983f 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -5,7 +5,7 @@ //! API for controlling multiple instances on a sled. use crate::illumos::dladm::Etherstub; -use crate::illumos::vnic::VnicAllocator; +use crate::illumos::link::VnicAllocator; use crate::nexus::LazyNexusClient; use crate::opte::PortManager; use crate::params::{ diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 6692f1209a..c08224f79b 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -380,7 +380,7 @@ impl std::fmt::Display for ServiceType { fn fmt(&self, f: &mut Formatter<'_>) -> FormatResult { match self { ServiceType::Nexus { .. } => write!(f, "nexus"), - ServiceType::InternalDns { .. } => write!(f, "internal-dns"), + ServiceType::InternalDns { .. } => write!(f, "internal_dns"), ServiceType::Oximeter => write!(f, "oximeter"), ServiceType::Dendrite { .. } => write!(f, "dendrite"), ServiceType::Tfport { .. } => write!(f, "tfport"), @@ -410,6 +410,45 @@ impl From for sled_agent_client::types::ServiceType { } } +/// The type of zone which may be requested from Sled Agent +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +pub enum ZoneType { + #[serde(rename = "internal_dns")] + InternalDNS, + #[serde(rename = "nexus")] + Nexus, + #[serde(rename = "oximeter")] + Oximeter, + #[serde(rename = "switch")] + Switch, +} + +impl From for sled_agent_client::types::ZoneType { + fn from(zt: ZoneType) -> Self { + match zt { + ZoneType::InternalDNS => Self::InternalDns, + ZoneType::Nexus => Self::Nexus, + ZoneType::Oximeter => Self::Oximeter, + ZoneType::Switch => Self::Switch, + } + } +} + +impl std::fmt::Display for ZoneType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use ZoneType::*; + let name = match self { + InternalDNS => "internal_dns", + Nexus => "nexus", + Oximeter => "oximeter", + Switch => "switch", + }; + write!(f, "{name}") + } +} + /// Describes a request to create a zone running one or more services. #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, @@ -417,8 +456,8 @@ impl From for sled_agent_client::types::ServiceType { pub struct ServiceZoneRequest { // The UUID of the zone to be initialized. pub id: Uuid, - // The name of the zone to be created. - pub zone_name: String, + // The type of the zone to be created. + pub zone_type: ZoneType, // The addresses on which the service should listen for requests. pub addresses: Vec, // The addresses in the global zone which should be created, if necessary @@ -480,7 +519,7 @@ impl From for sled_agent_client::types::ServiceZoneRequest { Self { id: s.id, - zone_name: s.zone_name.to_string(), + zone_type: s.zone_type.into(), addresses: s.addresses, gz_addresses: s.gz_addresses, services, diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 4997ce7f2f..ee0ab30678 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -10,7 +10,7 @@ use crate::bootstrap::ddm_admin_client::{DdmAdminClient, DdmError}; use crate::bootstrap::params::SledAgentRequest; use crate::bootstrap::rss_handle::BootstrapAgentHandle; use crate::bootstrap::trust_quorum::{RackSecret, ShareDistribution}; -use crate::params::{ServiceType, ServiceZoneRequest}; +use crate::params::{ServiceType, ServiceZoneRequest, ZoneType}; use internal_dns_client::multiclient::{DnsError, Updater as DnsUpdater}; use omicron_common::address::{ get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, @@ -316,7 +316,7 @@ impl ServiceInner { let dns_addr = dns_subnet.dns_address().ip(); request.dns_services.push(ServiceZoneRequest { id: Uuid::new_v4(), - zone_name: "internal-dns".to_string(), + zone_type: ZoneType::InternalDNS, addresses: vec![dns_addr], gz_addresses: vec![dns_subnet.gz_address().ip()], services: vec![ServiceType::InternalDns { diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 4aa9078890..c5b9c8aff4 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -10,7 +10,7 @@ use super::sled_agent::SledAgent; use crate::bootstrap::params::SledAgentRequest; use crate::nexus::LazyNexusClient; use slog::Logger; -use std::net::{SocketAddr, SocketAddrV6}; +use std::net::SocketAddr; use uuid::Uuid; /// Packages up a [`SledAgent`], running the sled agent API under a Dropshot @@ -33,13 +33,13 @@ impl Server { pub async fn start( config: &Config, log: Logger, - addr: SocketAddrV6, request: SledAgentRequest, ) -> Result { info!(log, "setting up sled agent server"); let client_log = log.new(o!("component" => "NexusClient")); + let addr = request.sled_address(); let lazy_nexus_client = LazyNexusClient::new(client_log, *addr.ip()) .map_err(|e| e.to_string())?; @@ -47,7 +47,6 @@ impl Server { &config, log.clone(), lazy_nexus_client.clone(), - addr, request, ) .await diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 5eef765435..c6a801ffe8 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -2,16 +2,39 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! Support for miscellaneous services managed by the sled. +//! Sled-local service management. +//! +//! For controlling zone-based storage services, refer to +//! [crate::storage_manager::StorageManager]. +//! +//! For controlling virtual machine instances, refer to +//! [crate::instance_manager::InstanceManager]. +//! +//! The [ServiceManager] provides separate mechanisms for services where the +//! "source-of-truth" is Nexus, compared with services where the +//! "source-of-truth" is the Sled Agent itself. Although we generally prefer to +//! delegate the decision of "which services run where" to Nexus, there are +//! situations where the Sled Agent must be capable of autonomously ensuring +//! that zones execute. For example, the "switch zone" contains services which +//! should automatically start when a Tofino device is detected, independently +//! of what other services Nexus wants to have executing on the sled. +//! +//! To accomplish this, the following interfaces are exposed: +//! - [ServiceManager::ensure_persistent] exposes an API to request a set of +//! services that should persist beyond reboot. +//! - [ServiceManager::ensure_switch] exposes an API to specifically enable +//! or disable the switch zone. use crate::bootstrap::ddm_admin_client::{DdmAdminClient, DdmError}; use crate::common::underlay; -use crate::illumos::dladm::{Etherstub, EtherstubVnic, PhysicalLink}; +use crate::illumos::dladm::{Dladm, Etherstub, EtherstubVnic, PhysicalLink}; +use crate::illumos::link::{Link, VnicAllocator}; use crate::illumos::running_zone::{InstalledZone, RunningZone}; -use crate::illumos::vnic::VnicAllocator; use crate::illumos::zfs::ZONE_ZFS_DATASET_MOUNTPOINT; use crate::illumos::zone::AddressRequest; -use crate::params::{ServiceEnsureBody, ServiceType, ServiceZoneRequest}; +use crate::params::{ + DendriteAsic, ServiceEnsureBody, ServiceType, ServiceZoneRequest, +}; use crate::zone::Zones; use omicron_common::address::Ipv6Subnet; use omicron_common::address::DENDRITE_PORT; @@ -29,8 +52,10 @@ use std::iter::FromIterator; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}; use std::path::{Path, PathBuf}; use std::str::FromStr; +use std::sync::Arc; use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; +use tokio::task::JoinHandle; use uuid::Uuid; // The filename of ServiceManager's internal storage. @@ -53,6 +78,9 @@ pub enum Error { #[error("I/O Error accessing {path}: {err}")] Io { path: PathBuf, err: std::io::Error }, + #[error("Failed to find device {device}")] + MissingDevice { device: String }, + #[error("Failed to do '{intent}' by running command in zone: {err}")] ZoneCommand { intent: String, @@ -224,10 +252,27 @@ impl<'t> SmfHelper<'t> { } } +// Describes the Switch Zone state. +enum SwitchZone { + // The switch zone is not currently running. + Disabled, + // The Zone is still initializing - it may be awaiting the initialization + // of certain links. + Initializing { + // The request for the zone + request: ServiceZoneRequest, + // A task repeatedly trying to initialize the zone + initializer: JoinHandle<()>, + }, + // The Zone is currently running. + Running(RunningZone), +} + /// Manages miscellaneous Sled-local services. -pub struct ServiceManager { +pub struct ServiceManagerInner { log: Logger, config: Config, + switch_zone: Mutex, zones: Mutex>, vnic_allocator: VnicAllocator, physical_link_vnic_allocator: VnicAllocator, @@ -238,6 +283,11 @@ pub struct ServiceManager { advertised_prefixes: Mutex>>, } +#[derive(Clone)] +pub struct ServiceManager { + inner: Arc, +} + impl ServiceManager { /// Creates a service manager, which returns once all requested services /// have been started. @@ -261,28 +311,32 @@ impl ServiceManager { debug!(log, "Creating new ServiceManager"); let log = log.new(o!("component" => "ServiceManager")); let mgr = Self { - log: log.clone(), - config, - zones: Mutex::new(vec![]), - vnic_allocator: VnicAllocator::new("Service", etherstub), - physical_link_vnic_allocator: VnicAllocator::new( - "Public", - // NOTE: Right now, we only use a connection to one of the Chelsio - // links. Longer-term, when we we use OPTE, we'll be able to use both - // connections. - physical_link, - ), - underlay_vnic, - underlay_address, - rack_id, - ddmd_client: DdmAdminClient::new(log)?, - advertised_prefixes: Mutex::new(HashSet::new()), + inner: Arc::new(ServiceManagerInner { + log: log.clone(), + config, + // TODO: Load the switch zone if it already exists? + switch_zone: Mutex::new(SwitchZone::Disabled), + zones: Mutex::new(vec![]), + vnic_allocator: VnicAllocator::new("Service", etherstub), + physical_link_vnic_allocator: VnicAllocator::new( + "Public", + // NOTE: Right now, we only use a connection to one of the Chelsio + // links. Longer-term, when we we use OPTE, we'll be able to use both + // connections. + physical_link, + ), + underlay_vnic, + underlay_address, + rack_id, + ddmd_client: DdmAdminClient::new(log)?, + advertised_prefixes: Mutex::new(HashSet::new()), + }), }; let config_path = mgr.services_config_path(); if config_path.exists() { info!( - &mgr.log, + &mgr.inner.log, "Sled services found at {}; loading", config_path.to_string_lossy() ); @@ -295,12 +349,12 @@ impl ServiceManager { path: config_path.clone(), err, })?; - let mut existing_zones = mgr.zones.lock().await; + let mut existing_zones = mgr.inner.zones.lock().await; mgr.initialize_services_locked(&mut existing_zones, &cfg.services) .await?; } else { info!( - &mgr.log, + &mgr.inner.log, "No sled services found at {}", config_path.to_string_lossy() ); @@ -312,7 +366,7 @@ impl ServiceManager { // Returns either the path to the explicitly provided config path, or // chooses the default one. fn services_config_path(&self) -> PathBuf { - self.config.all_svcs_config_path.clone() + self.inner.config.all_svcs_config_path.clone() } // Advertise the /64 prefix of `address`, unless we already have. @@ -322,188 +376,223 @@ impl ServiceManager { // prefix is spawned onto a background task. async fn advertise_prefix_of_address(&self, address: Ipv6Addr) { let subnet = Ipv6Subnet::new(address); - if self.advertised_prefixes.lock().await.insert(subnet) { - self.ddmd_client.advertise_prefix(subnet); + if self.inner.advertised_prefixes.lock().await.insert(subnet) { + self.inner.ddmd_client.advertise_prefix(subnet); } } - // Populates `existing_zones` according to the requests in `services`. - // - // At the point this function is invoked, IP addresses have already been - // allocated (by either RSS or Nexus). However, this function explicitly - // assigns such addresses to interfaces within zones. - async fn initialize_services_locked( + // Check the services intended to run in the zone to determine whether any + // physical devices need to be mapped into the zone when it is created. + fn devices_needed( &self, - existing_zones: &mut Vec, - requests: &Vec, - ) -> Result<(), Error> { - // TODO(https://github.com/oxidecomputer/omicron/issues/726): - // As long as we ensure the requests don't overlap, we could - // parallelize this request. - for req in requests { - info!( - self.log, - "Ensuring service zone is initialized: {:?}", req.zone_name - ); - // Before we bother allocating anything for this request, check if - // this service has already been created. - let expected_zone_name = - InstalledZone::get_zone_name(&req.zone_name, None); - if existing_zones.iter().any(|z| z.name() == expected_zone_name) { - info!( - self.log, - "Service zone {} already exists", req.zone_name - ); - continue; - } else { - info!( - self.log, - "Service zone {} does not yet exist", req.zone_name - ); + req: &ServiceZoneRequest, + ) -> Result, Error> { + let mut devices = vec![]; + for svc in &req.services { + match svc { + ServiceType::Dendrite { asic: DendriteAsic::TofinoAsic } => { + // When running on a real sidecar, we need the /dev/tofino + // device to talk to the tofino ASIC. + devices.push("/dev/tofino".to_string()); + } + _ => (), } + } - // TODO: Remove once Nexus traffic is transmitted over OPTE. - let physical_nic = match req - .services - .iter() - .any(|service| matches!(service, ServiceType::Nexus { .. })) - { - true => { - let vnic = self + for dev in &devices { + if !Path::new(dev).exists() { + return Err(Error::MissingDevice { device: dev.to_string() }); + } + } + Ok(devices) + } + + // Check the services intended to run in the zone to determine whether any + // physical links or vnics need to be mapped into the zone when it is created. + // + // NOTE: This function is implemented to return the first link found, under + // the assumption that "at most one" would be necessary. + fn link_needed( + &self, + req: &ServiceZoneRequest, + ) -> Result, Error> { + for svc in &req.services { + match svc { + ServiceType::Tfport { pkt_source } => { + // The tfport service requires a MAC device to/from which sidecar + // packets may be multiplexed. If the link isn't present, don't + // bother trying to start the zone. + match Dladm::verify_link(pkt_source) { + Ok(link) => { + return Ok(Some(link)); + } + Err(_) => { + return Err(Error::MissingDevice { + device: pkt_source.to_string(), + }); + } + } + } + ServiceType::Nexus { .. } => { + // TODO: Remove once Nexus traffic is transmitted over OPTE. + match self + .inner .physical_link_vnic_allocator .new_control(None) - .map_err(|e| Error::NexusVnicCreation(e))?; - Some(vnic) + { + Ok(n) => { + return Ok(Some(n)); + } + Err(e) => { + return Err(Error::NexusVnicCreation(e)); + } + } } - false => None, - }; - - let installed_zone = InstalledZone::install( - &self.log, - &self.vnic_allocator, - &req.zone_name, - // unique_name= - None, - // dataset= - &[], - // devices= - &[], - // opte_ports= - vec![], - // physical_nic= - physical_nic, - ) - .await?; - - let running_zone = RunningZone::boot(installed_zone).await?; - - for addr in &req.addresses { - info!(self.log, "Ensuring address {} exists", addr.to_string()); - let addr_request = - AddressRequest::new_static(IpAddr::V6(*addr), None); - running_zone.ensure_address(addr_request).await?; - info!( - self.log, - "Ensuring address {} exists - OK", - addr.to_string() - ); + _ => (), } + } + Ok(None) + } - info!(self.log, "GZ addresses: {:#?}", req.gz_addresses); - for &addr in &req.gz_addresses { - info!( - self.log, - "Ensuring GZ address {} exists", - addr.to_string() - ); + async fn initialize_zone( + &self, + request: &ServiceZoneRequest, + ) -> Result { + let device_names = self.devices_needed(request)?; + let link = self.link_needed(request)?; + + let devices: Vec = device_names + .iter() + .map(|d| zone::Device { name: d.to_string() }) + .collect(); + + let installed_zone = InstalledZone::install( + &self.inner.log, + &self.inner.vnic_allocator, + &request.zone_type.to_string(), + // unique_name= + None, + // dataset= + &[], + &devices, + // opte_ports= + vec![], + link, + ) + .await?; - let addr_name = req.zone_name.replace(&['-', '_'][..], ""); - Zones::ensure_has_global_zone_v6_address( - self.underlay_vnic.clone(), - addr, - &addr_name, - ) - .map_err(|err| Error::GzAddress { - message: format!( - "adding address on behalf of service zone '{}'", - req.zone_name - ), - err, - })?; + let running_zone = RunningZone::boot(installed_zone).await?; - // If this address is in a new ipv6 prefix, notify maghemite so - // it can advertise it to other sleds. - self.advertise_prefix_of_address(addr).await; - } + for addr in &request.addresses { + info!( + self.inner.log, + "Ensuring address {} exists", + addr.to_string() + ); + let addr_request = + AddressRequest::new_static(IpAddr::V6(*addr), None); + running_zone.ensure_address(addr_request).await?; + info!( + self.inner.log, + "Ensuring address {} exists - OK", + addr.to_string() + ); + } - let gateway = if !req.gz_addresses.is_empty() { - // If this service supplies its own GZ address, add a route. - // - // This is currently being used for the DNS service. - // - // TODO: consider limiting the number of GZ addresses which - // can be supplied - now that we're actively using it, we - // aren't really handling the "many GZ addresses" case, and it - // doesn't seem necessary now. - req.gz_addresses[0] - } else { - self.underlay_address - }; + info!(self.inner.log, "GZ addresses: {:#?}", request.gz_addresses); + for &addr in &request.gz_addresses { + info!( + self.inner.log, + "Ensuring GZ address {} exists", + addr.to_string() + ); - running_zone.add_default_route(gateway).await.map_err(|err| { - Error::ZoneCommand { intent: "Adding Route".to_string(), err } + let addr_name = + request.zone_type.to_string().replace(&['-', '_'][..], ""); + Zones::ensure_has_global_zone_v6_address( + self.inner.underlay_vnic.clone(), + addr, + &addr_name, + ) + .map_err(|err| Error::GzAddress { + message: format!( + "adding address on behalf of service zone '{}'", + request.zone_type + ), + err, })?; - for service in &req.services { - // TODO: Related to - // https://github.com/oxidecomputer/omicron/pull/1124 , should we - // avoid importing this manifest? - debug!(self.log, "importing manifest"); - - let smfh = SmfHelper::new(&running_zone, service); - smfh.import_manifest()?; - - match &service { - ServiceType::Nexus { internal_ip, external_ip } => { - info!(self.log, "Setting up Nexus service"); - - // The address of Nexus' external interface is a special - // case; it may be an IPv4 address. - let addr_request = - AddressRequest::new_static(*external_ip, None); - running_zone - .ensure_external_address_with_name( - addr_request, - "public", - ) - .await?; - - if let IpAddr::V4(_public_addr4) = *external_ip { - // If requested, create a default route back through - // the internet gateway. - if let Some(ref gateway) = - self.config.gateway_address - { - running_zone - .add_default_route4(*gateway) - .await - .map_err(|err| Error::ZoneCommand { - intent: "Adding Route".to_string(), - err, - })?; - } + // If this address is in a new ipv6 prefix, notify maghemite so + // it can advertise it to other sleds. + self.advertise_prefix_of_address(addr).await; + } + + let gateway = if !request.gz_addresses.is_empty() { + // If this service supplies its own GZ address, add a route. + // + // This is currently being used for the DNS service. + // + // TODO: consider limiting the number of GZ addresses which + // can be supplied - now that we're actively using it, we + // aren't really handling the "many GZ addresses" case, and it + // doesn't seem necessary now. + request.gz_addresses[0] + } else { + self.inner.underlay_address + }; + + running_zone.add_default_route(gateway).await.map_err(|err| { + Error::ZoneCommand { intent: "Adding Route".to_string(), err } + })?; + + for service in &request.services { + // TODO: Related to + // https://github.com/oxidecomputer/omicron/pull/1124 , should we + // avoid importing this manifest? + debug!(self.inner.log, "importing manifest"); + + let smfh = SmfHelper::new(&running_zone, service); + smfh.import_manifest()?; + + match &service { + ServiceType::Nexus { internal_ip, external_ip } => { + info!(self.inner.log, "Setting up Nexus service"); + + // The address of Nexus' external interface is a special + // case; it may be an IPv4 address. + let addr_request = + AddressRequest::new_static(*external_ip, None); + running_zone + .ensure_external_address_with_name( + addr_request, + "public", + ) + .await?; + + if let IpAddr::V4(_public_addr4) = *external_ip { + // If requested, create a default route back through + // the internet gateway. + if let Some(ref gateway) = + self.inner.config.gateway_address + { + running_zone + .add_default_route4(*gateway) + .await + .map_err(|err| Error::ZoneCommand { + intent: "Adding Route".to_string(), + err, + })?; } + } - let cert_file = - PathBuf::from("/var/nexus/certs/cert.pem"); - let key_file = - PathBuf::from("/var/nexus/certs/key.pem"); + let cert_file = PathBuf::from("/var/nexus/certs/cert.pem"); + let key_file = PathBuf::from("/var/nexus/certs/key.pem"); - // Nexus takes a separate config file for parameters which - // cannot be known at packaging time. - let deployment_config = NexusDeploymentConfig { - id: req.id, - rack_id: self.rack_id, + // Nexus takes a separate config file for parameters which + // cannot be known at packaging time. + let deployment_config = NexusDeploymentConfig { + id: request.id, + rack_id: self.inner.rack_id, // Request two dropshot servers: One for HTTP (port 80), // one for HTTPS (port 443). @@ -525,117 +614,147 @@ impl ServiceManager { ..Default::default() }, subnet: Ipv6Subnet::::new( - self.underlay_address, + self.inner.underlay_address, ), // TODO: Switch to inferring this URL by DNS. database: nexus_config::Database::FromUrl { url: PostgresConfigWithUrl::from_str( - "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" + "postgresql://root@[fd00:1122:3344:0101::3]:32221/omicron?sslmode=disable" ).unwrap(), } }; - // Copy the partial config file to the expected location. - let config_dir = (self.config.get_svc_config_dir)( - running_zone.name(), - &req.zone_name, - ); - let partial_config_path = - config_dir.join(PARTIAL_CONFIG_FILENAME); - let config_path = - config_dir.join(COMPLETE_CONFIG_FILENAME); - tokio::fs::copy(partial_config_path, &config_path) - .await - .map_err(|err| Error::Io { - path: config_path.clone(), - err, - })?; - - // Serialize the configuration and append it into the file. - let serialized_cfg = - toml::Value::try_from(&deployment_config) - .expect("Cannot serialize config"); - let mut map = toml::map::Map::new(); - map.insert("deployment".to_string(), serialized_cfg); - let config_str = - toml::to_string(&map).map_err(|err| { - Error::TomlSerialize { - path: config_path.clone(), - err, - } - })?; - let mut file = tokio::fs::OpenOptions::new() - .append(true) - .open(&config_path) - .await - .map_err(|err| Error::Io { - path: config_path.clone(), - err, - })?; - file.write_all(config_str.as_bytes()).await.map_err( - |err| Error::Io { path: config_path.clone(), err }, - )?; - } - ServiceType::InternalDns { - server_address, - dns_address, - } => { - info!(self.log, "Setting up internal-dns service"); - smfh.setprop( - "config/server_address", - format!( - "[{}]:{}", - server_address.ip(), - server_address.port(), - ), - )?; - smfh.setprop( - "config/dns_address", - &format!( - "[{}]:{}", - dns_address.ip(), - dns_address.port(), - ), - )?; - - // Refresh the manifest with the new properties we set, - // so they become "effective" properties when the service is enabled. - smfh.refresh()?; - } - ServiceType::Oximeter => { - info!(self.log, "Setting up oximeter service"); - - let address = req.addresses[0]; - smfh.setprop("config/id", req.id)?; - smfh.setprop( - "config/address", - &format!("[{}]:{}", address, OXIMETER_PORT), - )?; - smfh.refresh()?; - } - ServiceType::Dendrite { asic } => { - info!(self.log, "Setting up dendrite service"); - - let address = req.addresses[0]; - smfh.setprop("config/asic", asic)?; - smfh.setprop( - "config/address", - &format!("[{}]:{}", address, DENDRITE_PORT,), - )?; - smfh.refresh()?; - } - ServiceType::Tfport { pkt_source } => { - info!(self.log, "Setting up tfport service"); + // Copy the partial config file to the expected location. + let config_dir = (self.inner.config.get_svc_config_dir)( + running_zone.name(), + &request.zone_type.to_string(), + ); + let partial_config_path = + config_dir.join(PARTIAL_CONFIG_FILENAME); + let config_path = config_dir.join(COMPLETE_CONFIG_FILENAME); + tokio::fs::copy(partial_config_path, &config_path) + .await + .map_err(|err| Error::Io { + path: config_path.clone(), + err, + })?; + + // Serialize the configuration and append it into the file. + let serialized_cfg = + toml::Value::try_from(&deployment_config) + .expect("Cannot serialize config"); + let mut map = toml::map::Map::new(); + map.insert("deployment".to_string(), serialized_cfg); + let config_str = toml::to_string(&map).map_err(|err| { + Error::TomlSerialize { path: config_path.clone(), err } + })?; + let mut file = tokio::fs::OpenOptions::new() + .append(true) + .open(&config_path) + .await + .map_err(|err| Error::Io { + path: config_path.clone(), + err, + })?; + file.write_all(config_str.as_bytes()).await.map_err( + |err| Error::Io { path: config_path.clone(), err }, + )?; + } + ServiceType::InternalDns { server_address, dns_address } => { + info!(self.inner.log, "Setting up internal-dns service"); + smfh.setprop( + "config/server_address", + format!( + "[{}]:{}", + server_address.ip(), + server_address.port(), + ), + )?; + smfh.setprop( + "config/dns_address", + &format!( + "[{}]:{}", + dns_address.ip(), + dns_address.port(), + ), + )?; - smfh.setprop("config/pkt_source", pkt_source)?; - smfh.refresh()?; - } + // Refresh the manifest with the new properties we set, + // so they become "effective" properties when the service is enabled. + smfh.refresh()?; + } + ServiceType::Oximeter => { + info!(self.inner.log, "Setting up oximeter service"); + + let address = request.addresses[0]; + smfh.setprop("config/id", request.id)?; + smfh.setprop( + "config/address", + &format!("[{}]:{}", address, OXIMETER_PORT), + )?; + smfh.refresh()?; + } + ServiceType::Dendrite { asic } => { + info!(self.inner.log, "Setting up dendrite service"); + + let address = request.addresses[0]; + smfh.setprop("config/asic", asic)?; + smfh.setprop( + "config/address", + &format!("[{}]:{}", address, DENDRITE_PORT,), + )?; + smfh.refresh()?; } + ServiceType::Tfport { pkt_source } => { + info!(self.inner.log, "Setting up tfport service"); - debug!(self.log, "enabling service"); - smfh.enable()?; + smfh.setprop("config/pkt_source", pkt_source)?; + smfh.refresh()?; + } + } + + debug!(self.inner.log, "enabling service"); + smfh.enable()?; + } + Ok(running_zone) + } + + // Populates `existing_zones` according to the requests in `services`. + // + // At the point this function is invoked, IP addresses have already been + // allocated (by either RSS or Nexus). However, this function explicitly + // assigns such addresses to interfaces within zones. + async fn initialize_services_locked( + &self, + existing_zones: &mut Vec, + requests: &Vec, + ) -> Result<(), Error> { + // TODO(https://github.com/oxidecomputer/omicron/issues/726): + // As long as we ensure the requests don't overlap, we could + // parallelize this request. + for req in requests { + info!( + self.inner.log, + "Ensuring service zone is initialized: {:?}", req.zone_type + ); + // Before we bother allocating anything for this request, check if + // this service has already been created. + let expected_zone_name = + InstalledZone::get_zone_name(&req.zone_type.to_string(), None); + if existing_zones.iter().any(|z| z.name() == expected_zone_name) { + info!( + self.inner.log, + "Service zone {} already exists", req.zone_type + ); + continue; + } else { + info!( + self.inner.log, + "Service zone {} does not yet exist", req.zone_type + ); } + let running_zone = self.initialize_zone(req).await?; existing_zones.push(running_zone); } Ok(()) @@ -645,11 +764,11 @@ impl ServiceManager { /// /// These services will be instantiated by this function, will be recorded /// to a local file to ensure they start automatically on next boot. - pub async fn ensure( + pub async fn ensure_persistent( &self, request: ServiceEnsureBody, ) -> Result<(), Error> { - let mut existing_zones = self.zones.lock().await; + let mut existing_zones = self.inner.zones.lock().await; let config_path = self.services_config_path(); let services_to_initialize = { @@ -676,7 +795,7 @@ impl ServiceManager { // the case of changing configurations, rather than just doing // that removal implicitly. warn!( - self.log, + self.inner.log, "Cannot request services on this sled, differing configurations: {:#?}", known_set.symmetric_difference(&requested_set) ); @@ -709,6 +828,71 @@ impl ServiceManager { Ok(()) } + + pub async fn ensure_switch(&self, request: Option) { + let log = &self.inner.log; + let mut switch_zone = self.inner.switch_zone.lock().await; + + match (&*switch_zone, request) { + (SwitchZone::Disabled, Some(request)) => { + info!(log, "Enabling switch zone (new)"); + let mgr = self.clone(); + *switch_zone = SwitchZone::Initializing { + request, + initializer: tokio::task::spawn(async move { + mgr.initialize_switch_zone().await + }), + }; + } + (SwitchZone::Initializing { .. }, Some(_)) => { + info!(log, "Enabling switch zone (already underway)"); + } + (SwitchZone::Running(_), Some(_)) => { + info!(log, "Enabling switch zone (already complete)"); + } + (SwitchZone::Disabled, None) => { + info!(log, "Disabling switch zone (already complete)"); + } + (SwitchZone::Initializing { initializer, .. }, None) => { + info!(log, "Disabling switch zone (was initializing)"); + initializer.abort(); + *switch_zone = SwitchZone::Disabled; + } + (SwitchZone::Running(_), None) => { + info!(log, "Disabling switch zone (was running)"); + *switch_zone = SwitchZone::Disabled; + } + } + } + + async fn initialize_switch_zone(&self) { + loop { + { + let mut switch_zone = self.inner.switch_zone.lock().await; + match &*switch_zone { + SwitchZone::Initializing { request, .. } => { + match self.initialize_zone(&request).await { + Ok(zone) => { + *switch_zone = SwitchZone::Running(zone); + return; + } + Err(err) => { + warn!( + self.inner.log, + "Failed to initialize switch zone: {err}" + ); + } + } + } + _ => return, + } + } + + // Poll for the device every second - this timeout is somewhat + // arbitrary, but we probably don't want to use backoff here. + tokio::time::sleep(tokio::time::Duration::from_secs(1)).await; + } + } } #[cfg(test)] @@ -719,12 +903,12 @@ mod test { svc, zone::MockZones, }; + use crate::params::ZoneType; use std::net::Ipv6Addr; use std::os::unix::process::ExitStatusExt; use uuid::Uuid; - const SVC_NAME: &str = "my_svc"; - const EXPECTED_ZONE_NAME: &str = "oxz_my_svc"; + const EXPECTED_ZONE_NAME: &str = "oxz_oximeter"; // Returns the expectations for a new service to be created. fn expect_new_service() -> Vec> { @@ -783,10 +967,10 @@ mod test { async fn ensure_new_service(mgr: &ServiceManager, id: Uuid) { let _expectations = expect_new_service(); - mgr.ensure(ServiceEnsureBody { + mgr.ensure_persistent(ServiceEnsureBody { services: vec![ServiceZoneRequest { id, - zone_name: SVC_NAME.to_string(), + zone_type: ZoneType::Oximeter, addresses: vec![Ipv6Addr::LOCALHOST], gz_addresses: vec![], services: vec![ServiceType::Oximeter], @@ -799,10 +983,10 @@ mod test { // Prepare to call "ensure" for a service which already exists. We should // return the service without actually installing a new zone. async fn ensure_existing_service(mgr: &ServiceManager, id: Uuid) { - mgr.ensure(ServiceEnsureBody { + mgr.ensure_persistent(ServiceEnsureBody { services: vec![ServiceZoneRequest { id, - zone_name: SVC_NAME.to_string(), + zone_type: ZoneType::Oximeter, addresses: vec![Ipv6Addr::LOCALHOST], gz_addresses: vec![], services: vec![ServiceType::Oximeter], diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 4cc5d65ad7..b18fb1a27c 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -7,7 +7,7 @@ use crate::bootstrap::params::SledAgentRequest; use crate::config::Config; use crate::hardware::HardwareManager; -use crate::illumos::vnic::VnicKind; +use crate::illumos::link::LinkKind; use crate::illumos::zfs::{ Mountpoint, ZONE_ZFS_DATASET, ZONE_ZFS_DATASET_MOUNTPOINT, }; @@ -16,14 +16,18 @@ use crate::illumos::{execute, PFEXEC}; use crate::instance_manager::InstanceManager; use crate::nexus::{LazyNexusClient, NexusRequestQueue}; use crate::params::{ - DatasetKind, DiskStateRequested, InstanceHardware, InstanceMigrateParams, - InstanceRuntimeStateRequested, InstanceSerialConsoleData, - ServiceEnsureBody, VpcFirewallRule, + DatasetKind, DendriteAsic, DiskStateRequested, InstanceHardware, + InstanceMigrateParams, InstanceRuntimeStateRequested, + InstanceSerialConsoleData, ServiceEnsureBody, ServiceType, + ServiceZoneRequest, VpcFirewallRule, ZoneType, }; use crate::services::{self, ServiceManager}; use crate::storage_manager::StorageManager; use dropshot::HttpError; use futures::stream::{self, StreamExt, TryStreamExt}; +use omicron_common::address::{ + get_sled_address, get_switch_zone_address, Ipv6Subnet, SLED_PREFIX, +}; use omicron_common::api::{ internal::nexus::DiskRuntimeState, internal::nexus::InstanceRuntimeState, internal::nexus::UpdateArtifact, @@ -32,7 +36,7 @@ use omicron_common::backoff::{ internal_service_policy_with_max, retry_notify, BackoffError, }; use slog::Logger; -use std::net::SocketAddrV6; +use std::net::{Ipv6Addr, SocketAddrV6}; use std::process::Command; use std::sync::Arc; use uuid::Uuid; @@ -152,8 +156,13 @@ struct SledAgentInner { // ID of the Sled id: Uuid, - // Sled underlay address - addr: SocketAddrV6, + // The sled's initial configuration + config: Config, + + // Subnet of the Sled's underlay. + // + // The Sled Agent's address can be derived from this value. + subnet: Ipv6Subnet, // Component of Sled Agent responsible for storage and dataset management. storage: StorageManager, @@ -174,6 +183,16 @@ struct SledAgentInner { nexus_request_queue: NexusRequestQueue, } +impl SledAgentInner { + fn sled_address(&self) -> SocketAddrV6 { + get_sled_address(self.subnet) + } + + fn switch_ip(&self) -> Ipv6Addr { + get_switch_zone_address(self.subnet) + } +} + #[derive(Clone)] pub struct SledAgent { inner: Arc, @@ -185,7 +204,6 @@ impl SledAgent { config: &Config, log: Logger, lazy_nexus_client: LazyNexusClient, - sled_address: SocketAddrV6, request: SledAgentRequest, ) -> Result { let id = config.id; @@ -223,6 +241,7 @@ impl SledAgent { // should be removed once the Sled Agent is initialized with a // RSS-provided IP address. In the meantime, we use one from the // configuration file. + let sled_address = request.sled_address(); Zones::ensure_has_global_zone_v6_address( etherstub_vnic.clone(), *sled_address.ip(), @@ -340,7 +359,8 @@ impl SledAgent { let sled_agent = SledAgent { inner: Arc::new(SledAgentInner { id, - addr: sled_address, + config: config.clone(), + subnet: request.subnet, storage, instances, hardware, @@ -424,14 +444,34 @@ impl SledAgent { } async fn ensure_scrimlet_services_active(&self, log: &Logger) { - // TODO(https://github.com/oxidecomputer/omicron/issues/823): Launch the switch zone, with - // Dendrite, MGS, and any other services we want to enable. - warn!(log, "Activating scrimlet services not yet implemented"); + info!(log, "Ensuring scrimlet services (enabling services)"); + + let services = match self.inner.config.stub_scrimlet { + Some(_) => { + vec![ServiceType::Dendrite { asic: DendriteAsic::TofinoStub }] + } + None => { + vec![ + ServiceType::Dendrite { asic: DendriteAsic::TofinoAsic }, + ServiceType::Tfport { pkt_source: "tfpkt0".to_string() }, + ] + } + }; + + let request = ServiceZoneRequest { + id: Uuid::new_v4(), + zone_type: ZoneType::Switch, + addresses: vec![self.inner.switch_ip()], + gz_addresses: vec![], + services, + }; + + self.inner.services.ensure_switch(Some(request)).await; } async fn ensure_scrimlet_services_deactive(&self, log: &Logger) { - // TODO(https://github.com/oxidecomputer/omicron/issues/823): Terminate the switch zone. - warn!(log, "Deactivating scrimlet services not yet implemented"); + info!(log, "Ensuring scrimlet services (disabling services)"); + self.inner.services.ensure_switch(None).await; } pub fn id(&self) -> Uuid { @@ -442,7 +482,7 @@ impl SledAgent { fn notify_nexus_about_self(&self, log: &Logger) { let sled_id = self.inner.id; let lazy_nexus_client = self.inner.lazy_nexus_client.clone(); - let sled_address = self.inner.addr; + let sled_address = self.inner.sled_address(); let is_scrimlet = self.inner.hardware.is_scrimlet(); let log = log.clone(); let fut = async move { @@ -511,7 +551,7 @@ impl SledAgent { &self, requested_services: ServiceEnsureBody, ) -> Result<(), Error> { - self.inner.services.ensure(requested_services).await?; + self.inner.services.ensure_persistent(requested_services).await?; Ok(()) } @@ -692,7 +732,7 @@ async fn delete_omicron_vnics(log: &Logger) -> Result<(), Error> { log, "Deleting existing VNIC"; "vnic_name" => &vnic, - "vnic_kind" => ?VnicKind::from_name(&vnic).unwrap(), + "vnic_kind" => ?LinkKind::from_name(&vnic).unwrap(), ); Dladm::delete_vnic(&vnic) }) diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 492c26fbbe..70badc34b5 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -5,8 +5,8 @@ //! Management of sled-local storage. use crate::illumos::dladm::Etherstub; +use crate::illumos::link::VnicAllocator; use crate::illumos::running_zone::{InstalledZone, RunningZone}; -use crate::illumos::vnic::VnicAllocator; use crate::illumos::zone::AddressRequest; use crate::illumos::zpool::ZpoolName; use crate::illumos::{zfs::Mountpoint, zone::ZONE_PREFIX, zpool::ZpoolInfo}; diff --git a/smf/internal-dns/manifest.xml b/smf/internal-dns/manifest.xml index 96384d8dc0..b8e7b7f790 100644 --- a/smf/internal-dns/manifest.xml +++ b/smf/internal-dns/manifest.xml @@ -1,9 +1,9 @@ - + - + @@ -13,7 +13,7 @@ diff --git a/smf/nexus/config-partial.toml b/smf/nexus/config-partial.toml index c2ba69e538..2dedbc4162 100644 --- a/smf/nexus/config-partial.toml +++ b/smf/nexus/config-partial.toml @@ -22,4 +22,4 @@ if_exists = "append" # Configuration for interacting with the timeseries database [timeseries_db] -address = "[fd00:1122:3344:0101::5]:8123" +address = "[fd00:1122:3344:0101::6]:8123" diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index efc066260d..c641cdf4c7 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -34,44 +34,44 @@ mac = "00:0d:b9:54:fe:e4" [[request.dataset]] id = "09a9a25f-2602-4e2f-9630-31af9c492c3e" zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::6]:32345" +address = "[fd00:1122:3344:0101::7]:32345" dataset_kind.type = "crucible" [[request.dataset]] id = "2713b37a-3043-4ed5-aaff-f38200e45cfb" zpool_id = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::7]:32345" +address = "[fd00:1122:3344:0101::8]:32345" dataset_kind.type = "crucible" [[request.dataset]] id = "ffd16cad-e5d5-495e-9c59-4312a3857d91" zpool_id = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::8]:32345" +address = "[fd00:1122:3344:0101::9]:32345" dataset_kind.type = "crucible" [[request.dataset]] id = "4d08fc19-3d5f-4f6b-9c48-925f8eac7255" zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::2]:32221" +address = "[fd00:1122:3344:0101::3]:32221" dataset_kind.type = "cockroach_db" -dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] +dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::3]:32221" ] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate clickhouse datasets. [[request.dataset]] id = "a3505b41-a592-420b-84f2-3d76bf0e0a81" zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::5]:8123" +address = "[fd00:1122:3344:0101::6]:8123" dataset_kind.type = "clickhouse" [[request.service_zone]] id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" -zone_name = "nexus" -addresses = [ "fd00:1122:3344:0101::3" ] +zone_type = "nexus" +addresses = [ "fd00:1122:3344:0101::4" ] gz_addresses = [] [[request.service_zone.services]] type = "nexus" -internal_ip = "fd00:1122:3344:0101::3" +internal_ip = "fd00:1122:3344:0101::4" # NOTE: In the lab, use "172.20.15.226" external_ip = "192.168.1.20" @@ -79,23 +79,8 @@ external_ip = "192.168.1.20" # should allocate Oximeter services. [[request.service_zone]] id = "1da65e5b-210c-4859-a7d7-200c1e659972" -zone_name = "oximeter" -addresses = [ "fd00:1122:3344:0101::4" ] +zone_type = "oximeter" +addresses = [ "fd00:1122:3344:0101::5" ] gz_addresses = [] [[request.service_zone.services]] type = "oximeter" - -[[request.service_zone]] -id = "a0fe5ebc-9261-6f77-acc1-972481755789" -zone_name = "switch" -addresses = [ "fd00:1122:3344:0101::9" ] -gz_addresses = [] -[[request.service_zone.services]] -type = "dendrite" -asic = "tofino_stub" -#[[request.service_zone.services]] -# The tfport service will not work with the tofino_stub asic, -# as there is no network traffic to multiplex or network device -# to layer over. -#type = "tfport" -#pkt_source = "tfpkt0" From 51aae10200cfa04868ea7d1b2717fdda9333b33e Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 10 Nov 2022 13:27:11 -0500 Subject: [PATCH 7/9] Fix docs --- sled-agent/src/illumos/dladm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sled-agent/src/illumos/dladm.rs b/sled-agent/src/illumos/dladm.rs index 0be67cc6b5..d9bead4880 100644 --- a/sled-agent/src/illumos/dladm.rs +++ b/sled-agent/src/illumos/dladm.rs @@ -158,7 +158,7 @@ impl Dladm { /// Creates a VNIC on top of the etherstub. /// - /// This VNIC is not tracked like [`crate::illumos::vnic::Vnic`], because + /// This VNIC is not tracked like [`crate::illumos::link::Link`], because /// it is expected to exist for the lifetime of the sled. pub fn ensure_etherstub_vnic( source: &Etherstub, From dbbb13ee81116847c60080c51c2019042398c880 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 10 Nov 2022 16:37:41 -0500 Subject: [PATCH 8/9] Link to bug --- sled-agent/src/services.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index c6a801ffe8..4e870ddcaf 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -314,7 +314,8 @@ impl ServiceManager { inner: Arc::new(ServiceManagerInner { log: log.clone(), config, - // TODO: Load the switch zone if it already exists? + // TODO(https://github.com/oxidecomputer/omicron/issues/725): + // Load the switch zone if it already exists? switch_zone: Mutex::new(SwitchZone::Disabled), zones: Mutex::new(vec![]), vnic_allocator: VnicAllocator::new("Service", etherstub), From 282baee453752275a1ed70b58fb1e7c6cb78fa3c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 11 Nov 2022 15:27:12 -0500 Subject: [PATCH 9/9] Cleanup background task without abort --- sled-agent/src/services.rs | 49 +++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 4e870ddcaf..a503d86f0d 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -54,6 +54,7 @@ use std::path::{Path, PathBuf}; use std::str::FromStr; use std::sync::Arc; use tokio::io::AsyncWriteExt; +use tokio::sync::oneshot; use tokio::sync::Mutex; use tokio::task::JoinHandle; use uuid::Uuid; @@ -252,6 +253,13 @@ impl<'t> SmfHelper<'t> { } } +struct Task { + // A signal for the initializer task to terminate + exit_tx: oneshot::Sender<()>, + // A task repeatedly trying to initialize the zone + initializer: JoinHandle<()>, +} + // Describes the Switch Zone state. enum SwitchZone { // The switch zone is not currently running. @@ -261,8 +269,8 @@ enum SwitchZone { Initializing { // The request for the zone request: ServiceZoneRequest, - // A task repeatedly trying to initialize the zone - initializer: JoinHandle<()>, + // A background task + worker: Option, }, // The Zone is currently running. Running(RunningZone), @@ -834,14 +842,18 @@ impl ServiceManager { let log = &self.inner.log; let mut switch_zone = self.inner.switch_zone.lock().await; - match (&*switch_zone, request) { + match (&mut *switch_zone, request) { (SwitchZone::Disabled, Some(request)) => { info!(log, "Enabling switch zone (new)"); let mgr = self.clone(); + let (exit_tx, exit_rx) = oneshot::channel(); *switch_zone = SwitchZone::Initializing { request, - initializer: tokio::task::spawn(async move { - mgr.initialize_switch_zone().await + worker: Some(Task { + exit_tx, + initializer: tokio::task::spawn(async move { + mgr.initialize_switch_zone(exit_rx).await + }), }), }; } @@ -854,9 +866,19 @@ impl ServiceManager { (SwitchZone::Disabled, None) => { info!(log, "Disabling switch zone (already complete)"); } - (SwitchZone::Initializing { initializer, .. }, None) => { + (SwitchZone::Initializing { worker, .. }, None) => { info!(log, "Disabling switch zone (was initializing)"); - initializer.abort(); + let worker = worker + .take() + .expect("Initializing without background task"); + // If this succeeds, we told the background task to exit + // successfully. If it fails, the background task already + // exited. + let _ = worker.exit_tx.send(()); + worker + .initializer + .await + .expect("Switch initializer task panicked"); *switch_zone = SwitchZone::Disabled; } (SwitchZone::Running(_), None) => { @@ -866,7 +888,7 @@ impl ServiceManager { } } - async fn initialize_switch_zone(&self) { + async fn initialize_switch_zone(&self, mut exit_rx: oneshot::Receiver<()>) { loop { { let mut switch_zone = self.inner.switch_zone.lock().await; @@ -889,9 +911,14 @@ impl ServiceManager { } } - // Poll for the device every second - this timeout is somewhat - // arbitrary, but we probably don't want to use backoff here. - tokio::time::sleep(tokio::time::Duration::from_secs(1)).await; + tokio::select! { + // If we've been told to stop trying, bail. + _ = &mut exit_rx => return, + + // Poll for the device every second - this timeout is somewhat + // arbitrary, but we probably don't want to use backoff here. + _ = tokio::time::sleep(tokio::time::Duration::from_secs(1)) => (), + }; } } }