diff --git a/sled-agent/src/bootstrap/http_entrypoints.rs b/sled-agent/src/bootstrap/http_entrypoints.rs index c69bdeb0ce..7c32bf48a5 100644 --- a/sled-agent/src/bootstrap/http_entrypoints.rs +++ b/sled-agent/src/bootstrap/http_entrypoints.rs @@ -12,7 +12,6 @@ use super::BootstrapError; use super::RssAccessError; use crate::bootstrap::params::RackInitializeRequest; use crate::bootstrap::rack_ops::{RackInitId, RackResetId}; -use crate::storage_manager::StorageResources; use crate::updates::ConfigUpdates; use crate::updates::{Component, UpdateManager}; use bootstore::schemes::v0 as bootstore; @@ -25,6 +24,7 @@ use omicron_common::api::external::Error; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use sled_hardware::Baseboard; +use sled_storage::manager::StorageHandle; use slog::Logger; use std::net::Ipv6Addr; use tokio::sync::mpsc::error::TrySendError; @@ -33,7 +33,7 @@ use tokio::sync::{mpsc, oneshot}; pub(crate) struct BootstrapServerContext { pub(crate) base_log: Logger, pub(crate) global_zone_bootstrap_ip: Ipv6Addr, - pub(crate) storage_resources: StorageResources, + pub(crate) storage_manager: StorageHandle, pub(crate) bootstore_node_handle: bootstore::NodeHandle, pub(crate) baseboard: Baseboard, pub(crate) rss_access: RssAccess, @@ -50,7 +50,7 @@ impl BootstrapServerContext { self.rss_access.start_initializing( &self.base_log, self.global_zone_bootstrap_ip, - &self.storage_resources, + &self.storage_manager, &self.bootstore_node_handle, request, ) diff --git a/sled-agent/src/bootstrap/rack_ops.rs b/sled-agent/src/bootstrap/rack_ops.rs index b8721f8332..5cfd0b074a 100644 --- a/sled-agent/src/bootstrap/rack_ops.rs +++ b/sled-agent/src/bootstrap/rack_ops.rs @@ -8,11 +8,11 @@ use crate::bootstrap::http_entrypoints::RackOperationStatus; use crate::bootstrap::params::RackInitializeRequest; use crate::bootstrap::rss_handle::RssHandle; use crate::rack_setup::service::SetupServiceError; -use crate::storage_manager::StorageResources; use bootstore::schemes::v0 as bootstore; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; +use sled_storage::manager::StorageHandle; use slog::Logger; use std::mem; use std::net::Ipv6Addr; @@ -171,7 +171,7 @@ impl RssAccess { &self, parent_log: &Logger, global_zone_bootstrap_ip: Ipv6Addr, - storage_resources: &StorageResources, + storage_manager: &StorageHandle, bootstore_node_handle: &bootstore::NodeHandle, request: RackInitializeRequest, ) -> Result { @@ -207,14 +207,14 @@ impl RssAccess { mem::drop(status); let parent_log = parent_log.clone(); - let storage_resources = storage_resources.clone(); + let storage_manager = storage_manager.clone(); let bootstore_node_handle = bootstore_node_handle.clone(); let status = Arc::clone(&self.status); tokio::spawn(async move { let result = rack_initialize( &parent_log, global_zone_bootstrap_ip, - storage_resources, + storage_manager, bootstore_node_handle, request, ) @@ -342,7 +342,7 @@ enum RssStatus { async fn rack_initialize( parent_log: &Logger, global_zone_bootstrap_ip: Ipv6Addr, - storage_resources: StorageResources, + storage_manager: StorageHandle, bootstore_node_handle: bootstore::NodeHandle, request: RackInitializeRequest, ) -> Result<(), SetupServiceError> { @@ -350,7 +350,7 @@ async fn rack_initialize( parent_log, request, global_zone_bootstrap_ip, - storage_resources, + storage_manager, bootstore_node_handle, ) .await diff --git a/sled-agent/src/bootstrap/rss_handle.rs b/sled-agent/src/bootstrap/rss_handle.rs index c82873d91d..5d9c01e7f2 100644 --- a/sled-agent/src/bootstrap/rss_handle.rs +++ b/sled-agent/src/bootstrap/rss_handle.rs @@ -9,7 +9,6 @@ use super::params::StartSledAgentRequest; use crate::rack_setup::config::SetupServiceConfig; use crate::rack_setup::service::RackSetupService; use crate::rack_setup::service::SetupServiceError; -use crate::storage_manager::StorageResources; use ::bootstrap_agent_client::Client as BootstrapAgentClient; use bootstore::schemes::v0 as bootstore; use futures::stream::FuturesUnordered; @@ -17,6 +16,7 @@ use futures::StreamExt; use omicron_common::backoff::retry_notify; use omicron_common::backoff::retry_policy_local; use omicron_common::backoff::BackoffError; +use sled_storage::manager::StorageHandle; use slog::Logger; use std::net::Ipv6Addr; use std::net::SocketAddrV6; @@ -46,7 +46,7 @@ impl RssHandle { log: &Logger, config: SetupServiceConfig, our_bootstrap_address: Ipv6Addr, - storage_resources: StorageResources, + storage_manager: StorageHandle, bootstore: bootstore::NodeHandle, ) -> Result<(), SetupServiceError> { let (tx, rx) = rss_channel(our_bootstrap_address); @@ -54,7 +54,7 @@ impl RssHandle { let rss = RackSetupService::new( log.new(o!("component" => "RSS")), config, - storage_resources, + storage_manager, tx, bootstore, ); diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index 4e07ee03b7..94c326eef5 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -8,7 +8,6 @@ use super::config::BOOTSTRAP_AGENT_HTTP_PORT; use super::http_entrypoints; use super::params::RackInitializeRequest; use super::params::StartSledAgentRequest; -use super::pre_server::BootstrapManagers; use super::rack_ops::RackInitId; use super::views::SledAgentResponse; use super::BootstrapError; @@ -23,15 +22,15 @@ use crate::bootstrap::secret_retriever::LrtqOrHardcodedSecretRetriever; use crate::bootstrap::sprockets_server::SprocketsServer; use crate::config::Config as SledConfig; use crate::config::ConfigError; +use crate::long_running_tasks::LongRunningTaskHandles; use crate::server::Server as SledAgentServer; +use crate::services::ServiceManager; use crate::sled_agent::SledAgent; -use bootstore::schemes::v0 as bootstore; use camino::Utf8PathBuf; use cancel_safe_futures::TryStreamExt; use ddm_admin_client::Client as DdmAdminClient; use ddm_admin_client::DdmError; use dropshot::HttpServer; -use futures::Future; use futures::StreamExt; use illumos_utils::dladm; use illumos_utils::zfs; @@ -44,7 +43,6 @@ use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; use sled_hardware::underlay; -use sled_hardware::HardwareUpdate; use sled_storage::dataset::CONFIG_DATASET; use sled_storage::manager::StorageHandle; use slog::Logger; @@ -52,7 +50,6 @@ use std::borrow::Cow; use std::io; use std::net::SocketAddr; use std::net::SocketAddrV6; -use tokio::sync::broadcast; use tokio::sync::mpsc; use tokio::sync::oneshot; use tokio::task::JoinHandle; @@ -206,9 +203,9 @@ impl Server { let bootstrap_context = BootstrapServerContext { base_log: base_log.clone(), global_zone_bootstrap_ip, - storage_resources: storage_resources.clone(), - bootstore_node_handle: bootstore_handles.node_handle.clone(), - baseboard: managers.hardware.baseboard(), + storage_manager: long_running_task_handles.storage_manager.clone(), + bootstore_node_handle: long_running_task_handles.bootstore.clone(), + baseboard: long_running_task_handles.hardware_manager.baseboard(), rss_access, updates: config.updates.clone(), sled_reset_tx, @@ -240,52 +237,31 @@ impl Server { // Do we have a persistent sled-agent request that we need to restore? let state = if let Some(ledger) = maybe_ledger { let sled_request = ledger.data(); - let sled_agent_server = wait_while_handling_hardware_updates( - start_sled_agent( - &config, - &sled_request.request, - &bootstore_handles.node_handle, - &managers, - &ddm_admin_localhost_client, - &base_log, - &startup_log, - ), - &mut hardware_monitor, - &managers, - None, // No underlay network yet + let sled_agent_server = start_sled_agent( + &config, + &sled_request.request, + long_running_task_handles.clone(), + service_manager, + &ddm_admin_localhost_client, + &base_log, &startup_log, - "restoring sled-agent (cold boot)", ) .await?; - let sled_agent = sled_agent_server.sled_agent(); - // We've created sled-agent; we need to (possibly) reconfigure the // switch zone, if we're a scrimlet, to give it our underlay network // information. - let underlay_network_info = sled_agent.switch_zone_underlay_info(); - info!( - startup_log, "Sled Agent started; rescanning hardware"; - "underlay_network_info" => ?underlay_network_info, - ); - managers - .check_latest_hardware_snapshot(Some(&sled_agent), &startup_log) + let sled_agent = sled_agent_server.sled_agent(); + long_running_task_handles + .hardware_monitor + .sled_agent_started(sled_agent.clone()) .await; // For cold boot specifically, we now need to load the services // we're responsible for, while continuing to handle hardware // notifications. This cannot fail: we retry indefinitely until // we're done loading services. - wait_while_handling_hardware_updates( - sled_agent.cold_boot_load_services(), - &mut hardware_monitor, - &managers, - Some(&sled_agent), - &startup_log, - "restoring sled-agent services (cold boot)", - ) - .await; - + sled_agent.cold_boot_load_services().await; SledAgentState::ServerStarted(sled_agent_server) } else { SledAgentState::Bootstrapping @@ -296,15 +272,13 @@ impl Server { // agent state. let inner = Inner { config, - hardware_monitor, state, sled_init_rx, sled_reset_rx, - managers, ddm_admin_localhost_client, - bootstore_handles, + long_running_task_handles, + service_manager, _sprockets_server_handle: sprockets_server_handle, - _key_manager_handle: key_manager_handle, base_log, }; let inner_task = tokio::spawn(inner.run()); @@ -378,8 +352,8 @@ impl From for StartError { async fn start_sled_agent( config: &SledConfig, request: &StartSledAgentRequest, - bootstore: &bootstore::NodeHandle, - managers: &BootstrapManagers, + long_running_task_handles: LongRunningTaskHandles, + service_manager: ServiceManager, ddmd_client: &DdmAdminClient, base_log: &Logger, log: &Logger, @@ -394,14 +368,17 @@ async fn start_sled_agent( if request.use_trust_quorum { info!(log, "KeyManager: using lrtq secret retriever"); let salt = request.hash_rack_id(); - LrtqOrHardcodedSecretRetriever::init_lrtq(salt, bootstore.clone()) + LrtqOrHardcodedSecretRetriever::init_lrtq( + salt, + long_running_task_handles.bootstore.clone(), + ) } else { info!(log, "KeyManager: using hardcoded secret retriever"); LrtqOrHardcodedSecretRetriever::init_hardcoded(); } // Inform the storage service that the key manager is available - managers.storage.key_manager_ready().await; + long_running_task_handles.storage_manager.key_manager_ready().await; // Start trying to notify ddmd of our sled prefix so it can // advertise it to other sleds. @@ -421,9 +398,8 @@ async fn start_sled_agent( config, base_log.clone(), request.clone(), - managers.service.clone(), - managers.storage.clone(), - bootstore.clone(), + long_running_task_handles.clone(), + service_manager, ) .await .map_err(SledAgentServerStartError::FailedStartingServer)?; @@ -432,7 +408,8 @@ async fn start_sled_agent( // Record this request so the sled agent can be automatically // initialized on the next boot. - let paths = sled_config_paths(managers.storage.resources()).await?; + let paths = + sled_config_paths(&long_running_task_handles.storage_manager).await?; let mut ledger = Ledger::new_with( &log, @@ -505,41 +482,6 @@ async fn sled_config_paths( Ok(paths) } -// Helper function to wait for `fut` while handling any updates about hardware. -async fn wait_while_handling_hardware_updates, T>( - fut: F, - hardware_monitor: &mut broadcast::Receiver, - managers: &BootstrapManagers, - sled_agent: Option<&SledAgent>, - log: &Logger, - log_phase: &str, -) -> T { - tokio::pin!(fut); - loop { - tokio::select! { - // Cancel-safe per the docs on `broadcast::Receiver::recv()`. - hardware_update = hardware_monitor.recv() => { - info!( - log, - "Handling hardware update message"; - "phase" => log_phase, - "update" => ?hardware_update, - ); - - managers.handle_hardware_update( - hardware_update, - sled_agent, - log, - ).await; - } - - // Cancel-safe: we're using a `&mut Future`; dropping the - // reference does not cancel the underlying future. - result = &mut fut => return result, - } - } -} - #[derive(Clone, Serialize, Deserialize, PartialEq, JsonSchema)] struct PersistentSledAgentRequest<'a> { request: Cow<'a, StartSledAgentRequest>, @@ -565,18 +507,16 @@ pub fn run_openapi() -> Result<(), String> { struct Inner { config: SledConfig, - hardware_monitor: broadcast::Receiver, state: SledAgentState, sled_init_rx: mpsc::Receiver<( StartSledAgentRequest, oneshot::Sender>, )>, sled_reset_rx: mpsc::Receiver>>, - managers: BootstrapManagers, ddm_admin_localhost_client: DdmAdminClient, - bootstore_handles: BootstoreHandles, + service_manager: ServiceManager, + long_running_task_handles: LongRunningTaskHandles, _sprockets_server_handle: JoinHandle<()>, - _key_manager_handle: JoinHandle<()>, base_log: Logger, } @@ -584,14 +524,7 @@ impl Inner { async fn run(mut self) { let log = self.base_log.new(o!("component" => "SledAgentMain")); loop { - // TODO-correctness We pause handling hardware update messages while - // we handle sled init/reset requests - is that okay? tokio::select! { - // Cancel-safe per the docs on `broadcast::Receiver::recv()`. - hardware_update = self.hardware_monitor.recv() => { - self.handle_hardware_update(hardware_update, &log).await; - } - // Cancel-safe per the docs on `mpsc::Receiver::recv()`. Some((request, response_tx)) = self.sled_init_rx.recv() => { self.handle_start_sled_agent_request( @@ -619,27 +552,6 @@ impl Inner { } } - async fn handle_hardware_update( - &self, - hardware_update: Result, - log: &Logger, - ) { - info!( - log, - "Handling hardware update message"; - "phase" => "bootstore-steady-state", - "update" => ?hardware_update, - ); - - self.managers - .handle_hardware_update( - hardware_update, - self.state.sled_agent(), - &log, - ) - .await; - } - async fn handle_start_sled_agent_request( &mut self, request: StartSledAgentRequest, @@ -651,8 +563,8 @@ impl Inner { let response = match start_sled_agent( &self.config, &request, - &self.bootstore_handles.node_handle, - &self.managers, + self.long_running_task_handles.clone(), + self.service_manager.clone(), &self.ddm_admin_localhost_client, &self.base_log, &log, @@ -663,11 +575,9 @@ impl Inner { // We've created sled-agent; we need to (possibly) // reconfigure the switch zone, if we're a scrimlet, to // give it our underlay network information. - self.managers - .check_latest_hardware_snapshot( - Some(server.sled_agent()), - log, - ) + self.long_running_task_handles + .hardware_monitor + .sled_agent_started(server.sled_agent().clone()) .await; self.state = SledAgentState::ServerStarted(server); @@ -725,11 +635,11 @@ impl Inner { async fn uninstall_sled_local_config(&self) -> Result<(), BootstrapError> { let config_dirs = self - .managers - .storage - .resources() - .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET) + .long_running_task_handles + .storage_manager + .get_latest_resources() .await + .all_m2_mountpoints(CONFIG_DATASET) .into_iter(); for dir in config_dirs { diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index baf92af28a..88ece3e3b0 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -17,7 +17,6 @@ use crate::params::{ InstanceMigrationTargetParams, InstanceStateRequested, VpcFirewallRule, }; use crate::profile::*; -use crate::storage_manager::StorageResources; use crate::zone_bundle::BundleError; use crate::zone_bundle::ZoneBundler; use anyhow::anyhow; @@ -40,7 +39,8 @@ use omicron_common::backoff; use propolis_client::Client as PropolisClient; use rand::prelude::SliceRandom; use rand::SeedableRng; -use sled_hardware::disk::ZONE_DATASET; +use sled_storage::dataset::ZONE_DATASET; +use sled_storage::manager::StorageHandle; use slog::Logger; use std::net::IpAddr; use std::net::{SocketAddr, SocketAddrV6}; @@ -243,7 +243,7 @@ struct InstanceInner { nexus_client: NexusClientWithResolver, // Storage resources - storage: StorageResources, + storage: StorageHandle, // Object used to collect zone bundles from this instance when terminated. zone_bundler: ZoneBundler, @@ -622,7 +622,7 @@ impl Instance { vnic_allocator: VnicAllocator, port_manager: PortManager, nexus_client: NexusClientWithResolver, - storage: StorageResources, + storage: StorageHandle, zone_bundler: ZoneBundler, ) -> Result { info!(log, "Instance::new w/initial HW: {:?}", initial); @@ -889,8 +889,9 @@ impl Instance { let mut rng = rand::rngs::StdRng::from_entropy(); let root = inner .storage - .all_u2_mountpoints(ZONE_DATASET) + .get_latest_resources() .await + .all_u2_mountpoints(ZONE_DATASET) .choose(&mut rng) .ok_or_else(|| Error::U2NotFound)? .clone(); diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index bdd29e4d1f..c6310d28f2 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -12,7 +12,6 @@ use crate::params::{ InstanceHardware, InstanceMigrationSourceParams, InstancePutStateResponse, InstanceStateRequested, InstanceUnregisterResponse, }; -use crate::storage_manager::StorageResources; use crate::zone_bundle::BundleError; use crate::zone_bundle::ZoneBundler; use illumos_utils::dladm::Etherstub; @@ -21,6 +20,7 @@ use illumos_utils::opte::PortManager; use illumos_utils::vmm_reservoir; use omicron_common::api::external::ByteCount; use omicron_common::api::internal::nexus::InstanceRuntimeState; +use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::BTreeMap; use std::sync::{Arc, Mutex}; @@ -62,7 +62,7 @@ struct InstanceManagerInternal { vnic_allocator: VnicAllocator, port_manager: PortManager, - storage: StorageResources, + storage: StorageHandle, zone_bundler: ZoneBundler, } @@ -78,7 +78,7 @@ impl InstanceManager { nexus_client: NexusClientWithResolver, etherstub: Etherstub, port_manager: PortManager, - storage: StorageResources, + storage: StorageHandle, zone_bundler: ZoneBundler, ) -> Result { Ok(InstanceManager { diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index d0fa2fbe4d..e736793298 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -18,6 +18,7 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use sled_hardware::Baseboard; pub use sled_hardware::DendriteAsic; +use sled_storage::dataset::DatasetName; use std::fmt::{Debug, Display, Formatter, Result as FormatResult}; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use std::time::Duration; @@ -210,64 +211,6 @@ pub struct Zpool { pub disk_type: DiskType, } -/// The type of a dataset, and an auxiliary information necessary -/// to successfully launch a zone managing the associated data. -#[derive( - Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, -)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum DatasetKind { - CockroachDb, - Crucible, - Clickhouse, - ClickhouseKeeper, - ExternalDns, - InternalDns, -} - -impl From for sled_agent_client::types::DatasetKind { - fn from(k: DatasetKind) -> Self { - use DatasetKind::*; - match k { - CockroachDb => Self::CockroachDb, - Crucible => Self::Crucible, - Clickhouse => Self::Clickhouse, - ClickhouseKeeper => Self::ClickhouseKeeper, - ExternalDns => Self::ExternalDns, - InternalDns => Self::InternalDns, - } - } -} - -impl From for nexus_client::types::DatasetKind { - fn from(k: DatasetKind) -> Self { - use DatasetKind::*; - match k { - CockroachDb => Self::Cockroach, - Crucible => Self::Crucible, - Clickhouse => Self::Clickhouse, - ClickhouseKeeper => Self::ClickhouseKeeper, - ExternalDns => Self::ExternalDns, - InternalDns => Self::InternalDns, - } - } -} - -impl std::fmt::Display for DatasetKind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use DatasetKind::*; - let s = match self { - Crucible => "crucible", - CockroachDb { .. } => "cockroachdb", - Clickhouse => "clickhouse", - ClickhouseKeeper => "clickhouse_keeper", - ExternalDns { .. } => "external_dns", - InternalDns { .. } => "internal_dns", - }; - write!(f, "{}", s) - } -} - /// Describes service-specific parameters. #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, @@ -577,7 +520,7 @@ impl std::fmt::Display for ZoneType { )] pub struct DatasetRequest { pub id: Uuid, - pub name: crate::storage::dataset::DatasetName, + pub name: DatasetName, pub service_address: SocketAddrV6, } diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 2183aa7b63..01fababa4d 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -11,7 +11,6 @@ use crate::params::{ }; use crate::rack_setup::config::SetupServiceConfig as Config; use crate::storage::dataset::DatasetName; -use crate::storage_manager::StorageResources; use camino::Utf8PathBuf; use dns_service_client::types::DnsConfigParams; use illumos_utils::zpool::ZpoolName; @@ -35,6 +34,8 @@ use serde::{Deserialize, Serialize}; use sled_agent_client::{ types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, }; +use sled_storage::dataset::CONFIG_DATASET; +use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::{BTreeSet, HashMap, HashSet}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV6}; @@ -124,11 +125,12 @@ const RSS_SERVICE_PLAN_FILENAME: &str = "rss-service-plan.json"; impl Plan { pub async fn load( log: &Logger, - storage: &StorageResources, + storage_manager: &StorageHandle, ) -> Result, PlanError> { - let paths: Vec = storage - .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET) + let paths: Vec = storage_manager + .get_latest_resources() .await + .all_m2_mountpoints(CONFIG_DATASET) .into_iter() .map(|p| p.join(RSS_SERVICE_PLAN_FILENAME)) .collect(); @@ -236,7 +238,7 @@ impl Plan { pub async fn create( log: &Logger, config: &Config, - storage: &StorageResources, + storage_manager: &StorageHandle, sleds: &HashMap, ) -> Result { let mut dns_builder = internal_dns::DnsConfigBuilder::new(); @@ -724,9 +726,10 @@ impl Plan { let plan = Self { services, dns_config }; // Once we've constructed a plan, write it down to durable storage. - let paths: Vec = storage - .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET) + let paths: Vec = storage_manager + .get_latest_resources() .await + .all_m2_mountpoints(CONFIG_DATASET) .into_iter() .map(|p| p.join(RSS_SERVICE_PLAN_FILENAME)) .collect(); diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs index ea12f0db32..189216fd9b 100644 --- a/sled-agent/src/rack_setup/plan/sled.rs +++ b/sled-agent/src/rack_setup/plan/sled.rs @@ -13,6 +13,8 @@ use camino::Utf8PathBuf; use omicron_common::ledger::{self, Ledger, Ledgerable}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use sled_storage::dataset::CONFIG_DATASET; +use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::{HashMap, HashSet}; use std::net::{Ipv6Addr, SocketAddrV6}; @@ -77,7 +79,7 @@ impl Plan { pub async fn create( log: &Logger, config: &Config, - storage: &StorageResources, + storage_manager: &StorageHandle, bootstrap_addrs: HashSet, use_trust_quorum: bool, ) -> Result { @@ -119,9 +121,10 @@ impl Plan { let plan = Self { rack_id, sleds, config: config.clone() }; // Once we've constructed a plan, write it down to durable storage. - let paths: Vec = storage - .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET) + let paths: Vec = storage_manager + .get_latest_resources() .await + .all_m2_mountpoints(CONFIG_DATASET) .into_iter() .map(|p| p.join(RSS_SLED_PLAN_FILENAME)) .collect(); diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 805c889295..42290f5ce8 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -73,7 +73,6 @@ use crate::rack_setup::plan::service::{ use crate::rack_setup::plan::sled::{ Plan as SledPlan, PlanError as SledPlanError, }; -use crate::storage_manager::StorageResources; use bootstore::schemes::v0 as bootstore; use camino::Utf8PathBuf; use ddm_admin_client::{Client as DdmAdminClient, DdmError}; @@ -93,6 +92,8 @@ use sled_agent_client::{ types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, }; use sled_hardware::underlay::BootstrapInterface; +use sled_storage::dataset::CONFIG_DATASET; +use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::BTreeSet; use std::collections::{HashMap, HashSet}; @@ -186,7 +187,7 @@ impl RackSetupService { pub(crate) fn new( log: Logger, config: Config, - storage_resources: StorageResources, + storage_manager: StorageHandle, local_bootstrap_agent: BootstrapAgentHandle, bootstore: bootstore::NodeHandle, ) -> Self { @@ -195,7 +196,7 @@ impl RackSetupService { if let Err(e) = svc .run( &config, - &storage_resources, + &storage_manager, local_bootstrap_agent, bootstore, ) @@ -741,7 +742,7 @@ impl ServiceInner { async fn run( &self, config: &Config, - storage_resources: &StorageResources, + storage_manager: &StorageHandle, local_bootstrap_agent: BootstrapAgentHandle, bootstore: bootstore::NodeHandle, ) -> Result<(), SetupServiceError> { @@ -752,9 +753,10 @@ impl ServiceInner { config.az_subnet(), )?; - let marker_paths: Vec = storage_resources - .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET) + let marker_paths: Vec = storage_manager + .get_latest_resources() .await + .all_m2_mountpoints(CONFIG_DATASET) .into_iter() .map(|p| p.join(RSS_COMPLETED_FILENAME)) .collect(); @@ -775,7 +777,7 @@ impl ServiceInner { "RSS configuration looks like it has already been applied", ); - let sled_plan = SledPlan::load(&self.log, storage_resources) + let sled_plan = SledPlan::load(&self.log, storage_manager) .await? .expect("Sled plan should exist if completed marker exists"); if &sled_plan.config != config { @@ -783,7 +785,7 @@ impl ServiceInner { "Configuration changed".to_string(), )); } - let service_plan = ServicePlan::load(&self.log, storage_resources) + let service_plan = ServicePlan::load(&self.log, storage_manager) .await? .expect("Service plan should exist if completed marker exists"); @@ -817,7 +819,7 @@ impl ServiceInner { BootstrapAddressDiscovery::OnlyThese { addrs } => addrs.clone(), }; let maybe_sled_plan = - SledPlan::load(&self.log, storage_resources).await?; + SledPlan::load(&self.log, storage_manager).await?; if let Some(plan) = &maybe_sled_plan { let stored_peers: HashSet = plan.sleds.keys().map(|a| *a.ip()).collect(); @@ -849,7 +851,7 @@ impl ServiceInner { SledPlan::create( &self.log, config, - &storage_resources, + &storage_manager, bootstrap_addrs, config.trust_quorum_peers.is_some(), ) @@ -902,14 +904,14 @@ impl ServiceInner { }) .collect(); let service_plan = if let Some(plan) = - ServicePlan::load(&self.log, storage_resources).await? + ServicePlan::load(&self.log, storage_manager).await? { plan } else { ServicePlan::create( &self.log, &config, - &storage_resources, + &storage_manager, &plan.sleds, ) .await? diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 156547627c..c9828e7542 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -8,10 +8,9 @@ use super::config::Config; use super::http_entrypoints::api as http_api; use super::sled_agent::SledAgent; use crate::bootstrap::params::StartSledAgentRequest; +use crate::long_running_tasks::LongRunningTaskHandles; use crate::nexus::NexusClientWithResolver; use crate::services::ServiceManager; -use crate::storage_manager::StorageManager; -use bootstore::schemes::v0 as bootstore; use internal_dns::resolver::Resolver; use slog::Logger; use std::net::SocketAddr; @@ -39,9 +38,8 @@ impl Server { config: &Config, log: Logger, request: StartSledAgentRequest, + long_running_tasks_handles: LongRunningTaskHandles, services: ServiceManager, - storage: StorageManager, - bootstore: bootstore::NodeHandle, ) -> Result { info!(log, "setting up sled agent server"); @@ -63,8 +61,7 @@ impl Server { nexus_client, request, services, - storage, - bootstore, + long_running_tasks_handles, ) .await .map_err(|e| e.to_string())?; diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index dc130524f6..475cbf8018 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -10,6 +10,7 @@ use crate::bootstrap::early_networking::{ use crate::bootstrap::params::StartSledAgentRequest; use crate::config::Config; use crate::instance_manager::InstanceManager; +use crate::long_running_tasks::LongRunningTaskHandles; use crate::nexus::{NexusClientWithResolver, NexusRequestQueue}; use crate::params::{ DiskStateRequested, InstanceHardware, InstanceMigrationSourceParams, @@ -18,11 +19,9 @@ use crate::params::{ VpcFirewallRule, ZoneBundleMetadata, Zpool, }; use crate::services::{self, ServiceManager}; -use crate::storage_manager::{self, StorageManager}; use crate::updates::{ConfigUpdates, UpdateManager}; use crate::zone_bundle; use crate::zone_bundle::BundleError; -use bootstore::schemes::v0 as bootstore; use camino::Utf8PathBuf; use dropshot::HttpError; use illumos_utils::opte::params::{ @@ -46,6 +45,8 @@ use omicron_common::backoff::{ }; use sled_hardware::underlay; use sled_hardware::HardwareManager; +use sled_storage::dataset::DatasetName; +use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::BTreeMap; use std::net::{Ipv6Addr, SocketAddrV6}; @@ -200,7 +201,7 @@ struct SledAgentInner { subnet: Ipv6Subnet, // Component of Sled Agent responsible for storage and dataset management. - storage: StorageManager, + storage: StorageHandle, // Component of Sled Agent responsible for managing Propolis instances. instances: InstanceManager, @@ -254,8 +255,7 @@ impl SledAgent { nexus_client: NexusClientWithResolver, request: StartSledAgentRequest, services: ServiceManager, - storage: StorageManager, - bootstore: bootstore::NodeHandle, + long_running_task_handles: LongRunningTaskHandles, ) -> Result { // Pass the "parent_log" to all subcomponents that want to set their own // "component" value. @@ -268,12 +268,17 @@ impl SledAgent { )); info!(&log, "SledAgent::new(..) starting"); + let storage_manager = &long_running_task_handles.storage_manager; + // Configure a swap device of the configured size before other system setup. match config.swap_device_size_gb { Some(sz) if sz > 0 => { info!(log, "Requested swap device of size {} GiB", sz); - let boot_disk = - storage.resources().boot_disk().await.ok_or_else(|| { + let boot_disk = storage_manager + .get_latest_resources() + .await + .boot_disk() + .ok_or_else(|| { crate::swap_device::SwapDeviceError::BootDiskNotFound })?; crate::swap_device::ensure_swap_device( @@ -324,28 +329,13 @@ impl SledAgent { *sled_address.ip(), ); - storage - .setup_underlay_access(storage_manager::UnderlayAccess { - nexus_client: nexus_client.clone(), - sled_id: request.id, - }) - .await?; - - // TODO-correctness The bootstrap agent _also_ has a `HardwareManager`. - // We only use it for reading properties, but it's not `Clone`able - // because it's holding an inner task handle. Could we add a way to get - // a read-only handle to it, and have bootstrap agent give us that - // instead of creating a new full one ourselves? - let hardware = HardwareManager::new(&parent_log, services.sled_mode()) - .map_err(|e| Error::Hardware(e))?; - let instances = InstanceManager::new( parent_log.clone(), nexus_client.clone(), etherstub.clone(), port_manager.clone(), - storage.resources().clone(), - storage.zone_bundler().clone(), + storage_manager.clone(), + long_running_task_handles.zone_bundler.clone(), )?; match config.vmm_reservoir_percentage { @@ -378,7 +368,8 @@ impl SledAgent { // until we have this, as we need to know which switches have uplinks to // correctly set up services. let get_network_config = || async { - let serialized_config = bootstore + let serialized_config = long_running_task_handles + .bootstore .get_network_config() .await .map_err(|err| BackoffError::transient(err.to_string()))? @@ -421,14 +412,13 @@ impl SledAgent { rack_network_config.clone(), )?; - let zone_bundler = storage.zone_bundler().clone(); let sled_agent = SledAgent { inner: Arc::new(SledAgentInner { id: request.id, subnet: request.subnet, - storage, + storage: long_running_task_handles.storage_manager.clone(), instances, - hardware, + hardware: long_running_task_handles.hardware_manager.clone(), updates, port_manager, services, @@ -442,7 +432,7 @@ impl SledAgent { // request queue? nexus_request_queue: NexusRequestQueue::new(), rack_network_config, - zone_bundler, + zone_bundler: long_running_task_handles.zone_bundler.clone(), }), log: log.clone(), }; @@ -462,6 +452,7 @@ impl SledAgent { /// Blocks until all services have started, retrying indefinitely on /// failure. pub(crate) async fn cold_boot_load_services(&self) { + info!(self.log, "Loading cold boot services"); retry_notify( retry_policy_internal_service_aggressive(), || async { diff --git a/sled-agent/src/storage/dataset.rs b/sled-agent/src/storage/dataset.rs deleted file mode 100644 index 4efc0f320a..0000000000 --- a/sled-agent/src/storage/dataset.rs +++ /dev/null @@ -1,63 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use crate::params::DatasetKind; -use illumos_utils::zpool::ZpoolName; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use std::str::FromStr; - -#[derive( - Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone, JsonSchema, -)] -pub struct DatasetName { - // A unique identifier for the Zpool on which the dataset is stored. - pool_name: ZpoolName, - // A name for the dataset within the Zpool. - kind: DatasetKind, -} - -impl DatasetName { - pub fn new(pool_name: ZpoolName, kind: DatasetKind) -> Self { - Self { pool_name, kind } - } - - pub fn pool(&self) -> &ZpoolName { - &self.pool_name - } - - pub fn dataset(&self) -> &DatasetKind { - &self.kind - } - - pub fn full(&self) -> String { - format!("{}/{}", self.pool_name, self.kind) - } -} - -impl From for sled_agent_client::types::DatasetName { - fn from(n: DatasetName) -> Self { - Self { - pool_name: sled_agent_client::types::ZpoolName::from_str( - &n.pool().to_string(), - ) - .unwrap(), - kind: n.dataset().clone().into(), - } - } -} - -#[cfg(test)] -mod test { - use super::*; - use uuid::Uuid; - - #[test] - fn serialize_dataset_name() { - let pool = ZpoolName::new_internal(Uuid::new_v4()); - let kind = DatasetKind::Crucible; - let name = DatasetName::new(pool, kind); - toml::to_string(&name).unwrap(); - } -} diff --git a/sled-agent/src/storage/dump_setup.rs b/sled-agent/src/storage/dump_setup.rs index 9b5edc0a7e..ea60998955 100644 --- a/sled-agent/src/storage/dump_setup.rs +++ b/sled-agent/src/storage/dump_setup.rs @@ -1,4 +1,3 @@ -use crate::storage_manager::DiskWrapper; use camino::Utf8PathBuf; use derive_more::{AsRef, Deref, From}; use illumos_utils::dumpadm::DumpAdmError; @@ -6,6 +5,8 @@ use illumos_utils::zone::{AdmError, Zones}; use illumos_utils::zpool::{ZpoolHealth, ZpoolName}; use omicron_common::disk::DiskIdentity; use sled_hardware::DiskVariant; +use sled_storage::dataset::{CRASH_DATASET, DUMP_DATASET}; +use sled_storage::disk::Disk; use slog::Logger; use std::collections::{HashMap, HashSet}; use std::ffi::OsString; @@ -70,11 +71,11 @@ trait GetMountpoint: std::ops::Deref { } impl GetMountpoint for DebugZpool { type NewType = DebugDataset; - const MOUNTPOINT: &'static str = sled_hardware::disk::DUMP_DATASET; + const MOUNTPOINT: &'static str = DUMP_DATASET; } impl GetMountpoint for CoreZpool { type NewType = CoreDataset; - const MOUNTPOINT: &'static str = sled_hardware::disk::CRASH_DATASET; + const MOUNTPOINT: &'static str = CRASH_DATASET; } struct DumpSetupWorker { @@ -99,50 +100,51 @@ const ARCHIVAL_INTERVAL: Duration = Duration::from_secs(300); impl DumpSetup { pub(crate) async fn update_dumpdev_setup( &self, - disks: &mut MutexGuard<'_, HashMap>, + disks: &mut MutexGuard<'_, HashMap>, ) { let log = &self.log; let mut m2_dump_slices = Vec::new(); let mut u2_debug_datasets = Vec::new(); let mut m2_core_datasets = Vec::new(); - for (_id, disk_wrapper) in disks.iter() { - match disk_wrapper { - DiskWrapper::Real { disk, .. } => match disk.variant() { - DiskVariant::M2 => { - match disk.dump_device_devfs_path(false) { - Ok(path) => { - m2_dump_slices.push(DumpSlicePath(path)) - } - Err(err) => { - warn!(log, "Error getting dump device devfs path: {err:?}"); - } + for (_id, disk) in disks.iter() { + if disk.is_synthetic() { + // We only setup dump devices on real disks + continue; + } + match disk.variant() { + DiskVariant::M2 => { + match disk.dump_device_devfs_path(false) { + Ok(path) => m2_dump_slices.push(DumpSlicePath(path)), + Err(err) => { + warn!( + log, + "Error getting dump device devfs path: {err:?}" + ); } - let name = disk.zpool_name(); - if let Ok(info) = illumos_utils::zpool::Zpool::get_info( - &name.to_string(), - ) { - if info.health() == ZpoolHealth::Online { - m2_core_datasets.push(CoreZpool(name.clone())); - } else { - warn!(log, "Zpool {name:?} not online, won't attempt to save process core dumps there"); - } + } + let name = disk.zpool_name(); + if let Ok(info) = + illumos_utils::zpool::Zpool::get_info(&name.to_string()) + { + if info.health() == ZpoolHealth::Online { + m2_core_datasets.push(CoreZpool(name.clone())); + } else { + warn!(log, "Zpool {name:?} not online, won't attempt to save process core dumps there"); } } - DiskVariant::U2 => { - let name = disk.zpool_name(); - if let Ok(info) = illumos_utils::zpool::Zpool::get_info( - &name.to_string(), - ) { - if info.health() == ZpoolHealth::Online { - u2_debug_datasets - .push(DebugZpool(name.clone())); - } else { - warn!(log, "Zpool {name:?} not online, won't attempt to save kernel core dumps there"); - } + } + DiskVariant::U2 => { + let name = disk.zpool_name(); + if let Ok(info) = + illumos_utils::zpool::Zpool::get_info(&name.to_string()) + { + if info.health() == ZpoolHealth::Online { + u2_debug_datasets.push(DebugZpool(name.clone())); + } else { + warn!(log, "Zpool {name:?} not online, won't attempt to save kernel core dumps there"); } } - }, - DiskWrapper::Synthetic { .. } => {} + } } } diff --git a/sled-agent/src/storage/mod.rs b/sled-agent/src/storage/mod.rs index 74bd59a151..663ebe8274 100644 --- a/sled-agent/src/storage/mod.rs +++ b/sled-agent/src/storage/mod.rs @@ -4,5 +4,4 @@ //! Management of local storage -pub(crate) mod dataset; pub(crate) mod dump_setup; diff --git a/sled-storage/src/dump_setup.rs b/sled-storage/src/dump_setup.rs deleted file mode 100644 index 39c6aa2995..0000000000 --- a/sled-storage/src/dump_setup.rs +++ /dev/null @@ -1,803 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Dump dataset setup - -use crate::dataset::{CRASH_DATASET, DUMP_DATASET}; -use crate::disk::Disk; -use camino::Utf8PathBuf; -use derive_more::{AsRef, Deref, From}; -use illumos_utils::dumpadm::DumpAdmError; -use illumos_utils::zone::{AdmError, Zones}; -use illumos_utils::zpool::{ZpoolHealth, ZpoolName}; -use omicron_common::disk::DiskIdentity; -use sled_hardware::DiskVariant; -use slog::{debug, error, info, o, warn, Logger}; -use std::collections::{HashMap, HashSet}; -use std::ffi::OsString; -use std::path::{Path, PathBuf}; -use std::sync::{Arc, Weak}; -use std::time::{Duration, SystemTime, SystemTimeError, UNIX_EPOCH}; -use tokio::sync::MutexGuard; - -pub struct DumpSetup { - worker: Arc>, - _poller: std::thread::JoinHandle<()>, - log: Logger, -} - -impl DumpSetup { - pub fn new(log: &Logger) -> Self { - let worker = Arc::new(std::sync::Mutex::new(DumpSetupWorker::new( - log.new(o!("component" => "DumpSetup-worker")), - ))); - let worker_weak = Arc::downgrade(&worker); - let log_poll = log.new(o!("component" => "DumpSetup-archival")); - let _poller = std::thread::spawn(move || { - Self::poll_file_archival(worker_weak, log_poll) - }); - let log = log.new(o!("component" => "DumpSetup")); - Self { worker, _poller, log } - } -} - -// we sure are passing a lot of Utf8PathBufs around, let's be careful about it -#[derive( - AsRef, Clone, Debug, Deref, Eq, From, Hash, Ord, PartialEq, PartialOrd, -)] -struct DumpSlicePath(Utf8PathBuf); -#[derive( - AsRef, Clone, Debug, Deref, Eq, From, Hash, Ord, PartialEq, PartialOrd, -)] -struct DebugDataset(Utf8PathBuf); -#[derive( - AsRef, Clone, Debug, Deref, Eq, From, Hash, Ord, PartialEq, PartialOrd, -)] -struct CoreDataset(Utf8PathBuf); - -#[derive(Deref)] -struct CoreZpool(ZpoolName); -#[derive(Deref)] -struct DebugZpool(ZpoolName); - -// only want to access these directories after they're mounted! -trait GetMountpoint: std::ops::Deref { - type NewType: From; - const MOUNTPOINT: &'static str; - fn mountpoint(&self) -> Result, ZfsGetError> { - if zfs_get_prop(self.to_string(), "mounted")? == "yes" { - Ok(Some(Self::NewType::from( - self.dataset_mountpoint(Self::MOUNTPOINT), - ))) - } else { - Ok(None) - } - } -} -impl GetMountpoint for DebugZpool { - type NewType = DebugDataset; - const MOUNTPOINT: &'static str = DUMP_DATASET; -} -impl GetMountpoint for CoreZpool { - type NewType = CoreDataset; - const MOUNTPOINT: &'static str = CRASH_DATASET; -} - -struct DumpSetupWorker { - core_dataset_names: Vec, - debug_dataset_names: Vec, - - chosen_dump_slice: Option, - chosen_debug_dir: Option, - chosen_core_dir: Option, - - known_dump_slices: Vec, - known_debug_dirs: Vec, - known_core_dirs: Vec, - - savecored_slices: HashSet, - - log: Logger, -} - -const ARCHIVAL_INTERVAL: Duration = Duration::from_secs(300); - -impl DumpSetup { - pub(crate) async fn update_dumpdev_setup( - &self, - disks: &mut MutexGuard<'_, HashMap>, - ) { - let log = &self.log; - let mut m2_dump_slices = Vec::new(); - let mut u2_debug_datasets = Vec::new(); - let mut m2_core_datasets = Vec::new(); - for (_id, disk) in disks.iter() { - if disk.is_synthetic() { - // We only setup dump devices on real disks - continue; - } - match disk.variant() { - DiskVariant::M2 => { - match disk.dump_device_devfs_path(false) { - Ok(path) => m2_dump_slices.push(DumpSlicePath(path)), - Err(err) => { - warn!( - log, - "Error getting dump device devfs path: {err:?}" - ); - } - } - let name = disk.zpool_name(); - if let Ok(info) = - illumos_utils::zpool::Zpool::get_info(&name.to_string()) - { - if info.health() == ZpoolHealth::Online { - m2_core_datasets.push(CoreZpool(name.clone())); - } else { - warn!(log, "Zpool {name:?} not online, won't attempt to save process core dumps there"); - } - } - } - DiskVariant::U2 => { - let name = disk.zpool_name(); - if let Ok(info) = - illumos_utils::zpool::Zpool::get_info(&name.to_string()) - { - if info.health() == ZpoolHealth::Online { - u2_debug_datasets.push(DebugZpool(name.clone())); - } else { - warn!(log, "Zpool {name:?} not online, won't attempt to save kernel core dumps there"); - } - } - } - } - } - - let savecore_lock = self.worker.clone(); - let log_tmp = log.new(o!("component" => "DumpSetup-mutex")); - tokio::task::spawn_blocking(move || match savecore_lock.lock() { - Ok(mut guard) => { - guard.update_disk_loadout( - m2_dump_slices, - u2_debug_datasets, - m2_core_datasets, - ); - } - Err(err) => { - error!(log_tmp, "DumpSetup mutex poisoned: {err:?}"); - } - }); - } - - fn poll_file_archival( - worker: Weak>, - log: Logger, - ) { - info!(log, "DumpSetup poll loop started."); - loop { - if let Some(mutex) = worker.upgrade() { - match mutex.lock() { - Ok(mut guard) => { - guard.reevaluate_choices(); - if let Err(err) = guard.archive_files() { - error!( - log, - "Failed to archive debug/dump files: {err:?}" - ); - } - } - Err(err) => { - error!( - log, - "DumpSetup mutex poisoned in poll thread: {err:?}" - ); - break; - } - } - } else { - info!( - log, - "DumpSetup weak pointer dropped, leaving poll loop." - ); - break; - } - std::thread::sleep(ARCHIVAL_INTERVAL); - } - } -} - -#[derive(Debug, thiserror::Error)] -enum ZfsGetError { - #[error("Error executing 'zfs get' command: {0}")] - IoError(#[from] std::io::Error), - #[error("Output of 'zfs get' was not only not an integer string, it wasn't even UTF-8: {0}")] - Utf8(#[from] std::string::FromUtf8Error), - #[error("Error parsing output of 'zfs get' command as integer: {0}")] - Parse(#[from] std::num::ParseIntError), -} - -const ZFS_PROP_USED: &str = "used"; -const ZFS_PROP_AVAILABLE: &str = "available"; - -fn zfs_get_integer( - mountpoint_or_name: impl AsRef, - property: &str, -) -> Result { - zfs_get_prop(mountpoint_or_name, property)?.parse().map_err(Into::into) -} - -fn zfs_get_prop( - mountpoint_or_name: impl AsRef + Sized, - property: &str, -) -> Result { - let mountpoint = mountpoint_or_name.as_ref(); - let mut cmd = std::process::Command::new(illumos_utils::zfs::ZFS); - cmd.arg("get").arg("-Hpo").arg("value"); - cmd.arg(property); - cmd.arg(mountpoint); - let output = cmd.output()?; - Ok(String::from_utf8(output.stdout)?.trim().to_string()) -} - -const DATASET_USAGE_PERCENT_CHOICE: u64 = 70; -const DATASET_USAGE_PERCENT_CLEANUP: u64 = 80; - -fn below_thresh( - mountpoint: &Utf8PathBuf, - percent: u64, -) -> Result<(bool, u64), ZfsGetError> { - let used = zfs_get_integer(mountpoint, ZFS_PROP_USED)?; - let available = zfs_get_integer(mountpoint, ZFS_PROP_AVAILABLE)?; - let capacity = used + available; - let below = (used * 100) / capacity < percent; - Ok((below, used)) -} - -impl DumpSetupWorker { - fn new(log: Logger) -> Self { - Self { - core_dataset_names: vec![], - debug_dataset_names: vec![], - chosen_dump_slice: None, - chosen_debug_dir: None, - chosen_core_dir: None, - known_dump_slices: vec![], - known_debug_dirs: vec![], - known_core_dirs: vec![], - savecored_slices: Default::default(), - log, - } - } - - fn update_disk_loadout( - &mut self, - dump_slices: Vec, - debug_datasets: Vec, - core_datasets: Vec, - ) { - self.core_dataset_names = core_datasets; - self.debug_dataset_names = debug_datasets; - - self.known_dump_slices = dump_slices; - - self.reevaluate_choices(); - } - - // only allow mounted zfs datasets into 'known_*_dirs', - // such that we don't render them non-auto-mountable by zfs - fn update_mounted_dirs(&mut self) { - self.known_debug_dirs = self - .debug_dataset_names - .iter() - .flat_map(|ds| ds.mountpoint()) - .flatten() - .collect(); - self.known_core_dirs = self - .core_dataset_names - .iter() - .flat_map(|ds| ds.mountpoint()) - .flatten() - .collect(); - } - - fn reevaluate_choices(&mut self) { - self.update_mounted_dirs(); - - self.known_dump_slices.sort(); - // sort key: prefer to choose a dataset where there's already other - // dumps so we don't shotgun them across every U.2, but only if they're - // below a certain usage threshold. - self.known_debug_dirs.sort_by_cached_key( - |mountpoint: &DebugDataset| { - match below_thresh(mountpoint.as_ref(), DATASET_USAGE_PERCENT_CHOICE) { - Ok((below, used)) => { - let priority = if below { 0 } else { 1 }; - (priority, used, mountpoint.clone()) - } - Err(err) => { - error!(self.log, "Could not query zfs properties of debug dump dir: {err:?}"); - // deprioritize anything we get errors querying. - (usize::MAX, u64::MAX, mountpoint.clone()) - } - } - }, - ); - self.known_core_dirs.sort_by_cached_key(|mnt| { - // these get archived periodically anyway, pick one with room - let available = zfs_get_integer(&**mnt, "available").unwrap_or(0); - (u64::MAX - available, mnt.clone()) - }); - - if let Some(x) = &self.chosen_debug_dir { - if !self.known_debug_dirs.contains(x) { - warn!(self.log, "Previously-chosen debug/dump dir {x:?} no longer exists in our view of reality"); - self.chosen_debug_dir = None; - } else { - match below_thresh(x.as_ref(), DATASET_USAGE_PERCENT_CLEANUP) { - Ok((true, _)) => {} - Ok((false, _)) => { - if self.known_debug_dirs.iter().any(|x| { - below_thresh( - x.as_ref(), - DATASET_USAGE_PERCENT_CHOICE, - ) - .unwrap_or((false, 0)) - .0 - }) { - info!(self.log, "Previously-chosen debug/dump dir {x:?} is over usage threshold, choosing a more vacant disk"); - self.chosen_debug_dir = None; - } else { - warn!(self.log, "All candidate debug/dump dirs are over usage threshold, removing older archived files"); - if let Err(err) = self.cleanup() { - error!(self.log, "Couldn't clean up any debug/dump dirs, may hit dataset quota in {x:?}: {err:?}"); - } else { - self.chosen_debug_dir = None; - } - } - } - Err(err) => { - error!(self.log, "Previously-chosen debug/dump dir {x:?} couldn't be queried for zfs properties! Choosing another. {err:?}"); - self.chosen_debug_dir = None; - } - } - } - } - if let Some(x) = &self.chosen_dump_slice { - if !self.known_dump_slices.contains(x) { - warn!(self.log, "Previously-chosen dump slice {x:?} no longer exists in our view of reality"); - self.chosen_dump_slice = None; - } - } - if let Some(x) = &self.chosen_core_dir { - if !self.known_core_dirs.contains(x) { - warn!(self.log, "Previously-chosen core dir {x:?} no longer exists in our view of reality"); - self.chosen_core_dir = None; - } - } - - if self.chosen_debug_dir.is_none() { - self.chosen_debug_dir = self.known_debug_dirs.first().cloned(); - } - - if self.chosen_core_dir.is_none() { - for core_dir in &self.known_core_dirs { - // tell the system to write *userspace process* cores here. - match illumos_utils::coreadm::coreadm(core_dir) { - Ok(()) => { - self.chosen_core_dir = Some(core_dir.clone()); - info!( - self.log, - "Set process core dump directory to {core_dir:?}" - ); - break; - } - Err(err) => { - error!(self.log, "Couldn't configure process core dump directory to {core_dir:?}: {err:?}"); - } - } - } - } - - if self.chosen_dump_slice.is_none() { - if self.chosen_debug_dir.is_some() { - for dump_slice in self.known_dump_slices.clone() { - // Let's try to see if it appears to have a kernel dump already - match illumos_utils::dumpadm::dump_flag_is_valid( - &dump_slice, - ) { - Ok(true) => { - debug!(self.log, "Dump slice {dump_slice:?} appears to have a valid header; will attempt to savecore"); - } - Ok(false) => { - info!(self.log, "Dump slice {dump_slice:?} appears to have already been saved"); - } - Err(err) => { - debug!(self.log, "Dump slice {dump_slice:?} appears to be unused: {err:?}"); - } - } - if let Ok(saved) = self.dumpadm_and_savecore(&dump_slice) { - if let Some(out) = saved { - info!(self.log, "Previous dump on slice {dump_slice:?} saved, configured slice as target for new dumps. {out:?}"); - } - self.chosen_dump_slice = Some(dump_slice); - break; - } - } - } else { - // Don't risk overwriting an existing kernel dump if there's - // already one there until we can attempt to savecore(8) - // it away and clear the flag to make room. - for dump_slice in &self.known_dump_slices { - match illumos_utils::dumpadm::dump_flag_is_valid(dump_slice) - { - Ok(false) => { - // Have dumpadm write the config for crash dumps to be - // on this slice, at least, until a U.2 comes along. - match illumos_utils::dumpadm::dumpadm( - dump_slice, None, - ) { - Ok(_) => { - info!(self.log, "Using dump device {dump_slice:?} with no savecore destination (no U.2 debug zvol yet)"); - self.chosen_dump_slice = - Some(dump_slice.clone()); - break; - } - Err(err) => { - warn!(self.log, "Could not configure {dump_slice:?} as dump device: {err:?}"); - } - } - } - Ok(true) => { - warn!(self.log, "Not configuring {dump_slice:?} as it appears to contain a dump we cannot yet send to a U.2 debug zvol"); - } - Err(err) => { - debug!( - self.log, - "Dump slice {dump_slice:?} appears to be unused : {err:?}", - ); - } - } - } - } - } - - if let Some(debug_dir) = self.chosen_debug_dir.clone() { - let mut changed_slice = false; - for dump_slice in self.known_dump_slices.clone() { - if !self.savecored_slices.contains(&dump_slice) { - changed_slice = true; - // temporarily changes the system's dump slice so savecore(8) - // can update the header in the slice when it finishes... - match self.dumpadm_and_savecore(&dump_slice) { - Ok(saved) => { - if let Some(stdout) = &saved { - info!( - self.log, - "Saved dump from {dump_slice:?} to {debug_dir:?}: {stdout:?}" - ); - } else { - info!( - self.log, - "Set {dump_slice:?} as system dump slice", - ); - } - } - Err(err) => { - warn!(self.log, "Could not configure {dump_slice:?} as dump device with {debug_dir:?} as savecore destination: {err:?}"); - } - } - } - } - - // ...so then we restore the chosen dump slice for the system to use - // in the event of a kernel crash - if changed_slice { - if let Some(dump_slice) = &self.chosen_dump_slice { - if let Err(err) = - illumos_utils::dumpadm::dumpadm(dump_slice, None) - { - error!(self.log, "Could not restore dump slice to {dump_slice:?}: {err:?}"); - } - } - } - } - } - - fn archive_files(&self) -> std::io::Result<()> { - if let Some(debug_dir) = &self.chosen_debug_dir { - if self.known_core_dirs.is_empty() { - info!(self.log, "No core dump locations yet known."); - } - for core_dir in &self.known_core_dirs { - if let Ok(dir) = core_dir.read_dir() { - for entry in dir.flatten() { - if let Some(path) = entry.file_name().to_str() { - let dest = debug_dir.join(path); - - if let Err(err) = - Self::copy_sync_and_remove(&entry.path(), &dest) - { - error!( - self.log, - "Failed to archive {entry:?}: {err:?}" - ); - } else { - info!( - self.log, - "Relocated {entry:?} to {dest:?}" - ); - } - } else { - error!(self.log, "Non-UTF8 path found while archiving core dumps: {entry:?}"); - } - } - } - } - } else { - info!( - self.log, - "No archival destination for crash dumps yet chosen." - ); - } - - if let Err(err) = self.archive_logs() { - if !matches!(err, ArchiveLogsError::NoDebugDirYet) { - error!( - self.log, - "Failure while trying to archive logs to debug dataset: {err:?}" - ); - } - } - - Ok(()) - } - - fn copy_sync_and_remove( - source: impl AsRef, - dest: impl AsRef, - ) -> std::io::Result<()> { - let source = source.as_ref(); - let dest = dest.as_ref(); - let mut dest_f = std::fs::File::create(&dest)?; - let mut src_f = std::fs::File::open(&source)?; - - std::io::copy(&mut src_f, &mut dest_f)?; - - dest_f.sync_all()?; - - drop(src_f); - drop(dest_f); - - std::fs::remove_file(source)?; - Ok(()) - } - - fn archive_logs(&self) -> Result<(), ArchiveLogsError> { - let debug_dir = self - .chosen_debug_dir - .as_ref() - .ok_or(ArchiveLogsError::NoDebugDirYet)?; - // zone crate's 'deprecated' functions collide if you try to enable - // its 'sync' and 'async' features simultaneously :( - let rt = - tokio::runtime::Runtime::new().map_err(ArchiveLogsError::Tokio)?; - let oxz_zones = rt.block_on(Zones::get())?; - self.archive_logs_inner( - debug_dir, - PathBuf::from("/var/svc/log"), - "global", - )?; - for zone in oxz_zones { - let logdir = zone.path().join("root/var/svc/log"); - let zone_name = zone.name(); - self.archive_logs_inner(debug_dir, logdir, zone_name)?; - } - Ok(()) - } - - fn archive_logs_inner( - &self, - debug_dir: &DebugDataset, - logdir: PathBuf, - zone_name: &str, - ) -> Result<(), ArchiveLogsError> { - let mut rotated_log_files = Vec::new(); - // patterns matching archived logs, e.g. foo.log.3 - // keep checking for greater numbers of digits until we don't find any - for n in 1..9 { - let pattern = logdir - .join(format!("*.log.{}", "[0-9]".repeat(n))) - .to_str() - .ok_or_else(|| ArchiveLogsError::Utf8(zone_name.to_string()))? - .to_string(); - rotated_log_files.extend(glob::glob(&pattern)?.flatten()); - } - let dest_dir = debug_dir.join(zone_name).into_std_path_buf(); - if !rotated_log_files.is_empty() { - std::fs::create_dir_all(&dest_dir)?; - let count = rotated_log_files.len(); - info!( - self.log, - "Archiving {count} log files from {zone_name} zone" - ); - } - for entry in rotated_log_files { - let src_name = entry.file_name().unwrap(); - // as we archive them, logadm will keep resetting to .log.0, - // so we need to maintain our own numbering in the dest dataset. - // we'll use the modified date of the rotated log file, or try - // falling back to the time of archival if that fails, and - // falling back to counting up from 0 if *that* somehow fails. - let mut n = entry - .metadata() - .and_then(|m| m.modified()) - .unwrap_or_else(|_| SystemTime::now()) - .duration_since(UNIX_EPOCH) - .map(|d| d.as_secs()) - .unwrap_or(0); - let mut dest; - loop { - dest = dest_dir.join(src_name).with_extension(format!("{n}")); - if dest.exists() { - n += 1; - } else { - break; - } - } - if let Err(err) = Self::copy_sync_and_remove(&entry, dest) { - warn!(self.log, "Failed to archive {entry:?}: {err:?}"); - } - } - Ok(()) - } - - // Have dumpadm write the config for crash dumps to be - // on this slice, and then invoke savecore(8) to save any - // dump that's already present there. - // - // NOTE: because of the need to have dumpadm change the global - // state of which slice the system is using for dumps in order - // for savecore to behave the way we want (i.e. clear the flag - // after succeeding), we could hypothetically miss a dump if - // the kernel crashes again while savecore is still running. - fn dumpadm_and_savecore( - &mut self, - dump_slice: &DumpSlicePath, - ) -> Result, DumpAdmError> { - // TODO: untangle savecore from illumos_utils::dumpadm - assert!(self.chosen_debug_dir.is_some()); - - let savecore_dir = self.chosen_debug_dir.clone().unwrap().0; - - match illumos_utils::dumpadm::dumpadm(&dump_slice, Some(&savecore_dir)) - { - Ok(saved) => { - self.savecored_slices.insert(dump_slice.clone()); - Ok(saved) - } - Err(err) => Err(err), - } - } - - fn cleanup(&self) -> Result<(), CleanupError> { - let mut dir_info = Vec::new(); - for dir in &self.known_debug_dirs { - match Self::scope_dir_for_cleanup(dir) { - Ok(info) => { - dir_info.push((info, dir)); - } - Err(err) => { - error!(self.log, "Could not analyze {dir:?} for debug dataset cleanup task: {err:?}"); - } - } - } - if dir_info.is_empty() { - return Err(CleanupError::NoDatasetsToClean); - } - // find dir with oldest average time of files that must be deleted - // to achieve desired threshold, and reclaim that space. - dir_info.sort(); - 'outer: for (dir_info, dir) in dir_info { - let CleanupDirInfo { average_time: _, num_to_delete, file_list } = - dir_info; - for (_time, _bytes, path) in &file_list[..num_to_delete as usize] { - // if we are unable to remove a file, we cannot guarantee - // that we will reach our target size threshold, and suspect - // the i/o error *may* be an issue with the underlying disk, so - // we continue to the dataset with the next-oldest average age - // of files-to-delete in the sorted list. - if let Err(err) = std::fs::remove_file(&path) { - error!(self.log, "Couldn't delete {path:?} from debug dataset, skipping {dir:?}. {err:?}"); - continue 'outer; - } - } - // we made it through all the files we planned to remove, thereby - // freeing up enough space on one of the debug datasets for it to - // be chosen when reevaluating targets. - break; - } - Ok(()) - } - - fn scope_dir_for_cleanup( - debug_dir: &DebugDataset, - ) -> Result { - let used = zfs_get_integer(&**debug_dir, ZFS_PROP_USED)?; - let available = zfs_get_integer(&**debug_dir, ZFS_PROP_AVAILABLE)?; - let capacity = used + available; - - let target_used = capacity * DATASET_USAGE_PERCENT_CHOICE / 100; - - let mut file_list = Vec::new(); - // find all files in the debug dataset and sort by modified time - for path in glob::glob(debug_dir.join("**/*").as_str())?.flatten() { - let meta = std::fs::metadata(&path)?; - // we need this to be a Duration rather than SystemTime so we can - // do math to it later. - let time = meta.modified()?.duration_since(UNIX_EPOCH)?; - let size = meta.len(); - - file_list.push((time, size, path)) - } - file_list.sort(); - - // find how many old files must be deleted to get the dataset under - // the limit, and what the average age of that set is. - let mut possible_bytes = 0; - let mut total_time = Duration::ZERO; - let mut num_to_delete = 0; - for (time, size, _path) in &file_list { - if used - possible_bytes < target_used { - break; - } else { - total_time += *time; - num_to_delete += 1; - possible_bytes += size; - } - } - let average_time = - total_time.checked_div(num_to_delete).unwrap_or(Duration::MAX); - - Ok(CleanupDirInfo { average_time, num_to_delete, file_list }) - } -} - -#[derive(thiserror::Error, Debug)] -enum ArchiveLogsError { - #[error("Couldn't make an async runtime to get zone info: {0}")] - Tokio(std::io::Error), - #[error("I/O error: {0}")] - IoError(#[from] std::io::Error), - #[error("Error calling zoneadm: {0}")] - Zoneadm(#[from] AdmError), - #[error("Non-UTF8 zone path for zone {0}")] - Utf8(String), - #[error("Glob pattern invalid: {0}")] - Glob(#[from] glob::PatternError), - #[error( - "No debug dir into which we should archive logs has yet been chosen" - )] - NoDebugDirYet, -} - -#[derive(thiserror::Error, Debug)] -enum CleanupError { - #[error("No debug datasets were successfully evaluated for cleanup")] - NoDatasetsToClean, - #[error("Failed to query ZFS properties: {0}")] - ZfsError(#[from] ZfsGetError), - #[error("I/O error: {0}")] - IoError(#[from] std::io::Error), - #[error("Glob pattern invalid: {0}")] - Glob(#[from] glob::PatternError), - #[error("A file's observed modified time was before the Unix epoch: {0}")] - TimelineWentSideways(#[from] SystemTimeError), -} - -#[derive(Ord, PartialOrd, Eq, PartialEq)] -struct CleanupDirInfo { - average_time: Duration, - num_to_delete: u32, - file_list: Vec<(Duration, u64, PathBuf)>, -} diff --git a/sled-storage/src/lib.rs b/sled-storage/src/lib.rs index 0c1b383d7f..fc08579d77 100644 --- a/sled-storage/src/lib.rs +++ b/sled-storage/src/lib.rs @@ -10,7 +10,6 @@ pub mod dataset; pub mod disk; -pub(crate) mod dump_setup; pub mod error; pub(crate) mod keyfile; pub mod manager;