Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewjstone committed Oct 6, 2023
1 parent e8afd42 commit f540c08
Show file tree
Hide file tree
Showing 8 changed files with 83 additions and 92 deletions.
79 changes: 27 additions & 52 deletions sled-agent/src/bootstrap/pre_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
#![allow(clippy::result_large_err)]

use super::maghemite;
use super::secret_retriever::LrtqOrHardcodedSecretRetriever;
use super::server::StartError;
use crate::config::Config;
use crate::long_running_tasks::{
spawn_all_longrunning_tasks, LongRunningTaskHandles,
};
use crate::services::ServiceManager;
use crate::sled_agent::SledAgent;
use crate::storage_manager::StorageManager;
Expand All @@ -29,15 +31,15 @@ use illumos_utils::zfs;
use illumos_utils::zfs::Zfs;
use illumos_utils::zone;
use illumos_utils::zone::Zones;
use key_manager::KeyManager;
use key_manager::StorageKeyRequester;
use omicron_common::address::Ipv6Subnet;
use omicron_common::FileKv;
use sled_hardware::underlay;
use sled_hardware::DendriteAsic;
use sled_hardware::HardwareManager;
use sled_hardware::HardwareUpdate;
use sled_hardware::SledMode;
use sled_storage::disk::SyntheticDisk;
use sled_storage::manager::StorageHandle;
use slog::Drain;
use slog::Logger;
use std::net::IpAddr;
Expand Down Expand Up @@ -200,36 +202,24 @@ impl BootstrapAgentStartup {
// This should be a no-op if already enabled.
BootstrapNetworking::enable_ipv6_forwarding().await?;

// Spawn the `KeyManager` which is needed by the the StorageManager to
// retrieve encryption keys.
let (storage_key_requester, key_manager_handle) =
spawn_key_manager_task(&base_log);

let sled_mode = sled_mode_from_config(&config)?;

// Start monitoring hardware. This is blocking so we use
// `spawn_blocking`; similar to above, we move some things in and (on
// success) it gives them back.
let (base_log, log, hardware_manager) = {
tokio::task::spawn_blocking(move || {
info!(
log, "Starting hardware monitor";
"sled_mode" => ?sled_mode,
);
let hardware_manager =
HardwareManager::new(&base_log, sled_mode)
.map_err(StartError::StartHardwareManager)?;
Ok::<_, StartError>((base_log, log, hardware_manager))
})
.await
.unwrap()?
};
// Spawn all important long running tasks that live for the lifetime of
// the process and are used by both the bootstrap agent and sled agent
let long_running_task_handles = spawn_all_longrunning_tasks(
&base_log,
sled_mode,
startup_networking.global_zone_bootstrap_ip,
)
.await;

// Create a `StorageManager` and (possibly) synthetic disks.
let storage_manager =
StorageManager::new(&base_log, storage_key_requester).await;
upsert_synthetic_zpools_if_needed(&log, &storage_manager, &config)
.await;
// Add some synthetic disks if necessary.
upsert_synthetic_zpools_if_needed(
&log,
&long_running_task_handles.storage_manager,
&config,
)
.await;

let global_zone_bootstrap_ip =
startup_networking.global_zone_bootstrap_ip;
Expand All @@ -242,7 +232,7 @@ impl BootstrapAgentStartup {
config.skip_timesync,
config.sidecar_revision.clone(),
config.switch_zone_maghemite_links.clone(),
storage_manager.resources().clone(),
long_running_task_handles.storage_manager.clone(),
storage_manager.zone_bundler().clone(),
);

Expand Down Expand Up @@ -357,13 +347,10 @@ fn ensure_zfs_key_directory_exists(log: &Logger) -> Result<(), StartError> {
// to create and mount encrypted datasets.
info!(
log, "Ensuring zfs key directory exists";
"path" => sled_hardware::disk::KEYPATH_ROOT,
"path" => zfs::KEYPATH_ROOT,
);
std::fs::create_dir_all(sled_hardware::disk::KEYPATH_ROOT).map_err(|err| {
StartError::CreateZfsKeyDirectory {
dir: sled_hardware::disk::KEYPATH_ROOT,
err,
}
std::fs::create_dir_all(zfs::KEYPATH_ROOT).map_err(|err| {
StartError::CreateZfsKeyDirectory { dir: zfs::KEYPATH_ROOT, err }
})
}

Expand All @@ -387,7 +374,7 @@ fn ensure_zfs_ramdisk_dataset() -> Result<(), StartError> {

async fn upsert_synthetic_zpools_if_needed(
log: &Logger,
storage_manager: &StorageManager,
storage_manager: &StorageHandle,
config: &Config,
) {
if let Some(pools) = &config.zpools {
Expand All @@ -397,7 +384,8 @@ async fn upsert_synthetic_zpools_if_needed(
"Upserting synthetic zpool to Storage Manager: {}",
pool.to_string()
);
storage_manager.upsert_synthetic_disk(pool.clone()).await;
let disk = SyntheticDisk::new(pool.clone()).into();
storage_manager.upsert_disk(disk).await;
}
}
}
Expand Down Expand Up @@ -435,19 +423,6 @@ fn sled_mode_from_config(config: &Config) -> Result<SledMode, StartError> {
Ok(sled_mode)
}

fn spawn_key_manager_task(
log: &Logger,
) -> (StorageKeyRequester, JoinHandle<()>) {
let secret_retriever = LrtqOrHardcodedSecretRetriever::new();
let (mut key_manager, storage_key_requester) =
KeyManager::new(log, secret_retriever);

let key_manager_handle =
tokio::spawn(async move { key_manager.run().await });

(storage_key_requester, key_manager_handle)
}

#[derive(Debug, Clone)]
pub(crate) struct BootstrapNetworking {
pub(crate) bootstrap_etherstub: dladm::Etherstub,
Expand Down
1 change: 0 additions & 1 deletion sled-agent/src/bootstrap/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ use super::rack_ops::RackInitId;
use super::views::SledAgentResponse;
use super::BootstrapError;
use super::RssAccessError;
use crate::bootstrap::bootstore::BootstoreHandles;
use crate::bootstrap::config::BOOTSTRAP_AGENT_RACK_INIT_PORT;
use crate::bootstrap::http_entrypoints::api as http_api;
use crate::bootstrap::http_entrypoints::BootstrapServerContext;
Expand Down
30 changes: 26 additions & 4 deletions sled-agent/src/long_running_tasks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ use crate::bootstrap::bootstore::{
new_bootstore_config, poll_ddmd_for_bootstore_peer_update,
};
use crate::bootstrap::secret_retriever::LrtqOrHardcodedSecretRetriever;
use crate::zone_bundle::{CleanupContext, ZoneBundler};
use bootstore::schemes::v0 as bootstore;
use key_manager::{KeyManager, StorageKeyRequester};
use sled_agent_client::types::CleanupContext;
use sled_hardware::{HardwareManager, SledMode};
use sled_storage::manager::{StorageHandle, StorageManager};
use slog::{info, Logger};
Expand All @@ -35,24 +37,29 @@ pub struct LongRunningTaskHandles {

/// A mechanism for talking to the [`StorageManager`] which is responsible
/// for establishing zpools on disks and managing their datasets.
pub storage_handle: StorageHandle,
pub storage_manager: StorageHandle,

/// A mechanism for interacting with the hardware device tree
pub hardware_manager: HardwareManager,

// A handle for interacting with the bootstore
pub bootstore: bootstore::NodeHandle,

// A reference to the object used to manage zone bundles
pub zone_bundler: ZoneBundler,
}

/// Spawn all long running tasks
pub async fn spawn_all(
pub async fn spawn_all_longrunning_tasks(
log: &Logger,
sled_mode: SledMode,
global_zone_bootstrap_ip: Ipv6Addr,
) -> LongRunningTaskHandles {
let storage_key_requester = spawn_key_manager(log);
let mut storage_handle =
let mut storage_manager =
spawn_storage_manager(log, storage_key_requester.clone());

// TODO: Does this need to run inside tokio::task::spawn_blocking?
let hardware_manager = spawn_hardware_manager(log, sled_mode);

// Wait for the boot disk so that we can work with any ledgers,
Expand All @@ -67,9 +74,11 @@ pub async fn spawn_all(
)
.await;

let zone_bundler = spawn_zone_bundler_tasks(log, &mut storage_handle);

LongRunningTaskHandles {
storage_key_requester,
storage_handle,
storage_manager,
hardware_manager,
bootstore,
}
Expand Down Expand Up @@ -140,3 +149,16 @@ async fn spawn_bootstore_tasks(

node_handle
}

// `ZoneBundler::new` spawns a periodic cleanup task that runs indefinitely
fn spawn_zone_bundler_tasks(
log: &Logger,
storage_handle: &mut StorageHandle,
) -> ZoneBundler {
let log = log.new(o!("component" => "ZoneBundler"));
let zone_bundler = ZoneBundler::new(
log,
storage_handle.clone(),
CleanupContext::default(),
);
}
18 changes: 9 additions & 9 deletions sled-agent/src/services.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
//! Sled-local service management.
//!
//! For controlling zone-based storage services, refer to
//! [crate::storage_manager::StorageManager].
//! [sled_hardware:manager::StorageManager].
//!
//! For controlling virtual machine instances, refer to
//! [crate::instance_manager::InstanceManager].
Expand Down Expand Up @@ -38,7 +38,6 @@ use crate::params::{
use crate::profile::*;
use crate::smf_helper::Service;
use crate::smf_helper::SmfHelper;
use crate::storage_manager::StorageResources;
use crate::zone_bundle::BundleError;
use crate::zone_bundle::ZoneBundler;
use anyhow::anyhow;
Expand Down Expand Up @@ -88,12 +87,14 @@ use omicron_common::nexus_config::{
use once_cell::sync::OnceCell;
use rand::prelude::SliceRandom;
use rand::SeedableRng;
use sled_hardware::disk::ZONE_DATASET;
use sled_hardware::is_gimlet;
use sled_hardware::underlay;
use sled_hardware::underlay::BOOTSTRAP_PREFIX;
use sled_hardware::Baseboard;
use sled_hardware::SledMode;
use sled_storage::dataset::{CONFIG_DATASET, ZONE_DATASET};
use sled_storage::manager::StorageHandle;
use sled_storage::resources::StorageResources;
use slog::Logger;
use std::collections::HashSet;
use std::collections::{BTreeMap, HashMap};
Expand Down Expand Up @@ -370,7 +371,7 @@ pub struct ServiceManagerInner {
advertised_prefixes: Mutex<HashSet<Ipv6Subnet<SLED_PREFIX>>>,
sled_info: OnceCell<SledAgentInfo>,
switch_zone_bootstrap_address: Ipv6Addr,
storage: StorageResources,
storage: StorageHandle,
zone_bundler: ZoneBundler,
ledger_directory_override: OnceCell<Utf8PathBuf>,
image_directory_override: OnceCell<Utf8PathBuf>,
Expand Down Expand Up @@ -415,7 +416,7 @@ impl ServiceManager {
skip_timesync: Option<bool>,
sidecar_revision: SidecarRevision,
switch_zone_maghemite_links: Vec<PhysicalLink>,
storage: StorageResources,
storage: StorageHandle,
zone_bundler: ZoneBundler,
) -> Self {
let log = log.new(o!("component" => "ServiceManager"));
Expand Down Expand Up @@ -470,13 +471,12 @@ impl ServiceManager {
}

async fn all_service_ledgers(&self) -> Vec<Utf8PathBuf> {
let resources = self.inner.storage.get_latest_resources().await;
if let Some(dir) = self.inner.ledger_directory_override.get() {
return vec![dir.join(SERVICES_LEDGER_FILENAME)];
}
self.inner
.storage
.all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET)
.await
resources
.all_m2_mountpoints(CONFIG_DATASET)
.into_iter()
.map(|p| p.join(SERVICES_LEDGER_FILENAME))
.collect()
Expand Down
2 changes: 1 addition & 1 deletion sled-agent/src/sled_agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ pub enum Error {
Instance(#[from] crate::instance_manager::Error),

#[error("Error managing storage: {0}")]
Storage(#[from] crate::storage_manager::Error),
Storage(#[from] sled_storage::error::Error),

#[error("Error updating: {0}")]
Download(#[from] crate::updates::Error),
Expand Down
Loading

0 comments on commit f540c08

Please sign in to comment.