Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewjstone committed Oct 11, 2023
1 parent 1a67b04 commit 1e61ea9
Show file tree
Hide file tree
Showing 7 changed files with 147 additions and 127 deletions.
2 changes: 1 addition & 1 deletion sled-agent/src/bootstrap/secret_retriever.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ impl LrtqOrHardcodedSecretRetriever {
///
/// The local retriever only returns keys for epoch 0
#[derive(Debug)]
struct HardcodedSecretRetriever {}
pub struct HardcodedSecretRetriever {}

#[async_trait]
impl SecretRetriever for HardcodedSecretRetriever {
Expand Down
8 changes: 4 additions & 4 deletions sled-agent/src/dump_setup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ use omicron_common::disk::DiskIdentity;
use sled_hardware::DiskVariant;
use sled_storage::dataset::{CRASH_DATASET, DUMP_DATASET};
use sled_storage::disk::Disk;
use sled_storage::pool::Pool;
use slog::Logger;
use std::collections::{HashMap, HashSet};
use std::collections::{BTreeMap, HashSet};
use std::ffi::OsString;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Weak};
use std::time::{Duration, SystemTime, SystemTimeError, UNIX_EPOCH};
use tokio::sync::MutexGuard;

pub struct DumpSetup {
worker: Arc<std::sync::Mutex<DumpSetupWorker>>,
Expand Down Expand Up @@ -100,13 +100,13 @@ const ARCHIVAL_INTERVAL: Duration = Duration::from_secs(300);
impl DumpSetup {
pub(crate) async fn update_dumpdev_setup(
&self,
disks: &mut MutexGuard<'_, HashMap<DiskIdentity, Disk>>,
disks: &Arc<BTreeMap<DiskIdentity, (Disk, Pool)>>,
) {
let log = &self.log;
let mut m2_dump_slices = Vec::new();
let mut u2_debug_datasets = Vec::new();
let mut m2_core_datasets = Vec::new();
for (_id, disk) in disks.iter() {
for (_id, (disk, _)) in disks.iter() {
if disk.is_synthetic() {
// We only setup dump devices on real disks
continue;
Expand Down
22 changes: 22 additions & 0 deletions sled-agent/src/long_running_tasks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use crate::bootstrap::bootstore::{
};
use crate::bootstrap::secret_retriever::LrtqOrHardcodedSecretRetriever;
use crate::hardware_monitor::{HardwareMonitor, HardwareMonitorHandle};
use crate::storage_monitor::{StorageMonitor, StorageMonitorHandle};
use crate::zone_bundle::{CleanupContext, ZoneBundler};
use bootstore::schemes::v0 as bootstore;
use key_manager::{KeyManager, StorageKeyRequester};
Expand All @@ -39,6 +40,11 @@ pub struct LongRunningTaskHandles {
/// for establishing zpools on disks and managing their datasets.
pub storage_manager: StorageHandle,

/// A task which monitors for updates from the `StorageManager` and takes
/// actions based on those updates, such as informing Nexus and setting
/// up dump locations.
pub storage_monitor: StorageMonitorHandle,

/// A mechanism for interacting with the hardware device tree
pub hardware_manager: HardwareManager,

Expand All @@ -63,6 +69,8 @@ pub async fn spawn_all_longrunning_tasks(
let mut storage_manager =
spawn_storage_manager(log, storage_key_requester.clone());

let storage_monitor = spawn_storage_monitor(log, storage_manager.clone());

// TODO: Does this need to run inside tokio::task::spawn_blocking?
let hardware_manager = spawn_hardware_manager(log, sled_mode);

Expand All @@ -87,6 +95,7 @@ pub async fn spawn_all_longrunning_tasks(
LongRunningTaskHandles {
storage_key_requester,
storage_manager,
storage_monitor,
hardware_manager,
hardware_monitor,
bootstore,
Expand Down Expand Up @@ -115,6 +124,19 @@ fn spawn_storage_manager(
handle
}

fn spawn_storage_monitor(
log: &Logger,
storage_handle: StorageHandle,
) -> StorageMonitorHandle {
info!(log, "Starting StorageMonitor");
let (mut storage_monitor, handle) =
StorageMonitor::new(log, storage_handle);
tokio::spawn(async move {
storage_monitor.run().await;
});
handle
}

fn spawn_hardware_manager(
log: &Logger,
sled_mode: SledMode,
Expand Down
80 changes: 39 additions & 41 deletions sled-agent/src/services.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
//! Sled-local service management.
//!
//! For controlling zone-based storage services, refer to
//! [sled_hardware:manager::StorageManager].
//! [sled_storage:manager::StorageManager].
//!
//! For controlling virtual machine instances, refer to
//! [crate::instance_manager::InstanceManager].
Expand Down Expand Up @@ -2935,8 +2935,8 @@ impl ServiceManager {
#[cfg(test)]
mod test {
use super::*;
use crate::bootstrap::secret_retriever::HardcodedSecretRetriever;
use crate::params::{ServiceZoneService, ZoneType};
use async_trait::async_trait;
use illumos_utils::{
dladm::{
Etherstub, MockDladm, BOOTSTRAP_ETHERSTUB_NAME,
Expand All @@ -2945,10 +2945,9 @@ mod test {
svc,
zone::MockZones,
};
use key_manager::{
SecretRetriever, SecretRetrieverError, SecretState, VersionedIkm,
};
use key_manager::KeyManager;
use omicron_common::address::OXIMETER_PORT;
use sled_storage::manager::{StorageHandle, StorageManager};
use std::net::{Ipv6Addr, SocketAddrV6};
use std::os::unix::process::ExitStatusExt;
use uuid::Uuid;
Expand Down Expand Up @@ -3141,29 +3140,28 @@ mod test {
}
}

pub struct TestSecretRetriever {}

#[async_trait]
impl SecretRetriever for TestSecretRetriever {
async fn get_latest(
&self,
) -> Result<VersionedIkm, SecretRetrieverError> {
let epoch = 0;
let salt = [0u8; 32];
let secret = [0x1d; 32];

Ok(VersionedIkm::new(epoch, salt, &secret))
}
// Spawn storage related tasks and return a handle to pass to both the `ServiceManager`
// and `ZoneBundler`. However, it is expected that this handle is not actually used
// as there are no provisioned zones or datasets. This is consistent with the use of
// `test_config.override_paths` below.
async fn setup_storage(log: &Logger) -> StorageHandle {
let (mut key_manager, key_requester) =
KeyManager::new(log, HardcodedSecretRetriever {});
let (mut manager, handle) = StorageManager::new(log, key_requester);

// Spawn the key_manager so that it will respond to requests for encryption keys
tokio::spawn(async move { key_manager.run().await });

// Spawn the storage manager as done by sled-agent
tokio::spawn(async move {
manager.run().await;
});

async fn get(
&self,
epoch: u64,
) -> Result<SecretState, SecretRetrieverError> {
if epoch != 0 {
return Err(SecretRetrieverError::NoSuchEpoch(epoch));
}
Ok(SecretState::Current(self.get_latest().await?))
}
// Inform the storage manager that the secret retriever is ready We
// are using the HardcodedSecretRetriever, so no need to wait for RSS
// or anything to setup the LRTQ
handle.key_manager_ready().await;
handle
}

#[tokio::test]
Expand All @@ -3174,10 +3172,10 @@ mod test {
let log = logctx.log.clone();
let test_config = TestConfig::new().await;

let resources = StorageResources::new_for_test();
let storage_handle = setup_storage(&log).await;
let zone_bundler = ZoneBundler::new(
log.clone(),
resources.clone(),
storage_handle.clone(),
Default::default(),
);
let mgr = ServiceManager::new(
Expand All @@ -3188,7 +3186,7 @@ mod test {
Some(true),
SidecarRevision::Physical("rev-test".to_string()),
vec![],
resources,
storage_handle,
zone_bundler,
);
test_config.override_paths(&mgr);
Expand Down Expand Up @@ -3222,10 +3220,10 @@ mod test {
let log = logctx.log.clone();
let test_config = TestConfig::new().await;

let resources = StorageResources::new_for_test();
let storage_handle = setup_storage(&log).await;
let zone_bundler = ZoneBundler::new(
log.clone(),
resources.clone(),
storage_handle.clone(),
Default::default(),
);
let mgr = ServiceManager::new(
Expand All @@ -3236,7 +3234,7 @@ mod test {
Some(true),
SidecarRevision::Physical("rev-test".to_string()),
vec![],
resources,
storage_handle,
zone_bundler,
);
test_config.override_paths(&mgr);
Expand Down Expand Up @@ -3275,10 +3273,10 @@ mod test {

// First, spin up a ServiceManager, create a new service, and tear it
// down.
let resources = StorageResources::new_for_test();
let storage_handle = setup_storage(&log).await;
let zone_bundler = ZoneBundler::new(
log.clone(),
resources.clone(),
storage_handle.clone(),
Default::default(),
);
let mgr = ServiceManager::new(
Expand All @@ -3289,7 +3287,7 @@ mod test {
Some(true),
SidecarRevision::Physical("rev-test".to_string()),
vec![],
resources.clone(),
storage_handle.clone(),
zone_bundler.clone(),
);
test_config.override_paths(&mgr);
Expand Down Expand Up @@ -3322,7 +3320,7 @@ mod test {
Some(true),
SidecarRevision::Physical("rev-test".to_string()),
vec![],
resources.clone(),
storage_handle.clone(),
zone_bundler.clone(),
);
test_config.override_paths(&mgr);
Expand Down Expand Up @@ -3358,10 +3356,10 @@ mod test {

// First, spin up a ServiceManager, create a new service, and tear it
// down.
let resources = StorageResources::new_for_test();
let storage_handle = setup_storage(&log).await;
let zone_bundler = ZoneBundler::new(
log.clone(),
resources.clone(),
storage_handle.clone(),
Default::default(),
);
let mgr = ServiceManager::new(
Expand All @@ -3372,7 +3370,7 @@ mod test {
Some(true),
SidecarRevision::Physical("rev-test".to_string()),
vec![],
resources.clone(),
storage_handle.clone(),
zone_bundler.clone(),
);
test_config.override_paths(&mgr);
Expand Down Expand Up @@ -3410,7 +3408,7 @@ mod test {
Some(true),
SidecarRevision::Physical("rev-test".to_string()),
vec![],
resources.clone(),
storage_handle,
zone_bundler.clone(),
);
test_config.override_paths(&mgr);
Expand Down
25 changes: 21 additions & 4 deletions sled-agent/src/storage_monitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,16 @@
//! and dispatches them to other parst of the bootstrap agent and sled agent
//! code.
use crate::dump_setup::DumpSetup;
use crate::nexus::NexusClientWithResolver;
use derive_more::From;
use futures::stream::FuturesOrdered;
use futures::FutureExt;
use nexus_client::types::PhysicalDiskDeleteRequest;
use nexus_client::types::PhysicalDiskKind;
use nexus_client::types::PhysicalDiskPutRequest;
use nexus_client::types::ZpoolPutRequest;
use omicron_common::api::external::ByteCount;
use omicron_common::backoff;
use sled_storage::disk::Disk;
use sled_storage::manager::StorageHandle;
use sled_storage::pool::Pool;
use sled_storage::resources::StorageResources;
Expand Down Expand Up @@ -74,6 +73,9 @@ pub struct StorageMonitor {

// A queue for sending nexus notifications in order
nexus_notifications: FuturesOrdered<NotifyFut>,

// Invokes dumpadm(8) and savecore(8) when new disks are encountered
dump_setup: DumpSetup,
}

impl StorageMonitor {
Expand All @@ -83,6 +85,7 @@ impl StorageMonitor {
) -> (StorageMonitor, StorageMonitorHandle) {
let (handle_tx, handle_rx) = mpsc::channel(QUEUE_SIZE);
let storage_resources = StorageResources::default();
let dump_setup = DumpSetup::new(&log);
let log = log.new(o!("component" => "StorageMonitor"));
(
StorageMonitor {
Expand All @@ -92,6 +95,7 @@ impl StorageMonitor {
storage_resources,
underlay: None,
nexus_notifications: FuturesOrdered::new(),
dump_setup,
},
StorageMonitorHandle { tx: handle_tx },
)
Expand Down Expand Up @@ -129,6 +133,9 @@ impl StorageMonitor {
let sled_id = underlay.sled_id;
self.underlay = Some(underlay);
self.notify_nexus_about_existing_resources(sled_id).await;
self.dump_setup
.update_dumpdev_setup(&self.storage_resources.disks)
.await;
}
}
}
Expand Down Expand Up @@ -162,6 +169,12 @@ impl StorageMonitor {
&self.storage_resources,
&updated_resources,
);
if nexus_updates.has_disk_updates() {
self.dump_setup
.update_dumpdev_setup(&self.storage_resources.disks)
.await;
}

for put in nexus_updates.disk_puts {
self.physical_disk_notify(put.into()).await;
}
Expand All @@ -171,8 +184,6 @@ impl StorageMonitor {
for (pool, put) in nexus_updates.zpool_puts {
self.add_zpool_notify(pool, put).await;
}

// TODO: Update Dump Setup if any diffs
}
// Save the updated `StorageResources`
self.storage_resources = updated_resources;
Expand Down Expand Up @@ -291,6 +302,12 @@ struct NexusUpdates {
zpool_puts: Vec<(Pool, ZpoolPutRequest)>,
}

impl NexusUpdates {
fn has_disk_updates(&self) -> bool {
!self.disk_puts.is_empty() || !self.disk_deletes.is_empty()
}
}

fn compute_resource_diffs(
log: &Logger,
sled_id: &Uuid,
Expand Down
Loading

0 comments on commit 1e61ea9

Please sign in to comment.