Skip to content

Commit

Permalink
Revert "[sled-agent] PUT zones doesn't create datasets (#7006)" (#7157)
Browse files Browse the repository at this point in the history
This reverts commit 81c808c.

We cannot yet rely on `omicron-datasets.json` existing, because old
systems need new blueprints to receive that
configuration file. Unfortunately, systems exist (e.g. dogfood) which
have not had blueprints created that contain
necessary datasets - as a result, the original PR would prevent cold
boot from functioning properly, as the lack of `omicron-datasets.json`
would prevent zones from being initialized.

By reverting this PR, zones should be able to launch during cold boot
**without** an `omicron-datasets.json` file once more.
  • Loading branch information
smklein authored Nov 23, 2024
1 parent adaa2ec commit 1cf9437
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 104 deletions.
1 change: 0 additions & 1 deletion common/src/ledger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ impl<T: Ledgerable> Ledger<T> {
warn!(self.log, "Failed to write ledger"; "path" => ?path, "err" => ?e);
failed_paths.push((path.to_path_buf(), e));
} else {
info!(self.log, "Successfully wrote to ledger"; "path" => ?path);
one_successful_write = true;
}
}
Expand Down
18 changes: 1 addition & 17 deletions nexus-sled-agent-shared/src/inventory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use omicron_common::{
external::{ByteCount, Generation},
internal::shared::{NetworkInterface, SourceNatConfig},
},
disk::{DatasetKind, DatasetName, DiskVariant},
disk::DiskVariant,
zpool_name::ZpoolName,
};
use omicron_uuid_kinds::{DatasetUuid, OmicronZoneUuid};
Expand Down Expand Up @@ -168,22 +168,6 @@ impl OmicronZoneConfig {
pub fn underlay_ip(&self) -> Ipv6Addr {
self.zone_type.underlay_ip()
}

/// If this zone has a transient filesystem, return the dataset's name.
/// Otherwise, return `None`.
pub fn transient_zone_dataset_name(&self) -> Option<DatasetName> {
self.filesystem_pool.as_ref().map(|pool| {
DatasetName::new(
pool.clone(),
DatasetKind::TransientZone {
name: illumos_utils::zone::zone_name(
self.zone_type.kind().zone_prefix(),
Some(self.id),
),
},
)
})
}
}

/// Describes a persistent ZFS dataset associated with an Omicron zone
Expand Down
70 changes: 0 additions & 70 deletions sled-agent/src/services.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ use sled_storage::dataset::{CONFIG_DATASET, INSTALL_DATASET, ZONE_DATASET};
use sled_storage::manager::StorageHandle;
use slog::Logger;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::collections::HashSet;
use std::net::{IpAddr, Ipv6Addr, SocketAddr};
use std::str::FromStr;
Expand Down Expand Up @@ -276,12 +275,6 @@ pub enum Error {
#[error("Error querying simnet devices")]
Simnet(#[from] GetSimnetError),

#[error("Failed to access storage")]
Storage(#[from] sled_storage::error::Error),

#[error("Missing datasets: {datasets:?}")]
MissingDatasets { datasets: BTreeSet<DatasetName> },

#[error(
"Requested generation ({requested}) is older than current ({current})"
)]
Expand Down Expand Up @@ -3478,54 +3471,6 @@ impl ServiceManager {
Ok(())
}

// Compares the incoming set of zones with the datasets this sled is
// configured to use.
//
// If the requested zones contain any datasets not configured on this sled,
// an error is returned.
//
// In this check the configured datasets are those intended to exist as
// written in a ledger. The datasets themselves may not actually physically
// exist yet, or they may have failed.
async fn check_requested_zone_datasets_configured(
&self,
request: &OmicronZonesConfig,
) -> Result<(), Error> {
// Before we provision these zones, inspect the request, and
// check all the datasets we're trying to make.
let mut requested_datasets = BTreeSet::new();
for zone in &request.zones {
if let Some(dataset) = zone.dataset_name() {
requested_datasets.insert(dataset);
}
if let Some(dataset) = zone.transient_zone_dataset_name() {
requested_datasets.insert(dataset);
}
}

// These datasets are configured to be part of the control plane.
let datasets_config = self.inner.storage.datasets_config_list().await?;
let existing_datasets: BTreeSet<_> = datasets_config
.datasets
.into_values()
.map(|config| config.name)
.collect();

// Check that the request is no larger than the configured set.
//
// (It's fine to have "existing_datasets" not contained in the
// request set).
let missing_datasets: BTreeSet<_> = requested_datasets
.difference(&existing_datasets)
.cloned()
.collect();
if !missing_datasets.is_empty() {
warn!(self.inner.log, "Missing datasets: {missing_datasets:?}");
return Err(Error::MissingDatasets { datasets: missing_datasets });
}
Ok(())
}

// Ensures that only the following Omicron zones are running.
//
// This method strives to be idempotent.
Expand All @@ -3549,8 +3494,6 @@ impl ServiceManager {
new_request: OmicronZonesConfig,
fake_install_dir: Option<&String>,
) -> Result<(), Error> {
self.check_requested_zone_datasets_configured(&new_request).await?;

// Do some data-normalization to ensure we can compare the "requested
// set" vs the "existing set" as HashSets.
let old_zone_configs: HashSet<OmicronZoneConfig> = existing_zones
Expand Down Expand Up @@ -4752,7 +4695,6 @@ mod illumos_tests {
zone::MockZones,
};

use omicron_common::disk::DatasetsConfig;
use omicron_uuid_kinds::OmicronZoneUuid;
use sled_storage::manager_test_harness::StorageManagerTestHarness;
use std::os::unix::process::ExitStatusExt;
Expand Down Expand Up @@ -5098,18 +5040,6 @@ mod illumos_tests {
.await
.expect("Failed to ensure disks");
assert!(!result.has_error(), "{:?}", result);
let result = harness
.handle()
.datasets_ensure(DatasetsConfig {
generation: Generation::new(),
datasets: BTreeMap::new(),
})
.await
.unwrap();
assert!(
!result.has_error(),
"Failed to ensure empty dataset ledger: {result:?}"
);
harness
}

Expand Down
19 changes: 19 additions & 0 deletions sled-agent/src/sled_agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use crate::metrics::MetricsManager;
use crate::nexus::{
NexusClient, NexusNotifierHandle, NexusNotifierInput, NexusNotifierTask,
};
use crate::params::OmicronZoneTypeExt;
use crate::probe_manager::ProbeManager;
use crate::services::{self, ServiceManager};
use crate::storage_monitor::StorageMonitorHandle;
Expand Down Expand Up @@ -942,6 +943,24 @@ impl SledAgent {
&self,
requested_zones: OmicronZonesConfig,
) -> Result<(), Error> {
// TODO(https://github.com/oxidecomputer/omicron/issues/6043):
// - If these are the set of filesystems, we should also consider
// removing the ones which are not listed here.
// - It's probably worth sending a bulk request to the storage system,
// rather than requesting individual datasets.
for zone in &requested_zones.zones {
let Some(dataset_name) = zone.dataset_name() else {
continue;
};

// First, ensure the dataset exists
let dataset_id = zone.id.into_untyped_uuid();
self.inner
.storage
.upsert_filesystem(dataset_id, dataset_name)
.await?;
}

self.inner
.services
.ensure_all_omicron_zones_persistent(requested_zones, None)
Expand Down
25 changes: 9 additions & 16 deletions sled-storage/src/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -856,7 +856,6 @@ impl StorageManager {
let ledger_paths = self.all_omicron_dataset_ledgers().await;
let maybe_ledger =
Ledger::<DatasetsConfig>::new(&log, ledger_paths.clone()).await;
let had_old_ledger = maybe_ledger.is_some();

let mut ledger = match maybe_ledger {
Some(ledger) => {
Expand Down Expand Up @@ -915,14 +914,12 @@ impl StorageManager {
let result = self.datasets_ensure_internal(&log, &config).await;

let ledger_data = ledger.data_mut();

// Commit the ledger to storage if either:
// - No ledger exists in storage, or
// - The ledger has changed
if !had_old_ledger || *ledger_data != config {
*ledger_data = config;
ledger.commit().await?;
if *ledger_data == config {
return Ok(result);
}
*ledger_data = config;
ledger.commit().await?;

Ok(result)
}

Expand Down Expand Up @@ -1145,7 +1142,6 @@ impl StorageManager {
ledger_paths.clone(),
)
.await;
let had_old_ledger = maybe_ledger.is_some();

let mut ledger = match maybe_ledger {
Some(ledger) => {
Expand Down Expand Up @@ -1185,14 +1181,11 @@ impl StorageManager {
self.omicron_physical_disks_ensure_internal(&log, &config).await?;

let ledger_data = ledger.data_mut();

// Commit the ledger to storage if either:
// - No ledger exists in storage, or
// - The ledger has changed
if !had_old_ledger || *ledger_data != config {
*ledger_data = config;
ledger.commit().await?;
if *ledger_data == config {
return Ok(result);
}
*ledger_data = config;
ledger.commit().await?;

Ok(result)
}
Expand Down

0 comments on commit 1cf9437

Please sign in to comment.