Skip to content

Commit

Permalink
[sled-agent][nexus][RSS] Implement RFD 278: RSS to Nexus Handoff (#1954)
Browse files Browse the repository at this point in the history
## Overview

- Implements https://rfd.shared.oxide.computer/rfd/0278 
- This PR moves much of the service configuration from the hard-coded
`config-rss.toml` file to RSS itself.
- In the future (See: #732) many of these services will be initialized
by Nexus. Decoupling their provisioning from the hard-coded versions is
the first step in this process.

### What Changed in the Sled Agent

- Sled Agent
- A new `get_zpools` endpoint is exposed from the Sled Agent. This is
invoked by RSS when figuring out where to provision datasets.
- The UUID for the sled agent is removed from the config file (it's
dynamic, and should not be shared among sleds)

### What Changed in RSS

- `HardcodedSledRequest` (and the corresponding entries in
`config-rss.toml`) has been removed
- A `plan` module was added, where plans for sled generation ("What
sleds should get what addresses?") and service generation ("What
services should run where?") are generated.
- Refactor service and dataset initialization to insert entries into DNS
- Invoke the `handoff_to_nexus`, informing it of all
previously-owned-by-RSS services.

### What Changed in Nexus

- Expand `RackInitializationRequest` to consider both services and
datasets
- `dataset_put` API removed -- beyond the initialization request, Nexus
should be responsible for provisioning new datasets, not the sled agent.

Fixes #1148
Part of #732
Part of #824
  • Loading branch information
smklein authored Dec 2, 2022
1 parent 3257446 commit 27dfb62
Show file tree
Hide file tree
Showing 46 changed files with 1,637 additions and 934 deletions.
2 changes: 1 addition & 1 deletion .github/buildomat/jobs/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ pfexec ipadm create-addr -T static -a 192.168.1.199/24 igb0/sidehatch
# address for upstream connectivity.
#
tar xf out/omicron-sled-agent.tar pkg/config-rss.toml
sed -e '/\[gateway\]/,/\[request\]/ s/^.*address =.*$/address = "192.168.1.199"/' \
sed -e 's/^# address =.*$/address = "192.168.1.199"/' \
-e "s/^mac =.*$/mac = \"$(dladm show-phys -m -p -o ADDRESS | head -n 1)\"/" \
-i pkg/config-rss.toml
tar rf out/omicron-sled-agent.tar pkg/config-rss.toml
Expand Down
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions common/src/address.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ pub const SLED_AGENT_PORT: u16 = 12345;
/// The port propolis-server listens on inside the propolis zone.
pub const PROPOLIS_PORT: u16 = 12400;
pub const COCKROACH_PORT: u16 = 32221;
pub const CRUCIBLE_PORT: u16 = 32345;
pub const CLICKHOUSE_PORT: u16 = 8123;
pub const OXIMETER_PORT: u16 = 12223;
pub const DENDRITE_PORT: u16 = 12224;
Expand Down
11 changes: 1 addition & 10 deletions end-to-end-tests/src/helpers/ctx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,16 +100,7 @@ pub fn nexus_addr() -> SocketAddr {
.join("../smf/sled-agent/config-rss.toml");
if rss_config_path.exists() {
if let Ok(config) = SetupServiceConfig::from_file(rss_config_path) {
for request in config.requests {
for zone in request.service_zones {
for service in zone.services {
if let ServiceType::Nexus { external_ip, .. } = service
{
return (external_ip, 80).into();
}
}
}
}
return (config.nexus_external_address, 80).into();
}
}

Expand Down
1 change: 1 addition & 0 deletions nexus/db-model/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ anyhow = "1.0"
chrono = { version = "0.4", features = ["serde"] }
diesel = { version = "2.0.2", features = ["postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] }
hex = "0.4.3"
internal-dns-client = { path = "../../internal-dns-client" }
ipnetwork = "0.20"
macaddr = { version = "1.0.1", features = [ "serde_std" ]}
newtype_derive = "0.1.6"
Expand Down
31 changes: 24 additions & 7 deletions nexus/db-model/src/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@

use super::{DatasetKind, Generation, Region, SqlU16};
use crate::collection::DatastoreCollectionConfig;
use crate::ipv6;
use crate::schema::{dataset, region};
use chrono::{DateTime, Utc};
use db_macros::Asset;
use internal_dns_client::names::{BackendName, ServiceName, AAAA, SRV};
use nexus_types::identity::Asset;
use serde::{Deserialize, Serialize};
use std::net::SocketAddr;
use std::net::{Ipv6Addr, SocketAddrV6};
use uuid::Uuid;

/// Database representation of a Dataset.
Expand All @@ -35,18 +38,18 @@ pub struct Dataset {

pub pool_id: Uuid,

ip: ipnetwork::IpNetwork,
ip: ipv6::Ipv6Addr,
port: SqlU16,

kind: DatasetKind,
pub kind: DatasetKind,
pub size_used: Option<i64>,
}

impl Dataset {
pub fn new(
id: Uuid,
pool_id: Uuid,
addr: SocketAddr,
addr: SocketAddrV6,
kind: DatasetKind,
) -> Self {
let size_used = match kind {
Expand All @@ -65,12 +68,26 @@ impl Dataset {
}
}

pub fn address(&self) -> SocketAddr {
pub fn address(&self) -> SocketAddrV6 {
self.address_with_port(self.port.into())
}

pub fn address_with_port(&self, port: u16) -> SocketAddr {
SocketAddr::new(self.ip.ip(), port)
pub fn address_with_port(&self, port: u16) -> SocketAddrV6 {
SocketAddrV6::new(Ipv6Addr::from(self.ip), port, 0, 0)
}

pub fn aaaa(&self) -> AAAA {
AAAA::Zone(self.id())
}

pub fn srv(&self) -> SRV {
match self.kind {
DatasetKind::Crucible => {
SRV::Backend(BackendName::Crucible, self.id())
}
DatasetKind::Clickhouse => SRV::Service(ServiceName::Clickhouse),
DatasetKind::Cockroach => SRV::Service(ServiceName::Cockroach),
}
}
}

Expand Down
12 changes: 11 additions & 1 deletion nexus/db-model/src/ipv6.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,19 @@ use diesel::sql_types::Inet;
use ipnetwork::IpNetwork;
use ipnetwork::Ipv6Network;
use omicron_common::api::external::Error;
use serde::{Deserialize, Serialize};

#[derive(
Clone, Copy, AsExpression, FromSqlRow, PartialEq, Ord, PartialOrd, Eq,
Clone,
Copy,
AsExpression,
FromSqlRow,
PartialEq,
Ord,
PartialOrd,
Eq,
Deserialize,
Serialize,
)]
#[diesel(sql_type = Inet)]
pub struct Ipv6Addr(std::net::Ipv6Addr);
Expand Down
8 changes: 5 additions & 3 deletions nexus/db-model/src/service_kind.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ use nexus_types::internal_api;
use serde::{Deserialize, Serialize};

impl_enum_type!(
#[derive(SqlType, Debug, QueryId)]
#[derive(Clone, SqlType, Debug, QueryId)]
#[diesel(postgres_type(name = "service_kind"))]
pub struct ServiceKindEnum;

#[derive(Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)]
#[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)]
#[diesel(sql_type = ServiceKindEnum)]
pub enum ServiceKind;

Expand All @@ -29,7 +29,9 @@ impl From<internal_api::params::ServiceKind> for ServiceKind {
internal_api::params::ServiceKind::InternalDNS => {
ServiceKind::InternalDNS
}
internal_api::params::ServiceKind::Nexus => ServiceKind::Nexus,
internal_api::params::ServiceKind::Nexus { .. } => {
ServiceKind::Nexus
}
internal_api::params::ServiceKind::Oximeter => {
ServiceKind::Oximeter
}
Expand Down
54 changes: 50 additions & 4 deletions nexus/src/app/rack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::authz;
use crate::context::OpContext;
use crate::db;
use crate::db::lookup::LookupPath;
use crate::internal_api::params::ServicePutRequest;
use crate::internal_api::params::RackInitializationRequest;
use omicron_common::api::external::DataPageParams;
use omicron_common::api::external::Error;
use omicron_common::api::external::ListResultVec;
Expand Down Expand Up @@ -57,12 +57,13 @@ impl super::Nexus {
&self,
opctx: &OpContext,
rack_id: Uuid,
services: Vec<ServicePutRequest>,
request: RackInitializationRequest,
) -> Result<(), Error> {
opctx.authorize(authz::Action::Modify, &authz::FLEET).await?;

// Convert from parameter -> DB type.
let services: Vec<_> = services
let services: Vec<_> = request
.services
.into_iter()
.map(|svc| {
db::model::Service::new(
Expand All @@ -74,10 +75,55 @@ impl super::Nexus {
})
.collect();

// TODO(https://github.com/oxidecomputer/omicron/issues/1958): If nexus, add a pool?

let datasets: Vec<_> = request
.datasets
.into_iter()
.map(|dataset| {
db::model::Dataset::new(
dataset.dataset_id,
dataset.zpool_id,
dataset.request.address,
dataset.request.kind.into(),
)
})
.collect();

self.db_datastore
.rack_set_initialized(opctx, rack_id, services)
.rack_set_initialized(opctx, rack_id, services, datasets)
.await?;

Ok(())
}

/// Awaits the initialization of the rack.
///
/// This will occur by either:
/// 1. RSS invoking the internal API, handing off responsibility, or
/// 2. Re-reading a value from the DB, if the rack has already been
/// initialized.
///
/// See RFD 278 for additional context.
pub async fn await_rack_initialization(&self, opctx: &OpContext) {
loop {
let result = self.rack_lookup(&opctx, &self.rack_id).await;
match result {
Ok(rack) => {
if rack.initialized {
info!(self.log, "Rack initialized");
return;
}
info!(
self.log,
"Still waiting for rack initialization: {:?}", rack
);
}
Err(e) => {
warn!(self.log, "Cannot look up rack: {}", e);
}
}
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
}
}
}
4 changes: 2 additions & 2 deletions nexus/src/app/sled.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use omicron_common::api::external::Error;
use omicron_common::api::external::ListResultVec;
use omicron_common::api::external::LookupResult;
use sled_agent_client::Client as SledAgentClient;
use std::net::{Ipv6Addr, SocketAddr};
use std::net::{Ipv6Addr, SocketAddrV6};
use std::sync::Arc;
use uuid::Uuid;

Expand Down Expand Up @@ -126,7 +126,7 @@ impl super::Nexus {
&self,
id: Uuid,
zpool_id: Uuid,
address: SocketAddr,
address: SocketAddrV6,
kind: DatasetKind,
) -> Result<(), Error> {
info!(self.log, "upserting dataset"; "zpool_id" => zpool_id.to_string(), "dataset_id" => id.to_string(), "address" => address.to_string());
Expand Down
20 changes: 7 additions & 13 deletions nexus/src/db/datastore/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,9 +264,7 @@ mod test {
use omicron_test_utils::dev;
use ref_cast::RefCast;
use std::collections::HashSet;
use std::net::Ipv6Addr;
use std::net::SocketAddrV6;
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddrV6};
use std::sync::Arc;
use uuid::Uuid;

Expand Down Expand Up @@ -514,8 +512,7 @@ mod test {

// ... and datasets within that zpool.
let dataset_count = REGION_REDUNDANCY_THRESHOLD * 2;
let bogus_addr =
SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080);
let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0);
let dataset_ids: Vec<Uuid> =
(0..dataset_count).map(|_| Uuid::new_v4()).collect();
for id in &dataset_ids {
Expand Down Expand Up @@ -614,8 +611,7 @@ mod test {

// ... and datasets within that zpool.
let dataset_count = REGION_REDUNDANCY_THRESHOLD;
let bogus_addr =
SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080);
let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0);
let dataset_ids: Vec<Uuid> =
(0..dataset_count).map(|_| Uuid::new_v4()).collect();
for id in &dataset_ids {
Expand Down Expand Up @@ -691,8 +687,7 @@ mod test {

// ... and datasets within that zpool.
let dataset_count = REGION_REDUNDANCY_THRESHOLD - 1;
let bogus_addr =
SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080);
let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0);
let dataset_ids: Vec<Uuid> =
(0..dataset_count).map(|_| Uuid::new_v4()).collect();
for id in &dataset_ids {
Expand Down Expand Up @@ -748,8 +743,7 @@ mod test {

// ... and datasets within that zpool.
let dataset_count = REGION_REDUNDANCY_THRESHOLD;
let bogus_addr =
SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080);
let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0);
let dataset_ids: Vec<Uuid> =
(0..dataset_count).map(|_| Uuid::new_v4()).collect();
for id in &dataset_ids {
Expand Down Expand Up @@ -1025,14 +1019,14 @@ mod test {

// Initialize the Rack.
let result = datastore
.rack_set_initialized(&opctx, rack.id(), vec![])
.rack_set_initialized(&opctx, rack.id(), vec![], vec![])
.await
.unwrap();
assert!(result.initialized);

// Re-initialize the rack (check for idempotency)
let result = datastore
.rack_set_initialized(&opctx, rack.id(), vec![])
.rack_set_initialized(&opctx, rack.id(), vec![], vec![])
.await
.unwrap();
assert!(result.initialized);
Expand Down
Loading

0 comments on commit 27dfb62

Please sign in to comment.