Skip to content

Commit

Permalink
Tunnel sled initialization requests through sprockets sessions (#1128)
Browse files Browse the repository at this point in the history
  • Loading branch information
jgallagher authored Jun 6, 2022
1 parent a23b0d5 commit 01764e0
Show file tree
Hide file tree
Showing 12 changed files with 739 additions and 18 deletions.
253 changes: 244 additions & 9 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions deploy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@ omicron-sled-agent = { path = "../sled-agent" }
omicron-package = { path = "../package" }
serde = { version = "1.0", features = [ "derive" ] }
serde_derive = "1.0"
sp-sim = { path = "../sp-sim" }
structopt = "0.3"
thiserror = "1.0"
toml = "0.5.9"

# Disable doc builds by default for our binaries to work around issue
# rust-lang/cargo#8373. These docs would not be very useful anyway.
Expand Down
36 changes: 35 additions & 1 deletion deploy/src/bin/sled-agent-overlay-files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ use omicron_sled_agent::bootstrap::trust_quorum::{
RackSecret, ShareDistribution,
};

use anyhow::{anyhow, Result};
use anyhow::{anyhow, Context, Result};
use sp_sim::config::GimletConfig;
use sp_sim::config::SpCommonConfig;
use std::path::PathBuf;
use structopt::StructOpt;

Expand Down Expand Up @@ -60,8 +62,40 @@ fn overlay_secret_shares(
Ok(())
}

// Generate a config file for a simulated SP in each deployment server folder.
fn overlay_sp_configs(server_dirs: &[PathBuf]) -> Result<()> {
// We will eventually need to flesh out more of this config; for now,
// it's sufficient to only generate an SP that emulates a RoT.
let mut config = GimletConfig {
common: SpCommonConfig {
multicast_addr: None,
bind_addrs: None,
serial_number: [0; 16],
manufacturing_root_cert_seed: [0; 32],
device_id_cert_seed: [0; 32],
},
components: Vec::new(),
};

// Our lazy device ID generation fails if we overflow a u8.
assert!(server_dirs.len() <= 255, "expand simulated SP ID generation");

for server_dir in server_dirs {
config.common.serial_number[0] += 1;
config.common.device_id_cert_seed[0] += 1;

let bytes = toml::ser::to_vec(&config).unwrap();
let path = server_dir.join("config-sp.toml");
std::fs::write(&path, bytes)
.with_context(|| format!("failed to write {}", path.display()))?;
}

Ok(())
}

fn main() -> Result<()> {
let args = Args::from_args_safe().map_err(|err| anyhow!(err))?;
overlay_secret_shares(args.threshold, &args.directories)?;
overlay_sp_configs(&args.directories)?;
Ok(())
}
3 changes: 3 additions & 0 deletions sled-agent/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ slog = { version = "2.5", features = [ "max_level_trace", "release_max_level_deb
slog-dtrace = "0.2"
smf = "0.2"
spdm = { git = "https://github.com/oxidecomputer/spdm", rev = "9742f6e" }
sp-sim = { path = "../sp-sim" }
sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "0361fd13ff19cda6696242fe40f1325fca30d3d1" }
sprockets-proxy = { git = "http://github.com/oxidecomputer/sprockets", rev = "0361fd13ff19cda6696242fe40f1325fca30d3d1" }
socket2 = { version = "0.4", features = [ "all" ] }
structopt = "0.3"
tar = "0.4"
Expand Down
48 changes: 47 additions & 1 deletion sled-agent/src/bin/sled-agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use omicron_sled_agent::bootstrap::{
};
use omicron_sled_agent::rack_setup::config::SetupServiceConfig as RssConfig;
use omicron_sled_agent::{config::Config as SledConfig, server as sled_server};
use sp_sim::config::GimletConfig;
use std::net::SocketAddr;
use std::path::PathBuf;
use structopt::StructOpt;
Expand Down Expand Up @@ -108,6 +109,20 @@ async fn do_run() -> Result<(), CmdError> {
} else {
None
};
let sp_config_path = {
let mut sp_config_path = config_path.clone();
sp_config_path.pop();
sp_config_path.push("config-sp.toml");
sp_config_path
};
let sp_config = if sp_config_path.exists() {
Some(
GimletConfig::from_file(sp_config_path)
.map_err(|e| CmdError::Failure(e.to_string()))?,
)
} else {
None
};

// Derive the bootstrap address from the data link's MAC address.
let link = config
Expand All @@ -116,16 +131,47 @@ async fn do_run() -> Result<(), CmdError> {
let bootstrap_address = bootstrap_address(link)
.map_err(|e| CmdError::Failure(e.to_string()))?;

// Are we going to simulate a local SP? If so:
//
// 1. The bootstrap dropshot server listens on localhost
// 2. A sprockets proxy listens on `bootstrap_address` (and relays
// incoming connections to the localhost dropshot server)
//
// If we're not simulating a local SP, we can't establish sprockets
// sessions, so we'll have the bootstrap dropshot server listen on
// `bootstrap_address` (and no sprockets proxy).
//
// TODO-security: With this configuration, dropshot itself is
// running plain HTTP and blindly trusting all connections from
// localhost. We have a similar sprockets proxy on the client side,
// where the proxy blindly trusts all connections from localhost
// (although the client-side proxy only runs while is being made,
// while our dropshot server is always listening). Can we secure
// these connections sufficiently? Other options include expanding
// dropshot/progenitor to allow a custom connection layer (supported
// by hyper, but not reqwest), keeping the sprockets proxy but using
// something other than TCP that we can lock down, or abandoning
// dropshot and using a bespoke protocol over a raw
// sprockets-encrypted TCP connection.
let (bootstrap_dropshot_addr, sprockets_proxy_bind_addr) =
if sp_config.is_some() {
("[::1]:0".parse().unwrap(), Some(bootstrap_address))
} else {
(SocketAddr::V6(bootstrap_address), None)
};

// Configure and run the Bootstrap server.
let bootstrap_config = BootstrapConfig {
id: config.id,
dropshot: ConfigDropshot {
bind_address: SocketAddr::V6(bootstrap_address),
bind_address: bootstrap_dropshot_addr,
request_body_max_bytes: 1024 * 1024,
..Default::default()
},
log: config.log.clone(),
rss_config,
sprockets_proxy_bind_addr,
sp_config,
};

// TODO: It's a little silly to pass the config this way - namely,
Expand Down
5 changes: 5 additions & 0 deletions sled-agent/src/bootstrap/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use crate::config::Config as SledConfig;
use crate::illumos::dladm::{self, Dladm, PhysicalLink};
use crate::illumos::zone::Zones;
use crate::server::Server as SledServer;
use crate::sp::SpHandle;
use omicron_common::address::get_sled_address;
use omicron_common::api::external::{Error as ExternalError, MacAddr};
use omicron_common::backoff::{
Expand Down Expand Up @@ -93,6 +94,7 @@ pub(crate) struct Agent {
rss: Mutex<Option<RssHandle>>,
sled_agent: Mutex<Option<SledServer>>,
sled_config: SledConfig,
sp: Option<SpHandle>,
}

fn get_sled_agent_request_path() -> PathBuf {
Expand Down Expand Up @@ -132,6 +134,7 @@ impl Agent {
log: Logger,
sled_config: SledConfig,
address: Ipv6Addr,
sp: Option<SpHandle>,
) -> Result<Self, BootstrapError> {
let ba_log = log.new(o!(
"component" => "BootstrapAgent",
Expand Down Expand Up @@ -190,6 +193,7 @@ impl Agent {
rss: Mutex::new(None),
sled_agent: Mutex::new(None),
sled_config,
sp,
};

let request_path = get_sled_agent_request_path();
Expand Down Expand Up @@ -405,6 +409,7 @@ impl Agent {
&self.parent_log,
rss_config.clone(),
self.peer_monitor.observer().await,
self.sp.clone(),
);
self.rss.lock().await.replace(rss);
}
Expand Down
8 changes: 8 additions & 0 deletions sled-agent/src/bootstrap/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use dropshot::ConfigDropshot;
use dropshot::ConfigLogging;
use serde::Deserialize;
use serde::Serialize;
use sp_sim::config::GimletConfig;
use std::net::SocketAddrV6;
use uuid::Uuid;

pub const BOOTSTRAP_AGENT_PORT: u16 = 12346;
Expand All @@ -20,4 +22,10 @@ pub struct Config {
pub log: ConfigLogging,

pub rss_config: Option<crate::rack_setup::config::SetupServiceConfig>,

// If present, `dropshot` should bind to a localhost address, and we'll
// configure a sprockets-proxy pointed to it that listens on this
// (non-localhost) address.
pub sprockets_proxy_bind_addr: Option<SocketAddrV6>,
pub sp_config: Option<GimletConfig>,
}
77 changes: 71 additions & 6 deletions sled-agent/src/bootstrap/rss_handle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@ use super::discovery::PeerMonitorObserver;
use super::params::SledAgentRequest;
use crate::rack_setup::config::SetupServiceConfig;
use crate::rack_setup::service::Service;
use crate::sp::SpHandle;
use futures::stream::FuturesUnordered;
use futures::StreamExt;
use omicron_common::backoff::internal_service_policy;
use omicron_common::backoff::retry_notify;
use omicron_common::backoff::BackoffError;
use slog::Logger;
use std::net::SocketAddr;
use std::net::SocketAddrV6;
use std::time::Duration;
use thiserror::Error;
use tokio::sync::mpsc;
use tokio::sync::oneshot;
Expand Down Expand Up @@ -43,6 +46,7 @@ impl RssHandle {
log: &Logger,
config: SetupServiceConfig,
peer_monitor: PeerMonitorObserver,
sp: Option<SpHandle>,
) -> Self {
let (tx, rx) = rss_channel();

Expand All @@ -54,7 +58,7 @@ impl RssHandle {
);
let log = log.new(o!("component" => "BootstrapAgentRssHandler"));
let task = tokio::spawn(async move {
rx.initialize_sleds(&log).await;
rx.initialize_sleds(&log, &sp).await;
});
Self { _rss: rss, task }
}
Expand All @@ -65,6 +69,9 @@ enum InitializeSledAgentError {
#[error("Failed to construct an HTTP client: {0}")]
HttpClient(#[from] reqwest::Error),

#[error("Failed to start sprockets proxy: {0}")]
SprocketsProxy(#[from] sprockets_proxy::Error),

#[error("Error making HTTP request to Bootstrap Agent: {0}")]
BootstrapApi(
#[from]
Expand All @@ -76,6 +83,7 @@ async fn initialize_sled_agent(
log: &Logger,
bootstrap_addr: SocketAddrV6,
request: &SledAgentRequest,
sp: &Option<SpHandle>,
) -> Result<(), InitializeSledAgentError> {
let dur = std::time::Duration::from_secs(60);

Expand All @@ -84,8 +92,57 @@ async fn initialize_sled_agent(
.timeout(dur)
.build()?;

let url = format!("http://{}", bootstrap_addr);
info!(log, "Sending request to peer agent: {}", url);
let (url, _proxy_task) = if let Some(sp) = sp.as_ref() {
// We have an SP; spawn a sprockets proxy for this connection.
let proxy_config = sprockets_proxy::Config {
bind_address: "[::1]:0".parse().unwrap(),
target_address: SocketAddr::V6(bootstrap_addr),
role: sprockets_proxy::Role::Client,
};
// TODO-cleanup The `Duration` passed to `Proxy::new()` is the timeout
// for communicating with the RoT. Currently it can be set to anything
// at all (our simulated RoT always responds immediately). Should the
// value move to our config?
let proxy = sprockets_proxy::Proxy::new(
&proxy_config,
sp.manufacturing_public_key(),
sp.rot_handle(),
sp.rot_certs(),
Duration::from_secs(5),
log.new(o!("BootstrapAgentClientSprocketsProxy"
=> proxy_config.target_address)),
)
.await?;

let proxy_addr = proxy.local_addr();

let proxy_task = tokio::spawn(async move {
// TODO-robustness `proxy.run()` only fails if `accept()`ing on our
// already-bound listening socket fails, which means something has
// gone very wrong. Do we have any recourse other than panicking?
// What does dropshot do if `accept()` fails?
proxy.run().await.expect("sprockets client proxy failed");
});

// Wrap `proxy_task` in `AbortOnDrop`, which will abort it (shutting
// down the proxy) when we return.
let proxy_task = AbortOnDrop(proxy_task);

info!(
log, "Sending request to peer agent via sprockets proxy";
"peer" => %bootstrap_addr,
"sprockets_proxy" => %proxy_addr,
);
(format!("http://{}", proxy_addr), Some(proxy_task))
} else {
// We have no SP; connect directly.
info!(
log, "Sending request to peer agent";
"peer" => %bootstrap_addr,
);
(format!("http://{}", bootstrap_addr), None)
};

let client = bootstrap_agent_client::Client::new_with_client(
&url,
client,
Expand Down Expand Up @@ -119,7 +176,7 @@ async fn initialize_sled_agent(
};
retry_notify(internal_service_policy(), sled_agent_initialize, log_failure)
.await?;
info!(log, "Peer agent at {} initialized", url);
info!(log, "Peer agent initialized"; "peer" => %bootstrap_addr);
Ok(())
}

Expand Down Expand Up @@ -178,7 +235,7 @@ struct BootstrapAgentHandleReceiver {
}

impl BootstrapAgentHandleReceiver {
async fn initialize_sleds(mut self, log: &Logger) {
async fn initialize_sleds(mut self, log: &Logger, sp: &Option<SpHandle>) {
let (requests, tx_response) = match self.inner.recv().await {
Some(requests) => requests,
None => {
Expand All @@ -201,7 +258,7 @@ impl BootstrapAgentHandleReceiver {
"target_sled" => %bootstrap_addr,
);

initialize_sled_agent(log, bootstrap_addr, &request)
initialize_sled_agent(log, bootstrap_addr, &request, sp)
.await
.map_err(|err| {
format!(
Expand Down Expand Up @@ -241,3 +298,11 @@ impl BootstrapAgentHandleReceiver {
tx_response.send(Ok(())).unwrap();
}
}

struct AbortOnDrop<T>(JoinHandle<T>);

impl<T> Drop for AbortOnDrop<T> {
fn drop(&mut self) {
self.0.abort();
}
}
Loading

0 comments on commit 01764e0

Please sign in to comment.