Skip to content

Commit

Permalink
[wicketd] Add refresh-config subcommand instead of using curl (#4606)
Browse files Browse the repository at this point in the history
This allows us to add some time for refreshing a wicketd that hasn't
fully started (and therefore isn't reachable on its dropshot server
yet).

Fixes #4604.

Testing this is a little awkward because config refresh is only support
on illumos via SMF, so I tested this by hand on `madrid`:

0. Disable wicketd (`svcadm disable wicketd`)
1. Install the new wicketd binary and `manifest.xml` in the switch zone
2. Import the new manifest.xml (`svccfg import
/var/svc/manifest/site/wicketd/manifest.xml`)
3. (I expected to need to restore the config properties that sled-agent
had set, but they persisted)
4. Enable wicketd and immediately try to refresh it (`svcadm enable
wicketd && svcadm refresh wicketd`)

In the wicketd logs, we see that the first refresh attempt failed
because it fired before the dropshot server was up:

```
[ Dec  4 18:34:22 Executing start method ("ctrun -l child -o noorphan,regent /opt/oxide/wicketd/bin/wicketd run /var/svc/manifest/site/wicketd/config.toml --address [::1]:12226 --artifact-address [fdb0:a840:2504:355::2]:12227 --mgs-address [::1]:12225 --nex
us-proxy-address [::]:12229 --baseboard-file /opt/oxide/baseboard.json --read-smf-config &"). ]
[ Dec  4 18:34:22 Method "start" exited with status 0. ]
[ Dec  4 18:34:22 Rereading configuration. ]
note: configured to log to "/dev/stdout"
[ Dec  4 18:34:22 Executing refresh method ("/opt/oxide/wicketd/bin/wicketd refresh-config /var/svc/manifest/site/wicketd/config.toml --address [::1]:12226"). ]
note: configured to log to "/dev/stdout"
18:34:22.330Z WARN wicketd: failed to refresh wicketd config (attempt 1 of 3); will retry after 5s
    err = Communication Error: error sending request for url (http://[::1]:12226/reload-config): error trying to connect: tcp connect error: Connection refused (os error 146)\nCaused by:\n  -> error sending request for url (http://[::1]:12226/reload-config)
: error trying to connect: tcp connect error: Connection refused (os error 146)\n  -> error trying to connect: tcp connect error: Connection refused (os error 146)\n  -> tcp connect error: Connection refused (os error 146)\n  -> Connection refused (os error
 146)
18:34:22.396Z INFO wicketd (dropshot (wicketd)): listening
    file = /home/john/.cargo/git/checkouts/dropshot-a4a923d29dccc492/ff87a01/dropshot/src/server.rs:195
    local_addr = [::1]:12226
```

10 seconds later, we see the successful connection, POST, and exit of
the SMF `refresh`:

```
18:34:32.332Z INFO wicketd (dropshot (wicketd)): accepted connection
    file = /home/john/.cargo/git/checkouts/dropshot-a4a923d29dccc492/ff87a01/dropshot/src/server.rs:769
    local_addr = [::1]:12226
    remote_addr = [::1]:32976
18:34:32.388Z INFO wicketd (dropshot (wicketd)): request completed
    file = /home/john/.cargo/git/checkouts/dropshot-a4a923d29dccc492/ff87a01/dropshot/src/server.rs:853
    latency_us = 30475
    local_addr = [::1]:12226
    method = POST
    remote_addr = [::1]:32976
    req_id = e0c2034a-0a99-45c1-a651-57249ca258f0
    response_code = 204
    uri = /reload-config
[ Dec  4 18:34:32 Method "refresh" exited with status 0. ]
```
  • Loading branch information
jgallagher authored Dec 5, 2023
1 parent 0a781f9 commit 7dca6fc
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 11 deletions.
2 changes: 1 addition & 1 deletion smf/wicketd/manifest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
it expected https).
-->
<exec_method type='method' name='refresh'
exec='curl -X POST http://%{config/address}/reload-config'
exec='/opt/oxide/wicketd/bin/wicketd refresh-config /var/svc/manifest/site/wicketd/config.toml --address %{config/address}'
timeout_seconds='0' />

<property_group name='startd' type='framework'>
Expand Down
2 changes: 1 addition & 1 deletion wicketd/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ sled-hardware.workspace = true
tufaceous-lib.workspace = true
update-engine.workspace = true
wicket-common.workspace = true
wicketd-client.workspace = true
omicron-workspace-hack.workspace = true

[[bin]]
Expand All @@ -83,4 +84,3 @@ tar.workspace = true
tokio = { workspace = true, features = ["test-util"] }
tufaceous.workspace = true
wicket.workspace = true
wicketd-client.workspace = true
41 changes: 36 additions & 5 deletions wicketd/src/bin/wicketd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
//! Executable for wicketd: technician port based management service
use anyhow::{anyhow, Context};
use camino::Utf8PathBuf;
use clap::Parser;
use omicron_common::{
address::Ipv6Subnet,
Expand All @@ -24,9 +25,9 @@ enum Args {
/// Start a wicketd server
Run {
#[clap(name = "CONFIG_FILE_PATH", action)]
config_file_path: PathBuf,
config_file_path: Utf8PathBuf,

/// The address for the technician port
/// The address on which the main wicketd dropshot server should listen
#[clap(short, long, action)]
address: SocketAddrV6,

Expand Down Expand Up @@ -57,6 +58,19 @@ enum Args {
#[clap(long, action, conflicts_with("read_smf_config"))]
rack_subnet: Option<Ipv6Addr>,
},

/// Instruct a running wicketd server to refresh its config
///
/// Mechanically, this hits a specific endpoint served by wicketd's dropshot
/// server
RefreshConfig {
#[clap(name = "CONFIG_FILE_PATH", action)]
config_file_path: Utf8PathBuf,

/// The address of the server to refresh
#[clap(short, long, action)]
address: SocketAddrV6,
},
}

#[tokio::main]
Expand Down Expand Up @@ -104,9 +118,7 @@ async fn do_run() -> Result<(), CmdError> {
};

let config = Config::from_file(&config_file_path)
.with_context(|| {
format!("failed to parse {}", config_file_path.display())
})
.with_context(|| format!("failed to parse {config_file_path}"))
.map_err(CmdError::Failure)?;

let rack_subnet = match rack_subnet {
Expand Down Expand Up @@ -140,5 +152,24 @@ async fn do_run() -> Result<(), CmdError> {
.await
.map_err(|err| CmdError::Failure(anyhow!(err)))
}
Args::RefreshConfig { config_file_path, address } => {
let config = Config::from_file(&config_file_path)
.with_context(|| format!("failed to parse {config_file_path}"))
.map_err(CmdError::Failure)?;

let log = config
.log
.to_logger("wicketd")
.context("failed to initialize logger")
.map_err(CmdError::Failure)?;

// When run via `svcadm refresh ...`, we need to respect the special
// [SMF exit codes](https://illumos.org/man/7/smf_method). Returning
// an error from main exits with code 1 (from libc::EXIT_FAILURE),
// which does not collide with any special SMF codes.
Server::refresh_config(log, address)
.await
.map_err(CmdError::Failure)
}
}
}
68 changes: 64 additions & 4 deletions wicketd/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ mod preflight_check;
mod rss_config;
mod update_tracker;

use anyhow::{anyhow, Result};
use anyhow::{anyhow, Context, Result};
use artifacts::{WicketdArtifactServer, WicketdArtifactStore};
use bootstrap_addrs::BootstrapPeers;
pub use config::Config;
pub(crate) use context::ServerContext;
use display_error_chain::DisplayErrorChain;
use dropshot::{ConfigDropshot, HandlerTaskMode, HttpServer};
pub use installinator_progress::{IprUpdateTracker, RunningUpdateState};
use internal_dns::resolver::Resolver;
Expand All @@ -34,6 +35,7 @@ use preflight_check::PreflightCheckerHandler;
use sled_hardware::Baseboard;
use slog::{debug, error, o, Drain};
use std::sync::{Mutex, OnceLock};
use std::time::Duration;
use std::{
net::{SocketAddr, SocketAddrV6},
sync::Arc,
Expand Down Expand Up @@ -70,7 +72,6 @@ pub struct SmfConfigValues {
impl SmfConfigValues {
#[cfg(target_os = "illumos")]
pub fn read_current() -> Result<Self> {
use anyhow::Context;
use illumos_utils::scf::ScfHandle;

const CONFIG_PG: &str = "config";
Expand Down Expand Up @@ -259,11 +260,70 @@ impl Server {
res = self.artifact_server => {
match res {
Ok(()) => Err("artifact server exited unexpectedly".to_owned()),
// The artifact server returns an anyhow::Error, which has a `Debug` impl that
// prints out the chain of errors.
// The artifact server returns an anyhow::Error, which has a
// `Debug` impl that prints out the chain of errors.
Err(err) => Err(format!("running artifact server: {err:?}")),
}
}
}
}

/// Instruct a running server at the specified address to reload its config
/// parameters
pub async fn refresh_config(
log: slog::Logger,
address: SocketAddrV6,
) -> Result<()> {
// It's possible we're being told to refresh a server's config before
// it's ready to receive such a request, so we'll give it a healthy
// amount of time before we give up: we'll set a client timeout and also
// retry a few times. See
// https://github.com/oxidecomputer/omicron/issues/4604.
const CLIENT_TIMEOUT: Duration = Duration::from_secs(5);
const SLEEP_BETWEEN_RETRIES: Duration = Duration::from_secs(10);
const NUM_RETRIES: usize = 3;

let client = reqwest::Client::builder()
.connect_timeout(CLIENT_TIMEOUT)
.timeout(CLIENT_TIMEOUT)
.build()
.context("failed to construct reqwest Client")?;

let client = wicketd_client::Client::new_with_client(
&format!("http://{address}"),
client,
log,
);
let log = client.inner();

let mut attempt = 0;
loop {
attempt += 1;

// If we succeed, we're done.
let Err(err) = client.post_reload_config().await else {
return Ok(());
};

// If we failed, either warn+sleep and try again, or fail.
if attempt < NUM_RETRIES {
slog::warn!(
log,
"failed to refresh wicketd config \
(attempt {attempt} of {NUM_RETRIES}); \
will retry after {CLIENT_TIMEOUT:?}";
"err" => %DisplayErrorChain::new(&err),
);
tokio::time::sleep(SLEEP_BETWEEN_RETRIES).await;
} else {
slog::error!(
log,
"failed to refresh wicketd config \
(tried {NUM_RETRIES} times)";
"err" => %DisplayErrorChain::new(&err),
);
return Err(err).context("failed to contact wicketd");
}
}
}
}

0 comments on commit 7dca6fc

Please sign in to comment.