Skip to content

Commit

Permalink
Adds functionality to run oximeter standalone (#4117)
Browse files Browse the repository at this point in the history
- Adds a "standalone" mode for the `oximeter-collector` crate, including
the binary and main inner types. This runs in a slightly different mode,
in which the ClickHouse database itself isn't strictly required. In this
case, a task to simply print the results will be spawned in place of the
normal results-sink task which inserts records into the database.
- Creates a tiny fake Nexus server, which includes only the API needed
to register collectors and producers. This is started automatically when
running `oximeter standalone`, and used to assign producers / collectors
as the real Nexus does, but without a database. The assignments are only
in memory.
- Adds internal `oximeter` API for listing / deleting a producer for
each oximeter collector, and an `omdb` subcommand which exercises the
listing.
  • Loading branch information
bnaecker authored Oct 4, 2023
1 parent b0487d3 commit ce81dd1
Show file tree
Hide file tree
Showing 17 changed files with 1,215 additions and 111 deletions.
12 changes: 12 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion common/src/api/internal/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ pub struct InstanceRuntimeState {

/// Information announced by a metric server, used so that clients can contact it and collect
/// available metric data from it.
#[derive(Debug, Clone, JsonSchema, Serialize, Deserialize)]
#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize, PartialEq)]
pub struct ProducerEndpoint {
pub id: Uuid,
pub address: SocketAddr,
Expand Down
2 changes: 2 additions & 0 deletions dev-tools/omdb/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@ diesel.workspace = true
dropshot.workspace = true
humantime.workspace = true
internal-dns.workspace = true
futures.workspace = true
nexus-client.workspace = true
nexus-db-model.workspace = true
nexus-db-queries.workspace = true
nexus-types.workspace = true
omicron-common.workspace = true
oximeter-client.workspace = true
# See omicron-rpaths for more about the "pq-sys" dependency.
pq-sys = "*"
serde.workspace = true
Expand Down
4 changes: 4 additions & 0 deletions dev-tools/omdb/src/bin/omdb/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ use std::net::SocketAddrV6;

mod db;
mod nexus;
mod oximeter;
mod sled_agent;

#[tokio::main]
Expand All @@ -57,6 +58,7 @@ async fn main() -> Result<(), anyhow::Error> {
match &args.command {
OmdbCommands::Db(db) => db.run_cmd(&args, &log).await,
OmdbCommands::Nexus(nexus) => nexus.run_cmd(&args, &log).await,
OmdbCommands::Oximeter(oximeter) => oximeter.run_cmd(&log).await,
OmdbCommands::SledAgent(sled) => sled.run_cmd(&args, &log).await,
}
}
Expand Down Expand Up @@ -155,6 +157,8 @@ enum OmdbCommands {
Db(db::DbArgs),
/// Debug a specific Nexus instance
Nexus(nexus::NexusArgs),
/// Query oximeter collector state
Oximeter(oximeter::OximeterArgs),
/// Debug a specific Sled
SledAgent(sled_agent::SledAgentArgs),
}
Expand Down
94 changes: 94 additions & 0 deletions dev-tools/omdb/src/bin/omdb/oximeter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! omdb commands that query oximeter
use anyhow::Context;
use clap::Args;
use clap::Subcommand;
use futures::TryStreamExt;
use oximeter_client::types::ProducerEndpoint;
use oximeter_client::Client;
use slog::Logger;
use std::net::SocketAddr;
use std::time::Duration;
use tabled::Table;
use tabled::Tabled;
use uuid::Uuid;

#[derive(Debug, Args)]
pub struct OximeterArgs {
/// URL of the oximeter collector to query
#[arg(long, env("OMDB_OXIMETER_URL"))]
oximeter_url: String,

#[command(subcommand)]
command: OximeterCommands,
}

/// Subcommands that query oximeter collector state
#[derive(Debug, Subcommand)]
enum OximeterCommands {
/// List the producers the collector is assigned to poll
ListProducers,
}

impl OximeterArgs {
fn client(&self, log: &Logger) -> Client {
Client::new(
&self.oximeter_url,
log.new(slog::o!("component" => "oximeter-client")),
)
}

pub async fn run_cmd(&self, log: &Logger) -> anyhow::Result<()> {
let client = self.client(log);
match self.command {
OximeterCommands::ListProducers => {
self.list_producers(client).await
}
}
}

async fn list_producers(&self, client: Client) -> anyhow::Result<()> {
let info = client
.collector_info()
.await
.context("failed to fetch collector info")?;
let producers: Vec<Producer> = client
.producers_list_stream(None)
.map_ok(Producer::from)
.try_collect()
.await
.context("failed to list producers")?;
let table = Table::new(producers)
.with(tabled::settings::Style::empty())
.with(tabled::settings::Padding::new(0, 1, 0, 0))
.to_string();
println!("Collector ID: {}\n", info.id);
println!("{table}");
Ok(())
}
}

#[derive(Tabled)]
#[tabled(rename_all = "SCREAMING_SNAKE_CASE")]
struct Producer {
id: Uuid,
address: SocketAddr,
base_route: String,
interval: String,
}

impl From<ProducerEndpoint> for Producer {
fn from(p: ProducerEndpoint) -> Self {
let interval = Duration::new(p.interval.secs, p.interval.nanos);
Self {
id: p.id,
address: p.address.parse().unwrap(),
base_route: p.base_route,
interval: humantime::format_duration(interval).to_string(),
}
}
}
2 changes: 2 additions & 0 deletions dev-tools/omdb/tests/usage_errors.out
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Usage: omdb [OPTIONS] <COMMAND>
Commands:
db Query the control plane database (CockroachDB)
nexus Debug a specific Nexus instance
oximeter Query oximeter collector state
sled-agent Debug a specific Sled
help Print this message or the help of the given subcommand(s)

Expand All @@ -33,6 +34,7 @@ Usage: omdb [OPTIONS] <COMMAND>
Commands:
db Query the control plane database (CockroachDB)
nexus Debug a specific Nexus instance
oximeter Query oximeter collector state
sled-agent Debug a specific Sled
help Print this message or the help of the given subcommand(s)

Expand Down
34 changes: 34 additions & 0 deletions docs/how-to-run.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -697,3 +697,37 @@ To build a recovery host image:
----
$ ./tools/build-host-image.sh -R $HELIOS_PATH /work/trampoline-global-zone-packages.tar.gz
----


== Running `oximeter` in standalone mode

`oximeter` is the program used to collect metrics from producers in the control
plane. Normally, the producers register themselves with `nexus`, which creates a
durable assignment between the producer and an `oximeter` collector in the
database. That allows components to survive restarts, while still producing
metrics.

To ease development, `oximeter` can be run in "standalone" mode. In this case, a
mock `nexus` server is started, with only the minimal subset of the internal API
needed to register producers and collectors. Neither CockroachDB nor ClickHouse
is required, although ClickHouse _can_ be used, if one wants to see how data is
inserted into the database.

To run `oximeter` in standalone, use:

[source,console]
----
$ cargo run --bin oximeter -- standalone
----

The producer should still register with `nexus` as normal, which is usually done
with an explicit IP address and port. This defaults to `[::1]:12221`.

When run this way, `oximeter` will print the samples it collects from the
producers to its logs, like so:

[source,console]
----
Sep 26 17:48:56.006 INFO sample: Sample { measurement: Measurement { timestamp: 2023-09-26T17:48:56.004565890Z, datum: CumulativeF64(Cumulative { start_time: 2023-09-26T17:48:45.997404777Z, value: 10.007154703 }) }, timeseries_name: "virtual_machine:cpu_busy", target: FieldSet { name: "virtual_machine", fields: {"instance_id": Field { name: "instance_id", value: Uuid(564ef6df-d5f6-4204-88f7-5c615859cfa7) }, "project_id": Field { name: "project_id", value: Uuid(2dc7e1c9-f8ac-49d7-8292-46e9e2b1a61d) }} }, metric: FieldSet { name: "cpu_busy", fields: {"cpu_id": Field { name: "cpu_id", value: I64(0) }} } }, component: results-sink, collector_id: 78c7c9a5-1569-460a-8899-aada9ad5db6c, component: oximeter-standalone, component: nexus-standalone, file: oximeter/collector/src/lib.rs:280
Sep 26 17:48:56.006 INFO sample: Sample { measurement: Measurement { timestamp: 2023-09-26T17:48:56.004700841Z, datum: CumulativeF64(Cumulative { start_time: 2023-09-26T17:48:45.997405187Z, value: 10.007154703 }) }, timeseries_name: "virtual_machine:cpu_busy", target: FieldSet { name: "virtual_machine", fields: {"instance_id": Field { name: "instance_id", value: Uuid(564ef6df-d5f6-4204-88f7-5c615859cfa7) }, "project_id": Field { name: "project_id", value: Uuid(2dc7e1c9-f8ac-49d7-8292-46e9e2b1a61d) }} }, metric: FieldSet { name: "cpu_busy", fields: {"cpu_id": Field { name: "cpu_id", value: I64(1) }} } }, component: results-sink, collector_id: 78c7c9a5-1569-460a-8899-aada9ad5db6c, component: oximeter-standalone, component: nexus-standalone, file: oximeter/collector/src/lib.rs:280
----
Loading

0 comments on commit ce81dd1

Please sign in to comment.