From 8316247eaf4c629301be3bbafaa8c77aca93eb13 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Tue, 16 Jul 2024 12:29:01 -0700 Subject: [PATCH] Add sled-agent endpoint for fetching sled identifiers (#6086) - Adds a `SledIdentifiers` type, with the most salient bits of identifying metadata for a single sled - Adds sled-agent endpoint `/sled-identifiers` for fetching the above from the sled. The main goal is to provide the main data for #5267, attaching sled identifiers to most timeseries. - Small improvement to instructions in package-manifest.toml. - Small bugfix to error-reporting in `xtask download`. --- clients/sled-agent-client/src/lib.rs | 30 ++++++++++++++ common/src/api/internal/shared.rs | 20 +++++++++ dev-tools/xtask/src/download.rs | 2 +- openapi/sled-agent.json | 61 ++++++++++++++++++++++++++++ package-manifest.toml | 12 ++++-- sled-agent/src/http_entrypoints.rs | 19 ++++++++- sled-agent/src/sled_agent.rs | 30 +++++++++++++- 7 files changed, 168 insertions(+), 6 deletions(-) diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index 42eefaf8b5..8a63cecd4f 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -593,6 +593,36 @@ impl From } } +impl From + for types::SledIdentifiers +{ + fn from( + value: omicron_common::api::internal::shared::SledIdentifiers, + ) -> Self { + Self { + model: value.model, + rack_id: value.rack_id, + revision: value.revision, + serial: value.serial, + sled_id: value.sled_id, + } + } +} + +impl From + for omicron_common::api::internal::shared::SledIdentifiers +{ + fn from(value: types::SledIdentifiers) -> Self { + Self { + model: value.model, + rack_id: value.rack_id, + revision: value.revision, + serial: value.serial, + sled_id: value.sled_id, + } + } +} + /// Exposes additional [`Client`] interfaces for use by the test suite. These /// are bonus endpoints, not generated in the real client. #[async_trait] diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index 884b4dc165..24bb339112 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -702,6 +702,26 @@ pub struct ResolvedVpcRouteSet { pub routes: HashSet, } +/// Identifiers for a single sled. +/// +/// This is intended primarily to be used in timeseries, to identify +/// sled from which metric data originates. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct SledIdentifiers { + /// Control plane ID of the rack this sled is a member of + pub rack_id: Uuid, + /// Control plane ID for the sled itself + pub sled_id: Uuid, + /// Model name of the sled + pub model: String, + /// Revision number of the sled + pub revision: u32, + /// Serial number of the sled + // + // NOTE: This is only guaranteed to be unique within a model. + pub serial: String, +} + #[cfg(test)] mod tests { use crate::api::internal::shared::AllowedSourceIps; diff --git a/dev-tools/xtask/src/download.rs b/dev-tools/xtask/src/download.rs index 37c9b7be8a..b5910e3915 100644 --- a/dev-tools/xtask/src/download.rs +++ b/dev-tools/xtask/src/download.rs @@ -242,7 +242,7 @@ async fn get_values_from_file( let content = tokio::fs::read_to_string(&path) .await - .context("Failed to read {path}")?; + .with_context(|| format!("Failed to read {path}"))?; for line in content.lines() { let line = line.trim(); let Some((key, value)) = line.split_once('=') else { diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 8165cfa9d6..13036f115b 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -710,6 +710,30 @@ } } }, + "/sled-identifiers": { + "get": { + "summary": "Fetch sled identifiers", + "operationId": "sled_identifiers", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SledIdentifiers" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/sled-role": { "get": { "operationId": "sled_role_get", @@ -4549,6 +4573,43 @@ "type": "string", "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" }, + "SledIdentifiers": { + "description": "Identifiers for a single sled.\n\nThis is intended primarily to be used in timeseries, to identify sled from which metric data originates.", + "type": "object", + "properties": { + "model": { + "description": "Model name of the sled", + "type": "string" + }, + "rack_id": { + "description": "Control plane ID of the rack this sled is a member of", + "type": "string", + "format": "uuid" + }, + "revision": { + "description": "Revision number of the sled", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "serial": { + "description": "Serial number of the sled", + "type": "string" + }, + "sled_id": { + "description": "Control plane ID for the sled itself", + "type": "string", + "format": "uuid" + } + }, + "required": [ + "model", + "rack_id", + "revision", + "serial", + "sled_id" + ] + }, "SledInstanceState": { "description": "A wrapper type containing a sled's total knowledge of the state of a specific VMM and the instance it incarnates.", "type": "object", diff --git a/package-manifest.toml b/package-manifest.toml index 561a61ec4c..2eb643ecb0 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -639,8 +639,10 @@ only_for_targets.image = "standard" # 1. Build the zone image manually # 1a. cd # 1b. cargo build --features=tofino_stub --release -# 1c. cargo xtask dist -o -r --features tofino_stub +# 1c. cargo xtask dist --format omicron --release --features tofino_stub # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out +# 3. Change the below `source.type` key to `"manual"` and comment out or remove +# the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" source.commit = "e83f4f164fd3dbb2100989a399a4fa087232ac36" @@ -664,8 +666,10 @@ only_for_targets.image = "standard" # 1. Build the zone image manually # 1a. cd # 1b. cargo build --features=tofino_asic --release -# 1c. cargo xtask dist -o -r --features tofino_asic +# 1c. cargo xtask dist --format omicron --release --features tofino_asic # 2. Copy the output zone image from dendrite/out to omicron/out +# 3. Change the below `source.type` key to `"manual"` and comment out or remove +# the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" source.commit = "e83f4f164fd3dbb2100989a399a4fa087232ac36" @@ -682,8 +686,10 @@ only_for_targets.image = "standard" # 1. Build the zone image manually # 1a. cd # 1b. cargo build --features=softnpu --release -# 1c. cargo xtask dist -o -r --features softnpu +# 1c. cargo xtask dist --format omicron --release --features softnpu # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out/dendrite-softnpu.tar.gz +# 3. Change the below `source.type` key to `"manual"` and comment out or remove +# the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" source.commit = "e83f4f164fd3dbb2100989a399a4fa087232ac36" diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index a21c278699..ff8c13105a 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -32,7 +32,7 @@ use omicron_common::api::internal::nexus::{ DiskRuntimeState, SledInstanceState, UpdateArtifactId, }; use omicron_common::api::internal::shared::{ - ResolvedVpcRouteSet, ResolvedVpcRouteState, SwitchPorts, + ResolvedVpcRouteSet, ResolvedVpcRouteState, SledIdentifiers, SwitchPorts, }; use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use schemars::JsonSchema; @@ -89,6 +89,7 @@ pub fn api() -> SledApiDescription { api.register(host_os_write_status_get)?; api.register(host_os_write_status_delete)?; api.register(inventory)?; + api.register(sled_identifiers)?; api.register(bootstore_status)?; api.register(list_vpc_routes)?; api.register(set_vpc_routes)?; @@ -1012,6 +1013,22 @@ async fn inventory( Ok(HttpResponseOk(sa.inventory().await?)) } +/// Fetch sled identifiers +#[endpoint { + method = GET, + path = "/sled-identifiers", +}] +async fn sled_identifiers( + request_context: RequestContext, +) -> Result, HttpError> { + request_context + .context() + .sled_identifiers() + .await + .map(HttpResponseOk) + .map_err(HttpError::from) +} + /// Get the internal state of the local bootstore node #[endpoint { method = GET, diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 9832144791..23a13487ef 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -51,7 +51,7 @@ use omicron_common::api::internal::nexus::{ }; use omicron_common::api::internal::shared::{ HostPortConfig, RackNetworkConfig, ResolvedVpcRouteSet, - ResolvedVpcRouteState, + ResolvedVpcRouteState, SledIdentifiers, }; use omicron_common::api::{ internal::nexus::DiskRuntimeState, internal::nexus::InstanceRuntimeState, @@ -156,6 +156,9 @@ pub enum Error { #[error("Metrics error: {0}")] Metrics(#[from] crate::metrics::Error), + + #[error("Expected revision to fit in a u32, but found {0}")] + UnexpectedRevision(i64), } impl From for omicron_common::api::external::Error { @@ -1185,6 +1188,31 @@ impl SledAgent { &self.inner.boot_disk_os_writer } + /// Return identifiers for this sled. + /// + /// This is mostly used to identify timeseries data with the originating + /// sled. + /// + /// NOTE: This only returns the identifiers for the _sled_ itself. If you're + /// interested in the switch identifiers, MGS is the current best way to do + /// that, by asking for the local switch's slot, and then that switch's SP + /// state. + pub(crate) async fn sled_identifiers( + &self, + ) -> Result { + let baseboard = self.inner.hardware.baseboard(); + Ok(SledIdentifiers { + rack_id: self.inner.start_request.body.rack_id, + sled_id: self.inner.id, + model: baseboard.model().to_string(), + revision: baseboard + .revision() + .try_into() + .map_err(|_| Error::UnexpectedRevision(baseboard.revision()))?, + serial: baseboard.identifier().to_string(), + }) + } + /// Return basic information about ourselves: identity and status /// /// This is basically a GET version of the information we push to Nexus on