Skip to content

Commit

Permalink
Sled/instance association for inventory and insights (#3092)
Browse files Browse the repository at this point in the history
  • Loading branch information
zephraph authored May 18, 2023
1 parent cd40ec6 commit 2b0ab64
Show file tree
Hide file tree
Showing 20 changed files with 460 additions and 32 deletions.
1 change: 1 addition & 0 deletions common/src/api/external/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,7 @@ pub enum ResourceType {
Rack,
Service,
Sled,
SledInstance,
Switch,
SagaDbg,
Snapshot,
Expand Down
39 changes: 34 additions & 5 deletions common/src/sql/dbinit.sql
Original file line number Diff line number Diff line change
Expand Up @@ -818,13 +818,14 @@ CREATE TABLE omicron.public.instance (
time_state_updated TIMESTAMPTZ NOT NULL,
state_generation INT NOT NULL,
/*
* Server where the VM is currently running, if any. Note that when we
* support live migration, there may be multiple servers associated with
* Sled where the VM is currently running, if any. Note that when we
* support live migration, there may be multiple sleds associated with
* this Instance, but only one will be truly active. Still, consumers of
* this information should consider whether they also want to know the other
* servers involved in the migration.
* sleds involved in the migration.
*/
active_server_id UUID,
active_sled_id UUID,

/* Identifies the underlying propolis-server backing the instance. */
active_propolis_id UUID NOT NULL,
active_propolis_ip INET,
Expand Down Expand Up @@ -862,10 +863,38 @@ CREATE UNIQUE INDEX ON omicron.public.instance (
-- Allow looking up instances by server. This is particularly
-- useful for resource accounting within a sled.
CREATE INDEX ON omicron.public.instance (
active_server_id
active_sled_id
) WHERE
time_deleted IS NULL;

/*
* A special view of an instance provided to operators for insights into what's running
* on a sled.
*/

CREATE VIEW omicron.public.sled_instance
AS SELECT
instance.id,
instance.name,
silo.name as silo_name,
project.name as project_name,
instance.active_sled_id,
instance.time_created,
instance.time_modified,
instance.migration_id,
instance.ncpus,
instance.memory,
instance.state
FROM
omicron.public.instance AS instance
JOIN omicron.public.project AS project ON
instance.project_id = project.id
JOIN omicron.public.silo AS silo ON
project.silo_id = silo.id
WHERE
instance.time_deleted IS NULL;


/*
* Guest-Visible, Virtual Disks
*/
Expand Down
2 changes: 1 addition & 1 deletion nexus/db-model/src/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ pub struct InstanceRuntimeState {
//
// TODO(#2315): This should be optional so that it can be cleared when the
// instance is not active.
#[diesel(column_name = active_server_id)]
#[diesel(column_name = active_sled_id)]
pub sled_id: Uuid,
/// The ID of the Propolis server hosting the current incarnation of this
/// instance.
Expand Down
2 changes: 2 additions & 0 deletions nexus/db-model/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ mod silo_group;
mod silo_user;
mod silo_user_password_hash;
mod sled;
mod sled_instance;
mod sled_resource;
mod sled_resource_kind;
mod snapshot;
Expand Down Expand Up @@ -136,6 +137,7 @@ pub use silo_group::*;
pub use silo_user::*;
pub use silo_user_password_hash::*;
pub use sled::*;
pub use sled_instance::*;
pub use sled_resource::*;
pub use sled_resource_kind::*;
pub use snapshot::*;
Expand Down
18 changes: 17 additions & 1 deletion nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ table! {
state -> crate::InstanceStateEnum,
time_state_updated -> Timestamptz,
state_generation -> Int8,
active_server_id -> Uuid,
active_sled_id -> Uuid,
active_propolis_id -> Uuid,
active_propolis_ip -> Nullable<Inet>,
target_propolis_id -> Nullable<Uuid>,
Expand All @@ -135,6 +135,22 @@ table! {
}
}

table! {
sled_instance (id) {
id -> Uuid,
name -> Text,
silo_name -> Text,
project_name -> Text,
time_created -> Timestamptz,
time_modified -> Timestamptz,
state -> crate::InstanceStateEnum,
active_sled_id -> Uuid,
migration_id -> Nullable<Uuid>,
ncpus -> Int8,
memory -> Int8,
}
}

table! {
metric_producer (id) {
id -> Uuid,
Expand Down
43 changes: 43 additions & 0 deletions nexus/db-model/src/sled_instance.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
use crate::schema::sled_instance;
use crate::InstanceState;
use crate::Name;
use db_macros::Asset;
use nexus_types::external_api::views;
use nexus_types::identity::Asset;
use serde::Deserialize;
use serde::Serialize;
use uuid::Uuid;

/// An operator view of an instance as exposed by the sled API.
#[derive(Queryable, Debug, Selectable, Asset, Serialize, Deserialize)]
#[diesel(table_name = sled_instance)]
pub struct SledInstance {
#[diesel(embed)]
identity: SledInstanceIdentity,
active_sled_id: Uuid,
pub migration_id: Option<Uuid>,

pub name: Name,
pub silo_name: Name,
pub project_name: Name,

pub state: InstanceState,
pub ncpus: i64,
pub memory: i64,
}

impl From<SledInstance> for views::SledInstance {
fn from(sled_instance: SledInstance) -> Self {
Self {
identity: sled_instance.identity(),
name: sled_instance.name.into(),
active_sled_id: sled_instance.active_sled_id,
silo_name: sled_instance.silo_name.into(),
project_name: sled_instance.project_name.into(),
state: *sled_instance.state.state(),
migration_id: sled_instance.migration_id,
ncpus: sled_instance.ncpus,
memory: sled_instance.memory,
}
}
}
8 changes: 8 additions & 0 deletions nexus/db-queries/src/authz/api_resources.rs
Original file line number Diff line number Diff line change
Expand Up @@ -974,6 +974,14 @@ authz_resource! {
polar_snippet = FleetChild,
}

authz_resource! {
name = "SledInstance",
parent = "Fleet",
primary_key = Uuid,
roles_allowed = false,
polar_snippet = FleetChild,
}

authz_resource! {
name = "Switch",
parent = "Fleet",
Expand Down
1 change: 1 addition & 0 deletions nexus/db-queries/src/db/datastore/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ mod silo;
mod silo_group;
mod silo_user;
mod sled;
mod sled_instance;
mod snapshot;
mod ssh_key;
mod switch;
Expand Down
32 changes: 32 additions & 0 deletions nexus/db-queries/src/db/datastore/sled_instance.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
use super::DataStore;

use crate::authz;
use crate::context::OpContext;
use crate::db;
use crate::db::error::public_error_from_diesel_pool;
use crate::db::error::ErrorHandler;
use crate::db::pagination::paginated;
use async_bb8_diesel::AsyncRunQueryDsl;
use diesel::prelude::*;
use nexus_db_model::SledInstance;
use omicron_common::api::external::DataPageParams;
use omicron_common::api::external::ListResultVec;
use uuid::Uuid;

impl DataStore {
pub async fn sled_instance_list(
&self,
opctx: &OpContext,
authz_sled: &authz::Sled,
pagparams: &DataPageParams<'_, Uuid>,
) -> ListResultVec<SledInstance> {
opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?;
use db::schema::sled_instance::dsl;
paginated(dsl::sled_instance, dsl::id, &pagparams)
.filter(dsl::active_sled_id.eq(authz_sled.id()))
.select(SledInstance::as_select())
.load_async::<SledInstance>(self.pool_authorized(opctx).await?)
.await
.map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server))
}
}
2 changes: 1 addition & 1 deletion nexus/db-queries/src/db/datastore/vpc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ impl DataStore {
.on(instance::id
.eq(instance_network_interface::instance_id)),
)
.inner_join(sled::table.on(sled::id.eq(instance::active_server_id)))
.inner_join(sled::table.on(sled::id.eq(instance::active_sled_id)))
.filter(instance_network_interface::vpc_id.eq(vpc_id))
.filter(instance_network_interface::time_deleted.is_null())
.filter(instance::time_deleted.is_null())
Expand Down
9 changes: 9 additions & 0 deletions nexus/db-queries/src/db/lookup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,15 @@ lookup_resource! {
primary_key_columns = [ { column_name = "id", rust_type = Uuid } ]
}

lookup_resource! {
name = "SledInstance",
ancestors = [],
children = [],
lookup_by_name = false,
soft_deletes = false,
primary_key_columns = [ { column_name = "id", rust_type = Uuid } ],
}

lookup_resource! {
name = "Switch",
ancestors = [],
Expand Down
1 change: 1 addition & 0 deletions nexus/src/app/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ pub mod saga;
mod session;
mod silo;
mod sled;
mod sled_instance;
mod snapshot;
mod switch;
pub mod test_interfaces;
Expand Down
40 changes: 21 additions & 19 deletions nexus/src/app/sled.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use crate::internal_api::params::{
SledRole, ZpoolPutRequest,
};
use nexus_db_queries::context::OpContext;
use nexus_db_queries::db::lookup;
use omicron_common::api::external::DataPageParams;
use omicron_common::api::external::Error;
use omicron_common::api::external::ListResultVec;
Expand All @@ -27,6 +28,14 @@ use uuid::Uuid;

impl super::Nexus {
// Sleds
pub fn sled_lookup<'a>(
&'a self,
opctx: &'a OpContext,
sled_id: &Uuid,
) -> LookupResult<lookup::Sled<'a>> {
let sled = LookupPath::new(opctx, &self.db_datastore).sled_id(*sled_id);
Ok(sled)
}

// TODO-robustness we should have a limit on how many sled agents there can
// be (for graceful degradation at large scale).
Expand Down Expand Up @@ -61,26 +70,14 @@ impl super::Nexus {
Ok(())
}

pub async fn sleds_list(
pub async fn sled_list(
&self,
opctx: &OpContext,
pagparams: &DataPageParams<'_, Uuid>,
) -> ListResultVec<db::model::Sled> {
self.db_datastore.sled_list(&opctx, pagparams).await
}

pub async fn sled_lookup(
&self,
opctx: &OpContext,
sled_id: &Uuid,
) -> LookupResult<db::model::Sled> {
let (.., db_sled) = LookupPath::new(opctx, &self.db_datastore)
.sled_id(*sled_id)
.fetch()
.await?;
Ok(db_sled)
}

pub async fn sled_client(
&self,
id: &Uuid,
Expand All @@ -92,7 +89,8 @@ impl super::Nexus {
// Franky, returning an "Arc" here without a connection pool is a little
// silly; it's not actually used if each client connection exists as a
// one-shot.
let sled = self.sled_lookup(&self.opctx_alloc, id).await?;
let (.., sled) =
self.sled_lookup(&self.opctx_alloc, id)?.fetch().await?;

let log = self.log.new(o!("SledAgent" => id.clone().to_string()));
let dur = std::time::Duration::from_secs(60);
Expand Down Expand Up @@ -347,7 +345,7 @@ impl super::Nexus {

// Look up the supplied sled's physical host IP.
let physical_host_ip =
*self.sled_lookup(&self.opctx_alloc, &sled_id).await?.ip;
*self.sled_lookup(&self.opctx_alloc, &sled_id)?.fetch().await?.1.ip;

let mut last_sled_id: Option<Uuid> = None;
loop {
Expand All @@ -358,7 +356,7 @@ impl super::Nexus {
};

let sleds_page =
self.sleds_list(&self.opctx_alloc, &pagparams).await?;
self.sled_list(&self.opctx_alloc, &pagparams).await?;
let mut join_handles =
Vec::with_capacity(sleds_page.len() * instance_nics.len());

Expand Down Expand Up @@ -444,8 +442,12 @@ impl super::Nexus {

// Lookup the physical host IP of the sled hosting this instance
let instance_sled_id = db_instance.runtime().sled_id;
let physical_host_ip =
*self.sled_lookup(&self.opctx_alloc, &instance_sled_id).await?.ip;
let physical_host_ip = *self
.sled_lookup(&self.opctx_alloc, &instance_sled_id)?
.fetch()
.await?
.1
.ip;

let mut last_sled_id: Option<Uuid> = None;

Expand All @@ -457,7 +459,7 @@ impl super::Nexus {
};

let sleds_page =
self.sleds_list(&self.opctx_alloc, &pagparams).await?;
self.sled_list(&self.opctx_alloc, &pagparams).await?;
let mut join_handles =
Vec::with_capacity(sleds_page.len() * instance_nics.len());

Expand Down
23 changes: 23 additions & 0 deletions nexus/src/app/sled_instance.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
use crate::authz;
use crate::db;
use nexus_db_queries::context::OpContext;
use nexus_db_queries::db::lookup;
use omicron_common::api::external::DataPageParams;
use omicron_common::api::external::ListResultVec;
use uuid::Uuid;

impl super::Nexus {
pub async fn sled_instance_list(
&self,
opctx: &OpContext,
sled_lookup: &lookup::Sled<'_>,
pagparams: &DataPageParams<'_, Uuid>,
) -> ListResultVec<db::model::SledInstance> {
let (.., authz_sled) =
sled_lookup.lookup_for(authz::Action::Read).await?;
opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?;
self.db_datastore
.sled_instance_list(opctx, &authz_sled, pagparams)
.await
}
}
Loading

0 comments on commit 2b0ab64

Please sign in to comment.