Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sled/instance association for inventory and insights #3092

Merged
merged 15 commits into from
May 18, 2023
Merged
1 change: 1 addition & 0 deletions common/src/api/external/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,7 @@ pub enum ResourceType {
Rack,
Service,
Sled,
SledInstance,
Switch,
SagaDbg,
Snapshot,
Expand Down
39 changes: 34 additions & 5 deletions common/src/sql/dbinit.sql
Original file line number Diff line number Diff line change
Expand Up @@ -818,13 +818,14 @@ CREATE TABLE omicron.public.instance (
time_state_updated TIMESTAMPTZ NOT NULL,
state_generation INT NOT NULL,
/*
* Server where the VM is currently running, if any. Note that when we
* support live migration, there may be multiple servers associated with
* Sled where the VM is currently running, if any. Note that when we
* support live migration, there may be multiple sleds associated with
* this Instance, but only one will be truly active. Still, consumers of
* this information should consider whether they also want to know the other
* servers involved in the migration.
* sleds involved in the migration.
*/
active_server_id UUID,
active_sled_id UUID,

/* Identifies the underlying propolis-server backing the instance. */
active_propolis_id UUID NOT NULL,
active_propolis_ip INET,
Expand Down Expand Up @@ -862,10 +863,38 @@ CREATE UNIQUE INDEX ON omicron.public.instance (
-- Allow looking up instances by server. This is particularly
-- useful for resource accounting within a sled.
CREATE INDEX ON omicron.public.instance (
active_server_id
active_sled_id
) WHERE
time_deleted IS NULL;

/*
* A special view of an instance provided to operators for insights into what's running
* on a sled.
*/

CREATE VIEW omicron.public.sled_instance
AS SELECT
instance.id,
instance.name,
silo.name as silo_name,
project.name as project_name,
instance.active_sled_id,
instance.time_created,
instance.time_modified,
instance.migration_id,
instance.ncpus,
instance.memory,
instance.state
FROM
omicron.public.instance AS instance
JOIN omicron.public.project AS project ON
instance.project_id = project.id
JOIN omicron.public.silo AS silo ON
project.silo_id = silo.id
WHERE
instance.time_deleted IS NULL;


/*
* Guest-Visible, Virtual Disks
*/
Expand Down
2 changes: 1 addition & 1 deletion nexus/db-model/src/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ pub struct InstanceRuntimeState {
//
// TODO(#2315): This should be optional so that it can be cleared when the
// instance is not active.
#[diesel(column_name = active_server_id)]
#[diesel(column_name = active_sled_id)]
pub sled_id: Uuid,
/// The ID of the Propolis server hosting the current incarnation of this
/// instance.
Expand Down
2 changes: 2 additions & 0 deletions nexus/db-model/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ mod silo_group;
mod silo_user;
mod silo_user_password_hash;
mod sled;
mod sled_instance;
mod sled_resource;
mod sled_resource_kind;
mod snapshot;
Expand Down Expand Up @@ -136,6 +137,7 @@ pub use silo_group::*;
pub use silo_user::*;
pub use silo_user_password_hash::*;
pub use sled::*;
pub use sled_instance::*;
pub use sled_resource::*;
pub use sled_resource_kind::*;
pub use snapshot::*;
Expand Down
18 changes: 17 additions & 1 deletion nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ table! {
state -> crate::InstanceStateEnum,
time_state_updated -> Timestamptz,
state_generation -> Int8,
active_server_id -> Uuid,
active_sled_id -> Uuid,
active_propolis_id -> Uuid,
active_propolis_ip -> Nullable<Inet>,
target_propolis_id -> Nullable<Uuid>,
Expand All @@ -135,6 +135,22 @@ table! {
}
}

table! {
sled_instance (id) {
id -> Uuid,
name -> Text,
silo_name -> Text,
project_name -> Text,
time_created -> Timestamptz,
time_modified -> Timestamptz,
state -> crate::InstanceStateEnum,
active_sled_id -> Uuid,
migration_id -> Nullable<Uuid>,
ncpus -> Int8,
memory -> Int8,
}
}

table! {
metric_producer (id) {
id -> Uuid,
Expand Down
43 changes: 43 additions & 0 deletions nexus/db-model/src/sled_instance.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
use crate::schema::sled_instance;
use crate::InstanceState;
use crate::Name;
use db_macros::Asset;
use nexus_types::external_api::views;
use nexus_types::identity::Asset;
use serde::Deserialize;
use serde::Serialize;
use uuid::Uuid;

/// An operator view of an instance as exposed by the sled API.
#[derive(Queryable, Debug, Selectable, Asset, Serialize, Deserialize)]
#[diesel(table_name = sled_instance)]
pub struct SledInstance {
#[diesel(embed)]
identity: SledInstanceIdentity,
active_sled_id: Uuid,
pub migration_id: Option<Uuid>,

pub name: Name,
pub silo_name: Name,
pub project_name: Name,

pub state: InstanceState,
pub ncpus: i64,
pub memory: i64,
}

impl From<SledInstance> for views::SledInstance {
fn from(sled_instance: SledInstance) -> Self {
Self {
identity: sled_instance.identity(),
name: sled_instance.name.into(),
active_sled_id: sled_instance.active_sled_id,
silo_name: sled_instance.silo_name.into(),
project_name: sled_instance.project_name.into(),
state: *sled_instance.state.state(),
migration_id: sled_instance.migration_id,
ncpus: sled_instance.ncpus,
memory: sled_instance.memory,
}
}
}
8 changes: 8 additions & 0 deletions nexus/db-queries/src/authz/api_resources.rs
Original file line number Diff line number Diff line change
Expand Up @@ -974,6 +974,14 @@ authz_resource! {
polar_snippet = FleetChild,
}

authz_resource! {
name = "SledInstance",
parent = "Fleet",
primary_key = Uuid,
roles_allowed = false,
polar_snippet = FleetChild,
}

authz_resource! {
name = "Switch",
parent = "Fleet",
Expand Down
1 change: 1 addition & 0 deletions nexus/db-queries/src/db/datastore/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ mod silo;
mod silo_group;
mod silo_user;
mod sled;
mod sled_instance;
mod snapshot;
mod ssh_key;
mod switch;
Expand Down
32 changes: 32 additions & 0 deletions nexus/db-queries/src/db/datastore/sled_instance.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
use super::DataStore;

use crate::authz;
use crate::context::OpContext;
use crate::db;
use crate::db::error::public_error_from_diesel_pool;
use crate::db::error::ErrorHandler;
use crate::db::pagination::paginated;
use async_bb8_diesel::AsyncRunQueryDsl;
use diesel::prelude::*;
use nexus_db_model::SledInstance;
use omicron_common::api::external::DataPageParams;
use omicron_common::api::external::ListResultVec;
use uuid::Uuid;

impl DataStore {
pub async fn sled_instance_list(
&self,
opctx: &OpContext,
authz_sled: &authz::Sled,
pagparams: &DataPageParams<'_, Uuid>,
) -> ListResultVec<SledInstance> {
opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?;
use db::schema::sled_instance::dsl;
paginated(dsl::sled_instance, dsl::id, &pagparams)
.filter(dsl::active_sled_id.eq(authz_sled.id()))
.select(SledInstance::as_select())
.load_async::<SledInstance>(self.pool_authorized(opctx).await?)
.await
.map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server))
}
}
2 changes: 1 addition & 1 deletion nexus/db-queries/src/db/datastore/vpc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ impl DataStore {
.on(instance::id
.eq(instance_network_interface::instance_id)),
)
.inner_join(sled::table.on(sled::id.eq(instance::active_server_id)))
.inner_join(sled::table.on(sled::id.eq(instance::active_sled_id)))
.filter(instance_network_interface::vpc_id.eq(vpc_id))
.filter(instance_network_interface::time_deleted.is_null())
.filter(instance::time_deleted.is_null())
Expand Down
9 changes: 9 additions & 0 deletions nexus/db-queries/src/db/lookup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,15 @@ lookup_resource! {
primary_key_columns = [ { column_name = "id", rust_type = Uuid } ]
}

lookup_resource! {
name = "SledInstance",
ancestors = [],
children = [],
lookup_by_name = false,
soft_deletes = false,
primary_key_columns = [ { column_name = "id", rust_type = Uuid } ],
}

lookup_resource! {
name = "Switch",
ancestors = [],
Expand Down
1 change: 1 addition & 0 deletions nexus/src/app/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ pub mod saga;
mod session;
mod silo;
mod sled;
mod sled_instance;
mod snapshot;
mod switch;
pub mod test_interfaces;
Expand Down
40 changes: 21 additions & 19 deletions nexus/src/app/sled.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use crate::internal_api::params::{
SledRole, ZpoolPutRequest,
};
use nexus_db_queries::context::OpContext;
use nexus_db_queries::db::lookup;
use omicron_common::api::external::DataPageParams;
use omicron_common::api::external::Error;
use omicron_common::api::external::ListResultVec;
Expand All @@ -27,6 +28,14 @@ use uuid::Uuid;

impl super::Nexus {
// Sleds
pub fn sled_lookup<'a>(
&'a self,
opctx: &'a OpContext,
sled_id: &Uuid,
) -> LookupResult<lookup::Sled<'a>> {
let sled = LookupPath::new(opctx, &self.db_datastore).sled_id(*sled_id);
Ok(sled)
}

// TODO-robustness we should have a limit on how many sled agents there can
// be (for graceful degradation at large scale).
Expand Down Expand Up @@ -61,26 +70,14 @@ impl super::Nexus {
Ok(())
}

pub async fn sleds_list(
pub async fn sled_list(
&self,
opctx: &OpContext,
pagparams: &DataPageParams<'_, Uuid>,
) -> ListResultVec<db::model::Sled> {
self.db_datastore.sled_list(&opctx, pagparams).await
}

pub async fn sled_lookup(
&self,
opctx: &OpContext,
sled_id: &Uuid,
) -> LookupResult<db::model::Sled> {
let (.., db_sled) = LookupPath::new(opctx, &self.db_datastore)
.sled_id(*sled_id)
.fetch()
.await?;
Ok(db_sled)
}

pub async fn sled_client(
&self,
id: &Uuid,
Expand All @@ -92,7 +89,8 @@ impl super::Nexus {
// Franky, returning an "Arc" here without a connection pool is a little
// silly; it's not actually used if each client connection exists as a
// one-shot.
let sled = self.sled_lookup(&self.opctx_alloc, id).await?;
let (.., sled) =
self.sled_lookup(&self.opctx_alloc, id)?.fetch().await?;

let log = self.log.new(o!("SledAgent" => id.clone().to_string()));
let dur = std::time::Duration::from_secs(60);
Expand Down Expand Up @@ -347,7 +345,7 @@ impl super::Nexus {

// Look up the supplied sled's physical host IP.
let physical_host_ip =
*self.sled_lookup(&self.opctx_alloc, &sled_id).await?.ip;
*self.sled_lookup(&self.opctx_alloc, &sled_id)?.fetch().await?.1.ip;

let mut last_sled_id: Option<Uuid> = None;
loop {
Expand All @@ -358,7 +356,7 @@ impl super::Nexus {
};

let sleds_page =
self.sleds_list(&self.opctx_alloc, &pagparams).await?;
self.sled_list(&self.opctx_alloc, &pagparams).await?;
let mut join_handles =
Vec::with_capacity(sleds_page.len() * instance_nics.len());

Expand Down Expand Up @@ -444,8 +442,12 @@ impl super::Nexus {

// Lookup the physical host IP of the sled hosting this instance
let instance_sled_id = db_instance.runtime().sled_id;
let physical_host_ip =
*self.sled_lookup(&self.opctx_alloc, &instance_sled_id).await?.ip;
let physical_host_ip = *self
.sled_lookup(&self.opctx_alloc, &instance_sled_id)?
.fetch()
.await?
.1
.ip;

let mut last_sled_id: Option<Uuid> = None;

Expand All @@ -457,7 +459,7 @@ impl super::Nexus {
};

let sleds_page =
self.sleds_list(&self.opctx_alloc, &pagparams).await?;
self.sled_list(&self.opctx_alloc, &pagparams).await?;
let mut join_handles =
Vec::with_capacity(sleds_page.len() * instance_nics.len());

Expand Down
23 changes: 23 additions & 0 deletions nexus/src/app/sled_instance.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
use crate::authz;
use crate::db;
use nexus_db_queries::context::OpContext;
use nexus_db_queries::db::lookup;
use omicron_common::api::external::DataPageParams;
use omicron_common::api::external::ListResultVec;
use uuid::Uuid;

impl super::Nexus {
pub async fn sled_instance_list(
&self,
opctx: &OpContext,
sled_lookup: &lookup::Sled<'_>,
pagparams: &DataPageParams<'_, Uuid>,
) -> ListResultVec<db::model::SledInstance> {
let (.., authz_sled) =
sled_lookup.lookup_for(authz::Action::Read).await?;
opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?;
self.db_datastore
.sled_instance_list(opctx, &authz_sled, pagparams)
.await
}
}
Loading