Skip to content

Commit

Permalink
Adds concept of a primary network interface (#1143)
Browse files Browse the repository at this point in the history
* Adds concept of a primary network interface

- Adds the `is_primary` column to the `network_interface` table, and a
  corresponding field `primary` in the database model and external
  `NetworkInterface` objects. Primary interfaces are used for NAT and
  appear in DNS records.
- Updates the `InsertNetworkInterfaceQuery` to automatically decide if
  this interface should be considered the primary. It considers the new
  NIC primary iff there are zero existing NICs for the instance it's to
  be attached to. That means that the first NIC added to an instance,
  either during a provision or later, is the primary. Future work could
  allow changing which NIC is the primary.
- Adds a new query for deleting a network interface from an instance,
  with improved validation. This now checks that the instance is stopped
  inside the query, fixing a TOCTOU bug. It also verifies that the
  instance either has exactly 1 interface (which must be the primary) or
  that the instance has 2 or more (we're deleting a secondary). This
  means that the primary interface cannot be deleted until all secondary
  interfaces are deleted. The reason for this restriction is that
  instances _must_ have a primary interface, and it's not clear how to
  pick a new primary from the remaining secondaries if we allow deletion
  of the primary. We force the client to make the choice.
- Adds a special error type for handling the above validation failures.
- Adds tests for this deletion behavior to the instance integration
  tests

* Review feedback

- Cleanup comments
- Simplify NIC query/error type names
- Remove stale test
  • Loading branch information
bnaecker authored Jun 3, 2022
1 parent 1593b8b commit e2bd1e8
Show file tree
Hide file tree
Showing 14 changed files with 648 additions and 174 deletions.
3 changes: 3 additions & 0 deletions common/src/api/external/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1811,6 +1811,9 @@ pub struct NetworkInterface {
pub ip: IpAddr,
// TODO-correctness: We need to split this into an optional V4 and optional
// V6 address, at least one of which must be specified.
/// True if this interface is the primary for the instance to which it's
/// attached.
pub primary: bool,
}

#[derive(
Expand Down
9 changes: 8 additions & 1 deletion common/src/sql/dbinit.sql
Original file line number Diff line number Diff line change
Expand Up @@ -675,7 +675,14 @@ CREATE TABLE omicron.public.network_interface (
* Limited to 8 NICs per instance. This value must be kept in sync with
* `crate::nexus::MAX_NICS_PER_INSTANCE`.
*/
slot INT2 NOT NULL CHECK (slot >= 0 AND slot < 8)
slot INT2 NOT NULL CHECK (slot >= 0 AND slot < 8),

/* True if this interface is the primary interface for the instance.
*
* The primary interface appears in DNS and its address is used for external
* connectivity for the instance.
*/
is_primary BOOL NOT NULL
);

/* TODO-completeness
Expand Down
19 changes: 15 additions & 4 deletions nexus/src/app/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::db;
use crate::db::identity::Resource;
use crate::db::lookup::LookupPath;
use crate::db::model::Name;
use crate::db::queries::network_interface::NetworkInterfaceError;
use crate::db::queries::network_interface;
use crate::external_api::params;
use omicron_common::api::external;
use omicron_common::api::external::CreateResult;
Expand Down Expand Up @@ -642,7 +642,8 @@ impl super::Nexus {
// instance between this check and when we actually create the NIC
// record. One solution is to place the state verification in the query
// to create the NIC. Unfortunately, that query is already very
// complicated.
// complicated. See
// https://github.com/oxidecomputer/omicron/issues/1134.
let stopped =
db::model::InstanceState::new(external::InstanceState::Stopped);
if db_instance.runtime_state.state != stopped {
Expand Down Expand Up @@ -680,7 +681,7 @@ impl super::Nexus {
interface,
)
.await
.map_err(NetworkInterfaceError::into_external)?;
.map_err(network_interface::InsertError::into_external)?;
Ok(interface)
}

Expand Down Expand Up @@ -724,6 +725,9 @@ impl super::Nexus {
}

/// Delete a network interface from the provided instance.
///
/// Note that the primary interface for an instance cannot be deleted if
/// there are any secondary interfaces.
pub async fn instance_delete_network_interface(
&self,
opctx: &OpContext,
Expand All @@ -746,6 +750,8 @@ impl super::Nexus {
.await?;

// TODO-completeness: We'd like to relax this once hot-plug is supported
// TODO-correctness: There's a race condition here. Someone may start
// the instance after this check but before we actually delete the NIC.
let stopped =
db::model::InstanceState::new(external::InstanceState::Stopped);
if db_instance.runtime_state.state != stopped {
Expand All @@ -754,8 +760,13 @@ impl super::Nexus {
));
}
self.db_datastore
.instance_delete_network_interface(opctx, &authz_interface)
.instance_delete_network_interface(
opctx,
&authz_instance,
&authz_interface,
)
.await
.map_err(network_interface::DeleteError::into_external)
}

/// Invoked by a sled agent to publish an updated runtime state for an
Expand Down
30 changes: 21 additions & 9 deletions nexus/src/app/sagas/instance_create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ use crate::app::{MAX_DISKS_PER_INSTANCE, MAX_NICS_PER_INSTANCE};
use crate::context::OpContext;
use crate::db::identity::Resource;
use crate::db::lookup::LookupPath;
use crate::db::queries::network_interface::NetworkInterfaceError;
use crate::db::queries::network_interface::InsertError as InsertNicError;
use crate::defaults::DEFAULT_PRIMARY_NIC_NAME;
use crate::external_api::params;
use crate::saga_interface::SagaContext;
use crate::{authn, authz, db};
Expand Down Expand Up @@ -242,7 +243,7 @@ async fn sic_create_network_interfaces(
match sagactx.saga_params().create_params.network_interfaces {
params::InstanceNetworkInterfaceAttachment::None => Ok(()),
params::InstanceNetworkInterfaceAttachment::Default => {
sic_create_default_network_interface(&sagactx).await
sic_create_default_primary_network_interface(&sagactx).await
}
params::InstanceNetworkInterfaceAttachment::Create(
ref create_params,
Expand Down Expand Up @@ -340,14 +341,14 @@ async fn sic_create_custom_network_interfaces(
// insert that record if it exists, which obviously fails with a
// primary key violation. (If the record does _not_ exist, one will
// be inserted as usual, see
// `db::subnet_name::InsertNetworkInterfaceQuery` for details).
// `db::queries::network_interface::InsertQuery` for details).
//
// In this one specific case, we're asserting that any primary key
// duplicate arises because this saga node ran partway and then
// crashed. The saga recovery machinery will replay just this node,
// without first unwinding it, so any previously-inserted interfaces
// will still exist. This is expected.
Err(NetworkInterfaceError::DuplicatePrimaryKey(_)) => {
Err(InsertNicError::DuplicatePrimaryKey(_)) => {
// TODO-observability: We should bump a counter here.
let log = osagactx.log();
warn!(
Expand All @@ -369,8 +370,9 @@ async fn sic_create_custom_network_interfaces(
Ok(())
}

/// Create the default network interface for an instance during the create saga
async fn sic_create_default_network_interface(
/// Create a default primary network interface for an instance during the create
/// saga.
async fn sic_create_default_primary_network_interface(
sagactx: &ActionContext<SagaInstanceCreate>,
) -> Result<(), ActionError> {
let osagactx = sagactx.user_data();
Expand All @@ -379,13 +381,23 @@ async fn sic_create_default_network_interface(
let opctx =
OpContext::for_saga_action(&sagactx, &saga_params.serialized_authn);
let instance_id = sagactx.lookup::<Uuid>("instance_id")?;

// The literal name "default" is currently used for the VPC and VPC Subnet,
// when not specified in the client request.
// TODO-completeness: We'd like to select these from Project-level defaults.
// See https://github.com/oxidecomputer/omicron/issues/1015.
let default_name = Name::try_from("default".to_string()).unwrap();
let internal_default_name = db::model::Name::from(default_name.clone());

// The name of the default primary interface.
let iface_name =
Name::try_from(DEFAULT_PRIMARY_NIC_NAME.to_string()).unwrap();

let interface_params = params::NetworkInterfaceCreate {
identity: IdentityMetadataCreateParams {
name: default_name.clone(),
name: iface_name.clone(),
description: format!(
"default interface for {}",
"default primary interface for {}",
saga_params.create_params.identity.name,
),
},
Expand Down Expand Up @@ -427,7 +439,7 @@ async fn sic_create_default_network_interface(
interface,
)
.await
.map_err(NetworkInterfaceError::into_external)
.map_err(InsertNicError::into_external)
.map_err(ActionError::action_failed)?;
Ok(())
}
Expand Down
55 changes: 31 additions & 24 deletions nexus/src/db/datastore.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@ use crate::db::lookup::LookupPath;
use crate::db::model::DatabaseString;
use crate::db::model::IncompleteVpc;
use crate::db::model::Vpc;
use crate::db::queries::network_interface::InsertNetworkInterfaceQuery;
use crate::db::queries::network_interface::NetworkInterfaceError;
use crate::db::queries::network_interface;
use crate::db::queries::vpc::InsertVpcQuery;
use crate::db::queries::vpc_subnet::FilterConflictingVpcSubnetRangesQuery;
use crate::db::queries::vpc_subnet::SubnetError;
Expand Down Expand Up @@ -1575,35 +1574,37 @@ impl DataStore {
authz_subnet: &authz::VpcSubnet,
authz_instance: &authz::Instance,
interface: IncompleteNetworkInterface,
) -> Result<NetworkInterface, NetworkInterfaceError> {
) -> Result<NetworkInterface, network_interface::InsertError> {
opctx
.authorize(authz::Action::CreateChild, authz_instance)
.await
.map_err(NetworkInterfaceError::External)?;
.map_err(network_interface::InsertError::External)?;
opctx
.authorize(authz::Action::CreateChild, authz_subnet)
.await
.map_err(NetworkInterfaceError::External)?;
.map_err(network_interface::InsertError::External)?;
self.instance_create_network_interface_raw(&opctx, interface).await
}

pub(super) async fn instance_create_network_interface_raw(
&self,
opctx: &OpContext,
interface: IncompleteNetworkInterface,
) -> Result<NetworkInterface, NetworkInterfaceError> {
) -> Result<NetworkInterface, network_interface::InsertError> {
use db::schema::network_interface::dsl;
let query = InsertNetworkInterfaceQuery::new(interface.clone());
let query = network_interface::InsertQuery::new(interface.clone());
diesel::insert_into(dsl::network_interface)
.values(query)
.returning(NetworkInterface::as_returning())
.get_result_async(
self.pool_authorized(opctx)
.await
.map_err(NetworkInterfaceError::External)?,
.map_err(network_interface::InsertError::External)?,
)
.await
.map_err(|e| NetworkInterfaceError::from_pool(e, &interface))
.map_err(|e| {
network_interface::InsertError::from_pool(e, &interface)
})
}

/// Delete all network interfaces attached to the given instance.
Expand Down Expand Up @@ -1634,27 +1635,33 @@ impl DataStore {
}

/// Delete a `NetworkInterface` attached to a provided instance.
///
/// Note that the primary interface for an instance cannot be deleted if
/// there are any secondary interfaces.
pub async fn instance_delete_network_interface(
&self,
opctx: &OpContext,
authz_instance: &authz::Instance,
authz_interface: &authz::NetworkInterface,
) -> DeleteResult {
opctx.authorize(authz::Action::Delete, authz_interface).await?;

use db::schema::network_interface::dsl;
let now = Utc::now();
let interface_id = authz_interface.id();
diesel::update(dsl::network_interface)
.filter(dsl::id.eq(interface_id))
.filter(dsl::time_deleted.is_null())
.set((dsl::time_deleted.eq(now),))
.execute_async(self.pool_authorized(opctx).await?)
) -> Result<(), network_interface::DeleteError> {
opctx
.authorize(authz::Action::Delete, authz_interface)
.await
.map_err(network_interface::DeleteError::External)?;
let query = network_interface::DeleteQuery::new(
authz_instance.id(),
authz_interface.id(),
);
query
.clone()
.execute_async(
self.pool_authorized(opctx)
.await
.map_err(network_interface::DeleteError::External)?,
)
.await
.map_err(|e| {
public_error_from_diesel_pool(
e,
ErrorHandler::NotFoundByResource(authz_interface),
)
network_interface::DeleteError::from_pool(e, &query)
})?;
Ok(())
}
Expand Down
3 changes: 3 additions & 0 deletions nexus/src/db/model/network_interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ pub struct NetworkInterface {
// If neither is specified, auto-assign one of each?
pub ip: ipnetwork::IpNetwork,
pub slot: i16,
#[diesel(column_name = is_primary)]
pub primary: bool,
}

impl From<NetworkInterface> for external::NetworkInterface {
Expand All @@ -37,6 +39,7 @@ impl From<NetworkInterface> for external::NetworkInterface {
subnet_id: iface.subnet_id,
ip: iface.ip.ip(),
mac: *iface.mac,
primary: iface.primary,
}
}
}
Expand Down
Loading

0 comments on commit e2bd1e8

Please sign in to comment.