Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Manage OPTE V2P mappings #2536

Merged
merged 20 commits into from
Apr 6, 2023
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions illumos-utils/src/opte/illumos/port_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use crate::dladm::PhysicalLink;
use crate::opte::default_boundary_services;
use crate::opte::opte_firewall_rules;
use crate::opte::params::NetworkInterface;
use crate::opte::params::SetVirtualNetworkInterfaceHost;
use crate::opte::params::SourceNatConfig;
use crate::opte::params::VpcFirewallRule;
use crate::opte::Error;
Expand All @@ -25,9 +26,11 @@ use oxide_vpc::api::Ipv4Cfg;
use oxide_vpc::api::Ipv4Cidr;
use oxide_vpc::api::Ipv4PrefixLen;
use oxide_vpc::api::MacAddr;
use oxide_vpc::api::PhysNet;
use oxide_vpc::api::RouterTarget;
use oxide_vpc::api::SNat4Cfg;
use oxide_vpc::api::SetFwRulesReq;
use oxide_vpc::api::SetVirt2PhysReq;
use oxide_vpc::api::VpcCfg;
use slog::debug;
use slog::info;
Expand Down Expand Up @@ -424,6 +427,36 @@ impl PortManager {
}
Ok(())
}

pub fn set_virtual_nic_host(
&self,
_interface_id: Uuid,
luqmana marked this conversation as resolved.
Show resolved Hide resolved
mapping: &SetVirtualNetworkInterfaceHost,
) -> Result<(), Error> {
let hdl = OpteHdl::open(OpteHdl::XDE_CTL)?;

hdl.set_v2p(&SetVirt2PhysReq {
vip: mapping.virtual_ip.into(),
phys: PhysNet {
ether: oxide_vpc::api::MacAddr::from(
(*mapping.virtual_mac).into_array(),
),
ip: mapping.physical_host_ip.into(),
vni: Vni::new(mapping.vni).unwrap(),
},
})?;

Ok(())
}

pub fn unset_virtual_nic_host(
&self,
_interface_id: Uuid,
_mapping: &SetVirtualNetworkInterfaceHost,
) -> Result<(), Error> {
// TODO requires https://github.com/oxidecomputer/opte/issues/332
Ok(())
}
}

pub struct PortTicket {
Expand Down
19 changes: 19 additions & 0 deletions illumos-utils/src/opte/non_illumos/port_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

use crate::opte::default_boundary_services;
use crate::opte::params::NetworkInterface;
use crate::opte::params::SetVirtualNetworkInterfaceHost;
use crate::opte::params::SourceNatConfig;
use crate::opte::params::VpcFirewallRule;
use crate::opte::Error;
Expand Down Expand Up @@ -176,6 +177,24 @@ impl PortManager {
info!(self.inner.log, "Ignoring {} firewall rules", rules.len());
Ok(())
}

pub fn set_virtual_nic_host(
&self,
_interface_id: Uuid,
_mapping: &SetVirtualNetworkInterfaceHost,
) -> Result<(), Error> {
info!(self.inner.log, "Ignoring virtual NIC mapping");
Ok(())
}

pub fn unset_virtual_nic_host(
&self,
_interface_id: Uuid,
_mapping: &SetVirtualNetworkInterfaceHost,
) -> Result<(), Error> {
info!(self.inner.log, "Ignoring unset of virtual NIC mapping");
Ok(())
}
}

pub struct PortTicket {
Expand Down
11 changes: 11 additions & 0 deletions illumos-utils/src/opte/params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ use omicron_common::api::internal::nexus::HostIdentifier;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use std::net::IpAddr;
use std::net::Ipv6Addr;

/// Information required to construct a virtual network interface for a guest
#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
pub struct NetworkInterface {
pub id: uuid::Uuid,
pub name: external::Name,
pub ip: IpAddr,
pub mac: external::MacAddr,
Expand Down Expand Up @@ -50,3 +52,12 @@ pub struct VpcFirewallRule {
pub action: external::VpcFirewallRuleAction,
pub priority: external::VpcFirewallRulePriority,
}

/// A mapping from a virtual NIC to a physical host
#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)]
pub struct SetVirtualNetworkInterfaceHost {
pub virtual_ip: IpAddr,
pub virtual_mac: external::MacAddr,
pub physical_host_ip: Ipv6Addr,
pub vni: external::Vni,
}
3 changes: 3 additions & 0 deletions nexus/db-queries/src/db/datastore/network_interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ use sled_agent_client::types as sled_client_types;
/// interface and VPC subnet tables.
#[derive(Debug, diesel::Queryable)]
struct NicInfo {
id: uuid::Uuid,
name: db::model::Name,
ip: ipnetwork::IpNetwork,
mac: db::model::MacAddr,
Expand All @@ -59,6 +60,7 @@ impl From<NicInfo> for sled_client_types::NetworkInterface {
external::IpNet::V6(nic.ipv6_block.0)
};
sled_client_types::NetworkInterface {
id: nic.id,
name: sled_client_types::Name::from(&nic.name.0),
ip: nic.ip.ip(),
mac: sled_client_types::MacAddr::from(nic.mac.0),
Expand Down Expand Up @@ -205,6 +207,7 @@ impl DataStore {
// ideal, but we can't derive `Selectable` since this is the result
// of a JOIN and not from a single table. DRY this out if possible.
.select((
network_interface::id,
network_interface::name,
network_interface::ip,
network_interface::mac,
Expand Down
66 changes: 66 additions & 0 deletions nexus/src/app/sagas/instance_create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,13 @@ declare_saga_actions! {
+ sic_add_network_config
- sic_remove_network_config
}
V2P_ENSURE_UNDO -> "v2p_ensure_undo" {
+ sic_noop
- sic_v2p_ensure_undo
}
luqmana marked this conversation as resolved.
Show resolved Hide resolved
V2P_ENSURE -> "v2p_ensure" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this go before the instance ensure step so that the mappings exist by the time the instance starts to run? (I suspect this step is also going to be easier to undo than sic_instance_ensure.)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't leave comments on an arbitrary part of the file (ugh) so leaving this here because it's closely related: if this ordering changes and V2P_ENSURE gets an undo step, we should also update the tests in this module to check that no V2P mappings exist after a saga is torn down (there are a bunch of functions down there that check this sort of thing for other side effects).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, this should come before the instance-ensure call. I also think it makes sense to add a TODO here linking to the relevant OPTE issu about adding deletion of these mappings.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done in 48754ea

+ sic_v2p_ensure
}
INSTANCE_ENSURE -> "instance_ensure" {
+ sic_instance_ensure
}
Expand Down Expand Up @@ -335,7 +342,16 @@ impl NexusSaga for SagaInstanceCreate {
i,
)?;
}

// creating instance v2p mappings is not atomic - there are many calls
// to different sled agents that occur. for this to unwind correctly
// given a partial success of the ensure node, the undo node must be
// prior to the ensure node as a separate action.
builder.append(v2p_ensure_undo_action());
builder.append(v2p_ensure_action());

builder.append(instance_ensure_action());

Ok(builder.build()?)
}
}
Expand Down Expand Up @@ -1306,6 +1322,51 @@ async fn sic_instance_ensure(
Ok(())
}

async fn sic_noop(_sagactx: NexusActionContext) -> Result<(), ActionError> {
Ok(())
}

/// Ensure that the necessary v2p mappings exist for this instance
async fn sic_v2p_ensure(
sagactx: NexusActionContext,
) -> Result<(), ActionError> {
let osagactx = sagactx.user_data();
let params = sagactx.saga_params::<Params>()?;
let opctx = crate::context::op_context_for_saga_action(
&sagactx,
&params.serialized_authn,
);
let instance_id = sagactx.lookup::<Uuid>("instance_id")?;

osagactx
.nexus()
.create_instance_v2p_mappings(&opctx, instance_id)
.await
.map_err(ActionError::action_failed)?;

Ok(())
}

async fn sic_v2p_ensure_undo(
sagactx: NexusActionContext,
) -> Result<(), anyhow::Error> {
let osagactx = sagactx.user_data();
let params = sagactx.saga_params::<Params>()?;
let opctx = crate::context::op_context_for_saga_action(
&sagactx,
&params.serialized_authn,
);
let instance_id = sagactx.lookup::<Uuid>("instance_id")?;

osagactx
.nexus()
.delete_instance_v2p_mappings(&opctx, instance_id)
.await
.map_err(ActionError::action_failed)?;

Ok(())
}

#[cfg(test)]
pub mod test {
use crate::{
Expand Down Expand Up @@ -1558,6 +1619,11 @@ pub mod test {
);
assert!(disk_is_detached(datastore).await);
assert!(no_instances_or_disks_on_sled(&sled_agent).await);

let v2p_mappings = &*sled_agent.v2p_mappings.lock().await;
for (_nic_id, mappings) in v2p_mappings {
assert!(mappings.is_empty());
}
}

#[nexus_test(server = crate::Server)]
Expand Down
53 changes: 53 additions & 0 deletions nexus/src/app/sagas/instance_delete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ pub struct Params {

declare_saga_actions! {
instance_delete;
V2P_ENSURE_UNDO -> "v2p_ensure_undo" {
+ sid_noop
- sid_v2p_ensure_undo
}
V2P_ENSURE -> "v2p_ensure" {
+ sid_v2p_ensure
}
INSTANCE_DELETE_RECORD -> "no_result1" {
+ sid_delete_instance_record
}
Expand Down Expand Up @@ -60,6 +67,8 @@ impl NexusSaga for SagaInstanceDelete {
_params: &Self::Params,
mut builder: steno::DagBuilder,
) -> Result<steno::Dag, super::SagaInitError> {
builder.append(v2p_ensure_undo_action());
builder.append(v2p_ensure_action());
builder.append(delete_asic_configuration_action());
builder.append(instance_delete_record_action());
builder.append(delete_network_interfaces_action());
Expand All @@ -71,6 +80,50 @@ impl NexusSaga for SagaInstanceDelete {

// instance delete saga: action implementations

async fn sid_noop(_sagactx: NexusActionContext) -> Result<(), ActionError> {
Ok(())
}

/// Ensure that the v2p mappings for this instance are deleted
async fn sid_v2p_ensure(
sagactx: NexusActionContext,
) -> Result<(), ActionError> {
let osagactx = sagactx.user_data();
let params = sagactx.saga_params::<Params>()?;
let opctx = crate::context::op_context_for_saga_action(
&sagactx,
&params.serialized_authn,
);

osagactx
.nexus()
.delete_instance_v2p_mappings(&opctx, params.authz_instance.id())
.await
.map_err(ActionError::action_failed)?;

Ok(())
}

/// During unwind, ensure that v2p mappings are created again
async fn sid_v2p_ensure_undo(
sagactx: NexusActionContext,
) -> Result<(), anyhow::Error> {
let osagactx = sagactx.user_data();
let params = sagactx.saga_params::<Params>()?;
let opctx = crate::context::op_context_for_saga_action(
&sagactx,
&params.serialized_authn,
);

osagactx
.nexus()
.create_instance_v2p_mappings(&opctx, params.authz_instance.id())
.await
.map_err(ActionError::action_failed)?;

Ok(())
}

async fn sid_delete_network_config(
sagactx: NexusActionContext,
) -> Result<(), ActionError> {
Expand Down
39 changes: 39 additions & 0 deletions nexus/src/app/sagas/instance_migrate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ declare_saga_actions! {
MIGRATE_PREP -> "migrate_instance" {
+ sim_migrate_prep
}
V2P_ENSURE -> "v2p_ensure" {
// TODO robustness: This needs an undo action
+ sim_v2p_ensure
}
INSTANCE_MIGRATE -> "instance_migrate" {
// TODO robustness: This needs an undo action
+ sim_instance_migrate
Expand Down Expand Up @@ -85,6 +89,7 @@ impl NexusSaga for SagaInstanceMigrate {

builder.append(allocate_propolis_ip_action());
builder.append(migrate_prep_action());
builder.append(v2p_ensure_action());
builder.append(instance_migrate_action());
builder.append(cleanup_source_action());

Expand Down Expand Up @@ -136,6 +141,40 @@ async fn sim_allocate_propolis_ip(
allocate_sled_ipv6(&opctx, sagactx, "dst_sled_uuid").await
}

/// Add V2P mappings for the destination instance
async fn sim_v2p_ensure(
sagactx: NexusActionContext,
) -> Result<(), ActionError> {
let osagactx = sagactx.user_data();
let params = sagactx.saga_params::<Params>()?;
let opctx = crate::context::op_context_for_saga_action(
&sagactx,
&params.serialized_authn,
);
let (instance_id, _) =
sagactx.lookup::<(Uuid, InstanceRuntimeState)>("migrate_instance")?;

// TODO-performance the instance_put in sim_instance_migrate will *start* a
// migration, but the source and destination propolis servers will perform
// it asynchronously. Updating the mappings here will briefly "disconnect"
// the source instance in the sense that it will be able to send packets out
luqmana marked this conversation as resolved.
Show resolved Hide resolved
// (as other instance's V2P mappings will be untouched) but will not be able
// to receive any packets (other instances will send packets to the
// destination propolis' sled but the vCPUs may not have started yet). Until
// the destination propolis takes over, there will be a small network outage
// for the instance.
//
// TODO-correctness if the migration fails, there's nothing that will unwind
// this and restore the original V2P mappings
osagactx
.nexus()
.create_instance_v2p_mappings(&opctx, instance_id)
.await
.map_err(ActionError::action_failed)?;

Ok(())
}

async fn sim_instance_migrate(
sagactx: NexusActionContext,
) -> Result<(), ActionError> {
Expand Down
Loading