Skip to content

Commit

Permalink
Reorganize OPTE port management
Browse files Browse the repository at this point in the history
- Simplify platform deps and module structure
- Add new Port and PortManager types. The port manager is a centralized
  object to manage all OPTE ports. This is currently required in order
  to support correctly implementing the external IP workaround, which
  requires keeping track of all current MAC addresses for guest
  interfaces. This is all modeled after the instance / instance manager
  relationship, where ports remove themselves from the manager on drop.
- Add better logging
- Add better handling of the overlay VNIC, currently also required for
  OPTE to work with Viona.
  • Loading branch information
bnaecker committed Jul 10, 2022
1 parent 0ae9c04 commit c46f934
Show file tree
Hide file tree
Showing 12 changed files with 1,056 additions and 744 deletions.
6 changes: 6 additions & 0 deletions common/src/api/external/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1716,6 +1716,12 @@ impl JsonSchema for L4PortRange {
)]
pub struct MacAddr(pub macaddr::MacAddr6);

impl From<macaddr::MacAddr6> for MacAddr {
fn from(mac: macaddr::MacAddr6) -> Self {
Self(mac)
}
}

impl FromStr for MacAddr {
type Err = macaddr::ParseError;

Expand Down
34 changes: 34 additions & 0 deletions sled-agent/src/illumos/dladm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,18 @@ pub struct SetLinkpropError {
err: ExecutionError,
}

/// Errors returned from [`Dladm::reset_linkprop`].
#[derive(thiserror::Error, Debug)]
#[error(
"Failed to reset link property \"{prop_name}\" on vnic {link_name}: {err}"
)]
pub struct ResetLinkpropError {
link_name: String,
prop_name: String,
#[source]
err: ExecutionError,
}

/// The name of a physical datalink.
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
pub struct PhysicalLink(pub String);
Expand Down Expand Up @@ -346,4 +358,26 @@ impl Dladm {
})?;
Ok(())
}

/// Reset a link property on a VNIC
pub fn reset_linkprop(
vnic: &str,
prop_name: &str,
) -> Result<(), ResetLinkpropError> {
let mut command = std::process::Command::new(PFEXEC);
let cmd = command.args(&[
DLADM,
"reset-linkprop",
"-t",
"-p",
prop_name,
vnic,
]);
execute(cmd).map_err(|err| ResetLinkpropError {
link_name: vnic.to_string(),
prop_name: prop_name.to_string(),
err,
})?;
Ok(())
}
}
14 changes: 6 additions & 8 deletions sled-agent/src/illumos/running_zone.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::illumos::dladm::Etherstub;
use crate::illumos::svc::wait_for_service;
use crate::illumos::vnic::{Vnic, VnicAllocator};
use crate::illumos::zone::{AddressRequest, ZONE_PREFIX};
use crate::opte::OptePort;
use crate::opte::Port;
use ipnetwork::IpNetwork;
use slog::Logger;
use std::net::{Ipv4Addr, Ipv6Addr};
Expand Down Expand Up @@ -292,16 +292,14 @@ impl RunningZone {
log: log.new(o!("zone" => zone_name.to_string())),
name: zone_name.to_string(),
control_vnic,
// TODO(https://github.com/oxidecomputer/omicron/issues/725)
//
// Re-initialize guest_vnic state by inspecting the zone.
opte_ports: vec![],
physical_nic: None,
},
})
}

pub fn get_opte_ports(&self) -> &Vec<OptePort> {
/// Return references to the OPTE ports for this zone.
pub fn opte_ports(&self) -> &[Port] {
&self.inner.opte_ports
}
}
Expand Down Expand Up @@ -348,7 +346,7 @@ pub struct InstalledZone {
control_vnic: Vnic,

// OPTE devices for the guest network interfaces
opte_ports: Vec<OptePort>,
opte_ports: Vec<Port>,

// Physical NIC possibly provisioned to the zone.
// TODO: Remove once Nexus traffic is transmitted over OPTE.
Expand Down Expand Up @@ -385,7 +383,7 @@ impl InstalledZone {
unique_name: Option<&str>,
datasets: &[zone::Dataset],
devices: &[zone::Device],
opte_ports: Vec<OptePort>,
opte_ports: Vec<Port>,
physical_nic: Option<Vnic>,
) -> Result<InstalledZone, InstallZoneError> {
let control_vnic = vnic_allocator.new_control(None).map_err(|err| {
Expand All @@ -401,7 +399,7 @@ impl InstalledZone {

let net_device_names: Vec<String> = opte_ports
.iter()
.map(|port| port.vnic().name().to_string())
.map(|port| port.vnic_name().to_string())
.chain(std::iter::once(control_vnic.name().to_string()))
.chain(physical_nic.as_ref().map(|vnic| vnic.name().to_string()))
.collect();
Expand Down
105 changes: 50 additions & 55 deletions sled-agent/src/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ use crate::illumos::vnic::VnicAllocator;
use crate::illumos::zone::{AddressRequest, PROPOLIS_ZONE_PREFIX};
use crate::instance_manager::InstanceTicket;
use crate::nexus::NexusClient;
use crate::opte::OptePort;
use crate::opte::OptePortAllocator;
use crate::opte::PortManager;
use crate::opte::PortTicket;
use crate::params::ExternalIp;
use crate::params::NetworkInterface;
use crate::params::{
Expand All @@ -32,7 +32,6 @@ use propolis_client::api::DiskRequest;
use propolis_client::Client as PropolisClient;
use slog::Logger;
use std::net::IpAddr;
use std::net::Ipv6Addr;
use std::net::SocketAddr;
use std::sync::Arc;
use tokio::task::JoinHandle;
Expand Down Expand Up @@ -157,7 +156,9 @@ struct RunningState {
// Connection to Propolis.
client: Arc<PropolisClient>,
// Object representing membership in the "instance manager".
ticket: InstanceTicket,
instance_ticket: InstanceTicket,
// Object representing the instance's OPTE ports in the port manager
port_ticket: Option<PortTicket>,
// Handle to task monitoring for Propolis state changes.
monitor_task: Option<JoinHandle<()>>,
// Handle to the zone.
Expand Down Expand Up @@ -190,6 +191,7 @@ impl Drop for RunningState {
struct PropolisSetup {
client: Arc<PropolisClient>,
running_zone: RunningZone,
port_ticket: Option<PortTicket>,
}

struct InstanceInner {
Expand All @@ -207,9 +209,11 @@ struct InstanceInner {
// NIC-related properties
vnic_allocator: VnicAllocator<Etherstub>,

// OPTE port related properties
underlay_addr: Ipv6Addr,
port_allocator: OptePortAllocator,
// Reference to the port manager for creating OPTE ports when starting the
// instance
port_manager: PortManager,

// Guest NIC and OPTE port information
requested_nics: Vec<NetworkInterface>,
external_ip: ExternalIp,

Expand Down Expand Up @@ -288,21 +292,20 @@ impl InstanceInner {
async fn ensure(
&mut self,
instance: Instance,
ticket: InstanceTicket,
instance_ticket: InstanceTicket,
setup: PropolisSetup,
migrate: Option<InstanceMigrateParams>,
) -> Result<(), Error> {
let PropolisSetup { client, running_zone } = setup;
let PropolisSetup { client, running_zone, port_ticket } = setup;

let nics = self
.requested_nics
let nics = running_zone
.opte_ports()
.iter()
.zip(running_zone.get_opte_ports().iter())
.map(|(nic, port)| propolis_client::api::NetworkInterfaceRequest {
.map(|port| propolis_client::api::NetworkInterfaceRequest {
// TODO-correctness: Remove `.vnic()` call when we use the port
// directly.
name: port.vnic().name().to_string(),
slot: propolis_client::api::Slot(nic.slot),
name: port.vnic_name().to_string(),
slot: propolis_client::api::Slot(port.slot()),
})
.collect();

Expand Down Expand Up @@ -350,7 +353,8 @@ impl InstanceInner {

self.running_state = Some(RunningState {
client,
ticket,
instance_ticket,
port_ticket,
monitor_task,
_running_zone: running_zone,
});
Expand Down Expand Up @@ -401,15 +405,14 @@ mockall::mock! {
pub fn new(
log: Logger,
id: Uuid,
initial: InstanceHardware,
vnic_allocator: VnicAllocator<Etherstub>,
underlay_addr: Ipv6Addr,
port_allocator: OptePortAllocator,
initial: InstanceHardware,
nexus_client: Arc<NexusClient>,
) -> Result<Self, Error>;
pub async fn start(
&self,
ticket: InstanceTicket,
instance_ticket: InstanceTicket,
migrate: Option<InstanceMigrateParams>,
) -> Result<(), Error>;
pub async fn transition(
Expand All @@ -434,23 +437,20 @@ impl Instance {
/// Arguments:
/// * `log`: Logger for dumping debug information.
/// * `id`: UUID of the instance to be created.
/// * `initial`: State of the instance at initialization time.
/// * `vnic_allocator`: A unique (to the sled) ID generator to
/// refer to a VNIC. (This exists because of a restriction on VNIC name
/// lengths, otherwise the UUID would be used instead).
/// * `underlay_addr`: The IPv6 underlay address for the sled hosting this
/// instance.
/// * `port_allocator`: A unique (to the sled) ID generator to
/// refer to an OPTE port for the guest network interfaces.
/// * `initial`: State of the instance at initialization time.
/// * `port_manager`: Handle the the object responsible for managing OPTE
/// ports.
/// * `nexus_client`: Connection to Nexus, used for sending notifications.
// TODO: This arg list is getting a little long; can we clean this up?
pub fn new(
log: Logger,
id: Uuid,
vnic_allocator: VnicAllocator<Etherstub>,
underlay_addr: Ipv6Addr,
port_allocator: OptePortAllocator,
initial: InstanceHardware,
vnic_allocator: VnicAllocator<Etherstub>,
port_manager: PortManager,
nexus_client: Arc<NexusClient>,
) -> Result<Self, Error> {
info!(log, "Instance::new w/initial HW: {:?}", initial);
Expand All @@ -472,8 +472,7 @@ impl Instance {
propolis_id: initial.runtime.propolis_id,
propolis_ip: initial.runtime.propolis_addr.unwrap().ip(),
vnic_allocator,
underlay_addr,
port_allocator,
port_manager,
requested_nics: initial.nics,
external_ip: initial.external_ip,
requested_disks: initial.disks,
Expand All @@ -489,34 +488,23 @@ impl Instance {
Ok(Instance { inner })
}

fn create_opte_ports(
async fn setup_propolis_locked(
&self,
inner: &mut MutexGuard<'_, InstanceInner>,
) -> Result<Vec<OptePort>, Error> {
let mut ports = Vec::with_capacity(inner.requested_nics.len());
) -> Result<PropolisSetup, Error> {
// Create OPTE ports for the instance
let mut opte_ports = Vec::with_capacity(inner.requested_nics.len());
for nic in inner.requested_nics.iter() {
let vni = crate::opte::Vni::new(nic.vni).expect("Invalid VNI");
let external_ip =
if nic.primary { Some(inner.external_ip) } else { None };
let port = inner.port_allocator.new_port(
nic.ip,
*nic.mac,
ipnetwork::IpNetwork::from(nic.subnet),
vni,
inner.underlay_addr,
let port = inner.port_manager.create_port(
*inner.id(),
nic,
external_ip,
)?;
info!(inner.log, "created OPTE port for guest"; "port_info" => ?port);
ports.push(port);
opte_ports.push(port);
}
Ok(ports)
}

async fn setup_propolis_locked(
&self,
inner: &mut MutexGuard<'_, InstanceInner>,
) -> Result<PropolisSetup, Error> {
let opte_ports = self.create_opte_ports(inner)?;
let port_ticket = opte_ports.first().map(|port| port.ticket());

// Create a zone for the propolis instance, using the previously
// configured VNICs.
Expand Down Expand Up @@ -545,8 +533,8 @@ impl Instance {
let network = running_zone.ensure_address(addr_request).await?;
info!(inner.log, "Created address {} for zone: {}", network, zname);

let gateway = inner.underlay_addr;
running_zone.add_default_route(gateway).await?;
let gateway = inner.port_manager.underlay_ip();
running_zone.add_default_route(*gateway).await?;

// Run Propolis in the Zone.
let smf_service_name = "svc:/system/illumos/propolis-server";
Expand Down Expand Up @@ -646,13 +634,13 @@ impl Instance {
// don't need to worry about initialization races.
wait_for_http_server(&inner.log, &client).await?;

Ok(PropolisSetup { client, running_zone })
Ok(PropolisSetup { client, running_zone, port_ticket })
}

/// Begins the execution of the instance's service (Propolis).
pub async fn start(
&self,
ticket: InstanceTicket,
instance_ticket: InstanceTicket,
migrate: Option<InstanceMigrateParams>,
) -> Result<(), Error> {
let mut inner = self.inner.lock().await;
Expand All @@ -662,7 +650,7 @@ impl Instance {

// Ensure the instance exists in the Propolis Server before we start
// using it.
inner.ensure(self.clone(), ticket, setup, migrate).await?;
inner.ensure(self.clone(), instance_ticket, setup, migrate).await?;

Ok(())
}
Expand All @@ -675,7 +663,14 @@ impl Instance {
warn!(inner.log, "Halting and removing zone: {}", zname);
Zones::halt_and_remove_logged(&inner.log, &zname).unwrap();

inner.running_state.as_mut().unwrap().ticket.terminate();
// Remove ourselves from the instance manager's map of instances.
let running_state = inner.running_state.as_mut().unwrap();
running_state.instance_ticket.terminate();

// And remove the OPTE ports from the port manager
if let Some(ticket) = running_state.port_ticket.as_mut() {
ticket.release();
}

Ok(())
}
Expand Down
Loading

0 comments on commit c46f934

Please sign in to comment.