From 38ebc07d8ad0a77fd069cdf7c50a485cb5da040c Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Tue, 10 May 2022 03:43:10 +0000 Subject: [PATCH 1/8] Fixes improves OPTE installation and improves errors - Fixes mismerge in `tools/install_opte.sh` that prevented installing OPTE package on a system that didn't previously have it - Improves error messages when either xde driver fails or the expected virtual networking devices don't exist --- sled-agent/src/illumos/dladm.rs | 17 ++++++++++++++++- sled-agent/src/opte/opte.rs | 26 +++++++++++++++++++++++--- tools/install_opte.sh | 9 ++++----- 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/sled-agent/src/illumos/dladm.rs b/sled-agent/src/illumos/dladm.rs index 6bf9c18241..8cf5d34f71 100644 --- a/sled-agent/src/illumos/dladm.rs +++ b/sled-agent/src/illumos/dladm.rs @@ -13,6 +13,11 @@ use std::str::FromStr; pub const VNIC_PREFIX: &str = "ox"; pub const VNIC_PREFIX_CONTROL: &str = "oxControl"; +/// Prefix used to name VNICs over xde devices / OPTE ports. +// TODO-correctness: Remove this when `xde` devices can be directly used beneath +// Viona, and thus plumbed directly to guests. +pub const VNIC_PREFIX_OPTE: &str = "vopte"; + pub const DLADM: &str = "/usr/sbin/dladm"; /// Errors returned from [`Dladm::find_physical`]. @@ -172,12 +177,22 @@ impl Dladm { let vnics = String::from_utf8_lossy(&output.stdout) .lines() - .filter(|vnic| vnic.starts_with(VNIC_PREFIX)) + .filter(|name| Self::is_sled_agent_vnic(name)) .map(|s| s.to_owned()) .collect(); Ok(vnics) } + // Return true if a VNIC with the provided name may be managed by the sled + // agent. + fn is_sled_agent_vnic>(name: S) -> bool { + let name = name.as_ref(); + name.starts_with(VNIC_PREFIX) || + name.starts_with(VNIC_PREFIX_OPTE) || + name == "net0" || + name == "net1" + } + /// Remove a vnic from the sled. pub fn delete_vnic(name: &str) -> Result<(), DeleteVnicError> { let mut command = std::process::Command::new(PFEXEC); diff --git a/sled-agent/src/opte/opte.rs b/sled-agent/src/opte/opte.rs index 6f62dd021e..9cbbac7490 100644 --- a/sled-agent/src/opte/opte.rs +++ b/sled-agent/src/opte/opte.rs @@ -37,7 +37,13 @@ pub enum Error { CreateVnic(#[from] dladm::CreateVnicError), #[error("Failed to create an IPv6 link-local address for xde underlay devices: {0}")] - UnderlayDevice(#[from] crate::illumos::ExecutionError), + UnderlayDeviceAddress(#[from] crate::illumos::ExecutionError), + + #[error("Failed to get VNICs for xde underlay devices: {0}")] + GetVnic(#[from] crate::illumos::dladm::GetVnicError), + + #[error("No xde driver configuration file exists at '/kernel/drv/xde.conf'")] + NoXdeConf, #[error(transparent)] BadAddrObj(#[from] addrobj::ParseError), @@ -263,11 +269,21 @@ impl Drop for OptePort { /// The xde driver needs information about the physical devices out which it can /// send traffic from the guests. pub fn initialize_xde_driver(log: &Logger) -> Result<(), Error> { + if !std::path::Path::new("/kernel/drv/xde.conf").exists() { + return Err(Error::NoXdeConf); + } let underlay_nics = find_chelsio_links()?; info!(log, "using '{:?}' as data links for xde driver", underlay_nics); if underlay_nics.len() < 2 { + const MESSAGE: &str = concat!( + "There must be at least two underlay NICs for the xde ", + "driver to operate. These are currently created by ", + "`./tools/create_virtual_hardware.sh`. Please ensure that ", + "script has been run, and that two VNICs named `net{0,1}` ", + "exist on the system." + ); return Err(Error::Opte(opte_ioctl::Error::InvalidArgument( - String::from("There must be at least two underlay NICs"), + String::from(MESSAGE) ))); } for nic in &underlay_nics { @@ -294,5 +310,9 @@ fn find_chelsio_links() -> Result, Error> { // `Dladm` to get the real Chelsio links on a Gimlet. These will likely be // called `cxgbeN`, but we explicitly call them `netN` to be clear that // they're likely VNICs for the time being. - Ok((0..2).map(|i| PhysicalLink(format!("net{}", i))).collect()) + Ok(Dladm::get_vnics()? + .into_iter() + .filter(|name| name == "net0" || name == "net1") + .map(PhysicalLink) + .collect()) } diff --git a/tools/install_opte.sh b/tools/install_opte.sh index b2c49694ed..95ebe41862 100755 --- a/tools/install_opte.sh +++ b/tools/install_opte.sh @@ -62,7 +62,7 @@ function sha_from_url { # `helios-netdev` provides the xde kernel driver and the `opteadm` userland tool # for interacting with it. HELIOS_NETDEV_BASE_URL="https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/repo" -HELIOS_NETDEV_COMMIT="cb1767c80d4e9d97cb79901eed3c9d08e1fb3826" +HELIOS_NETDEV_COMMIT="7f57b5d959fcd91100feb14ac83aefcee1c96a50" HELIOS_NETDEV_REPO_URL="$HELIOS_NETDEV_BASE_URL/$HELIOS_NETDEV_COMMIT/opte.p5p" HELIOS_NETDEV_REPO_SHA_URL="$HELIOS_NETDEV_BASE_URL/$HELIOS_NETDEV_COMMIT/opte.p5p.sha256" HELIOS_NETDEV_REPO_PATH="$XDE_DIR/$(basename "$HELIOS_NETDEV_REPO_URL")" @@ -70,7 +70,7 @@ HELIOS_NETDEV_REPO_PATH="$XDE_DIR/$(basename "$HELIOS_NETDEV_REPO_URL")" # The xde repo provides a full OS/Net incorporation, with updated kernel bits # that the `xde` kernel module and OPTE rely on. XDE_REPO_BASE_URL="https://buildomat.eng.oxide.computer/public/file/oxidecomputer/os-build/xde" -XDE_REPO_COMMIT="485065f3b3292e2198db0629341492672b1e29f7" +XDE_REPO_COMMIT="fc0717b76a92d1e317955ec33477133257982670" XDE_REPO_URL="$XDE_REPO_BASE_URL/$XDE_REPO_COMMIT/repo.p5p" XDE_REPO_SHA_URL="$XDE_REPO_BASE_URL/$XDE_REPO_COMMIT/repo.p5p.sha256" XDE_REPO_PATH="$XDE_DIR/$(basename "$XDE_REPO_URL")" @@ -91,9 +91,8 @@ pkg set-publisher -p "$XDE_REPO_PATH" --search-first # Actually update packages, handling case where no updates are needed RC=0 pkg update || RC=$?; -if [[ "$RC" -eq 0 ]] || [[ "$RC" -eq 4 ]]; then - exit 0 -else +if [[ "$RC" -ne 0 ]] && [[ "$RC" -ne 4 ]]; then + echo "Adding OPTE and/or xde package repositories failed" exit "$RC" fi From 3890fc78acf1707a5107a8757053c4e96708fe36 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Tue, 10 May 2022 04:10:23 +0000 Subject: [PATCH 2/8] Fmt, fix lifetime for automock --- sled-agent/src/illumos/dladm.rs | 11 +++++------ sled-agent/src/opte/opte.rs | 6 ++++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/sled-agent/src/illumos/dladm.rs b/sled-agent/src/illumos/dladm.rs index 8cf5d34f71..251b2b7d66 100644 --- a/sled-agent/src/illumos/dladm.rs +++ b/sled-agent/src/illumos/dladm.rs @@ -185,12 +185,11 @@ impl Dladm { // Return true if a VNIC with the provided name may be managed by the sled // agent. - fn is_sled_agent_vnic>(name: S) -> bool { - let name = name.as_ref(); - name.starts_with(VNIC_PREFIX) || - name.starts_with(VNIC_PREFIX_OPTE) || - name == "net0" || - name == "net1" + fn is_sled_agent_vnic(name: &str) -> bool { + name.starts_with(VNIC_PREFIX) + || name.starts_with(VNIC_PREFIX_OPTE) + || name == "net0" + || name == "net1" } /// Remove a vnic from the sled. diff --git a/sled-agent/src/opte/opte.rs b/sled-agent/src/opte/opte.rs index 9cbbac7490..4f51451644 100644 --- a/sled-agent/src/opte/opte.rs +++ b/sled-agent/src/opte/opte.rs @@ -42,7 +42,9 @@ pub enum Error { #[error("Failed to get VNICs for xde underlay devices: {0}")] GetVnic(#[from] crate::illumos::dladm::GetVnicError), - #[error("No xde driver configuration file exists at '/kernel/drv/xde.conf'")] + #[error( + "No xde driver configuration file exists at '/kernel/drv/xde.conf'" + )] NoXdeConf, #[error(transparent)] @@ -283,7 +285,7 @@ pub fn initialize_xde_driver(log: &Logger) -> Result<(), Error> { "exist on the system." ); return Err(Error::Opte(opte_ioctl::Error::InvalidArgument( - String::from(MESSAGE) + String::from(MESSAGE), ))); } for nic in &underlay_nics { From 7e2e9edf58e864844869e9f608e03cbe3711eb39 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Tue, 10 May 2022 21:02:51 +0000 Subject: [PATCH 3/8] Add better handling and cleanup of VNICs - Adds a `VnicKind` enum for tracking the flavor of each VNIC the sled agent is responsible for. - Adds parameter to the `Dladm::get_vnics()` call which filters the returned list to a particular kind. The goal here is to be more explicit about which VNICs we're looking for and ultimately operating on in the sled agent. - The sled agent now cleans up guest VNICs and the underlying xde devices (OPTE ports) when it starts up, similar to how it clears out any extant control VNICs, to ensure things are in a reliable state before accepting any requests from Nexus - Improves the `tools/install_opte.sh` script, trying to be less intrusive and only modifying the state we need to change when adding the OPTE / xde package repositories. --- Cargo.lock | 8 ++--- sled-agent/Cargo.toml | 4 +-- sled-agent/src/illumos/dladm.rs | 33 ++++++++++++-------- sled-agent/src/illumos/vnic.rs | 40 ++++++++++++++++++++++-- sled-agent/src/opte/opte.rs | 22 ++++++++++++-- sled-agent/src/sled_agent.rs | 35 +++++++++++++++------ tools/install_opte.sh | 54 +++++++++++++++++++++++++++++++-- 7 files changed, 159 insertions(+), 37 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3faa816e3b..ffd8b170df 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2045,7 +2045,7 @@ dependencies = [ [[package]] name = "illumos-ddi-dki" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=cb1767c#cb1767c80d4e9d97cb79901eed3c9d08e1fb3826" +source = "git+https://github.com/oxidecomputer/opte?rev=b998015#b9980158540d15d44cfc5d17fc0a5d1848c5e1ae" dependencies = [ "illumos-sys-hdrs", ] @@ -2053,7 +2053,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=cb1767c#cb1767c80d4e9d97cb79901eed3c9d08e1fb3826" +source = "git+https://github.com/oxidecomputer/opte?rev=b998015#b9980158540d15d44cfc5d17fc0a5d1848c5e1ae" [[package]] name = "impl-trait-for-tuples" @@ -3028,7 +3028,7 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=cb1767c#cb1767c80d4e9d97cb79901eed3c9d08e1fb3826" +source = "git+https://github.com/oxidecomputer/opte?rev=b998015#b9980158540d15d44cfc5d17fc0a5d1848c5e1ae" dependencies = [ "anymap", "cfg-if 0.1.10", @@ -3045,7 +3045,7 @@ dependencies = [ [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=cb1767c#cb1767c80d4e9d97cb79901eed3c9d08e1fb3826" +source = "git+https://github.com/oxidecomputer/opte?rev=b998015#b9980158540d15d44cfc5d17fc0a5d1848c5e1ae" dependencies = [ "libc", "libnet", diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index c26cbb1fba..53ef4a6062 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -48,8 +48,8 @@ vsss-rs = { version = "2.0.0-pre2", default-features = false, features = ["std"] zone = "0.1" [target.'cfg(target_os = "illumos")'.dependencies] -opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "cb1767c" } -opte = { git = "https://github.com/oxidecomputer/opte", rev = "cb1767c", features = [ "api", "std" ] } +opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "b998015" } +opte = { git = "https://github.com/oxidecomputer/opte", rev = "b998015", features = [ "api", "std" ] } [dev-dependencies] expectorate = "1.0.5" diff --git a/sled-agent/src/illumos/dladm.rs b/sled-agent/src/illumos/dladm.rs index 251b2b7d66..ec2ebd7b91 100644 --- a/sled-agent/src/illumos/dladm.rs +++ b/sled-agent/src/illumos/dladm.rs @@ -5,6 +5,7 @@ //! Utilities for poking at data links. use crate::common::vlan::VlanID; +use crate::illumos::vnic::VnicKind; use crate::illumos::{execute, ExecutionError, PFEXEC}; use omicron_common::api::external::MacAddr; use serde::{Deserialize, Serialize}; @@ -16,7 +17,13 @@ pub const VNIC_PREFIX_CONTROL: &str = "oxControl"; /// Prefix used to name VNICs over xde devices / OPTE ports. // TODO-correctness: Remove this when `xde` devices can be directly used beneath // Viona, and thus plumbed directly to guests. -pub const VNIC_PREFIX_OPTE: &str = "vopte"; +pub const VNIC_PREFIX_GUEST: &str = "vopte"; + +/// Names of VNICs used as underlay devices for the xde driver. +pub const XDE_VNIC_NAMES: [&str; 2] = ["net0", "net1"]; + +/// Prefix used to identify xde data links. +pub const XDE_LINK_PREFIX: &str = "opte"; pub const DLADM: &str = "/usr/sbin/dladm"; @@ -169,29 +176,29 @@ impl Dladm { Ok(()) } - /// Returns all VNICs that may be managed by the Sled Agent. - pub fn get_vnics() -> Result, GetVnicError> { + /// Returns VNICs that may be managed by the Sled Agent, optionally + /// restricted to a particular kind. + pub fn get_vnics( + kind: Option, + ) -> Result, GetVnicError> { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[DLADM, "show-vnic", "-p", "-o", "LINK"]); let output = execute(cmd).map_err(|err| GetVnicError { err })?; let vnics = String::from_utf8_lossy(&output.stdout) .lines() - .filter(|name| Self::is_sled_agent_vnic(name)) + .filter(|name| { + if let Some(kind) = kind { + VnicKind::from_name(name) == kind + } else { + false + } + }) .map(|s| s.to_owned()) .collect(); Ok(vnics) } - // Return true if a VNIC with the provided name may be managed by the sled - // agent. - fn is_sled_agent_vnic(name: &str) -> bool { - name.starts_with(VNIC_PREFIX) - || name.starts_with(VNIC_PREFIX_OPTE) - || name == "net0" - || name == "net1" - } - /// Remove a vnic from the sled. pub fn delete_vnic(name: &str) -> Result<(), DeleteVnicError> { let mut command = std::process::Command::new(PFEXEC); diff --git a/sled-agent/src/illumos/vnic.rs b/sled-agent/src/illumos/vnic.rs index c3d118ad1e..14b17175c2 100644 --- a/sled-agent/src/illumos/vnic.rs +++ b/sled-agent/src/illumos/vnic.rs @@ -6,7 +6,7 @@ use crate::illumos::dladm::{ CreateVnicError, DeleteVnicError, PhysicalLink, VNIC_PREFIX, - VNIC_PREFIX_CONTROL, + VNIC_PREFIX_CONTROL, VNIC_PREFIX_GUEST, XDE_VNIC_NAMES, }; use omicron_common::api::external::MacAddr; use std::sync::{ @@ -60,7 +60,7 @@ impl VnicAllocator { debug_assert!(name.starts_with(VNIC_PREFIX)); debug_assert!(name.starts_with(VNIC_PREFIX_CONTROL)); Dladm::create_vnic(&self.data_link, &name, mac, None)?; - Ok(Vnic { name, deleted: false }) + Ok(Vnic { name, deleted: false, kind: VnicKind::OxideControl }) } fn new_superscope>(&self, scope: S) -> Self { @@ -82,6 +82,31 @@ impl VnicAllocator { } } +/// Represents the kind of a VNIC, such as whether it's for guest networking or +/// communicating with Oxide services. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum VnicKind { + OxideControl, + Guest, + XdeUnderlay, + Other, +} + +impl VnicKind { + /// Infer the kind from a VNIC's name. + pub fn from_name(name: &str) -> Self { + if name.starts_with(VNIC_PREFIX) { + VnicKind::OxideControl + } else if name.starts_with(VNIC_PREFIX_GUEST) { + VnicKind::Guest + } else if XDE_VNIC_NAMES.contains(&name) { + VnicKind::XdeUnderlay + } else { + VnicKind::Other + } + } +} + /// Represents an allocated VNIC on the system. /// The VNIC is de-allocated when it goes out of scope. /// @@ -92,12 +117,17 @@ impl VnicAllocator { pub struct Vnic { name: String, deleted: bool, + kind: VnicKind, } impl Vnic { /// Takes ownership of an existing VNIC. pub fn wrap_existing>(name: S) -> Self { - Vnic { name: name.as_ref().to_owned(), deleted: false } + Vnic { + name: name.as_ref().to_owned(), + deleted: false, + kind: VnicKind::from_name(name.as_ref()), + } } /// Deletes a NIC (if it has not already been deleted). @@ -113,6 +143,10 @@ impl Vnic { pub fn name(&self) -> &str { &self.name } + + pub fn kind(&self) -> VnicKind { + self.kind + } } impl Drop for Vnic { diff --git a/sled-agent/src/opte/opte.rs b/sled-agent/src/opte/opte.rs index 4f51451644..f13a960f9f 100644 --- a/sled-agent/src/opte/opte.rs +++ b/sled-agent/src/opte/opte.rs @@ -9,7 +9,9 @@ use crate::illumos::addrobj::AddrObject; use crate::illumos::dladm; use crate::illumos::dladm::Dladm; use crate::illumos::dladm::PhysicalLink; +use crate::illumos::dladm::XDE_LINK_PREFIX; use crate::illumos::vnic::Vnic; +use crate::illumos::vnic::VnicKind; use crate::illumos::zone::Zones; use ipnetwork::IpNetwork; use macaddr::MacAddr6; @@ -62,7 +64,7 @@ impl OptePortAllocator { } fn next(&self) -> String { - format!("opte{}", self.next_id()) + format!("{}{}", XDE_LINK_PREFIX, self.next_id()) } fn next_id(&self) -> u64 { @@ -266,6 +268,21 @@ impl Drop for OptePort { } } +/// Delete all xde devices on the system. +pub fn delete_all_xde_devices(log: &Logger) -> Result<(), Error> { + let hdl = OpteHdl::open(OpteHdl::DLD_CTL)?; + for port_info in hdl.list_ports()?.ports.into_iter() { + let name = &port_info.name; + info!( + log, + "deleting existing OPTE port and xde device"; + "device_name" => name + ); + hdl.delete_xde(name)?; + } + Ok(()) +} + /// Initialize the underlay devices required for the xde kernel module. /// /// The xde driver needs information about the physical devices out which it can @@ -312,9 +329,8 @@ fn find_chelsio_links() -> Result, Error> { // `Dladm` to get the real Chelsio links on a Gimlet. These will likely be // called `cxgbeN`, but we explicitly call them `netN` to be clear that // they're likely VNICs for the time being. - Ok(Dladm::get_vnics()? + Ok(Dladm::get_vnics(Some(VnicKind::XdeUnderlay))? .into_iter() - .filter(|name| name == "net0" || name == "net1") .map(PhysicalLink) .collect()) } diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index e4017304d1..025422092b 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -5,6 +5,7 @@ //! Sled agent implementation use crate::config::Config; +use crate::illumos::vnic::VnicKind; use crate::illumos::zfs::{ Mountpoint, ZONE_ZFS_DATASET, ZONE_ZFS_DATASET_MOUNTPOINT, }; @@ -148,7 +149,7 @@ impl SledAgent { // to leave the running Zones intact). let zones = Zones::get()?; for z in zones { - warn!(log, "Deleting zone: {}", z.name()); + warn!(log, "Deleting existing zone"; "zone_name" => z.name()); Zones::halt_and_remove_logged(&log, z.name())?; } @@ -162,18 +163,32 @@ impl SledAgent { // This should be accessible via: // $ dladm show-linkprop -c -p zone -o LINK,VALUE // - // Note that this currently deletes only VNICs that start with the - // prefix the sled-agent uses. We'll need to generate an alert or - // otherwise handle VNICs that we _don't_ expect. - let vnics = Dladm::get_vnics()?; - for vnic in vnics - .iter() - .filter(|vnic| vnic.starts_with(crate::illumos::dladm::VNIC_PREFIX)) - { - warn!(log, "Deleting VNIC: {}", vnic); + // Delete VNICs in this order: + // + // - Oxide control VNICs + // - Guest VNICs over xde devices + let vnics = Dladm::get_vnics(Some(VnicKind::OxideControl))? + .into_iter() + .chain(Dladm::get_vnics(Some(VnicKind::Guest))?); + for vnic in vnics { + warn!( + log, + "Deleting existing VNIC"; + "vnic_name" => &vnic, + "vnic_kind" => ?VnicKind::from_name(&vnic), + ); Dladm::delete_vnic(&vnic)?; } + // Also delete any extant xde devices. These should also eventually be + // recovered / tracked, to avoid interruption of any guests that are + // still running. That's currently irrelevant, since we're deleting the + // zones anyway. + // + // This is also tracked by + // https://github.com/oxidecomputer/omicron/issues/725. + crate::opte::delete_all_xde_devices(&log)?; + let storage = StorageManager::new( &log, *id, diff --git a/tools/install_opte.sh b/tools/install_opte.sh index 95ebe41862..04ac63589c 100755 --- a/tools/install_opte.sh +++ b/tools/install_opte.sh @@ -59,10 +59,58 @@ function sha_from_url { curl -L "$SHA_URL" 2> /dev/null | cut -d ' ' -f 1 } +# Echo the stickiness, 'sticky' or 'non-sticky' of the `helios-dev` publisher +function helios_dev_stickiness { + local LINE="$(pkg publisher | grep '^helios-dev')" + if [[ -z "$LINE" ]]; then + echo "Expected a publisher named helios-dev, exiting!" + exit 1 + fi + if [[ -z "$(echo "$LINE" | grep 'non-sticky')" ]]; then + echo "sticky" + else + echo "non-sticky" + fi +} + +# Ensure that the `helios-dev` publisher is non-sticky. This does not modify the +# publisher, if it is already non-sticky. +function ensure_helios_dev_is_non_sticky { + local STICKINESS="$(helios_dev_stickiness)" + if [[ "$STICKINESS" = "sticky" ]]; then + pkg set-publisher --non-sticky helios-dev + STICKINESS="$(helios_dev_stickiness)" + if [[ "$STICKINESS" = "non-sticky" ]]; then + echo "Failed to make helios-dev publisher non-sticky" + exit 1 + fi + else + echo "helios-dev publisher is already non-sticky" + fi +} + +function verify_publisher_search_order { + local EXPECTED=("on-nightly" "helios-netdev" "helios-dev") + local N_EXPECTED="${#EXPECTED[*]}" + readarray -t ACTUAL < <(pkg publisher -H | cut -d ' ' -f 1) + local N_ACTUAL="${#ACTUAL[*]}" + if [[ $N_EXPECTED -ne $N_ACTUAL ]]; then + echo "Mismatched number of publishers, expected: $N_EXPECTED, found: $N_ACTUAL" + exit 1 + fi + for ((i=0;i Date: Tue, 10 May 2022 21:13:07 +0000 Subject: [PATCH 4/8] Add mock_opte::delete_all_xde_devices for non-illumos systems --- sled-agent/src/opte/mock_opte.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sled-agent/src/opte/mock_opte.rs b/sled-agent/src/opte/mock_opte.rs index e048b44981..cdd475b0c1 100644 --- a/sled-agent/src/opte/mock_opte.rs +++ b/sled-agent/src/opte/mock_opte.rs @@ -183,3 +183,8 @@ pub fn initialize_xde_driver(log: &Logger) -> Result<(), Error> { slog::warn!(log, "`xde` driver is a fiction on non-illumos systems"); Ok(()) } + +pub fn delete_all_xde_devices(log: &Logger) -> Result<(), Error> { + slog::warn!(log, "`xde` driver is a fiction on non-illumos systems"); + Ok(()) +} From a5635a19a247a673e6d406774ba3c223d2b5f6b5 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Tue, 10 May 2022 21:20:15 +0000 Subject: [PATCH 5/8] Update sled-agent mock context calls --- sled-agent/src/instance_manager.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index bd6e4dff4c..669db64683 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -255,7 +255,7 @@ mod test { zones_get_ctx.expect().return_once(|| Ok(vec![])); let dladm_get_vnics_ctx = MockDladm::get_vnics_context(); - dladm_get_vnics_ctx.expect().return_once(|| Ok(vec![])); + dladm_get_vnics_ctx.expect().return_once(|_| Ok(vec![])); let im = InstanceManager::new( log, @@ -337,7 +337,7 @@ mod test { zones_get_ctx.expect().return_once(|| Ok(vec![])); let dladm_get_vnics_ctx = MockDladm::get_vnics_context(); - dladm_get_vnics_ctx.expect().return_once(|| Ok(vec![])); + dladm_get_vnics_ctx.expect().return_once(|_| Ok(vec![])); let im = InstanceManager::new( log, From 863b5cabe1a88603ee1ef515eba4b8ba464d831e Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Tue, 10 May 2022 22:03:41 +0000 Subject: [PATCH 6/8] Be more conservative about what VnicKinds are allowed --- sled-agent/src/illumos/dladm.rs | 34 ++++++++++++------------ sled-agent/src/illumos/running_zone.rs | 5 +++- sled-agent/src/illumos/vnic.rs | 36 +++++++++++++++----------- sled-agent/src/opte/opte.rs | 16 ++++++++---- sled-agent/src/sled_agent.rs | 4 +-- 5 files changed, 55 insertions(+), 40 deletions(-) diff --git a/sled-agent/src/illumos/dladm.rs b/sled-agent/src/illumos/dladm.rs index ec2ebd7b91..35e15e1e28 100644 --- a/sled-agent/src/illumos/dladm.rs +++ b/sled-agent/src/illumos/dladm.rs @@ -19,12 +19,6 @@ pub const VNIC_PREFIX_CONTROL: &str = "oxControl"; // Viona, and thus plumbed directly to guests. pub const VNIC_PREFIX_GUEST: &str = "vopte"; -/// Names of VNICs used as underlay devices for the xde driver. -pub const XDE_VNIC_NAMES: [&str; 2] = ["net0", "net1"]; - -/// Prefix used to identify xde data links. -pub const XDE_LINK_PREFIX: &str = "opte"; - pub const DLADM: &str = "/usr/sbin/dladm"; /// Errors returned from [`Dladm::find_physical`]. @@ -176,25 +170,31 @@ impl Dladm { Ok(()) } - /// Returns VNICs that may be managed by the Sled Agent, optionally - /// restricted to a particular kind. - pub fn get_vnics( - kind: Option, - ) -> Result, GetVnicError> { + /// Returns VNICs that may be managed by the Sled Agent of a particular + /// kind. + pub fn get_vnics(kind: VnicKind) -> Result, GetVnicError> { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[DLADM, "show-vnic", "-p", "-o", "LINK"]); let output = execute(cmd).map_err(|err| GetVnicError { err })?; let vnics = String::from_utf8_lossy(&output.stdout) .lines() - .filter(|name| { - if let Some(kind) = kind { - VnicKind::from_name(name) == kind - } else { - false + .filter_map(|name| { + // Ensure this is a kind of VNIC that the sled agent could be + // responsible for. + match VnicKind::from_name(name) { + Some(vnic_kind) => { + // Ensure matches the caller-specified VnicKind + if vnic_kind == kind { + Some(name.to_owned()) + } else { + None + } + } + // Always ignore this VNIC if it's not a valid VnicKind. + None => None, } }) - .map(|s| s.to_owned()) .collect(); Ok(vnics) } diff --git a/sled-agent/src/illumos/running_zone.rs b/sled-agent/src/illumos/running_zone.rs index 08227d42ec..68a0a28a99 100644 --- a/sled-agent/src/illumos/running_zone.rs +++ b/sled-agent/src/illumos/running_zone.rs @@ -221,11 +221,14 @@ impl RunningZone { }, )?; + let control_vnic = Vnic::wrap_existing(vnic_name) + .expect("Failed to wrap valid control VNIC"); + Ok(Self { inner: InstalledZone { log: log.new(o!("zone" => zone_name.to_string())), name: zone_name.to_string(), - control_vnic: Vnic::wrap_existing(vnic_name), + control_vnic, // TODO(https://github.com/oxidecomputer/omicron/issues/725) // // Re-initialize guest_vnic state by inspecting the zone. diff --git a/sled-agent/src/illumos/vnic.rs b/sled-agent/src/illumos/vnic.rs index 14b17175c2..03aa872fce 100644 --- a/sled-agent/src/illumos/vnic.rs +++ b/sled-agent/src/illumos/vnic.rs @@ -6,7 +6,7 @@ use crate::illumos::dladm::{ CreateVnicError, DeleteVnicError, PhysicalLink, VNIC_PREFIX, - VNIC_PREFIX_CONTROL, VNIC_PREFIX_GUEST, XDE_VNIC_NAMES, + VNIC_PREFIX_CONTROL, VNIC_PREFIX_GUEST, }; use omicron_common::api::external::MacAddr; use std::sync::{ @@ -88,25 +88,26 @@ impl VnicAllocator { pub enum VnicKind { OxideControl, Guest, - XdeUnderlay, - Other, } impl VnicKind { - /// Infer the kind from a VNIC's name. - pub fn from_name(name: &str) -> Self { + /// Infer the kind from a VNIC's name, if this one the sled agent can + /// manage, and `None` otherwise. + pub fn from_name(name: &str) -> Option { if name.starts_with(VNIC_PREFIX) { - VnicKind::OxideControl + Some(VnicKind::OxideControl) } else if name.starts_with(VNIC_PREFIX_GUEST) { - VnicKind::Guest - } else if XDE_VNIC_NAMES.contains(&name) { - VnicKind::XdeUnderlay + Some(VnicKind::Guest) } else { - VnicKind::Other + None } } } +#[derive(thiserror::Error, Debug)] +#[error("VNIC with name '{0}' is not valid for sled agent management")] +pub struct InvalidVnicKind(String); + /// Represents an allocated VNIC on the system. /// The VNIC is de-allocated when it goes out of scope. /// @@ -122,11 +123,16 @@ pub struct Vnic { impl Vnic { /// Takes ownership of an existing VNIC. - pub fn wrap_existing>(name: S) -> Self { - Vnic { - name: name.as_ref().to_owned(), - deleted: false, - kind: VnicKind::from_name(name.as_ref()), + pub fn wrap_existing>( + name: S, + ) -> Result { + match VnicKind::from_name(name.as_ref()) { + Some(kind) => Ok(Vnic { + name: name.as_ref().to_owned(), + deleted: false, + kind, + }), + None => Err(InvalidVnicKind(name.as_ref().to_owned())), } } diff --git a/sled-agent/src/opte/opte.rs b/sled-agent/src/opte/opte.rs index f13a960f9f..ad4dd88d46 100644 --- a/sled-agent/src/opte/opte.rs +++ b/sled-agent/src/opte/opte.rs @@ -9,9 +9,7 @@ use crate::illumos::addrobj::AddrObject; use crate::illumos::dladm; use crate::illumos::dladm::Dladm; use crate::illumos::dladm::PhysicalLink; -use crate::illumos::dladm::XDE_LINK_PREFIX; use crate::illumos::vnic::Vnic; -use crate::illumos::vnic::VnicKind; use crate::illumos::zone::Zones; use ipnetwork::IpNetwork; use macaddr::MacAddr6; @@ -30,6 +28,12 @@ use std::sync::atomic::AtomicU64; use std::sync::atomic::Ordering; use std::sync::Arc; +// Names of VNICs used as underlay devices for the xde driver. +const XDE_VNIC_NAMES: [&str; 2] = ["net0", "net1"]; + +// Prefix used to identify xde data links. +const XDE_LINK_PREFIX: &str = "opte"; + #[derive(thiserror::Error, Debug)] pub enum Error { #[error("Failure interacting with the OPTE ioctl(2) interface: {0}")] @@ -160,7 +164,9 @@ impl OptePortAllocator { Some(omicron_common::api::external::MacAddr(mac)), None, )?; - Some(Vnic::wrap_existing(vnic_name)) + // Safety: We're explicitly creating the VNIC with the prefix + // `VNIC_PREFIX_GUEST`, so this call must return Some(_). + Some(Vnic::wrap_existing(vnic_name).unwrap()) }; Ok(OptePort { @@ -329,8 +335,8 @@ fn find_chelsio_links() -> Result, Error> { // `Dladm` to get the real Chelsio links on a Gimlet. These will likely be // called `cxgbeN`, but we explicitly call them `netN` to be clear that // they're likely VNICs for the time being. - Ok(Dladm::get_vnics(Some(VnicKind::XdeUnderlay))? + Ok(XDE_VNIC_NAMES .into_iter() - .map(PhysicalLink) + .map(|name| PhysicalLink(name.to_string())) .collect()) } diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 025422092b..f499d04015 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -167,9 +167,9 @@ impl SledAgent { // // - Oxide control VNICs // - Guest VNICs over xde devices - let vnics = Dladm::get_vnics(Some(VnicKind::OxideControl))? + let vnics = Dladm::get_vnics(VnicKind::OxideControl)? .into_iter() - .chain(Dladm::get_vnics(Some(VnicKind::Guest))?); + .chain(Dladm::get_vnics(VnicKind::Guest)?); for vnic in vnics { warn!( log, From f192973f524d5ca2f94f96243809c5d11e94165b Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Tue, 10 May 2022 22:09:22 +0000 Subject: [PATCH 7/8] non-sticky is the goal --- tools/install_opte.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/install_opte.sh b/tools/install_opte.sh index 04ac63589c..1c50249c2c 100755 --- a/tools/install_opte.sh +++ b/tools/install_opte.sh @@ -80,7 +80,7 @@ function ensure_helios_dev_is_non_sticky { if [[ "$STICKINESS" = "sticky" ]]; then pkg set-publisher --non-sticky helios-dev STICKINESS="$(helios_dev_stickiness)" - if [[ "$STICKINESS" = "non-sticky" ]]; then + if [[ "$STICKINESS" = "sticky" ]]; then echo "Failed to make helios-dev publisher non-sticky" exit 1 fi From c3b332931f0a3437bdc454995fe84b9e36ecd273 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Tue, 10 May 2022 22:21:06 +0000 Subject: [PATCH 8/8] Remove kind parameter to get_vnics, no longer needed --- sled-agent/src/illumos/dladm.rs | 15 +++------------ sled-agent/src/instance_manager.rs | 4 ++-- sled-agent/src/sled_agent.rs | 14 +++++--------- 3 files changed, 10 insertions(+), 23 deletions(-) diff --git a/sled-agent/src/illumos/dladm.rs b/sled-agent/src/illumos/dladm.rs index 35e15e1e28..69e22ca816 100644 --- a/sled-agent/src/illumos/dladm.rs +++ b/sled-agent/src/illumos/dladm.rs @@ -170,9 +170,8 @@ impl Dladm { Ok(()) } - /// Returns VNICs that may be managed by the Sled Agent of a particular - /// kind. - pub fn get_vnics(kind: VnicKind) -> Result, GetVnicError> { + /// Returns VNICs that may be managed by the Sled Agent. + pub fn get_vnics() -> Result, GetVnicError> { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[DLADM, "show-vnic", "-p", "-o", "LINK"]); let output = execute(cmd).map_err(|err| GetVnicError { err })?; @@ -183,15 +182,7 @@ impl Dladm { // Ensure this is a kind of VNIC that the sled agent could be // responsible for. match VnicKind::from_name(name) { - Some(vnic_kind) => { - // Ensure matches the caller-specified VnicKind - if vnic_kind == kind { - Some(name.to_owned()) - } else { - None - } - } - // Always ignore this VNIC if it's not a valid VnicKind. + Some(_) => Some(name.to_owned()), None => None, } }) diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index 669db64683..bd6e4dff4c 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -255,7 +255,7 @@ mod test { zones_get_ctx.expect().return_once(|| Ok(vec![])); let dladm_get_vnics_ctx = MockDladm::get_vnics_context(); - dladm_get_vnics_ctx.expect().return_once(|_| Ok(vec![])); + dladm_get_vnics_ctx.expect().return_once(|| Ok(vec![])); let im = InstanceManager::new( log, @@ -337,7 +337,7 @@ mod test { zones_get_ctx.expect().return_once(|| Ok(vec![])); let dladm_get_vnics_ctx = MockDladm::get_vnics_context(); - dladm_get_vnics_ctx.expect().return_once(|_| Ok(vec![])); + dladm_get_vnics_ctx.expect().return_once(|| Ok(vec![])); let im = InstanceManager::new( log, diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index f499d04015..fcea2c627e 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -163,19 +163,15 @@ impl SledAgent { // This should be accessible via: // $ dladm show-linkprop -c -p zone -o LINK,VALUE // - // Delete VNICs in this order: - // - // - Oxide control VNICs - // - Guest VNICs over xde devices - let vnics = Dladm::get_vnics(VnicKind::OxideControl)? - .into_iter() - .chain(Dladm::get_vnics(VnicKind::Guest)?); - for vnic in vnics { + // Note that we don't currently delete the VNICs in any particular + // order. That should be OK, since we're definitely deleting the guest + // VNICs before the xde devices, which is the main constraint. + for vnic in Dladm::get_vnics()? { warn!( log, "Deleting existing VNIC"; "vnic_name" => &vnic, - "vnic_kind" => ?VnicKind::from_name(&vnic), + "vnic_kind" => ?VnicKind::from_name(&vnic).unwrap(), ); Dladm::delete_vnic(&vnic)?; }