From 11371b0f3743f8df5b047dc0edc2699f4bdf3927 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 28 Sep 2024 01:55:08 +0000 Subject: [PATCH] teach propolis-server to understand configurable boot order (#756) while we accept a list of boot options here, note that Nexus intentionally has a more reductive view, maintaining only a single boot_disk. this is heavily based on what i've learned writing tests for how OVMF+guests behave with different mixes of boot device orders and sources of boot configuration. this conversation on RFD 470: https://github.com/oxidecomputer/rfd/pull/751#discussion_r1612632313 gets at the crux of it - we don't have like a platform NVRAM device for guests to persist EFI variables in, so boot configuration is persisted in the EFI system partition (so, associated with a disk, not an instance). because we're not going to go modify the EFI system partition in user disks to effect boot order preferences, we're using the QEMU-style fw_cfg mechanism to indicate boot order. this, in turn, means that guest settings are blown away and replaced with what OVMF determines at boot time. taken together, this isn't an ideal spot to be in if we were to support more complex boot behavior logic, and it's probably not how we want to work multi-device orderings into propolis-server. it also means that guest-managed boot ordering just don't reliably persist if the instance is configured with a specific boot device! in the future with managed nonvolatile storage for guest firmware to persist settings in, we'll be in a somewhat better position to extend boot logic, and so this PR should leave us able to do so without much API hassle. that will probably be more interesting on the control plane side, which is why i'm permissive on the Propolis side, but restrictive on the Nexus side. along the way, this change brings PHD device/backend names in line with propolis-server. --- Cargo.lock | 2 + bin/propolis-cli/src/main.rs | 4 + bin/propolis-server/src/lib/initializer.rs | 86 ++- bin/propolis-server/src/lib/server.rs | 6 + .../src/lib/spec/api_spec_v0.rs | 6 + bin/propolis-server/src/lib/spec/builder.rs | 22 +- bin/propolis-server/src/lib/spec/mod.rs | 6 + bin/propolis-standalone/src/main.rs | 29 +- .../src/instance_spec/v0.rs | 5 + crates/propolis-api-types/src/lib.rs | 15 + lib/propolis-client/src/instance_spec.rs | 28 + lib/propolis-client/src/lib.rs | 2 + openapi/propolis-server-falcon.json | 44 ++ openapi/propolis-server.json | 44 ++ phd-tests/framework/src/disk/crucible.rs | 31 +- phd-tests/framework/src/disk/file.rs | 27 +- phd-tests/framework/src/disk/in_memory.rs | 24 +- phd-tests/framework/src/disk/mod.rs | 52 +- phd-tests/framework/src/test_vm/config.rs | 154 +++-- phd-tests/framework/src/test_vm/mod.rs | 1 + phd-tests/framework/src/test_vm/spec.rs | 4 +- phd-tests/tests/Cargo.toml | 2 + phd-tests/tests/src/boot_order.rs | 529 ++++++++++++++++++ phd-tests/tests/src/boot_order/efi_utils.rs | 518 +++++++++++++++++ phd-tests/tests/src/disk.rs | 1 + phd-tests/tests/src/lib.rs | 1 + phd-tests/tests/src/migrate.rs | 11 +- 27 files changed, 1555 insertions(+), 99 deletions(-) create mode 100644 phd-tests/tests/src/boot_order.rs create mode 100644 phd-tests/tests/src/boot_order/efi_utils.rs diff --git a/Cargo.lock b/Cargo.lock index de72fe926..cc401daab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3855,6 +3855,8 @@ dependencies = [ name = "phd-tests" version = "0.1.0" dependencies = [ + "anyhow", + "byteorder", "futures", "http 1.1.0", "phd-testcase", diff --git a/bin/propolis-cli/src/main.rs b/bin/propolis-cli/src/main.rs index e3224950b..4cf9ac85d 100644 --- a/bin/propolis-cli/src/main.rs +++ b/bin/propolis-cli/src/main.rs @@ -266,6 +266,7 @@ async fn new_instance( // TODO: Allow specifying NICs nics: vec![], disks, + boot_settings: None, migrate: None, cloud_init_bytes, }; @@ -517,6 +518,9 @@ async fn migrate_instance( // TODO: Handle migrating NICs nics: vec![], disks, + // TODO: Handle retaining boot settings? Or extant boot settings + // forwarded along outside InstanceEnsure anyway. + boot_settings: None, migrate: Some(InstanceMigrateInitiateRequest { migration_id: Uuid::new_v4(), src_addr: src_addr.to_string(), diff --git a/bin/propolis-server/src/lib/initializer.rs b/bin/propolis-server/src/lib/initializer.rs index 58b156d49..c0fd10b49 100644 --- a/bin/propolis-server/src/lib/initializer.rs +++ b/bin/propolis-server/src/lib/initializer.rs @@ -11,7 +11,7 @@ use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; use crate::serial::Serial; -use crate::spec::{self, Spec, StorageBackend}; +use crate::spec::{self, Spec, StorageBackend, StorageDevice}; use crate::stats::{ track_network_interface_kstats, track_vcpu_kstats, VirtualDiskProducer, VirtualMachine, @@ -34,7 +34,11 @@ use propolis::hw::ibmpc; use propolis::hw::pci; use propolis::hw::ps2::ctrl::PS2Ctrl; use propolis::hw::qemu::pvpanic::QemuPvpanic; -use propolis::hw::qemu::{debug::QemuDebugPort, fwcfg, ramfb}; +use propolis::hw::qemu::{ + debug::QemuDebugPort, + fwcfg::{self, Entry}, + ramfb, +}; use propolis::hw::uart::LpcUart; use propolis::hw::{nvme, virtio}; use propolis::intr_pins; @@ -1001,6 +1005,80 @@ impl<'a> MachineInitializer<'a> { smb_tables.commit() } + fn generate_bootorder(&self) -> Result, Error> { + info!( + self.log, + "Generating bootorder with order: {:?}", + self.spec.boot_order.as_ref() + ); + let Some(boot_names) = self.spec.boot_order.as_ref() else { + return Ok(None); + }; + + let mut order = fwcfg::formats::BootOrder::new(); + + let parse_bdf = + |pci_path: &propolis_api_types::instance_spec::PciPath| { + let bdf: Result = + pci_path.to_owned().try_into().map_err(|e| { + Error::new( + ErrorKind::InvalidInput, + format!( + "Couldn't get PCI BDF for {}: {}", + pci_path, e + ), + ) + }); + + bdf + }; + + for boot_entry in boot_names.iter() { + // Theoretically we could support booting from network devices by + // matching them here and adding their PCI paths, but exactly what + // would happen is ill-understood. So, only check disks here. + if let Some(spec) = self.spec.disks.get(boot_entry.name.as_str()) { + match &spec.device_spec { + StorageDevice::Virtio(disk) => { + let bdf = parse_bdf(&disk.pci_path)?; + if bdf.bus.get() != 0 { + return Err(Error::new( + ErrorKind::InvalidInput, + "Boot device currently must be on PCI bus 0", + )); + } + + order.add_disk(bdf.location); + } + StorageDevice::Nvme(disk) => { + let bdf = parse_bdf(&disk.pci_path)?; + if bdf.bus.get() != 0 { + return Err(Error::new( + ErrorKind::InvalidInput, + "Boot device currently must be on PCI bus 0", + )); + } + + // TODO: separately, propolis-standalone passes an eui64 + // of 0, so do that here too. is that.. ok? + order.add_nvme(bdf.location, 0); + } + }; + } else { + // This should be unreachable - we check that the boot disk is + // valid when constructing the spec we're initializing from. + let message = format!( + "Instance spec included boot entry which does not refer \ + to an existing disk: `{}`", + boot_entry.name.as_str(), + ); + return Err(Error::new(ErrorKind::InvalidInput, message)); + } + } + + Ok(Some(order.finish())) + } + /// Initialize qemu `fw_cfg` device, and populate it with data including CPU /// count, SMBIOS tables, and attached RAM-FB device. /// @@ -1032,6 +1110,10 @@ impl<'a> MachineInitializer<'a> { ) .unwrap(); + if let Some(boot_order) = self.generate_bootorder()? { + fwcfg.insert_named("bootorder", boot_order).unwrap(); + } + let ramfb = ramfb::RamFb::create( self.log.new(slog::o!("component" => "ramfb")), ); diff --git a/bin/propolis-server/src/lib/server.rs b/bin/propolis-server/src/lib/server.rs index 9ef4ebc7b..ae44f8248 100644 --- a/bin/propolis-server/src/lib/server.rs +++ b/bin/propolis-server/src/lib/server.rs @@ -129,6 +129,12 @@ fn instance_spec_from_request( spec_builder.add_disk_from_request(disk)?; } + if let Some(boot_settings) = request.boot_settings.as_ref() { + for item in boot_settings.order.iter() { + spec_builder.add_boot_option(item)?; + } + } + if let Some(base64) = &request.cloud_init_bytes { spec_builder.add_cloud_init_from_request(base64.clone())?; } diff --git a/bin/propolis-server/src/lib/spec/api_spec_v0.rs b/bin/propolis-server/src/lib/spec/api_spec_v0.rs index 67aef2e05..804b4749e 100644 --- a/bin/propolis-server/src/lib/spec/api_spec_v0.rs +++ b/bin/propolis-server/src/lib/spec/api_spec_v0.rs @@ -247,6 +247,12 @@ impl TryFrom for Spec { return Err(ApiSpecError::BackendNotUsed(backend.to_owned())); } + if let Some(boot_settings) = value.devices.boot_settings.as_ref() { + for item in boot_settings.order.iter() { + builder.add_boot_option(item)?; + } + } + for (name, serial_port) in value.devices.serial_ports { builder.add_serial_port(name, serial_port.num)?; } diff --git a/bin/propolis-server/src/lib/spec/builder.rs b/bin/propolis-server/src/lib/spec/builder.rs index fa4b3b830..3357d8b43 100644 --- a/bin/propolis-server/src/lib/spec/builder.rs +++ b/bin/propolis-server/src/lib/spec/builder.rs @@ -14,7 +14,7 @@ use propolis_api_types::{ }, PciPath, }, - DiskRequest, InstanceProperties, NetworkInterfaceRequest, + BootOrderEntry, DiskRequest, InstanceProperties, NetworkInterfaceRequest, }; use thiserror::Error; @@ -57,6 +57,9 @@ pub(crate) enum SpecBuilderError { #[error("pvpanic device already specified")] PvpanicInUse, + + #[error("Boot option {0} is not an attached device")] + BootOptionMissing(String), } #[derive(Debug, Default)] @@ -110,6 +113,23 @@ impl SpecBuilder { Ok(()) } + /// Add a boot option to the boot option list of the spec under construction. + pub fn add_boot_option( + &mut self, + item: &BootOrderEntry, + ) -> Result<(), SpecBuilderError> { + if !self.spec.disks.contains_key(item.name.as_str()) { + return Err(SpecBuilderError::BootOptionMissing(item.name.clone())); + } + + let boot_order = self.spec.boot_order.get_or_insert(Vec::new()); + + boot_order + .push(crate::spec::BootOrderEntry { name: item.name.clone() }); + + Ok(()) + } + /// Converts an HTTP API request to add a cloud-init disk to an instance /// into device/backend entries in the spec under construction. pub fn add_cloud_init_from_request( diff --git a/bin/propolis-server/src/lib/spec/mod.rs b/bin/propolis-server/src/lib/spec/mod.rs index 189463a6c..230943537 100644 --- a/bin/propolis-server/src/lib/spec/mod.rs +++ b/bin/propolis-server/src/lib/spec/mod.rs @@ -57,6 +57,7 @@ pub(crate) struct Spec { pub board: Board, pub disks: HashMap, pub nics: HashMap, + pub boot_order: Option>, pub serial: HashMap, @@ -67,6 +68,11 @@ pub(crate) struct Spec { pub softnpu: SoftNpu, } +#[derive(Clone, Debug, Default)] +pub(crate) struct BootOrderEntry { + pub name: String, +} + /// Describes the device half of a [`Disk`]. #[derive(Clone, Debug)] pub enum StorageDevice { diff --git a/bin/propolis-standalone/src/main.rs b/bin/propolis-standalone/src/main.rs index 1f210b661..a54bca1e2 100644 --- a/bin/propolis-standalone/src/main.rs +++ b/bin/propolis-standalone/src/main.rs @@ -959,8 +959,19 @@ fn generate_smbios(params: SmbiosParams) -> anyhow::Result { Ok(smb_tables.commit()) } -fn generate_bootorder(config: &config::Config) -> anyhow::Result { - let names = config.main.boot_order.as_ref().unwrap(); +fn generate_bootorder( + config: &config::Config, + log: &slog::Logger, +) -> anyhow::Result> { + let Some(names) = config.main.boot_order.as_ref() else { + return Ok(None); + }; + + slog::info!( + log, + "Bootorder declared as {:?}", + config.main.boot_order.as_ref() + ); let mut order = fwcfg::formats::BootOrder::new(); for name in names.iter() { @@ -994,7 +1005,7 @@ fn generate_bootorder(config: &config::Config) -> anyhow::Result { } } } - Ok(order.finish()) + Ok(Some(order.finish())) } fn setup_instance( @@ -1306,14 +1317,10 @@ fn setup_instance( // It is "safe" to generate bootorder (if requested) now, given that PCI // device configuration has been validated by preceding logic - if config.main.boot_order.is_some() { - fwcfg - .insert_named( - "bootorder", - generate_bootorder(&config) - .context("Failed to generate boot order")?, - ) - .unwrap(); + if let Some(boot_config) = generate_bootorder(&config, log) + .context("Failed to generate boot order")? + { + fwcfg.insert_named("bootorder", boot_config).unwrap(); } fwcfg.attach(pio, &machine.acc_mem); diff --git a/crates/propolis-api-types/src/instance_spec/v0.rs b/crates/propolis-api-types/src/instance_spec/v0.rs index 13b5cedbb..31451dcdc 100644 --- a/crates/propolis-api-types/src/instance_spec/v0.rs +++ b/crates/propolis-api-types/src/instance_spec/v0.rs @@ -61,6 +61,11 @@ pub struct DeviceSpecV0 { #[serde(skip_serializing_if = "Option::is_none")] pub qemu_pvpanic: Option, + // Same backwards compatibility reasoning as above. + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub boot_settings: Option, + #[cfg(feature = "falcon")] pub softnpu_pci_port: Option, #[cfg(feature = "falcon")] diff --git a/crates/propolis-api-types/src/lib.rs b/crates/propolis-api-types/src/lib.rs index 3afe4b530..593a2ec4b 100644 --- a/crates/propolis-api-types/src/lib.rs +++ b/crates/propolis-api-types/src/lib.rs @@ -52,6 +52,9 @@ pub struct InstanceEnsureRequest { #[serde(default)] pub disks: Vec, + #[serde(default)] + pub boot_settings: Option, + pub migrate: Option, // base64 encoded cloud-init ISO @@ -385,6 +388,18 @@ pub struct DiskAttachment { pub state: DiskAttachmentState, } +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct BootSettings { + pub order: Vec, +} + +/// An entry in a list of boot options. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct BootOrderEntry { + /// The name of the device to attempt booting from. + pub name: String, +} + /// A stable index which is translated by Propolis /// into a PCI BDF, visible to the guest. #[derive(Copy, Clone, Debug, Deserialize, Serialize, JsonSchema)] diff --git a/lib/propolis-client/src/instance_spec.rs b/lib/propolis-client/src/instance_spec.rs index e99303cd3..8c75bee98 100644 --- a/lib/propolis-client/src/instance_spec.rs +++ b/lib/propolis-client/src/instance_spec.rs @@ -184,6 +184,34 @@ impl SpecBuilderV0 { } } + /// Sets a boot order. Names here refer to devices included in this spec. + /// + /// Permissible to not set this if the implicit boot order is desired, but + /// the implicit boot order may be unstable across device addition and + /// removal. + /// + /// If any devices named in this order are not actually present in the + /// constructed spec, Propolis will return an error when the spec is + /// provided. + /// + /// XXX: this should certainly return `&mut Self` - all the builders here + /// should. check if any of these are chained..? + pub fn set_boot_order( + &mut self, + boot_order: Vec, + ) -> Result<&Self, SpecBuilderError> { + let boot_order = boot_order + .into_iter() + .map(|name| crate::types::BootOrderEntry { name }) + .collect(); + + let settings = crate::types::BootSettings { order: boot_order }; + + self.spec.devices.boot_settings = Some(settings); + + Ok(self) + } + /// Yields the completed spec, consuming the builder. pub fn finish(self) -> InstanceSpecV0 { self.spec diff --git a/lib/propolis-client/src/lib.rs b/lib/propolis-client/src/lib.rs index 3fc2d6f9c..e0c769347 100644 --- a/lib/propolis-client/src/lib.rs +++ b/lib/propolis-client/src/lib.rs @@ -18,6 +18,8 @@ progenitor::generate_api!( // Some Crucible-related bits are re-exported through simulated // sled-agent and thus require JsonSchema + BootOrderEntry = { derives = [schemars::JsonSchema] }, + BootSettings = { derives = [schemars::JsonSchema] }, DiskRequest = { derives = [schemars::JsonSchema] }, VolumeConstructionRequest = { derives = [schemars::JsonSchema] }, CrucibleOpts = { derives = [schemars::JsonSchema] }, diff --git a/openapi/propolis-server-falcon.json b/openapi/propolis-server-falcon.json index acb4a3357..baf6dc95d 100644 --- a/openapi/propolis-server-falcon.json +++ b/openapi/propolis-server-falcon.json @@ -529,6 +529,33 @@ ], "additionalProperties": false }, + "BootOrderEntry": { + "description": "An entry in a list of boot options.", + "type": "object", + "properties": { + "name": { + "description": "The name of the device to attempt booting from.", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "BootSettings": { + "type": "object", + "properties": { + "order": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BootOrderEntry" + } + } + }, + "required": [ + "order" + ] + }, "Chipset": { "description": "A kind of virtual chipset.", "oneOf": [ @@ -631,6 +658,14 @@ "board": { "$ref": "#/components/schemas/Board" }, + "boot_settings": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/BootSettings" + } + ] + }, "network_devices": { "type": "object", "additionalProperties": { @@ -876,6 +911,15 @@ "InstanceEnsureRequest": { "type": "object", "properties": { + "boot_settings": { + "nullable": true, + "default": null, + "allOf": [ + { + "$ref": "#/components/schemas/BootSettings" + } + ] + }, "cloud_init_bytes": { "nullable": true, "type": "string" diff --git a/openapi/propolis-server.json b/openapi/propolis-server.json index 9df76ffb7..41f3fb5a2 100644 --- a/openapi/propolis-server.json +++ b/openapi/propolis-server.json @@ -529,6 +529,33 @@ ], "additionalProperties": false }, + "BootOrderEntry": { + "description": "An entry in a list of boot options.", + "type": "object", + "properties": { + "name": { + "description": "The name of the device to attempt booting from.", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "BootSettings": { + "type": "object", + "properties": { + "order": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BootOrderEntry" + } + } + }, + "required": [ + "order" + ] + }, "Chipset": { "description": "A kind of virtual chipset.", "oneOf": [ @@ -631,6 +658,14 @@ "board": { "$ref": "#/components/schemas/Board" }, + "boot_settings": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/BootSettings" + } + ] + }, "network_devices": { "type": "object", "additionalProperties": { @@ -845,6 +880,15 @@ "InstanceEnsureRequest": { "type": "object", "properties": { + "boot_settings": { + "nullable": true, + "default": null, + "allOf": [ + { + "$ref": "#/components/schemas/BootSettings" + } + ] + }, "cloud_init_bytes": { "nullable": true, "type": "string" diff --git a/phd-tests/framework/src/disk/crucible.rs b/phd-tests/framework/src/disk/crucible.rs index 04d71b595..c8b543450 100644 --- a/phd-tests/framework/src/disk/crucible.rs +++ b/phd-tests/framework/src/disk/crucible.rs @@ -21,7 +21,9 @@ use tracing::{error, info}; use uuid::Uuid; use super::BlockSize; -use crate::{guest_os::GuestOsKind, server_log_mode::ServerLogMode}; +use crate::{ + disk::DeviceName, guest_os::GuestOsKind, server_log_mode::ServerLogMode, +}; /// An RAII wrapper around a directory containing Crucible data files. Deletes /// the directory and its contents when dropped. @@ -60,8 +62,8 @@ impl Drop for Downstairs { /// An RAII wrapper around a Crucible disk. #[derive(Debug)] pub struct CrucibleDisk { - /// The name of the backend to use in instance specs that include this disk. - backend_name: String, + /// The name to use in instance specs that include this disk. + device_name: DeviceName, /// The UUID to insert into this disk's `VolumeConstructionRequest`s. id: Uuid, @@ -97,7 +99,7 @@ impl CrucibleDisk { /// `data_dir`. #[allow(clippy::too_many_arguments)] pub(crate) fn new( - backend_name: String, + device_name: DeviceName, min_disk_size_gib: u64, block_size: BlockSize, downstairs_binary_path: &impl AsRef, @@ -251,7 +253,7 @@ impl CrucibleDisk { } Ok(Self { - backend_name, + device_name, id: disk_uuid, block_size, blocks_per_extent, @@ -280,7 +282,11 @@ impl CrucibleDisk { } impl super::DiskConfig for CrucibleDisk { - fn backend_spec(&self) -> (String, StorageBackendV0) { + fn device_name(&self) -> &DeviceName { + &self.device_name + } + + fn backend_spec(&self) -> StorageBackendV0 { let gen = self.generation.load(Ordering::Relaxed); let downstairs_addrs = self .downstairs_instances @@ -318,14 +324,11 @@ impl super::DiskConfig for CrucibleDisk { }), }; - ( - self.backend_name.clone(), - StorageBackendV0::Crucible(CrucibleStorageBackend { - request_json: serde_json::to_string(&vcr) - .expect("VolumeConstructionRequest should serialize"), - readonly: false, - }), - ) + StorageBackendV0::Crucible(CrucibleStorageBackend { + request_json: serde_json::to_string(&vcr) + .expect("VolumeConstructionRequest should serialize"), + readonly: false, + }) } fn guest_os(&self) -> Option { diff --git a/phd-tests/framework/src/disk/file.rs b/phd-tests/framework/src/disk/file.rs index 76eb409b6..3fbf05308 100644 --- a/phd-tests/framework/src/disk/file.rs +++ b/phd-tests/framework/src/disk/file.rs @@ -9,7 +9,9 @@ use propolis_client::types::{FileStorageBackend, StorageBackendV0}; use tracing::{debug, error, warn}; use uuid::Uuid; -use crate::{guest_os::GuestOsKind, zfs::ClonedFile as ZfsClonedFile}; +use crate::{ + disk::DeviceName, guest_os::GuestOsKind, zfs::ClonedFile as ZfsClonedFile, +}; /// Describes the method used to create the backing file for a file-backed disk. #[derive(Debug)] @@ -80,7 +82,7 @@ impl Drop for BackingFile { #[derive(Debug)] pub struct FileBackedDisk { /// The name to use for instance spec backends that refer to this disk. - backend_name: String, + device_name: DeviceName, /// The backing file for this disk. file: BackingFile, @@ -94,7 +96,7 @@ impl FileBackedDisk { /// Creates a new file-backed disk whose initial contents are copied from /// the specified artifact on the host file system. pub(crate) fn new_from_artifact( - backend_name: String, + device_name: DeviceName, artifact_path: &impl AsRef, data_dir: &impl AsRef, guest_os: Option, @@ -116,19 +118,20 @@ impl FileBackedDisk { permissions.set_readonly(false); disk_file.set_permissions(permissions)?; - Ok(Self { backend_name, file: artifact, guest_os }) + Ok(Self { device_name, file: artifact, guest_os }) } } impl super::DiskConfig for FileBackedDisk { - fn backend_spec(&self) -> (String, StorageBackendV0) { - ( - self.backend_name.clone(), - StorageBackendV0::File(FileStorageBackend { - path: self.file.path().to_string(), - readonly: false, - }), - ) + fn device_name(&self) -> &DeviceName { + &self.device_name + } + + fn backend_spec(&self) -> StorageBackendV0 { + StorageBackendV0::File(FileStorageBackend { + path: self.file.path().to_string(), + readonly: false, + }) } fn guest_os(&self) -> Option { diff --git a/phd-tests/framework/src/disk/in_memory.rs b/phd-tests/framework/src/disk/in_memory.rs index 51a57f680..5300c4f68 100644 --- a/phd-tests/framework/src/disk/in_memory.rs +++ b/phd-tests/framework/src/disk/in_memory.rs @@ -7,11 +7,12 @@ use propolis_client::types::{BlobStorageBackend, StorageBackendV0}; use super::DiskConfig; +use crate::disk::DeviceName; /// A disk with an in-memory backend. #[derive(Debug)] pub struct InMemoryDisk { - backend_name: String, + device_name: DeviceName, contents: Vec, readonly: bool, } @@ -20,28 +21,29 @@ impl InMemoryDisk { /// Creates an in-memory backend that will present the supplied `contents` /// to the guest. pub fn new( - backend_name: String, + device_name: DeviceName, contents: Vec, readonly: bool, ) -> Self { - Self { backend_name, contents, readonly } + Self { device_name, contents, readonly } } } impl DiskConfig for InMemoryDisk { - fn backend_spec(&self) -> (String, StorageBackendV0) { + fn device_name(&self) -> &DeviceName { + &self.device_name + } + + fn backend_spec(&self) -> StorageBackendV0 { let base64 = base64::Engine::encode( &base64::engine::general_purpose::STANDARD, self.contents.as_slice(), ); - ( - self.backend_name.clone(), - StorageBackendV0::Blob(BlobStorageBackend { - base64, - readonly: self.readonly, - }), - ) + StorageBackendV0::Blob(BlobStorageBackend { + base64, + readonly: self.readonly, + }) } fn guest_os(&self) -> Option { diff --git a/phd-tests/framework/src/disk/mod.rs b/phd-tests/framework/src/disk/mod.rs index e3803e3da..35ec84af4 100644 --- a/phd-tests/framework/src/disk/mod.rs +++ b/phd-tests/framework/src/disk/mod.rs @@ -67,10 +67,54 @@ impl BlockSize { } } +/// The name for the device implementing a disk. This is the name provided for a +/// disk in constructing a VM spec for PHD tests. The disk by this name likely +/// also has a [`BackendName`] derived from this device name. +/// +/// TODO: This exists largely to ensure that PHD matches the same spec +/// construction conventions as `propolis-server` when handling API requests: it +/// is another piece of glue that could reasonably be deleted if/when PHD and +/// sled-agent use the same code to build InstanceEnsureRequest. Until then, +/// carefully match the relationship between names with these newtypes. +/// +/// Alternatively, `DeviceName` and `BackendName` could be pulled into +/// `propolis-api-types`. +#[derive(Clone, Debug)] +pub struct DeviceName(String); + +impl DeviceName { + pub fn new(name: String) -> Self { + DeviceName(name) + } + + pub fn into_backend_name(self) -> BackendName { + // This must match `api_request.rs`' `parse_disk_from_request`. + BackendName(format!("{}-backend", self.0)) + } + + pub fn into_string(self) -> String { + self.0 + } +} + +/// The name for a backend implementing storage for a disk. This is derived +/// exclusively from a corresponding [`DeviceName`]. +#[derive(Clone, Debug)] +pub struct BackendName(String); + +impl BackendName { + pub fn into_string(self) -> String { + self.0 + } +} + /// A trait for functions exposed by all disk backends (files, Crucible, etc.). pub trait DiskConfig: std::fmt::Debug + Send + Sync { + /// Yields the device name for this disk. + fn device_name(&self) -> &DeviceName; + /// Yields the backend spec for this disk's storage backend. - fn backend_spec(&self) -> (String, StorageBackendV0); + fn backend_spec(&self) -> StorageBackendV0; /// Yields the guest OS kind of the guest image the disk was created from, /// or `None` if the disk was not created from a guest image. @@ -182,7 +226,7 @@ impl DiskFactory { /// by `source`. pub(crate) async fn create_file_backed_disk<'d>( &self, - name: String, + name: DeviceName, source: &DiskSource<'d>, ) -> Result, DiskError> { let artifact_name = match source { @@ -226,7 +270,7 @@ impl DiskFactory { /// - block_size: The disk's block size. pub(crate) async fn create_crucible_disk<'d>( &self, - name: String, + name: DeviceName, source: &DiskSource<'d>, mut min_disk_size_gib: u64, block_size: BlockSize, @@ -278,7 +322,7 @@ impl DiskFactory { pub(crate) async fn create_in_memory_disk<'d>( &self, - name: String, + name: DeviceName, source: &DiskSource<'d>, readonly: bool, ) -> Result, DiskError> { diff --git a/phd-tests/framework/src/test_vm/config.rs b/phd-tests/framework/src/test_vm/config.rs index 77673132d..ddfdb42f8 100644 --- a/phd-tests/framework/src/test_vm/config.rs +++ b/phd-tests/framework/src/test_vm/config.rs @@ -16,7 +16,7 @@ use propolis_client::{ use uuid::Uuid; use crate::{ - disk::{DiskConfig, DiskSource}, + disk::{DeviceName, DiskConfig, DiskSource}, test_vm::spec::VmSpec, Framework, }; @@ -37,6 +37,7 @@ pub enum DiskBackend { #[derive(Clone, Debug)] struct DiskRequest<'a> { + name: &'a str, interface: DiskInterface, backend: DiskBackend, source: DiskSource<'a>, @@ -48,8 +49,8 @@ pub struct VmConfig<'dr> { cpus: u8, memory_mib: u64, bootrom_artifact: String, - boot_disk: DiskRequest<'dr>, - data_disks: Vec>, + boot_order: Option>, + disks: Vec>, devices: BTreeMap, } @@ -63,22 +64,24 @@ impl<'dr> VmConfig<'dr> { bootrom: &str, guest_artifact: &'dr str, ) -> Self { - let boot_disk = DiskRequest { - interface: DiskInterface::Nvme, - backend: DiskBackend::File, - source: DiskSource::Artifact(guest_artifact), - pci_device_num: 4, - }; - - Self { + let mut config = Self { vm_name: vm_name.to_owned(), cpus, memory_mib, bootrom_artifact: bootrom.to_owned(), - boot_disk, - data_disks: Vec::new(), + boot_order: None, + disks: Vec::new(), devices: BTreeMap::new(), - } + }; + + config.boot_disk( + guest_artifact, + DiskInterface::Nvme, + DiskBackend::File, + 4, + ); + + config } pub fn cpus(&mut self, cpus: u8) -> &mut Self { @@ -119,6 +122,21 @@ impl<'dr> VmConfig<'dr> { self } + pub fn boot_order(&mut self, disks: Vec<&'dr str>) -> &mut Self { + self.boot_order = Some(disks); + self + } + + pub fn clear_boot_order(&mut self) -> &mut Self { + self.boot_order = None; + self + } + + /// Add a new disk to the VM config, and add it to the front of the VM's + /// boot order. + /// + /// The added disk will have the name `boot-disk`, and replace the previous + /// existing `boot-disk`. pub fn boot_disk( &mut self, artifact: &'dr str, @@ -126,24 +144,42 @@ impl<'dr> VmConfig<'dr> { backend: DiskBackend, pci_device_num: u8, ) -> &mut Self { - self.boot_disk = DiskRequest { + let boot_order = self.boot_order.get_or_insert(Vec::new()); + if let Some(prev_boot_item) = + boot_order.iter().position(|d| *d == "boot-disk") + { + boot_order.remove(prev_boot_item); + } + + if let Some(prev_boot_disk) = + self.disks.iter().position(|d| d.name == "boot-disk") + { + self.disks.remove(prev_boot_disk); + } + + boot_order.insert(0, "boot-disk"); + + self.data_disk( + "boot-disk", + DiskSource::Artifact(artifact), interface, backend, - source: DiskSource::Artifact(artifact), pci_device_num, - }; + ); self } pub fn data_disk( &mut self, + name: &'dr str, source: DiskSource<'dr>, interface: DiskInterface, backend: DiskBackend, pci_device_num: u8, ) -> &mut Self { - self.data_disks.push(DiskRequest { + self.disks.push(DiskRequest { + name, interface, backend, source, @@ -172,7 +208,38 @@ impl<'dr> VmConfig<'dr> { }) .context("serializing Propolis server config")?; - let DiskSource::Artifact(boot_artifact) = self.boot_disk.source else { + // The first disk in the boot list might not be the disk a test + // *actually* expects to boot. + // + // If there are multiple bootable disks in the boot order, we'll assume + // they're all the same guest OS kind. So look for `boot-disk` - if + // there isn't a disk named `boot-disk` then fall back to hoping the + // first disk in the boot order is a bootable disk, and if *that* isn't + // a bootable disk, maybe the first disk is. + // + // TODO: theoretically we might want to accept configuration of a + // specific guest OS adapter and avoid the guessing games. So far the + // above supports existing tests and makes them "Just Work", but a more + // complicated test may want more control here. + let boot_disk = self + .disks + .iter() + .find(|d| d.name == "boot-disk") + .or_else(|| { + if let Some(boot_order) = self.boot_order.as_ref() { + boot_order.first().and_then(|name| { + self.disks.iter().find(|d| &d.name == name) + }) + } else { + None + } + }) + .or_else(|| self.disks.first()) + .expect("VM config includes at least one disk"); + + // XXX: assuming all bootable images are equivalent to the first, or at + // least the same guest OS kind. + let DiskSource::Artifact(boot_artifact) = boot_disk.source else { unreachable!("boot disks always use artifacts as sources"); }; @@ -183,16 +250,11 @@ impl<'dr> VmConfig<'dr> { .context("getting guest OS kind for boot disk")?; let mut disk_handles = Vec::new(); - disk_handles.push( - make_disk("boot-disk".to_owned(), framework, &self.boot_disk) - .await - .context("creating boot disk")?, - ); - for (idx, data_disk) in self.data_disks.iter().enumerate() { + for disk in self.disks.iter() { disk_handles.push( - make_disk(format!("data-disk-{}", idx), framework, data_disk) + make_disk(disk.name.to_owned(), framework, disk) .await - .context("creating data disk")?, + .context("creating disk")?, ); } @@ -203,32 +265,30 @@ impl<'dr> VmConfig<'dr> { // elements for all of them. This assumes the disk handles were created // in the correct order: boot disk first, then in the data disks' // iteration order. - let all_disks = [&self.boot_disk] - .into_iter() - .chain(self.data_disks.iter()) - .zip(disk_handles.iter()); - for (idx, (req, hdl)) in all_disks.enumerate() { - let device_name = format!("disk-device{}", idx); + let all_disks = self.disks.iter().zip(disk_handles.iter()); + for (req, hdl) in all_disks { let pci_path = PciPath::new(0, req.pci_device_num, 0).unwrap(); - let (backend_name, backend_spec) = hdl.backend_spec(); + let backend_spec = hdl.backend_spec(); + let device_name = hdl.device_name().clone(); + let backend_name = device_name.clone().into_backend_name(); let device_spec = match req.interface { DiskInterface::Virtio => { StorageDeviceV0::VirtioDisk(VirtioDisk { - backend_name: backend_name.clone(), + backend_name: backend_name.clone().into_string(), pci_path, }) } DiskInterface::Nvme => StorageDeviceV0::NvmeDisk(NvmeDisk { - backend_name: backend_name.clone(), + backend_name: backend_name.clone().into_string(), pci_path, }), }; spec_builder .add_storage_device( - device_name, + device_name.into_string(), device_spec, - backend_name, + backend_name.into_string(), backend_spec, ) .context("adding storage device to spec")?; @@ -238,6 +298,14 @@ impl<'dr> VmConfig<'dr> { .add_serial_port(SerialPortNumber::Com1) .context("adding serial port to spec")?; + if let Some(boot_order) = self.boot_order.as_ref() { + spec_builder + .set_boot_order( + boot_order.iter().map(|x| x.to_string()).collect(), + ) + .context("adding boot order to spec")?; + } + let instance_spec = spec_builder.finish(); // Generate random identifiers for this instance's timeseries metadata. @@ -263,14 +331,16 @@ impl<'dr> VmConfig<'dr> { } async fn make_disk<'req>( - name: String, + device_name: String, framework: &Framework, req: &DiskRequest<'req>, ) -> anyhow::Result> { + let device_name = DeviceName::new(device_name); + Ok(match req.backend { DiskBackend::File => framework .disk_factory - .create_file_backed_disk(name, &req.source) + .create_file_backed_disk(device_name, &req.source) .await .with_context(|| { format!("creating new file-backed disk from {:?}", req.source,) @@ -278,7 +348,7 @@ async fn make_disk<'req>( DiskBackend::Crucible { min_disk_size_gib, block_size } => framework .disk_factory .create_crucible_disk( - name, + device_name, &req.source, min_disk_size_gib, block_size, @@ -293,7 +363,7 @@ async fn make_disk<'req>( as Arc, DiskBackend::InMemory { readonly } => framework .disk_factory - .create_in_memory_disk(name, &req.source, readonly) + .create_in_memory_disk(device_name, &req.source, readonly) .await .with_context(|| { format!("creating new in-memory disk from {:?}", req.source) diff --git a/phd-tests/framework/src/test_vm/mod.rs b/phd-tests/framework/src/test_vm/mod.rs index 2153fa863..fccc439d6 100644 --- a/phd-tests/framework/src/test_vm/mod.rs +++ b/phd-tests/framework/src/test_vm/mod.rs @@ -357,6 +357,7 @@ impl TestVm { disks: disk_reqs.clone().unwrap(), migrate: migrate.clone(), nics: vec![], + boot_settings: None, properties: properties.clone(), }; diff --git a/phd-tests/framework/src/test_vm/spec.rs b/phd-tests/framework/src/test_vm/spec.rs index 841a05b7d..0d25dc271 100644 --- a/phd-tests/framework/src/test_vm/spec.rs +++ b/phd-tests/framework/src/test_vm/spec.rs @@ -46,7 +46,9 @@ impl VmSpec { continue; }; - let (backend_name, backend_spec) = disk.backend_spec(); + let backend_spec = disk.backend_spec(); + let backend_name = + disk.device_name().clone().into_backend_name().into_string(); match self .instance_spec .backends diff --git a/phd-tests/tests/Cargo.toml b/phd-tests/tests/Cargo.toml index e02b23c56..731dbf444 100644 --- a/phd-tests/tests/Cargo.toml +++ b/phd-tests/tests/Cargo.toml @@ -8,6 +8,8 @@ test = false doctest = false [dependencies] +anyhow.workspace = true +byteorder.workspace = true futures.workspace = true http.workspace = true propolis-client.workspace = true diff --git a/phd-tests/tests/src/boot_order.rs b/phd-tests/tests/src/boot_order.rs new file mode 100644 index 000000000..569a72458 --- /dev/null +++ b/phd-tests/tests/src/boot_order.rs @@ -0,0 +1,529 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use anyhow::{bail, Error}; +use phd_framework::{ + disk::{fat::FatFilesystem, DiskSource}, + test_vm::{DiskBackend, DiskInterface}, +}; +use phd_testcase::*; +use std::io::Cursor; +use tracing::warn; + +mod efi_utils; + +use efi_utils::{ + bootvar, discover_boot_option_numbers, efipath, find_option_in_boot_order, + read_efivar, remove_boot_entry, write_efivar, EfiLoadOption, + BOOT_CURRENT_VAR, BOOT_ORDER_VAR, EDK2_EFI_SHELL_GUID, + EDK2_FIRMWARE_VOL_GUID, EDK2_UI_APP_GUID, +}; + +pub(crate) async fn run_long_command( + vm: &phd_framework::TestVm, + cmd: &str, +) -> Result { + // Ok, this is a bit whacky: something about the line wrapping for long + // commands causes `run_shell_command` to hang instead of ever actually + // seeing a response prompt. + // + // I haven't gone and debugged that; instead, chunk the input command up + // into segments short enough to not wrap when input, put them all in a + // file, then run the file. + vm.run_shell_command("rm cmd").await?; + let mut offset = 0; + // Escape any internal `\`. This isn't comprehensive escaping (doesn't + // handle \n, for example).. + let cmd = cmd.replace("\\", "\\\\"); + while offset < cmd.len() { + let lim = std::cmp::min(cmd.len() - offset, 50); + let chunk = &cmd[offset..][..lim]; + offset += lim; + + // Catch this before it causes weird issues in half-executed commands. + // + // Could escape these here, but right now that's not really necessary. + assert!(!chunk.contains("\n")); + + vm.run_shell_command(&format!("echo -n \'{}\' >>cmd", chunk)).await?; + } + vm.run_shell_command(". cmd").await +} + +// This test checks that with a specified boot order, the guest boots whichever +// disk we wanted to come first. This is simple enough, until you want to know +// "what you booted from".. +// +// For live CDs, such as Alpine's, the system boots into a tmpfs loaded from a +// boot disk, but there's no clear line to what disk that live image *came +// from*. If you had two Alpine 3.20.3 images attached to one VM, you'd +// ceretainly boot into Alpine 3.20.3, but I don't see a way to tell *which +// disk* that Alpine would be sourced from, from Alpine alone. +// +// So instead, check EFI variables. To do this, then, we have to.. parse EFI +// variables. That is what this test does below, but it's probably not fully +// robust to what we might do with PCI devices in the future. +// +// A more "future-proof" setup might be to just boot an OS, see that we ended up +// in the OS we expected, and check some attribute about it like that the kernel +// version is what we expected the booted OS to be. That's still a good fallback +// if we discover that parsing EFI variables is difficult to stick with for any +// reason. It has a downside though: we'd have to keep a specific image around +// with a specific kernel version as either the "we expect to boot into this" +// image or the "we expected to boot into not this" cases. +// +// The simplest case: show that we can configure the guest's boot order from +// outside the machine. This is the most likely common case, where Propolis is +// told what the boot order should be by Nexus and we simply make it happen. +// +// Unlike later tests, this test does not manipulate boot configuration from +// inside the guest OS. +#[phd_testcase] +async fn configurable_boot_order(ctx: &Framework) { + let mut cfg = ctx.vm_config_builder("configurable_boot_order"); + + // Create a second disk backed by the same artifact as the default + // `boot-disk`. This way we'll boot to the same environment regardless of + // which disk is used; we'll check EFI variables to figure out if the right + // disk was booted. + cfg.data_disk( + "alt-boot", + DiskSource::Artifact(ctx.default_guest_os_artifact()), + DiskInterface::Virtio, + DiskBackend::File, + 24, + ); + + // We haven't specified a boot order. So, we'll expect that we boot to the + // lower-numbered PCI device (4) and end up in Alpine 3.20. + let mut vm = ctx.spawn_vm(&cfg, None).await?; + vm.launch().await?; + vm.wait_to_boot().await?; + + let boot_num_bytes = read_efivar(&vm, BOOT_CURRENT_VAR).await?; + + let boot_num: u16 = u16::from_le_bytes(boot_num_bytes.try_into().unwrap()); + + let boot_option_bytes = read_efivar(&vm, &bootvar(boot_num)).await?; + + let mut cursor = Cursor::new(boot_option_bytes.as_slice()); + + let load_option = EfiLoadOption::parse_from(&mut cursor).unwrap(); + + assert!(load_option.path.matches_pci_device_function(4, 0)); + + // Now specify a boot order and do the whole thing again. Note that this + // order puts the later PCI device first, so this changes the boot order! + cfg.boot_order(vec!["alt-boot", "boot-disk"]); + + let mut vm = ctx.spawn_vm(&cfg, None).await?; + vm.launch().await?; + vm.wait_to_boot().await?; + + let boot_num_bytes = read_efivar(&vm, BOOT_CURRENT_VAR).await?; + + let boot_num: u16 = u16::from_le_bytes(boot_num_bytes.try_into().unwrap()); + + let boot_option_bytes = read_efivar(&vm, &bootvar(boot_num)).await?; + + let mut cursor = Cursor::new(boot_option_bytes.as_slice()); + + let load_option = EfiLoadOption::parse_from(&mut cursor).unwrap(); + + // If we were going to test the PCI bus number too, we'd check the AHCI + // Device Path entry that precedes these PCI values. But we only use PCI bus + // 0 today, and the mapping from an AHCI Device Path to a PCI root is not + // immediately obvious? + assert!(load_option.path.matches_pci_device_function(24, 0)); +} + +// This is very similar to the `in_memory_backend_smoke_test` test, but +// specifically asserts that the unbootable disk is first in the boot order; the +// system booting means that boot order is respected and a non-bootable disk +// does not wedge startup. +#[phd_testcase] +async fn unbootable_disk_skipped(ctx: &Framework) { + let mut cfg = ctx.vm_config_builder("unbootable_disk_skipped"); + + cfg.data_disk( + "unbootable", + DiskSource::FatFilesystem(FatFilesystem::new()), + DiskInterface::Virtio, + DiskBackend::InMemory { readonly: true }, + 16, + ); + + // `boot-disk` is the implicitly-created boot disk made from the default + // guest OS artifact. + // + // explicitly boot from it later, so OVMF has to try and fail to boot + // `unbootable`. + cfg.boot_order(vec!["unbootable", "boot-disk"]); + + let mut vm = ctx.spawn_vm(&cfg, None).await?; + vm.launch().await?; + vm.wait_to_boot().await?; + + let boot_num_bytes = read_efivar(&vm, BOOT_CURRENT_VAR).await?; + + let boot_num: u16 = u16::from_le_bytes(boot_num_bytes.try_into().unwrap()); + + let boot_option_bytes = read_efivar(&vm, &bootvar(boot_num)).await?; + + let mut cursor = Cursor::new(boot_option_bytes.as_slice()); + + let load_option = EfiLoadOption::parse_from(&mut cursor).unwrap(); + + // Device 4 is the implicitly-used `boot-disk` PCI device number. This is + // not 16, for example, as we expect to not boot `unbootable`. + assert_eq!(load_option.pci_device_function(), (4, 0)); + + let boot_order_bytes = read_efivar(&vm, BOOT_ORDER_VAR).await?; + + // Interestingly, when we specify a boot order via fwcfg, OVMF includes two + // additional entries: + // * "UiApp", which I can't find much about + // * "EFI Internal Shell", the EFI shell the system drops into if no disks + // are bootable + // + // Exactly where these end up in the boot order is not entirely important; + // we really just need to make sure that the boot order we specified comes + // first (and before "EFI Internal Shell") + #[derive(Debug, PartialEq, Eq)] + enum TestState { + SeekingUnbootable, + FoundUnbootable, + AfterBootOrder, + } + + let mut state = TestState::SeekingUnbootable; + + for item in boot_order_bytes.chunks(2) { + let option_num: u16 = u16::from_le_bytes(item.try_into().unwrap()); + + let option_bytes = read_efivar(&vm, &bootvar(option_num)).await?; + + let mut cursor = Cursor::new(option_bytes.as_slice()); + + let load_option = EfiLoadOption::parse_from(&mut cursor).unwrap(); + + match state { + TestState::SeekingUnbootable => { + if load_option.path.matches_pci_device_function(16, 0) { + state = TestState::FoundUnbootable; + continue; + } else if load_option + .path + .matches_fw_file(EDK2_FIRMWARE_VOL_GUID, EDK2_UI_APP_GUID) + { + // `UiApp`. Ignore it and continue. + continue; + } else { + bail!( + "Did not expect to find {:?} yet (test state = {:?})", + load_option, + state + ); + } + } + TestState::FoundUnbootable => { + if load_option.path.matches_pci_device_function(4, 0) { + state = TestState::AfterBootOrder; + continue; + } else { + bail!( + "Did not expect to find {:?} (test state = {:?})", + load_option, + state + ); + } + } + TestState::AfterBootOrder => { + let is_ui_app = load_option + .path + .matches_fw_file(EDK2_FIRMWARE_VOL_GUID, EDK2_UI_APP_GUID); + let is_efi_shell = load_option.path.matches_fw_file( + EDK2_FIRMWARE_VOL_GUID, + EDK2_EFI_SHELL_GUID, + ); + if !is_ui_app && !is_efi_shell { + bail!( + "Did not expect to find {:?} (test state = {:?})", + load_option, + state + ); + } + } + } + } + + assert_eq!(state, TestState::AfterBootOrder); +} + +// Start with the boot order being `["boot-disk", "unbootable"]`, then change it +// so that next boot we'll boot from `unbootable` first. Then reboot and verify +// that the boot order is still "boot-disk" first. +#[phd_testcase] +async fn guest_can_adjust_boot_order(ctx: &Framework) { + let mut cfg = ctx.vm_config_builder("guest_can_adjust_boot_order"); + + cfg.data_disk( + "unbootable", + DiskSource::FatFilesystem(FatFilesystem::new()), + DiskInterface::Virtio, + DiskBackend::InMemory { readonly: true }, + 16, + ); + + cfg.boot_order(vec!["boot-disk", "unbootable"]); + + let mut vm = ctx.spawn_vm(&cfg, None).await?; + vm.launch().await?; + vm.wait_to_boot().await?; + + // If the guest doesn't have an EFI partition then there's no way for boot + // order preferences to be persisted. + let mountline = vm.run_shell_command("mount | grep efivarfs").await?; + + if !mountline.starts_with("efivarfs on ") { + warn!( + "guest doesn't have an efivarfs, cannot manage boot order! \ + exiting test WITHOUT VALIDATING ANYTHING" + ); + return Ok(()); + } + + // Try adding a few new boot options, then add them to the boot order, + // reboot, and make sure they're all as we set them. + if !run_long_command(&vm, &format!("ls {}", efipath(&bootvar(0xffff)))) + .await? + .is_empty() + { + warn!( + "guest environment already has a BootFFFF entry; \ + is this not a fresh image?" + ); + } + + let boot_num: u16 = { + let bytes = read_efivar(&vm, BOOT_CURRENT_VAR).await?; + u16::from_le_bytes(bytes.try_into().unwrap()) + }; + + // The entry we booted from is clearly valid, so we should be able to insert + // a few duplicate entries. We won't boot into them, but if something bogus + // happens and we did boot one of these, at least it'll work and we can + // detect the misbehavior. + // + // But here's a weird one: if we just append these to the end, on reboot + // they'll be moved somewhat up the boot order. This occurrs both if setting + // variables through `efibootmgr` or by writing to + // /sys/firmware/efi/efivars/BootOrder-* directly. As an example, say we had + // a boot order of "0004,0001,0003,0000" where boot options were as follows: + // * 0000: UiApp + // * 0001: PCI device 4, function 0 + // * 0003: EFI shell (Firmware volume+file) + // * 0004: Ubuntu (HD partition 15, GPT formatted) + // + // If we duplicate entry 4 to new options FFF0 and FFFF, reset the boot + // order to "0004,0001,0003,0000,FFF0,FFFF", then reboot the VM, the boot + // order when it comes back up will be "0004,0001,FFF0,FFFF,0003,0000". + // + // This almost makes sense, but with other devices in the mix I've seen + // reorderings like `0004,0001,,0003,0000,FFF0,FFFF` turning into + // `0004,0001,FFF0,FFFF,,0003,0000`. This is particularly strange + // in that the new options were reordered around some other PCI device. It's + // not the boot order we set! + // + // So, to at least confirm we *can* modify the boot order in a stable way, + // make a somewhat less ambitious change: insert the duplicate boot options + // in the order directly after the option they are duplicates of. This seems + // to not get reordered. + let boot_option_bytes = read_efivar(&vm, &bootvar(boot_num)).await?; + + // Finally, seeing a read-write `efivarfs` is not sufficient to know that + // writes to EFI variables will actually stick. For example, an Alpine live + // image backed by an ISO 9660 filesystem may have an EFI System Partition + // and `efivarfs`, but certainly cannot persist state and will drop writes + // to EFI variables. + // + // Check for this condition and exit early if the guest OS configuration + // will not let us perform a useful test. + write_efivar(&vm, &bootvar(0xfff0), &boot_option_bytes).await?; + let reread = read_efivar(&vm, &bootvar(0xfff0)).await?; + if reread.is_empty() { + phd_skip!("Guest environment drops EFI variable writes"); + } else { + assert_eq!( + boot_option_bytes, + read_efivar(&vm, &bootvar(0xfff0)).await?, + "EFI variable write wrote something, but not what we expected?" + ); + } + + let boot_order_bytes = read_efivar(&vm, BOOT_ORDER_VAR).await?; + + let mut new_boot_order = Vec::new(); + new_boot_order.extend_from_slice(&boot_order_bytes); + + let mut new_boot_order = boot_order_bytes.clone(); + let booted_idx = new_boot_order + .chunks(2) + .enumerate() + .find(|(_i, chunk)| *chunk == boot_num.to_le_bytes()) + .map(|(i, _chunk)| i) + .expect("booted entry exists"); + let suffix = new_boot_order.split_off((booted_idx + 1) * 2); + new_boot_order.extend_from_slice(&[0xf0, 0xff]); + new_boot_order.extend_from_slice(&[0xff, 0xff]); + new_boot_order.extend_from_slice(&suffix); + + write_efivar(&vm, &bootvar(0xfff0), &boot_option_bytes).await?; + assert_eq!(boot_option_bytes, read_efivar(&vm, &bootvar(0xfff0)).await?); + write_efivar(&vm, &bootvar(0xffff), &boot_option_bytes).await?; + assert_eq!(boot_option_bytes, read_efivar(&vm, &bootvar(0xffff)).await?); + + write_efivar(&vm, BOOT_ORDER_VAR, &new_boot_order).await?; + let written_boot_order = read_efivar(&vm, BOOT_ORDER_VAR).await?; + assert_eq!(new_boot_order, written_boot_order); + + // Now, reboot and check that the settings stuck. + vm.run_shell_command("reboot").await?; + vm.wait_to_boot().await?; + + let boot_order_after_reboot = read_efivar(&vm, BOOT_ORDER_VAR).await?; + assert_eq!(new_boot_order, boot_order_after_reboot); + + let boot_num_after_reboot: u16 = { + let bytes = read_efivar(&vm, BOOT_CURRENT_VAR).await?; + u16::from_le_bytes(bytes.try_into().unwrap()) + }; + assert_eq!(boot_num, boot_num_after_reboot); + + let boot_option_bytes_after_reboot = + read_efivar(&vm, &bootvar(boot_num)).await?; + assert_eq!(boot_option_bytes, boot_option_bytes_after_reboot); +} + +// This test is less demonstrating specific desired behavior, and more the +// observed behavior of OVMF with configuration we can offer today. If Propolis +// or other changes break this test, the test may well be what needs changing. +// +// If a `bootorder` file is present in fwcfg, there two relevant consequences +// demonstrated here: * The order of devices in `bootorder` is the order that +// will be used; on reboot any persisted configuration will be replaced with one +// derived from `bootorder` and corresponding OVMF logic. * Guests cannot +// meaningfully change boot order. If an entry is in `bootorder`, that +// determines its' order. If it is not in `bootorder` but is retained for +// booting, it is appended to the end of the boot order in what seems to be the +// order that OVMF discovers the device. +// +// If `bootorder` is removed for subsequent reboots, the EFI System Partition's +// store of NvVar variables is the source of boot order, and guests can control +// their boot fates. +#[phd_testcase] +async fn boot_order_source_priority(ctx: &Framework) { + let mut cfg = ctx.vm_config_builder("boot_order_source_priority"); + + cfg.data_disk( + "unbootable", + DiskSource::FatFilesystem(FatFilesystem::new()), + DiskInterface::Virtio, + DiskBackend::InMemory { readonly: true }, + 16, + ); + + cfg.data_disk( + "unbootable-2", + DiskSource::FatFilesystem(FatFilesystem::new()), + DiskInterface::Virtio, + DiskBackend::InMemory { readonly: true }, + 20, + ); + + // For the first stage of this test, we want to leave the boot procedure up + // to whatever the guest firmware will do. + cfg.clear_boot_order(); + + let mut vm_no_bootorder = ctx.spawn_vm(&cfg, None).await?; + vm_no_bootorder.launch().await?; + vm_no_bootorder.wait_to_boot().await?; + + let boot_option_numbers = discover_boot_option_numbers( + &vm_no_bootorder, + &[ + ((4, 0), "boot-disk"), + ((16, 0), "unbootable"), + ((20, 0), "unbootable-2"), + ], + ) + .await?; + + // `unbootable` should be somewhere in the middle of the boot order: + // definitely between `boot-disk` and `unbootable-2`, for the options + // enumerated from PCI devices. + let unbootable_num = boot_option_numbers["unbootable"]; + + let unbootable_idx = remove_boot_entry(&vm_no_bootorder, unbootable_num) + .await? + .expect("unbootable was in the boot order"); + + vm_no_bootorder.run_shell_command("reboot").await?; + vm_no_bootorder.wait_to_boot().await?; + + let reloaded_order = read_efivar(&vm_no_bootorder, BOOT_ORDER_VAR).await?; + + // Somewhat unexpected, but where OVMF gets us: `unbootable` is back in the + // boot order, but at the end of the list. One might hope it would be + // entirely removed from the boot order now, but no such luck. The good news + // is that we can in fact influence the boot order. + let unbootable_idx_after_reboot = + find_option_in_boot_order(&reloaded_order, unbootable_num) + .expect("unbootable is back in the order"); + + let last_boot_option = &reloaded_order[reloaded_order.len() - 2..]; + assert_eq!(last_boot_option, &unbootable_num.to_le_bytes()); + + // But this new position for `unbootable` definitely should be different + // from before. + assert_ne!(unbootable_idx, unbootable_idx_after_reboot); + + // And if we do the whole dance again with an explicit boot order provided + // to the guest, we'll get different results! + drop(vm_no_bootorder); + cfg.boot_order(vec!["boot-disk", "unbootable", "unbootable-2"]); + + let mut vm = ctx.spawn_vm(&cfg, None).await?; + vm.launch().await?; + vm.wait_to_boot().await?; + + let boot_option_numbers = discover_boot_option_numbers( + &vm, + &[ + ((4, 0), "boot-disk"), + ((16, 0), "unbootable"), + ((20, 0), "unbootable-2"), + ], + ) + .await?; + + let unbootable_num = boot_option_numbers["unbootable"]; + + // Try removing a fw_cfg-defined boot option. + let unbootable_idx = remove_boot_entry(&vm, unbootable_num) + .await? + .expect("unbootable was in the boot order"); + + vm.run_shell_command("reboot").await?; + vm.wait_to_boot().await?; + + let reloaded_order = read_efivar(&vm, BOOT_ORDER_VAR).await?; + + // The option will be back in the boot order, where it was before! This is + // because fwcfg still has a `bootorder` file. + assert_eq!( + find_option_in_boot_order(&reloaded_order, unbootable_num), + Some(unbootable_idx) + ); +} diff --git a/phd-tests/tests/src/boot_order/efi_utils.rs b/phd-tests/tests/src/boot_order/efi_utils.rs new file mode 100644 index 000000000..a57f4e3c4 --- /dev/null +++ b/phd-tests/tests/src/boot_order/efi_utils.rs @@ -0,0 +1,518 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! EFI variable parsing and manipulation utilities. +//! +//! Conceptually, this would be a separate crate. Something like `uefi`, or +//! maybe more accurately, `uefi-raw`. Those crates are very oriented towards +//! *being* the platform firmware though - it's not clear how to use them to +//! parse a boot option into a device path, for example, though they clearly are +//! able to support processing device paths. +//! +//! So instead, this is enough supporting logic for our tests in Propolis. + +use anyhow::{bail, Error}; +use byteorder::{LittleEndian, ReadBytesExt}; +use phd_testcase::*; +use std::collections::HashMap; +use std::fmt::Write; +use std::io::{Cursor, Read}; +use tracing::{info, trace, warn}; + +use super::run_long_command; + +// First, some GUIDs. These GUIDs come from EDK2, and OVMF reuses them. Notably +// these are the raw bytes of the GUID: textual values will have slightly +// different ordering of bytes. +// +// Some source references, as you won't find these GUIDs in a UEFI or related +// spec document.. The firmware volume is identified by what seems to be the DXE +// firmware volume: +// https://github.com/tianocore/edk2/blob/712797c/OvmfPkg/OvmfPkgIa32.fdf#L181 +// introduced in +// https://github.com/tianocore/edk2/commit/16f26de663967b5a64140b6abba2c145ea50194c, +// note this is the DXEFV entry. +// +// The *files* we'll care about in this test are identified by other GUIDs in +// the above *volume*. +// +// EFI Internal Shell: +// https://github.com/tianocore/edk2/blob/a445e1a/ShellPkg/ShellPkg.dec#L59-L60 +// UiApp: +// https://github.com/tianocore/edk2/blob/a445e1a/MdeModulePkg/Application/UiApp/UiApp.inf#L13 +pub(crate) const EDK2_FIRMWARE_VOL_GUID: &[u8; 16] = &[ + 0xc9, 0xbd, 0xb8, 0x7c, 0xeb, 0xf8, 0x34, 0x4f, 0xaa, 0xea, 0x3e, 0xe4, + 0xaf, 0x65, 0x16, 0xa1, +]; +pub(crate) const EDK2_UI_APP_GUID: &[u8; 16] = &[ + 0x21, 0xaa, 0x2c, 0x46, 0x14, 0x76, 0x03, 0x45, 0x83, 0x6e, 0x8a, 0xb6, + 0xf4, 0x66, 0x23, 0x31, +]; +pub(crate) const EDK2_EFI_SHELL_GUID: &[u8; 16] = &[ + 0x83, 0xa5, 0x04, 0x7c, 0x3e, 0x9e, 0x1c, 0x4f, 0xad, 0x65, 0xe0, 0x52, + 0x68, 0xd0, 0xb4, 0xd1, +]; + +// The variable namespace `8be4df61-93ca-11d2-aa0d-00e098032b8c` comes from +// UEFI, as do the variable names here. The presentation as +// `{varname}-{namespace}`, and at a path like `/sys/firmware/efi/efivars/`, are +// both Linux `efivars`-isms. +// +// These tests likely will not pass when run with other guest OSes. +pub(crate) const BOOT_CURRENT_VAR: &str = + "BootCurrent-8be4df61-93ca-11d2-aa0d-00e098032b8c"; +pub(crate) const BOOT_ORDER_VAR: &str = + "BootOrder-8be4df61-93ca-11d2-aa0d-00e098032b8c"; + +pub(crate) fn bootvar(num: u16) -> String { + format!("Boot{:04X}-8be4df61-93ca-11d2-aa0d-00e098032b8c", num) +} + +pub(crate) fn efipath(varname: &str) -> String { + format!("/sys/firmware/efi/efivars/{}", varname) +} + +/// A (very limited) parse of an `EFI_LOAD_OPTION` descriptor. +#[derive(Debug)] +pub(crate) struct EfiLoadOption { + pub description: String, + pub path: EfiLoadPath, +} + +#[derive(Debug)] +pub(crate) enum EfiLoadPath { + Device { acpi_root: DevicePath, pci_device: DevicePath }, + FirmwareFile { volume: DevicePath, file: DevicePath }, +} + +impl EfiLoadPath { + pub fn matches_fw_file( + &self, + fw_vol: &[u8; 16], + fw_file: &[u8; 16], + ) -> bool { + if let EfiLoadPath::FirmwareFile { + volume: DevicePath::FirmwareVolume { guid: vol_gid }, + file: DevicePath::FirmwareFile { guid: vol_file }, + } = self + { + vol_gid == fw_vol && vol_file == fw_file + } else { + false + } + } + + pub fn matches_pci_device_function( + &self, + pci_device: u8, + pci_function: u8, + ) -> bool { + if let EfiLoadPath::Device { + acpi_root: DevicePath::Acpi { .. }, + pci_device: DevicePath::Pci { device, function }, + } = self + { + pci_device == *device && pci_function == *function + } else { + false + } + } + + pub fn as_pci_device_function(&self) -> Option<(u8, u8)> { + if let EfiLoadPath::Device { + acpi_root: DevicePath::Acpi { .. }, + pci_device: DevicePath::Pci { device, function }, + } = self + { + Some((*device, *function)) + } else { + None + } + } +} + +// The `Acpi` fields are not explicitly used (yet), but are useful for `Debug` +// purposes. +#[allow(dead_code)] +#[derive(Debug, Clone, Copy)] +pub(crate) enum DevicePath { + Acpi { hid: u32, uid: u32 }, + Pci { device: u8, function: u8 }, + + // These two are described in sections 8.2 and 8.3 of the UEFI PI spec, + // respectively. Version 1.6 can be found at + // https://uefi.org/sites/default/files/resources/PI_Spec_1.6.pdf + FirmwareVolume { guid: [u8; 16] }, + FirmwareFile { guid: [u8; 16] }, +} + +impl DevicePath { + fn parse_from(bytes: &mut Cursor<&[u8]>) -> Result { + let ty = bytes.read_u8()?; + let subtype = bytes.read_u8()?; + + macro_rules! check_size { + ($desc:expr, $size: expr, $expect:expr) => { + if $size != $expect { + bail!( + "{} size is wrong (was {:#04x}, not {:#04x}", + $desc, + $size, + $expect, + ); + } + }; + } + + match (ty, subtype) { + (2, 1) => { + // ACPI Device Path + let size = bytes.read_u16::()?; + check_size!("ACPI Device Path", size, 0xc); + let hid = bytes.read_u32::().unwrap(); + let uid = bytes.read_u32::().unwrap(); + Ok(DevicePath::Acpi { hid, uid }) + } + (1, 1) => { + // PCI device path + let size = bytes.read_u16::()?; + check_size!("PCI Device Path", size, 0x6); + let function = bytes.read_u8().unwrap(); + let device = bytes.read_u8().unwrap(); + Ok(DevicePath::Pci { device, function }) + } + (4, 6) => { + // "PIWG Firmware File" aka "Firmware File" in UEFI PI spec + let size = bytes.read_u16::()?; + check_size!("Firmware File", size, 0x14); + let mut guid = [0u8; 16]; + bytes.read_exact(&mut guid)?; + Ok(DevicePath::FirmwareFile { guid }) + } + (4, 7) => { + // "PIWG Firmware Volume" aka "Firmware Volume" in UEFI PI spec + let size = bytes.read_u16::()?; + check_size!("Firmware Volume", size, 0x14); + let mut guid = [0u8; 16]; + bytes.read_exact(&mut guid)?; + Ok(DevicePath::FirmwareVolume { guid }) + } + (ty, subtype) => { + bail!( + "Device path type/subtype unsupported: ({:#02x}/{:#02x})", + ty, + subtype + ); + } + } + } +} + +impl EfiLoadOption { + // parsing here brought to you by rereading + // * https://uefi.org/specs/UEFI/2.10/10_Protocols_Device_Path_Protocol.html + // * https://uefi.org/specs/UEFI/2.10/03_Boot_Manager.html + pub(crate) fn parse_from( + bytes: &mut Cursor<&[u8]>, + ) -> Result { + let _attributes = bytes.read_u32::()?; + let file_path_list_length = bytes.read_u16::()?; + + // The `Description` field is a null-terminated string of char16. + let mut description_chars: Vec = Vec::new(); + + loop { + let c = bytes.read_u16::()?; + if c == 0 { + break; + } + description_chars.push(c); + } + + let description = String::from_utf16(&description_chars) + .expect("description is valid utf16"); + + let mut device_path_cursor = Cursor::new( + &bytes.get_ref()[bytes.position() as usize..] + [..file_path_list_length as usize], + ); + + let path_entry = DevicePath::parse_from(&mut device_path_cursor) + .map_err(|e| { + anyhow::anyhow!("unable to parse device path element: {:?}", e) + })?; + let load_path = match path_entry { + acpi_root @ DevicePath::Acpi { .. } => { + let pci_device = + DevicePath::parse_from(&mut device_path_cursor) + .expect("can read device path element"); + if !matches!(pci_device, DevicePath::Pci { .. }) { + bail!( + "expected ACPI Device Path entry to be followed by \ + a PCI Device Path, but was {:?}", + pci_device + ); + } + + EfiLoadPath::Device { acpi_root, pci_device } + } + volume @ DevicePath::FirmwareVolume { .. } => { + let file = DevicePath::parse_from(&mut device_path_cursor) + .expect("can read device path element"); + if !matches!(file, DevicePath::FirmwareFile { .. }) { + bail!( + "expected Firmware Volume entry to be followed by \ + a Firmware File, but was {:?}", + file + ); + } + + EfiLoadPath::FirmwareFile { volume, file } + } + other => { + bail!("unexpected root EFI Load Option path item: {:?}", other); + } + }; + + // Not strictly necessary, but advance `bytes` by the number of bytes we + // read from `device_path_cursor`. To callers, this keeps it as if we + // had just been reading `bytes` all along. + bytes.set_position(bytes.position() + device_path_cursor.position()); + + Ok(EfiLoadOption { description, path: load_path }) + } + + pub fn pci_device_function(&self) -> (u8, u8) { + let EfiLoadPath::Device { + pci_device: DevicePath::Pci { device, function }, + .. + } = self.path + else { + panic!( + "expected load path to be an ACPI/PCI pair, but was {:?}", + self.path + ); + }; + (device, function) + } +} + +fn unhex(s: &str) -> Vec { + let s = s.replace("\n", ""); + trace!("unhexing {}", s); + let mut res = Vec::new(); + for chunk in s.as_bytes().chunks(2) { + assert_eq!(chunk.len(), 2); + + let s = std::str::from_utf8(chunk).expect("valid string"); + + let b = u8::from_str_radix(s, 16).expect("can parse"); + + res.push(b); + } + res +} + +/// Read the EFI variable `varname` from inside the VM, and return the data +/// therein as a byte array. +pub(crate) async fn read_efivar( + vm: &phd_framework::TestVm, + varname: &str, +) -> Result, Error> { + // Linux's `efivarfs` prepends 4 bytes of attributes to EFI variables. + let cmd = format!( + "dd status=none if={} bs=1 skip=4 | xxd -p -", + efipath(varname) + ); + + let hex = run_long_command(vm, &cmd).await?; + + Ok(unhex(&hex)) +} + +/// Write the provided bytes to the EFI variable `varname`. +/// +/// For Linux guests: variables automatically have their prior attributes +/// prepended. Provide only the variable's data. +pub(crate) async fn write_efivar( + vm: &phd_framework::TestVm, + varname: &str, + data: &[u8], +) -> Result<(), Error> { + let attr_cmd = format!( + "dd status=none if={} bs=1 count=4 | xxd -p -", + efipath(varname) + ); + + let attr_read_bytes = run_long_command(vm, &attr_cmd).await?; + let attrs = if attr_read_bytes.ends_with(": No such file or directory") { + // Default attributes if the variable does not exist yet. We expect it + // to be non-volatile because we are writing it, we expect it to be + // available to boot services (not strictly required, but for boot + // configuration we need it), and we expect it to be available at + // runtime (e.g. where we are reading and writing it). + // + // so: + // NON_VOLATILE | BOOTSERVICE_ACCESS | RUNTIME_ACCESS + const FRESH_ATTRS: u32 = 0x00_00_00_07; + FRESH_ATTRS.to_le_bytes().to_vec() + } else { + unhex(&attr_read_bytes) + }; + + let mut new_value = attrs; + new_value.extend_from_slice(data); + + // The command to write this data back out will be, roughtly: + // ``` + // printf "\xAA\xAA\xAA\xAA\xDD\xDD\xDD\xDD" > /sys/firmware/efi/efivars/... + // ``` + // where AAAAAAAA are the attribute bytes and DDDDDDDD are caller-provided + // data. + let escaped: String = + new_value.into_iter().fold(String::new(), |mut out, b| { + write!(out, "\\x{:02x}", b).expect("can append to String"); + out + }); + + let cmd = format!("printf \"{}\" > {}", escaped, efipath(varname)); + + let res = run_long_command(vm, &cmd).await?; + // If something went sideways and the write failed with something like + // `invalid argument`... + if !res.is_empty() { + bail!("writing efi produced unexpected output: {}", res); + } + + Ok(()) +} + +/// Learn the boot option numbers associated with various boot options that may +/// or should exist. +/// +/// The fundamental wrinkle here is that we don't necessarily know what +/// `Boot####` entries exist, or which numbers they have, because NvVar is +/// handled through persistence in guest disks. This means a guest image may +/// have some prior NvVar state with `Boot####` entries that aren't removed, and +/// cause entries reflecting the current system to have later numbers than a +/// fully blank initial set of variables. +pub(crate) async fn discover_boot_option_numbers( + vm: &phd_framework::TestVm, + device_names: &[((u8, u8), &'static str)], +) -> Result> { + let mut option_mappings: HashMap = HashMap::new(); + + let boot_order_bytes = read_efivar(vm, BOOT_ORDER_VAR).await?; + info!("Initial boot order var: {:?}", boot_order_bytes); + + for chunk in boot_order_bytes.chunks(2) { + assert_eq!(chunk.len(), 2); + let option_num = u16::from_le_bytes(chunk.try_into().unwrap()); + + let option_bytes = read_efivar(vm, &bootvar(option_num)).await?; + + let mut cursor = Cursor::new(option_bytes.as_slice()); + + let load_option = match EfiLoadOption::parse_from(&mut cursor) { + Ok(option) => option, + Err(e) => { + warn!("Unhandled boot option: {:?}", e); + continue; + } + }; + + if load_option + .path + .matches_fw_file(EDK2_FIRMWARE_VOL_GUID, EDK2_UI_APP_GUID) + { + let prev = option_mappings.insert("uiapp".to_string(), option_num); + assert_eq!(prev, None); + } else if load_option + .path + .matches_fw_file(EDK2_FIRMWARE_VOL_GUID, EDK2_EFI_SHELL_GUID) + { + let prev = + option_mappings.insert("efi shell".to_string(), option_num); + assert_eq!(prev, None); + } else if let Some((device, function)) = + load_option.path.as_pci_device_function() + { + let description = device_names.iter().find_map(|(path, desc)| { + if path.0 == device && path.1 == function { + Some(desc) + } else { + None + } + }); + + if let Some(description) = description { + option_mappings.insert(description.to_string(), option_num); + } else { + warn!("Unknown PCI boot device {:#x}.{:#x}", device, function); + } + } else { + warn!("Unknown boot option: {:?}", load_option); + + let prev = option_mappings + .insert(load_option.description.to_string(), option_num); + assert_eq!(prev, None); + } + } + + info!("Found boot options: {:?}", option_mappings); + + Ok(option_mappings) +} + +pub(crate) fn find_option_in_boot_order( + order: &[u8], + option: u16, +) -> Option { + let option = option.to_le_bytes(); + order + .chunks(2) + .enumerate() + .find(|(_i, chunk)| *chunk == option) + .map(|(i, _chunk)| i) +} + +/// Remove the boot option from `vm`'s EFI BootOrder variable. `boot_option_num` +/// is assumed to refer to a boot option named like +/// `format!("Boot{boot_option_num:4X}-*")`. +/// +/// If the boot order was actually modified, returns the index that +/// `boot_option_num` was removed at. +pub(crate) async fn remove_boot_entry( + vm: &phd_framework::TestVm, + boot_option_num: u16, +) -> Result> { + let mut without_option = read_efivar(vm, BOOT_ORDER_VAR).await?; + + let Some(option_idx) = + find_option_in_boot_order(&without_option, boot_option_num) + else { + return Ok(None); + }; + + info!( + "Removing Boot{:4X} from the boot order. It was at index {}", + boot_option_num, option_idx + ); + + without_option.remove(option_idx * 2); + without_option.remove(option_idx * 2); + + // Technically it's fine if an option is present multiple times, but + // typically an option is present only once. This function intends to remove + // all copies of the specified option, so assert that we have done so in the + // new order. + assert_eq!( + find_option_in_boot_order(&without_option, boot_option_num), + None + ); + + write_efivar(vm, BOOT_ORDER_VAR, &without_option).await?; + + Ok(Some(option_idx)) +} diff --git a/phd-tests/tests/src/disk.rs b/phd-tests/tests/src/disk.rs index 11a6e90c7..1db141c90 100644 --- a/phd-tests/tests/src/disk.rs +++ b/phd-tests/tests/src/disk.rs @@ -17,6 +17,7 @@ async fn in_memory_backend_smoke_test(ctx: &Framework) { let mut data = FatFilesystem::new(); data.add_file_from_str("hello_oxide.txt", HELLO_MSG)?; cfg.data_disk( + "data-disk-0", DiskSource::FatFilesystem(data), DiskInterface::Virtio, DiskBackend::InMemory { readonly: true }, diff --git a/phd-tests/tests/src/lib.rs b/phd-tests/tests/src/lib.rs index 60ae76091..574089f1f 100644 --- a/phd-tests/tests/src/lib.rs +++ b/phd-tests/tests/src/lib.rs @@ -4,6 +4,7 @@ pub use phd_testcase; +mod boot_order; mod crucible; mod disk; mod ensure_api; diff --git a/phd-tests/tests/src/migrate.rs b/phd-tests/tests/src/migrate.rs index ba4bb9abb..78438be79 100644 --- a/phd-tests/tests/src/migrate.rs +++ b/phd-tests/tests/src/migrate.rs @@ -90,7 +90,16 @@ mod from_base { let mut env = ctx.environment_builder(); env.propolis(artifacts::BASE_PROPOLIS_ARTIFACT); - let cfg = ctx.vm_config_builder(name); + let mut cfg = ctx.vm_config_builder(name); + // TODO: not strictly necessary, but as of #756 PHD began adding a + // `boot_settings` key by default to new instances. This is not + // understood by older Propolis, so migration tests would fail because + // the test changed, rather than a migration issue. + // + // At some point after landing #756, stop clearing the boot order, + // because a newer base Propolis will understand `boot_settings` just + // fine. + cfg.clear_boot_order(); ctx.spawn_vm(&cfg, Some(&env)).await } }