Skip to content

Commit

Permalink
[antlir2][vm] implement PCI bridge
Browse files Browse the repository at this point in the history
Summary:
The default PCI bus can only handle a limited number of devices, which
is not enough to test machines with large disk arrays. Add PCI bridge support
for such use case and attach disks to their bridge instead. This is same as
what antlir1 VM did, just that I only realized why the default setting is not
enough after encountering the specific test.

This requires an update to our initrd host config as it changes the location of
root disks.

Test Plan:
`metalos/imaging_initrd/vmtest` covers all disk related stuff pretty
well.

Differential Revision: D49851480

fbshipit-source-id: 84e2dbb72b67a08570d67dd138b8a7bfb70ac34c
  • Loading branch information
wujj123456 authored and facebook-github-bot committed Oct 5, 2023
1 parent 84eff44 commit 4863a72
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 8 deletions.
19 changes: 13 additions & 6 deletions antlir/antlir2/antlir2_vm/src/disk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use tracing::debug;

use crate::isolation::IsolationError;
use crate::isolation::Platform;
use crate::pci::PCIBridge;
use crate::runtime::get_runtime;
use crate::types::QCow2DiskOpts;
use crate::utils::log_command;
Expand All @@ -28,6 +29,8 @@ use crate::utils::run_command_capture_output;
pub(crate) struct QCow2Disk {
/// Disk property specified by clients
opts: QCow2DiskOpts,
/// The PCI bridge to attach on
pci_bridge: PCIBridge,
/// Name prefix
#[builder(default = "\"vd\".to_string()")]
prefix: String,
Expand Down Expand Up @@ -139,15 +142,18 @@ impl QCow2Disk {
)
.into(),
];
let mut bus = self.pci_bridge.name();
// Create AHCI controller for SATA drives
if self.opts.interface == "ide-hd" {
args.push("-device".into());
args.push(format!("ahci,id=ahci-{}", self.name()).into());
args.push(format!("ahci,id=ahci-{},bus={}", self.name(), bus).into());
bus = format!("ahci-{}.0", self.name());
}
args.push("-device".into());
args.push(format!(
"{driver},drive={name},serial={serial},physical_block_size={pbs},logical_block_size={lbs}",
"{driver},bus={bus},drive={name},serial={serial},physical_block_size={pbs},logical_block_size={lbs}",
driver = self.opts.interface,
bus = bus,
name = self.name(),
serial = self.serial(),
pbs = self.opts.physical_block_size,
Expand Down Expand Up @@ -175,6 +181,7 @@ mod test {
let mut builder = QCow2DiskBuilder::default();
builder
.opts(opts)
.pci_bridge(PCIBridge::new(0, 1).expect("Failed to create PCI bridge"))
.prefix("test-device".to_string())
.id(3)
.state_dir(PathBuf::from("/tmp/test"));
Expand All @@ -191,7 +198,7 @@ mod test {
&disk.qemu_args().join(OsStr::new(" ")),
"-blockdev \
driver=qcow2,node-name=test-device3,file.driver=file,file.filename=/tmp/test/test-device3.qcow2 \
-device virtio-blk,drive=test-device3,serial=test-device3,\
-device virtio-blk,bus=pci0,drive=test-device3,serial=test-device3,\
physical_block_size=512,logical_block_size=512"
);

Expand All @@ -202,7 +209,7 @@ mod test {
&disk.qemu_args().join(OsStr::new(" ")),
"-blockdev \
driver=qcow2,node-name=test-device3,file.driver=file,file.filename=/tmp/test/test-device3.qcow2 \
-device virtio-blk,drive=test-device3,serial=serial,\
-device virtio-blk,bus=pci0,drive=test-device3,serial=serial,\
physical_block_size=512,logical_block_size=512"
);

Expand All @@ -212,8 +219,8 @@ mod test {
&disk.qemu_args().join(OsStr::new(" ")),
"-blockdev \
driver=qcow2,node-name=test-device3,file.driver=file,file.filename=/tmp/test/test-device3.qcow2 \
-device ahci,id=ahci-test-device3 \
-device ide-hd,drive=test-device3,serial=serial,\
-device ahci,id=ahci-test-device3,bus=pci0 \
-device ide-hd,bus=ahci-test-device3.0,drive=test-device3,serial=serial,\
physical_block_size=512,logical_block_size=512"
);
}
Expand Down
1 change: 1 addition & 0 deletions antlir/antlir2/antlir2_vm/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
mod disk;
mod isolation;
mod net;
mod pci;
mod runtime;
mod share;
mod ssh;
Expand Down
74 changes: 74 additions & 0 deletions antlir/antlir2/antlir2_vm/src/pci.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

use std::ffi::OsString;

use thiserror::Error;

pub(crate) const DEVICE_PER_BRIDGE: usize = 32;

/// PCI Bridge. Each bridge can attach 32 devices
#[derive(Debug, Clone)]
pub(crate) struct PCIBridge {
/// The ID of the bridge, starting from 0
id: usize,
/// Chassis ID
chassis_id: u8,
}

#[derive(Error, Debug)]
pub(crate) enum PCIBridgeError {
#[error("Chassis ID must fit into a u8. Got: {0}")]
ChassisIDExceededError(usize),
}
type Result<T> = std::result::Result<T, PCIBridgeError>;

impl PCIBridge {
pub(crate) fn new(id: usize, chassis_id: usize) -> Result<Self> {
let chassis_id = chassis_id
.try_into()
.map_err(|_| PCIBridgeError::ChassisIDExceededError(chassis_id))?;
Ok(Self { id, chassis_id })
}

/// Name of the bridge other devices can use to attach to
pub(crate) fn name(&self) -> String {
format!("pci{}", self.id)
}

/// Qemu arguments to create the bridge
pub(crate) fn qemu_args(&self) -> Vec<OsString> {
vec![
"-device".into(),
format!(
"pci-bridge,id={},chassis_nr={}",
self.name(),
self.chassis_id
)
.into(),
]
}
}

#[cfg(test)]
mod test {
use std::ffi::OsStr;

use super::*;

#[test]
fn test_pcibridge() {
assert!(PCIBridge::new(1, 10000000).is_err());

let bridge = PCIBridge::new(0, 1).expect("failed to create PCI bridge");
assert_eq!(bridge.name(), "pci0");
assert_eq!(
&bridge.qemu_args().join(OsStr::new(" ")),
"-device pci-bridge,id=pci0,chassis_nr=1",
)
}
}
35 changes: 33 additions & 2 deletions antlir/antlir2/antlir2_vm/src/vm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ use crate::disk::QCow2DiskError;
use crate::isolation::Platform;
use crate::net::VirtualNIC;
use crate::net::VirtualNICError;
use crate::pci::PCIBridge;
use crate::pci::PCIBridgeError;
use crate::pci::DEVICE_PER_BRIDGE;
use crate::runtime::get_runtime;
use crate::share::ShareError;
use crate::share::Shares;
Expand All @@ -55,6 +58,8 @@ pub(crate) struct VM {
machine: MachineOpts,
/// VM execution behavior
args: VMArgs,
/// List of PCI bridges
pci_bridges: Vec<PCIBridge>,
/// List of writable drives created for the VM. We need to hold the ownership
/// to prevent the temporary disks from getting cleaned up prematuresly.
disks: Vec<QCow2Disk>,
Expand All @@ -73,6 +78,8 @@ pub(crate) enum VMError {
#[error("Failed to create directory for VM states")]
StateDirError(std::io::Error),
#[error(transparent)]
PCIBridgeError(#[from] PCIBridgeError),
#[error(transparent)]
DiskInitError(#[from] QCow2DiskError),
#[error(transparent)]
ShareInitError(#[from] ShareError),
Expand Down Expand Up @@ -104,7 +111,8 @@ impl VM {
/// Create a new VM along with its virtual resources
pub(crate) fn new(machine: MachineOpts, args: VMArgs) -> Result<Self> {
let state_dir = Self::create_state_dir()?;
let disks = Self::create_disks(&machine, &state_dir)?;
let pci_bridges = Self::create_pci_bridges(&machine)?;
let disks = Self::create_disks(&machine, &pci_bridges, &state_dir)?;
let shares = Self::create_shares(
Self::get_all_shares_opts(&args.get_vm_output_dirs()),
&state_dir,
Expand All @@ -115,6 +123,7 @@ impl VM {
Ok(VM {
machine,
args,
pci_bridges,
disks,
shares,
nics,
Expand All @@ -140,15 +149,28 @@ impl VM {
Ok(PathBuf::from(STATE_DIR))
}

/// Create PCI bridges, enough for attaching all disks
fn create_pci_bridges(opts: &MachineOpts) -> Result<Vec<PCIBridge>> {
let num_bridges = (opts.disks.len() + DEVICE_PER_BRIDGE - 1) / DEVICE_PER_BRIDGE;
(0..num_bridges)
.map(|i| -> Result<PCIBridge> { Ok(PCIBridge::new(i, i + 1)?) })
.collect()
}

/// Create all writable disks
fn create_disks(opts: &MachineOpts, state_dir: &Path) -> Result<Vec<QCow2Disk>> {
fn create_disks(
opts: &MachineOpts,
pci_bridges: &[PCIBridge],
state_dir: &Path,
) -> Result<Vec<QCow2Disk>> {
opts.disks
.iter()
.enumerate()
.map(|(i, x)| {
Ok(QCow2DiskBuilder::default()
.opts(x.clone())
.id(i)
.pci_bridge(pci_bridges[i / DEVICE_PER_BRIDGE].clone())
.state_dir(state_dir.to_path_buf())
.build()?)
})
Expand Down Expand Up @@ -326,6 +348,7 @@ impl VM {
fn spawn_vm(&self) -> Result<Child> {
let mut args = self.common_qemu_args();
args.extend(self.non_disk_boot_qemu_args());
args.extend(self.pci_bridge_qemu_args());
args.extend(self.disk_qemu_args());
args.extend(self.share_qemu_args());
args.extend(self.nic_qemu_args());
Expand Down Expand Up @@ -579,6 +602,13 @@ impl VM {
.collect()
}

fn pci_bridge_qemu_args(&self) -> Vec<OsString> {
self.pci_bridges
.iter()
.flat_map(|x| x.qemu_args())
.collect()
}

fn disk_qemu_args(&self) -> Vec<OsString> {
self.disks.iter().flat_map(|x| x.qemu_args()).collect()
}
Expand Down Expand Up @@ -655,6 +685,7 @@ mod test {
VM {
machine,
args,
pci_bridges: vec![],
disks: vec![],
shares: Shares::new(vec![share], 1024, PathBuf::from("/state/units"))
.expect("Failed to create Shares"),
Expand Down

0 comments on commit 4863a72

Please sign in to comment.