Skip to content

Commit

Permalink
agent: Add VFIO-AP device handling
Browse files Browse the repository at this point in the history
Initial VFIO-AP support (kata-containers#578) was simple, but somewhat hacky; a
different code path would be chosen for performing the hotplug, and
agent-side device handling was bound to knowing the assigned queue
numbers (APQNs) through some other means; plus the code for awaiting
them was written for the Go agent and never released. This code also
artificially increased the hotplug timeout to wait for the (relatively
expensive, thus limited to 5 seconds at the quickest) AP rescan, which
is impractical for e.g. common k8s timeouts.

Since then, the general handling logic was improved (kata-containers#1190), but it
assumed PCI in several places.

In the runtime, introduce and parse AP devices. Annotate them as such
when passing to the agent, and information about the enabled APQNs is
included.

The agent awaits the passed APQNs through uevents and triggers a
rescan directly.

Fixes: kata-containers#3678
Signed-off-by: Jakob Naucke <[email protected]>
  • Loading branch information
Jakob-Naucke committed Mar 8, 2022
1 parent 821c756 commit 44ed889
Show file tree
Hide file tree
Showing 10 changed files with 277 additions and 20 deletions.
79 changes: 79 additions & 0 deletions src/agent/src/ap.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright (c) IBM Corp. 2022
//
// SPDX-License-Identifier: Apache-2.0
//

use std::fmt;
use std::str::FromStr;

use anyhow::{anyhow, Context};

// IBM Adjunct Processor (AP) is the bus used by IBM Crypto Express hardware security modules on
// IBM Z & LinuxONE (s390x)
// AP bus ID follow the format <xx>.<xxxx> [1, p. 476], where
// - <xx> is the adapter ID, i.e. the card and
// - <xxxx> is the adapter domain.
// [1] https://www.ibm.com/docs/en/linuxonibm/pdf/lku5dd05.pdf

#[derive(Debug)]
pub struct Address {
pub adapter_id: u8,
pub adapter_domain: u16,
}

impl Address {
pub fn new(adapter_id: u8, adapter_domain: u16) -> Address {
Address {
adapter_id,
adapter_domain,
}
}
}

impl FromStr for Address {
type Err = anyhow::Error;

fn from_str(s: &str) -> anyhow::Result<Self> {
let split: Vec<&str> = s.split('.').collect();
if split.len() != 2 {
return Err(anyhow!(
"Wrong AP bus format. It needs to be in the form <xx>.<xxxx>, got {:?}",
s
));
}

let adapter_id = u8::from_str_radix(split[0], 16).context(format!(
"Wrong AP bus format. AP ID needs to be in the form <xx>, got {:?}",
split[0]
))?;
let adapter_domain = u16::from_str_radix(split[1], 16).context(format!(
"Wrong AP bus format. AP domain needs to be in the form <xxxx>, got {:?}",
split[1]
))?;

Ok(Address::new(adapter_id, adapter_domain))
}
}

impl fmt::Display for Address {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "{:02x}.{:04x}", self.adapter_id, self.adapter_domain)
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_from_str() {
let device = Address::from_str("a.1").unwrap();
assert_eq!(format!("{}", device), "0a.0001");

assert!(Address::from_str("").is_err());
assert!(Address::from_str(".").is_err());
assert!(Address::from_str("0.0.0").is_err());
assert!(Address::from_str("0g.0000").is_err());
assert!(Address::from_str("0a.10000").is_err());
}
}
89 changes: 89 additions & 0 deletions src/agent/src/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,13 @@ pub const DRIVER_VFIO_PCI_GK_TYPE: &str = "vfio-pci-gk";
// VFIO PCI device to be bound to vfio-pci and made available inside the
// container as a VFIO device node
pub const DRIVER_VFIO_PCI_TYPE: &str = "vfio-pci";
pub const DRIVER_VFIO_AP_TYPE: &str = "vfio-ap";
pub const DRIVER_OVERLAYFS_TYPE: &str = "overlayfs";
pub const FS_TYPE_HUGETLB: &str = "hugetlbfs";

cfg_if! {
if #[cfg(target_arch = "s390x")] {
use crate::ap;
use crate::ccw;
}
}
Expand Down Expand Up @@ -406,6 +408,39 @@ async fn get_vfio_device_name(sandbox: &Arc<Mutex<Sandbox>>, grp: IommuGroup) ->
Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname))
}

#[cfg(target_arch = "s390x")]
#[derive(Debug)]
struct ApMatcher {
syspath: String,
}

#[cfg(target_arch = "s390x")]
impl ApMatcher {
fn new(address: ap::Address) -> ApMatcher {
ApMatcher {
syspath: format!(
"{}/card{:02x}/{}",
AP_ROOT_BUS_PATH, address.adapter_id, address
),
}
}
}

#[cfg(target_arch = "s390x")]
impl UeventMatcher for ApMatcher {
fn is_match(&self, uev: &Uevent) -> bool {
uev.action == "add" && uev.devpath == self.syspath
}
}

#[cfg(target_arch = "s390x")]
#[instrument]
async fn wait_for_ap_device(sandbox: &Arc<Mutex<Sandbox>>, address: ap::Address) -> Result<()> {
let matcher = ApMatcher::new(address);
wait_for_uevent(sandbox, matcher).await?;
Ok(())
}

/// Scan SCSI bus for the given SCSI address(SCSI-Id and LUN)
#[instrument]
fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
Expand Down Expand Up @@ -772,6 +807,28 @@ async fn vfio_pci_device_handler(
})
}

// The VFIO AP (Adjunct Processor) device handler takes all the APQNs provided as device options
// and awaits them. It sets the minimum AP rescan time of 5 seconds and temporarily adds that
// amoutn to the hotplug timeout.
#[cfg(target_arch = "s390x")]
#[instrument]
async fn vfio_ap_device_handler(
device: &Device,
sandbox: &Arc<Mutex<Sandbox>>,
) -> Result<SpecUpdate> {
// Force AP bus rescan
fs::write(AP_SCANS_PATH, "1")?;
for apqn in device.options.iter() {
wait_for_ap_device(sandbox, ap::Address::from_str(apqn)?).await?;
}
Ok(Default::default())
}

#[cfg(not(target_arch = "s390x"))]
async fn vfio_ap_device_handler(_: &Device, _: &Arc<Mutex<Sandbox>>) -> Result<SpecUpdate> {
Err(anyhow!("AP is only supported on s390x"))
}

#[instrument]
pub async fn add_devices(
devices: &[Device],
Expand Down Expand Up @@ -840,6 +897,7 @@ async fn add_device(device: &Device, sandbox: &Arc<Mutex<Sandbox>>) -> Result<Sp
DRIVER_VFIO_PCI_GK_TYPE | DRIVER_VFIO_PCI_TYPE => {
vfio_pci_device_handler(device, sandbox).await
}
DRIVER_VFIO_AP_TYPE => vfio_ap_device_handler(device, sandbox).await,
_ => Err(anyhow!("Unknown device type {}", device.field_type)),
}
}
Expand Down Expand Up @@ -1583,4 +1641,35 @@ mod tests {
// Test dev2
assert!(pci_iommu_group(&syspci, dev2).is_err());
}

#[cfg(target_arch = "s390x")]
#[tokio::test]
async fn test_vfio_ap_matcher() {
let subsystem = "ap";
let card = "0a";
let relpath = format!("{}.0001", card);

let mut uev = Uevent::default();
uev.action = U_EVENT_ACTION_ADD.to_string();
uev.subsystem = subsystem.to_string();
uev.devpath = format!("{}/card{}/{}", AP_ROOT_BUS_PATH, card, relpath);

let ap_address = ap::Address::from_str(&relpath).unwrap();
let matcher = ApMatcher::new(ap_address);

assert!(matcher.is_match(&uev));

let mut uev_remove = uev.clone();
uev_remove.action = U_EVENT_ACTION_REMOVE.to_string();
assert!(!matcher.is_match(&uev_remove));

let mut uev_other_device = uev.clone();
uev_other_device.devpath = format!(
"{}/card{}/{}",
AP_ROOT_BUS_PATH,
card,
format!("{}.0002", card)
);
assert!(!matcher.is_match(&uev_other_device));
}
}
2 changes: 2 additions & 0 deletions src/agent/src/linux_abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ pub fn create_pci_root_bus_path() -> String {
cfg_if! {
if #[cfg(target_arch = "s390x")] {
pub const CCW_ROOT_BUS_PATH: &str = "/devices/css0";
pub const AP_ROOT_BUS_PATH: &str = "/devices/ap";
pub const AP_SCANS_PATH: &str = "/sys/bus/ap/scans";
}
}

Expand Down
1 change: 1 addition & 0 deletions src/agent/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ mod tracer;

cfg_if! {
if #[cfg(target_arch = "s390x")] {
mod ap;
mod ccw;
}
}
Expand Down
29 changes: 29 additions & 0 deletions src/runtime/virtcontainers/device/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,9 @@ const (

// VFIOPCIDeviceMediatedType is a VFIO PCI mediated device type
VFIOPCIDeviceMediatedType

// VFIOAPDeviceMediatedType is a VFIO AP mediated device type
VFIOAPDeviceMediatedType
)

type VFIODev interface {
Expand Down Expand Up @@ -285,6 +288,32 @@ func (d VFIOPCIDev) GetSysfsDev() *string {
return &d.SysfsDev
}

type VFIOAPDev struct {
// ID is used to identify this drive in the hypervisor options.
ID string

// sysfsdev of VFIO mediated device
SysfsDev string

// Type of VFIO device
Type VFIODeviceType

// APDevices are the Adjunct Processor devices assigned to the mdev
APDevices []string
}

func (d VFIOAPDev) GetID() *string {
return &d.ID
}

func (d VFIOAPDev) GetType() VFIODeviceType {
return d.Type
}

func (d VFIOAPDev) GetSysfsDev() *string {
return &d.SysfsDev
}

// RNGDev represents a random number generator device
type RNGDev struct {
// ID is used to identify the device in the hypervisor options.
Expand Down
27 changes: 27 additions & 0 deletions src/runtime/virtcontainers/device/drivers/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,5 +103,32 @@ func GetVFIODeviceType(deviceFileName string) (vfioDeviceType config.VFIODeviceT
return vfioDeviceType, fmt.Errorf("Incorrect tokens found while parsing VFIO details: %s", deviceFileName)
}

deviceSysfsDev, err := GetSysfsDev(deviceFileName)
if err != nil {
return vfioDeviceType, err
}

if strings.HasPrefix(deviceSysfsDev, vfioAPSysfsDir) {
return config.VFIOAPDeviceMediatedType, err
}

return config.VFIOPCIDeviceMediatedType, err
}

// GetSysfsDev returns the sysfsdev of mediated device
// Expected input string format is absolute path to the sysfs dev node
// eg. /sys/kernel/iommu_groups/0/devices/f79944e4-5a3d-11e8-99ce-479cbab002e4
func GetSysfsDev(sysfsDevStr string) (string, error) {
return filepath.EvalSymlinks(sysfsDevStr)
}

// GetAPVFIODevices retrieves all APQNs associated with a mediated VFIO-AP
// device
func GetAPVFIODevices(sysfsdev string) ([]string, error) {
data, err := os.ReadFile(filepath.Join(sysfsdev, "matrix"))
if err != nil {
return []string{}, err
}
// Split by newlines, omitting final newline
return strings.Split(string(data[:len(data)-1]), "\n"), nil
}
45 changes: 30 additions & 15 deletions src/runtime/virtcontainers/device/drivers/vfio.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const (
iommuGroupPath = "/sys/bus/pci/devices/%s/iommu_group"
vfioDevPath = "/dev/vfio/%s"
pcieRootPortPrefix = "rp"
vfioAPSysfsDir = "/sys/devices/vfio_ap"
)

var (
Expand Down Expand Up @@ -107,6 +108,17 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece
AllPCIeDevs[deviceBDF] = true
}
vfio = vfioPCI
case config.VFIOAPDeviceMediatedType:
devices, err := GetAPVFIODevices(deviceSysfsDev)
if err != nil {
return err
}
vfio = config.VFIOAPDev{
ID: id,
SysfsDev: deviceSysfsDev,
Type: config.VFIOAPDeviceMediatedType,
APDevices: devices,
}
}
device.VfioDevs = append(device.VfioDevs, &vfio)
}
Expand Down Expand Up @@ -212,13 +224,23 @@ func (device *VFIODevice) Load(ds persistapi.DeviceState) {
device.GenericDevice.Load(ds)

for _, dev := range ds.VFIODevs {
var vfioDev config.VFIODev = config.VFIOPCIDev{
ID: dev.ID,
Type: config.VFIODeviceType(dev.Type),
BDF: dev.BDF,
SysfsDev: dev.SysfsDev,
var vfio config.VFIODev

if (*device.VfioDevs[0]).GetType() == config.VFIOAPDeviceMediatedType {
vfio = config.VFIOAPDev{
ID: dev.ID,
SysfsDev: dev.SysfsDev,
}
} else {
vfio = config.VFIOPCIDev{
ID: dev.ID,
Type: config.VFIODeviceType(dev.Type),
BDF: dev.BDF,
SysfsDev: dev.SysfsDev,
}
}
device.VfioDevs = append(device.VfioDevs, &vfioDev)

device.VfioDevs = append(device.VfioDevs, &vfio)
}
}

Expand All @@ -237,9 +259,9 @@ func getVFIODetails(deviceFileName, iommuDevicesPath string) (deviceBDF, deviceS
deviceBDF = getBDF(deviceFileName)
// Get sysfs path used by cloud-hypervisor
deviceSysfsDev = filepath.Join(config.SysBusPciDevicesPath, deviceFileName)
case config.VFIOPCIDeviceMediatedType:
case config.VFIOPCIDeviceMediatedType, config.VFIOAPDeviceMediatedType:
// Get sysfsdev of device eg. /sys/devices/pci0000:00/0000:00:02.0/f79944e4-5a3d-11e8-99ce-479cbab002e4
deviceSysfsDev, err = getSysfsDev(sysfsDevStr)
deviceSysfsDev, err = GetSysfsDev(sysfsDevStr)
default:
err = fmt.Errorf("Unknown VFIO device type: %v", vfioDeviceType)
}
Expand All @@ -254,13 +276,6 @@ func getBDF(deviceSysStr string) string {
return tokens[1]
}

// getSysfsDev returns the sysfsdev of mediated device
// Expected input string format is absolute path to the sysfs dev node
// eg. /sys/kernel/iommu_groups/0/devices/f79944e4-5a3d-11e8-99ce-479cbab002e4
func getSysfsDev(sysfsDevStr string) (string, error) {
return filepath.EvalSymlinks(sysfsDevStr)
}

// BindDevicetoVFIO binds the device to vfio driver after unbinding from host.
// Will be called by a network interface or a generic pcie device.
func BindDevicetoVFIO(bdf, hostDriver, vendorDeviceID string) (string, error) {
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/virtcontainers/device/drivers/vfio_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func TestGetVFIODetails(t *testing.T) {
switch vfioDeviceType {
case config.VFIOPCIDeviceNormalType:
assert.Equal(t, d.expectedStr, deviceBDF)
case config.VFIOPCIDeviceMediatedType:
case config.VFIOPCIDeviceMediatedType, config.VFIOAPDeviceMediatedType:
assert.Equal(t, d.expectedStr, deviceSysfsDev)
default:
assert.NotNil(t, err)
Expand Down
Loading

0 comments on commit 44ed889

Please sign in to comment.