From 5ba47b6e0d6b136076cb8ecc4d538103e0595591 Mon Sep 17 00:00:00 2001 From: Matias Ezequiel Vara Larsen Date: Fri, 9 Aug 2024 04:50:26 -0400 Subject: [PATCH] Support ARM CCA feature Enable to build confidential guests using ARM CCA (Confidential Computing Architecture). This work relies on v7 series for Linux and v5 series for KVM. This has been tested only on the corresponding FVP model simulator. For testing, you require specific kvm-ioctls and kvm-bindings crates. Signed-off-by: Matias Ezequiel Vara Larsen --- Makefile | 3 + src/arch/Cargo.toml | 6 +- src/arch/src/aarch64/fdt.rs | 3 + src/arch/src/aarch64/linux/regs.rs | 4 +- src/cpuid/Cargo.toml | 4 +- src/devices/Cargo.toml | 2 + src/devices/src/virtio/console/device.rs | 15 +- src/devices/src/virtio/console/mod.rs | 1 + src/devices/src/virtio/fs/device.rs | 13 +- src/devices/src/virtio/rng/device.rs | 7 +- src/libkrun/Cargo.toml | 2 + src/libkrun/src/lib.rs | 68 +++++++++ src/vmm/Cargo.toml | 8 +- src/vmm/src/builder.rs | 176 ++++++++++++++++++++--- src/vmm/src/lib.rs | 3 + src/vmm/src/linux/vstate.rs | 124 ++++++++++++++-- src/vmm/src/vmm_config/boot_source.rs | 2 +- 17 files changed, 393 insertions(+), 48 deletions(-) diff --git a/Makefile b/Makefile index 0df02907..fe085abf 100644 --- a/Makefile +++ b/Makefile @@ -27,6 +27,9 @@ ifeq ($(SEV),1) INIT_SRC += $(SNP_INIT_SRC) BUILD_INIT = 0 endif +ifeq ($(CCA), 1) + FEATURE_FLAGS := --features cca +endif ifeq ($(GPU),1) FEATURE_FLAGS += --features gpu endif diff --git a/src/arch/Cargo.toml b/src/arch/Cargo.toml index baaedda5..cb183142 100644 --- a/src/arch/Cargo.toml +++ b/src/arch/Cargo.toml @@ -5,6 +5,8 @@ authors = ["The Chromium OS Authors"] edition = "2021" [features] +default = ["cca"] +cca = [] tee = [] amd-sev = [ "tee" ] efi = [] @@ -18,8 +20,8 @@ smbios = { path = "../smbios" } utils = { path = "../utils" } [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = ">=0.8", features = ["fam-wrappers"] } -kvm-ioctls = ">=0.17" +kvm-bindings = { version = ">=0.8", features = ["fam-wrappers"] , git = "https://github.com/virtee/kvm-bindings", branch = "add_bindings_for_realms" } +kvm-ioctls = { version = ">=0.17", git = "https://github.com/virtee/kvm-ioctls", branch = "cca" } [target.'cfg(target_arch = "aarch64")'.dependencies] vm-fdt = ">= 0.2.0" diff --git a/src/arch/src/aarch64/fdt.rs b/src/arch/src/aarch64/fdt.rs index 02b45112..4e051506 100644 --- a/src/arch/src/aarch64/fdt.rs +++ b/src/arch/src/aarch64/fdt.rs @@ -285,7 +285,10 @@ fn create_psci_node(fdt: &mut FdtWriter) -> Result<()> { // Two methods available: hvc and smc. // As per documentation, PSCI calls between a guest and hypervisor may use the HVC conduit instead of SMC. // So, since we are using kvm, we need to use hvc. + #[cfg(not(feature = "cca"))] fdt.property_string("method", "hvc")?; + #[cfg(feature = "cca")] + fdt.property_string("method", "smc")?; fdt.end_node(node)?; Ok(()) diff --git a/src/arch/src/aarch64/linux/regs.rs b/src/arch/src/aarch64/linux/regs.rs index 81146b8a..71dffe5b 100644 --- a/src/arch/src/aarch64/linux/regs.rs +++ b/src/arch/src/aarch64/linux/regs.rs @@ -125,8 +125,10 @@ arm64_sys_reg!(MPIDR_EL1, 3, 0, 0, 0, 5); /// * `boot_ip` - Starting instruction pointer. /// * `mem` - Reserved DRAM for current VM. pub fn setup_regs(vcpu: &VcpuFd, cpu_id: u8, boot_ip: u64, mem: &GuestMemoryMmap) -> Result<()> { - // Get the register index of the PSTATE (Processor State) register. + // PSTATE cannot be accesed from the host in CCA + #[cfg(not(feature = "cca"))] #[allow(deref_nullptr)] + // Get the register index of the PSTATE (Processor State) register. vcpu.set_one_reg(arm64_core_reg!(pstate), &PSTATE_FAULT_BITS_64.to_le_bytes()) .map_err(Error::SetCoreRegister)?; diff --git a/src/cpuid/Cargo.toml b/src/cpuid/Cargo.toml index 41c53aee..d1e3214e 100644 --- a/src/cpuid/Cargo.toml +++ b/src/cpuid/Cargo.toml @@ -8,5 +8,5 @@ edition = "2021" vmm-sys-util = ">=0.11" [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = ">=0.8", features = ["fam-wrappers"] } -kvm-ioctls = ">=0.17" +kvm-bindings = { version = ">=0.8", features = ["fam-wrappers"] , git = "https://github.com/virtee/kvm-bindings", branch = "add_bindings_for_realms" } +kvm-ioctls = { version = ">=0.17", git = "https://github.com/virtee/kvm-ioctls", branch = "cca" } diff --git a/src/devices/Cargo.toml b/src/devices/Cargo.toml index 9508ce1f..33079f5b 100644 --- a/src/devices/Cargo.toml +++ b/src/devices/Cargo.toml @@ -5,7 +5,9 @@ authors = ["The Chromium OS Authors"] edition = "2021" [features] +default = ["cca"] tee = [] +cca = [] amd-sev = ["blk", "tee"] net = [] blk = [] diff --git a/src/devices/src/virtio/console/device.rs b/src/devices/src/virtio/console/device.rs index e1193ec6..56535afa 100644 --- a/src/devices/src/virtio/console/device.rs +++ b/src/devices/src/virtio/console/device.rs @@ -30,9 +30,18 @@ use crate::virtio::{PortDescription, VmmExitObserver}; pub(crate) const CONTROL_RXQ_INDEX: usize = 2; pub(crate) const CONTROL_TXQ_INDEX: usize = 3; -pub(crate) const AVAIL_FEATURES: u64 = 1 << uapi::VIRTIO_CONSOLE_F_SIZE as u64 - | 1 << uapi::VIRTIO_CONSOLE_F_MULTIPORT as u64 - | 1 << uapi::VIRTIO_F_VERSION_1 as u64; +// CCA requires VIRTIO_F_ACCESS_PLATFORM to ensure DMA-APIs +// are triggered for virtio in Linux +pub(crate) const AVAIL_FEATURES: u64 = if cfg!(feature = "cca") { + 1 << uapi::VIRTIO_CONSOLE_F_SIZE as u64 + | 1 << uapi::VIRTIO_CONSOLE_F_MULTIPORT as u64 + | 1 << uapi::VIRTIO_F_VERSION_1 as u64 + | 1 << uapi::VIRTIO_F_ACCESS_PLATFORM as u64 +} else { + 1 << uapi::VIRTIO_CONSOLE_F_SIZE as u64 + | 1 << uapi::VIRTIO_CONSOLE_F_MULTIPORT as u64 + | 1 << uapi::VIRTIO_F_VERSION_1 as u64 +}; #[repr(C)] #[derive(Default)] diff --git a/src/devices/src/virtio/console/mod.rs b/src/devices/src/virtio/console/mod.rs index bbaba4dd..c6d0fb9d 100644 --- a/src/devices/src/virtio/console/mod.rs +++ b/src/devices/src/virtio/console/mod.rs @@ -22,6 +22,7 @@ mod defs { pub const VIRTIO_CONSOLE_F_MULTIPORT: u32 = 1; pub const VIRTIO_F_VERSION_1: u32 = 32; pub const VIRTIO_ID_CONSOLE: u32 = 3; + pub const VIRTIO_F_ACCESS_PLATFORM: u32 = 33; } #[allow(dead_code)] diff --git a/src/devices/src/virtio/fs/device.rs b/src/devices/src/virtio/fs/device.rs index fa9ffc8f..ed18b44d 100644 --- a/src/devices/src/virtio/fs/device.rs +++ b/src/devices/src/virtio/fs/device.rs @@ -9,7 +9,10 @@ use std::thread::JoinHandle; #[cfg(target_os = "macos")] use hvf::MemoryMapping; use utils::eventfd::{EventFd, EFD_NONBLOCK}; -use virtio_bindings::{virtio_config::VIRTIO_F_VERSION_1, virtio_ring::VIRTIO_RING_F_EVENT_IDX}; +use virtio_bindings::{ + virtio_config::VIRTIO_F_ACCESS_PLATFORM, virtio_config::VIRTIO_F_VERSION_1, + virtio_ring::VIRTIO_RING_F_EVENT_IDX, +}; use vm_memory::{ByteValued, GuestMemoryMmap}; use super::super::{ @@ -70,7 +73,13 @@ impl Fs { .push(EventFd::new(utils::eventfd::EFD_NONBLOCK).map_err(FsError::EventFd)?); } - let avail_features = (1u64 << VIRTIO_F_VERSION_1) | (1u64 << VIRTIO_RING_F_EVENT_IDX); + let avail_features = if cfg!(feature = "cca") { + (1u64 << VIRTIO_F_VERSION_1) + | (1u64 << VIRTIO_RING_F_EVENT_IDX) + | (1 << VIRTIO_F_ACCESS_PLATFORM as u64) + } else { + (1u64 << VIRTIO_F_VERSION_1) | (1u64 << VIRTIO_RING_F_EVENT_IDX) + }; let tag = fs_id.into_bytes(); let mut config = VirtioFsConfig::default(); diff --git a/src/devices/src/virtio/rng/device.rs b/src/devices/src/virtio/rng/device.rs index d87f696d..75ea6cd4 100644 --- a/src/devices/src/virtio/rng/device.rs +++ b/src/devices/src/virtio/rng/device.rs @@ -13,12 +13,17 @@ use super::super::{ use super::{defs, defs::uapi}; use crate::legacy::Gic; use crate::Error as DeviceError; +use virtio_bindings::virtio_config::VIRTIO_F_ACCESS_PLATFORM; // Request queue. pub(crate) const REQ_INDEX: usize = 0; // Supported features. -pub(crate) const AVAIL_FEATURES: u64 = 1 << uapi::VIRTIO_F_VERSION_1 as u64; +pub(crate) const AVAIL_FEATURES: u64 = if cfg!(feature = "cca") { + 1 << uapi::VIRTIO_F_VERSION_1 as u64 | 1 << VIRTIO_F_ACCESS_PLATFORM as u64 +} else { + 1 << uapi::VIRTIO_F_VERSION_1 as u64 +}; #[derive(Copy, Clone, Debug, Default)] #[repr(C, packed)] diff --git a/src/libkrun/Cargo.toml b/src/libkrun/Cargo.toml index 5a6c71eb..8bea2aba 100644 --- a/src/libkrun/Cargo.toml +++ b/src/libkrun/Cargo.toml @@ -16,12 +16,14 @@ snd = [] virgl_resource_map2 = [] [dependencies] +vm-memory = { version = ">=0.13", features = ["backend-mmap"] } crossbeam-channel = "0.5" env_logger = "0.9.0" libc = ">=0.2.39" log = "0.4.0" once_cell = "1.4.1" +kvm-bindings = { version = ">=0.8", features = ["fam-wrappers"] , git = "https://github.com/virtee/kvm-bindings", branch = "add_bindings_for_realms" } devices = { path = "../devices" } polly = { path = "../polly" } utils = { path = "../utils" } diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index 77c84d5d..cea9bb95 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -1,6 +1,13 @@ #[macro_use] extern crate log; +use crossbeam_channel::unbounded; +use kvm_bindings::kvm_memory_attributes; +use libc::fallocate; +use libc::madvise; +use libc::FALLOC_FL_KEEP_SIZE; +use libc::FALLOC_FL_PUNCH_HOLE; +use libc::MADV_DONTNEED; use std::collections::hash_map::Entry; use std::collections::HashMap; use std::convert::TryInto; @@ -11,10 +18,13 @@ use std::ffi::CString; #[cfg(target_os = "linux")] use std::os::fd::AsRawFd; use std::os::fd::RawFd; +use std::os::raw::c_void; use std::path::PathBuf; use std::slice; use std::sync::atomic::{AtomicI32, Ordering}; use std::sync::Mutex; +use vm_memory::GuestMemoryRegion; +use vm_memory::{Address, GuestMemory}; #[cfg(target_os = "macos")] use crossbeam_channel::unbounded; @@ -1154,9 +1164,12 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { #[cfg(target_os = "macos")] let (sender, receiver) = unbounded(); + let (io_sender, receiver) = unbounded(); + let _vmm = match vmm::builder::build_microvm( &ctx_cfg.vmr, &mut event_manager, + io_sender, ctx_cfg.shutdown_efd, #[cfg(target_os = "macos")] sender, @@ -1171,6 +1184,61 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { #[cfg(target_os = "macos")] let mapper_vmm = _vmm.clone(); + let vm = _vmm.lock().unwrap().kvm_vm().fd.clone(); + let guest_mem = _vmm.lock().unwrap().guest_memory().clone(); + let guest_memfd = _vmm.lock().unwrap().guest_memfd_vec.clone(); + + std::thread::spawn(move || loop { + match receiver.recv() { + Err(e) => error!("Error in receiver: {:?}", e), + Ok(m) => { + let _ret = vm + .lock() + .unwrap() + .set_memory_attributes(kvm_memory_attributes { + address: m.addr, + size: m.size, + attributes: m.attributes as u64, + flags: 0, + }); + + // from private to shared + if m.attributes == 0 { + for (index, region) in guest_mem.iter().enumerate() { + // this supposes that m.addr + m.size < region.start + region.size + // which may be false + if (region.start_addr().raw_value() + region.size() as u64) > m.addr { + let offset = m.addr - region.start_addr().raw_value(); + unsafe { + let _ret = fallocate( + *guest_memfd.get(index).unwrap(), + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + offset as i64, + m.size as i64, + ); + } + } + } + // from shared to private + } else { + for (_index, region) in guest_mem.iter().enumerate() { + if (region.start_addr().raw_value() + region.size() as u64) > m.addr { + let offset = m.addr - region.start_addr().raw_value(); + let host_startaddr = m.addr + offset; + unsafe { + let _ret = madvise( + host_startaddr as *mut c_void, + m.size.try_into().unwrap(), + MADV_DONTNEED, + ); + } + } + } + } + } + } + }); + #[cfg(target_os = "macos")] std::thread::Builder::new() .name("mapping worker".into()) diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 25ed38d7..70d49e98 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -5,8 +5,10 @@ authors = ["Amazon Firecracker team "] edition = "2021" [features] +default = ["cca"] tee = [] amd-sev = [ "blk", "tee", "codicon", "kbs-types", "procfs", "rdrand", "serde", "serde_json", "sev", "curl" ] +cca = [] net = [] blk = [] efi = [ "blk", "net" ] @@ -37,12 +39,14 @@ sev = { version = "4.0.0", features = ["openssl"], optional = true } curl = { version = "0.4", optional = true } nix = "0.24.1" +cca = { git = "https://github.com/virtee/cca" } + [target.'cfg(target_arch = "x86_64")'.dependencies] cpuid = { path = "../cpuid" } [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = ">=0.10", features = ["fam-wrappers"] } -kvm-ioctls = ">=0.17" +kvm-bindings = { version = ">=0.8", features = ["fam-wrappers"] , git = "https://github.com/virtee/kvm-bindings", branch = "add_bindings_for_realms" } +kvm-ioctls = { version = ">=0.17", git = "https://github.com/virtee/kvm-ioctls", branch = "cca" } [target.'cfg(target_os = "macos")'.dependencies] hvf = { path = "../hvf" } diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index bf40d8df..d12d4dc8 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -3,13 +3,17 @@ //! Enables pre-boot setup, instantiation and booting of a Firecracker VMM. +use crate::vstate::MemProperties; +use cca::Algo; #[cfg(target_os = "macos")] use crossbeam_channel::{unbounded, Sender}; +use std::cmp::max; use std::fmt::{Display, Formatter}; use std::fs::File; use std::io; #[cfg(target_os = "linux")] use std::os::fd::AsRawFd; +use std::os::fd::RawFd; use std::path::PathBuf; use std::sync::{Arc, Mutex}; @@ -46,7 +50,7 @@ use crate::vmm_config::boot_source::DEFAULT_KERNEL_CMDLINE; use crate::vmm_config::fs::FsDeviceConfig; #[cfg(target_os = "linux")] use crate::vstate::KvmContext; -#[cfg(all(target_os = "linux", feature = "tee"))] +#[cfg(all(target_os = "linux", any(feature = "tee", feature = "cca")))] use crate::vstate::MeasuredRegion; use crate::vstate::{Error as VstateError, Vcpu, VcpuConfig, Vm}; use arch::ArchMemoryInfo; @@ -56,6 +60,8 @@ use device_manager::shm::ShmManager; #[cfg(not(feature = "tee"))] use devices::virtio::{fs::ExportTable, VirtioShmRegion}; #[cfg(feature = "tee")] +use kbs_types::Tee; +#[cfg(feature = "tee")] use kvm_bindings::KVM_MAX_CPUID_ENTRIES; use libc::{STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO}; use nix::unistd::isatty; @@ -71,6 +77,11 @@ use vm_memory::Bytes; use vm_memory::GuestRegionMmap; use vm_memory::{GuestAddress, GuestMemory, GuestMemoryMmap}; +use vm_memory::GuestMemoryRegion; + +use crossbeam_channel::Sender; +use kvm_bindings::KVM_ARM_VCPU_REC; + #[cfg(feature = "efi")] static EDK2_BINARY: &[u8] = include_bytes!("../../../edk2/KRUN_EFI.silent.fd"); @@ -351,6 +362,7 @@ enum Payload { pub fn build_microvm( vm_resources: &super::resources::VmResources, event_manager: &mut EventManager, + io_sender: Sender, _shutdown_efd: Option, #[cfg(target_os = "macos")] _map_sender: Sender, ) -> std::result::Result>, StartMicrovmError> { @@ -412,9 +424,11 @@ pub fn build_microvm( Some(s) => kernel_cmdline.insert_str(s).unwrap(), }; + let mut guest_memfd: Vec = vec![]; + #[cfg(not(feature = "tee"))] #[allow(unused_mut)] - let mut vm = setup_vm(&guest_memory)?; + let mut vm = setup_vm(&guest_memory, &mut guest_memfd)?; #[cfg(feature = "tee")] let (kvm, mut vm) = { @@ -484,19 +498,59 @@ pub fn build_microvm( m }; + #[cfg(feature = "cca")] + let measured_regions = { + let m = vec![ + MeasuredRegion { + guest_addr: kernel_bundle.guest_addr, + // TODO: remove host_addr? + host_addr: guest_memory + .get_host_address(GuestAddress(kernel_bundle.guest_addr)) + .unwrap() as u64, + size: kernel_bundle.size, + populate: true, + }, + MeasuredRegion { + guest_addr: kernel_bundle.guest_addr + kernel_bundle.size as u64, + host_addr: guest_memory + .get_host_address(GuestAddress( + kernel_bundle.guest_addr + kernel_bundle.size as u64, + )) + .unwrap() as u64, + size: vm_resources.vm_config().mem_size_mib.unwrap() << 20 - kernel_bundle.size, + populate: false, + }, + // The region used for the FDT must be populated. However, we only know the addr and the size after + // configure_system() but at that point guest_memory is already shared. For the moment, hardcore the + // fdt addr and size. + MeasuredRegion { + guest_addr: 0x2DFE00000, + host_addr: guest_memory + .get_host_address(GuestAddress(0x2DFE00000)) + .unwrap() as u64, + // size must be page aligned + size: 0x1000, + populate: true, + }, + ]; + + m + }; + // On x86_64 always create a serial device, // while on aarch64 only create it if 'console=' is specified in the boot args. - let serial_device = if cfg!(feature = "efi") { + // TODO: to comment this + let serial_device = //if cfg!(feature = "efi") { Some(setup_serial_device( event_manager, None, - None, + //None, // Uncomment this to get EFI output when debugging EDK2. - // Some(Box::new(io::stdout())), - )?) - } else { - None - }; + Some(Box::new(io::stdout())), + )?); + //} else { + // None + //}; let exit_evt = EventFd::new(utils::eventfd::EFD_NONBLOCK) .map_err(Error::EventFd) @@ -568,16 +622,19 @@ pub fn build_microvm( &guest_memory, GuestAddress(kernel_bundle.guest_addr), &exit_evt, + io_sender, ) .map_err(StartMicrovmError::Internal)?; setup_interrupt_controller(&mut vm, vcpu_config.vcpu_count)?; + /* + This makes the kernel to block in parsing it, I do not know why attach_legacy_devices( &vm, &mut mmio_device_manager, &mut kernel_cmdline, serial_device, - )?; + )?; */ } #[cfg(all(target_arch = "aarch64", target_os = "macos"))] @@ -619,6 +676,7 @@ pub fn build_microvm( exit_observers: Vec::new(), vm, mmio_device_manager, + guest_memfd_vec: guest_memfd, #[cfg(target_arch = "x86_64")] pio_device_manager, }; @@ -671,7 +729,7 @@ pub fn build_microvm( if let Some(vsock) = vm_resources.vsock.get() { attach_unixsock_vsock_device(&mut vmm, vsock, event_manager, intc.clone())?; #[cfg(not(feature = "net"))] - vmm.kernel_cmdline.insert_str("tsi_hijack")?; + //vmm.kernel_cmdline.insert_str("tsi_hijack")?; #[cfg(feature = "net")] if vm_resources .net_builder @@ -680,7 +738,7 @@ pub fn build_microvm( .is_empty() { // Only enable TSI if we don't have any network devices. - vmm.kernel_cmdline.insert_str("tsi_hijack")?; + //vmm.kernel_cmdline.insert_str("tsi_hijack")?; } } #[cfg(feature = "net")] @@ -744,6 +802,55 @@ pub fn build_microvm( println!("Starting TEE/microVM."); } + // after this point guest memory and regs are not accesible anymore + #[cfg(feature = "cca")] + { + let _ = vmm + .kvm_vm() + .realm + .configure_measurement(&vmm.kvm_vm().fd.lock().unwrap(), Algo::AlgoSha256); + + vmm.kvm_vm() + .realm + .create_realm_descriptor(&vmm.kvm_vm().fd.lock().unwrap()) + .unwrap(); + + println!("Injecting and measuring memory regions. This may take a while."); + + for region in measured_regions.iter() { + if region.populate { + vmm.kvm_vm() + .realm + .populate( + &vmm.kvm_vm().fd.lock().unwrap(), + region.guest_addr, + region.size.try_into().unwrap(), + ) + .unwrap(); + } else { + vmm.kvm_vm() + .realm + .initiate( + &vmm.kvm_vm().fd.lock().unwrap(), + region.guest_addr, + region.size.try_into().unwrap(), + ) + .unwrap(); + } + } + + let feature = KVM_ARM_VCPU_REC as i32; + + for vcpu in vcpus.iter() { + vcpu.fd.vcpu_finalize(&feature).unwrap(); + } + + vmm.kvm_vm() + .realm + .activate(&vmm.kvm_vm().fd.lock().unwrap()) + .unwrap(); + } + vmm.start_vcpus(vcpus) .map_err(StartMicrovmError::Internal)?; @@ -891,7 +998,7 @@ fn load_cmdline(vmm: &Vmm) -> std::result::Result<(), StartMicrovmError> { .map_err(StartMicrovmError::LoadCommandline) } -#[cfg(all(target_os = "linux", not(feature = "tee")))] +#[cfg(all(target_os = "linux", not(feature = "tee"), not(feature = "cca")))] pub(crate) fn setup_vm( guest_memory: &GuestMemoryMmap, ) -> std::result::Result { @@ -906,6 +1013,28 @@ pub(crate) fn setup_vm( .map_err(StartMicrovmError::Internal)?; Ok(vm) } +#[cfg(all(target_os = "linux", feature = "cca"))] +pub(crate) fn setup_vm( + guest_memory: &GuestMemoryMmap, + guest_memfd: &mut Vec, +) -> std::result::Result { + let kvm: KvmContext = KvmContext::new() + .map_err(Error::KvmContext) + .map_err(StartMicrovmError::Internal)?; + + // calculate max_addr for max_ipa + let mut vm = Vm::new( + kvm.fd(), + (guest_memory.last_addr().raw_value() * 2) as usize, + ) + .map_err(Error::Vm) + .map_err(StartMicrovmError::Internal)?; + + vm.memory_init(guest_memory, kvm.max_memslots(), guest_memfd, true) + .map_err(Error::Vm) + .map_err(StartMicrovmError::Internal)?; + Ok(vm) +} #[cfg(all(target_os = "linux", feature = "tee"))] pub(crate) fn setup_vm( kvm: &KvmContext, @@ -1014,13 +1143,13 @@ fn attach_legacy_devices( ) -> std::result::Result<(), StartMicrovmError> { if let Some(serial) = serial { mmio_device_manager - .register_mmio_serial(vm.fd(), kernel_cmdline, serial) + .register_mmio_serial(&vm.fd.lock().unwrap(), kernel_cmdline, serial) .map_err(Error::RegisterMMIODevice) .map_err(StartMicrovmError::Internal)?; } mmio_device_manager - .register_mmio_rtc(vm.fd()) + .register_mmio_rtc(&vm.fd.lock().unwrap()) .map_err(Error::RegisterMMIODevice) .map_err(StartMicrovmError::Internal)?; @@ -1100,17 +1229,19 @@ fn create_vcpus_aarch64( guest_mem: &GuestMemoryMmap, entry_addr: GuestAddress, exit_evt: &EventFd, + sender_io: Sender, ) -> super::Result> { let mut vcpus = Vec::with_capacity(vcpu_config.vcpu_count as usize); for cpu_index in 0..vcpu_config.vcpu_count { - let mut vcpu = Vcpu::new_aarch64( + let mut vcpu: Vcpu = Vcpu::new_aarch64( cpu_index, - vm.fd(), + &vm.fd.lock().unwrap(), exit_evt.try_clone().map_err(Error::EventFd)?, + sender_io.clone(), ) .map_err(Error::Vcpu)?; - vcpu.configure_aarch64(vm.fd(), guest_mem, entry_addr) + vcpu.configure_aarch64(&vm.fd.lock().unwrap(), guest_mem, entry_addr) .map_err(Error::Vcpu)?; vcpus.push(vcpu); @@ -1174,9 +1305,12 @@ fn attach_mmio_device( let _cmdline = &mut vmm.kernel_cmdline; #[cfg(target_os = "linux")] - let (_mmio_base, _irq) = - vmm.mmio_device_manager - .register_mmio_device(vmm.vm.fd(), device, type_id, id)?; + let (_mmio_base, _irq) = vmm.mmio_device_manager.register_mmio_device( + &vmm.vm.fd.lock().unwrap(), + device, + type_id, + id, + )?; #[cfg(target_os = "macos")] let (_mmio_base, _irq) = vmm .mmio_device_manager diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 0e680a84..22394833 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -39,6 +39,7 @@ use macos::vstate; use std::fmt::{Display, Formatter}; use std::io; +use std::os::fd::RawFd; use std::os::unix::io::AsRawFd; use std::sync::{Arc, Mutex}; #[cfg(target_os = "linux")] @@ -188,6 +189,8 @@ pub struct Vmm { guest_memory: GuestMemoryMmap, arch_memory_info: ArchMemoryInfo, + pub guest_memfd_vec: Vec, + kernel_cmdline: KernelCmdline, vcpus_handles: Vec, diff --git a/src/vmm/src/linux/vstate.rs b/src/vmm/src/linux/vstate.rs index f86cdd47..7b325d11 100644 --- a/src/vmm/src/linux/vstate.rs +++ b/src/vmm/src/linux/vstate.rs @@ -8,13 +8,17 @@ use crossbeam_channel::{unbounded, Receiver, Sender, TryRecvError}; use libc::{c_int, c_void, siginfo_t}; use std::cell::Cell; +use std::cmp::max; use std::fmt::{Display, Formatter}; use std::io; use std::os::fd::RawFd; +use std::sync::Arc; +use std::sync::Mutex; #[cfg(feature = "tee")] use std::os::unix::io::RawFd; +use kvm_ioctls::VcpuExit::Unsupported; use std::result; use std::sync::atomic::{fence, Ordering}; #[cfg(not(test))] @@ -47,8 +51,10 @@ use kvm_bindings::{ KVM_MAX_CPUID_ENTRIES, KVM_PIT_SPEAKER_DUMMY, }; use kvm_bindings::{ - kvm_create_guest_memfd, kvm_userspace_memory_region, kvm_userspace_memory_region2, - KVM_API_VERSION, KVM_MEM_GUEST_MEMFD, + kvm_create_guest_memfd, kvm_memory_attributes, kvm_userspace_memory_region, + kvm_userspace_memory_region2, KVM_API_VERSION, KVM_MEMORY_ATTRIBUTE_PRIVATE, + KVM_MEMORY_EXIT_FLAG_PRIVATE, KVM_MEM_GUEST_MEMFD, KVM_VM_TYPE_ARM_IPA_SIZE_MASK, + KVM_VM_TYPE_ARM_REALM, }; use kvm_ioctls::*; use utils::eventfd::EventFd; @@ -64,6 +70,9 @@ use sev::launch::sev as sev_launch; #[cfg(feature = "amd-sev")] use sev::launch::snp; +#[cfg(feature = "cca")] +use cca::Realm; + /// Signal number (SIGRTMIN) used to kick Vcpus. pub(crate) const VCPU_RTSIG_OFFSET: i32 = 0; @@ -405,12 +414,13 @@ impl Display for Error { pub type Result = result::Result; -#[cfg(feature = "tee")] +#[cfg(any(feature = "tee", feature = "cca"))] #[derive(Debug)] pub struct MeasuredRegion { pub guest_addr: u64, pub host_addr: u64, pub size: usize, + pub populate: bool, } /// Describes a KVM context that gets attached to the microVM. @@ -464,7 +474,7 @@ impl KvmContext { /// A wrapper around creating and using a VM. pub struct Vm { - fd: VmFd, + pub fd: Arc>, next_mem_slot: u32, // X86 specific fields. @@ -486,11 +496,14 @@ pub struct Vm { #[cfg(feature = "amd-sev")] pub tee: Tee, + + #[cfg(feature = "cca")] + pub realm: Realm, } impl Vm { /// Constructs a new `Vm` using the given `Kvm` instance. - #[cfg(not(feature = "tee"))] + #[cfg(all(not(feature = "tee"), not(feature = "cca")))] pub fn new(kvm: &Kvm) -> Result { //create fd for interacting with kvm-vm specific functions let vm_fd = kvm.create_vm().map_err(Error::VmFd)?; @@ -515,6 +528,27 @@ impl Vm { }) } + #[cfg(feature = "cca")] + pub fn new(kvm: &Kvm, max_ipa: usize) -> Result { + //create fd for interacting with kvm-vm specific functions + let ipa_bits = max(64u32 - max_ipa.leading_zeros() - 1, 32) + 1; + let vm_fd = kvm + .create_vm_with_type( + (KVM_VM_TYPE_ARM_REALM | (ipa_bits & KVM_VM_TYPE_ARM_IPA_SIZE_MASK)).into(), + ) + .map_err(Error::VmFd)?; + + let realm = Realm::new(); + + Ok(Vm { + next_mem_slot: 0, + fd: Arc::new(Mutex::new(vm_fd)), + #[cfg(target_arch = "aarch64")] + irqchip_handle: None, + realm, + }) + } + #[cfg(feature = "amd-sev")] pub fn new(kvm: &Kvm, tee_config: &TeeConfig) -> Result { //create fd for interacting with kvm-vm specific functions @@ -564,6 +598,7 @@ impl Vm { &mut self, guest_mem: &GuestMemoryMmap, kvm_max_memslots: usize, + guest_memfd: &mut Vec, require_guest_memfd: bool, ) -> Result<()> { if guest_mem.num_regions() > kvm_max_memslots { @@ -583,9 +618,13 @@ impl Vm { let id: RawFd = self .fd + .lock() + .unwrap() .create_guest_memfd(gmem) .map_err(Error::CreateGuestMemfd)?; + guest_memfd.push(id); + let memory_region = kvm_userspace_memory_region2 { slot: self.next_mem_slot as u32, flags: KVM_MEM_GUEST_MEMFD, @@ -602,9 +641,22 @@ impl Vm { // are not overlapping. unsafe { self.fd + .lock() + .unwrap() .set_user_memory_region2(memory_region) .map_err(Error::SetUserMemoryRegion2)?; }; + + // set private by default when using guestmemfd + // this imitates QEMU behavior + let attr = kvm_memory_attributes { + address: region.start_addr().raw_value(), + size: region.len(), + attributes: KVM_MEMORY_ATTRIBUTE_PRIVATE as u64, + flags: 0, + }; + + self.fd.lock().unwrap().set_memory_attributes(attr).unwrap(); } else { let memory_region = kvm_userspace_memory_region { slot: self.next_mem_slot as u32, @@ -617,6 +669,8 @@ impl Vm { // are not overlapping. unsafe { self.fd + .lock() + .unwrap() .set_user_memory_region(memory_region) .map_err(Error::SetUserMemoryRegion)?; }; @@ -706,7 +760,8 @@ impl Vm { #[cfg(target_arch = "aarch64")] pub fn setup_irqchip(&mut self, vcpu_count: u8) -> Result<()> { self.irqchip_handle = Some( - arch::aarch64::gic::create_gic(&self.fd, vcpu_count.into()).map_err(Error::SetupGIC)?, + arch::aarch64::gic::create_gic(&self.fd.lock().unwrap(), vcpu_count.into()) + .map_err(Error::SetupGIC)?, ); Ok(()) } @@ -719,9 +774,9 @@ impl Vm { } /// Gets a reference to the kvm file descriptor owned by this VM. - pub fn fd(&self) -> &VmFd { - &self.fd - } + //pub fn fd(&self) -> &VmFd { + // &self.fd + // } #[allow(unused)] #[cfg(target_arch = "x86_64")] @@ -812,9 +867,14 @@ pub struct VcpuConfig { // Using this for easier explicit type-casting to help IDEs interpret the code. type VcpuCell = Cell>; +pub struct MemProperties { + pub addr: u64, + pub size: u64, + pub attributes: u32, +} /// A wrapper around creating and using a kvm-based VCPU. pub struct Vcpu { - fd: VcpuFd, + pub fd: VcpuFd, id: u8, mmio_bus: Option, #[allow(dead_code)] @@ -831,6 +891,9 @@ pub struct Vcpu { #[cfg(target_arch = "aarch64")] mpidr: u64, + #[cfg(feature = "cca")] + sender_io: Sender, + // The receiving end of events channel owned by the vcpu side. event_receiver: Receiver, // The transmitting end of the events channel which will be given to the handler. @@ -972,7 +1035,12 @@ impl Vcpu { /// * `exit_evt` - An `EventFd` that will be written into when this vcpu exits. /// * `create_ts` - A timestamp used by the vcpu to calculate its lifetime. #[cfg(target_arch = "aarch64")] - pub fn new_aarch64(id: u8, vm_fd: &VmFd, exit_evt: EventFd) -> Result { + pub fn new_aarch64( + id: u8, + vm_fd: &VmFd, + exit_evt: EventFd, + sender_io: Sender, + ) -> Result { let kvm_vcpu = vm_fd.create_vcpu(id as u64).map_err(Error::VcpuFd)?; let (event_sender, event_receiver) = unbounded(); let (response_sender, response_receiver) = unbounded(); @@ -987,6 +1055,7 @@ impl Vcpu { event_sender: Some(event_sender), response_receiver: Some(response_receiver), response_sender, + sender_io, }) } @@ -1273,12 +1342,37 @@ impl Vcpu { info!("Received KVM_EXIT_SHUTDOWN signal"); Ok(VcpuEmulation::Stopped) } + VcpuExit::MemoryFault { flags, gpa, size } => { + // TODO: flags can be private or shared + if flags & !KVM_MEMORY_EXIT_FLAG_PRIVATE as u64 != 0 { + error!("KVM_EXIT_MEMORY_FAULT: Unknown flag {}", flags); + Err(Error::VcpuUnhandledKvmExit) + } else { + // from private to shared + let mut attr = 0; + // from shared to private + if flags & KVM_MEMORY_EXIT_FLAG_PRIVATE as u64 + == KVM_MEMORY_EXIT_FLAG_PRIVATE as u64 + { + attr = KVM_MEMORY_ATTRIBUTE_PRIVATE; + }; + + let _ = self.sender_io.try_send(MemProperties { + addr: gpa, + size, + attributes: attr, + }); + Ok(VcpuEmulation::Handled) + } + } // Documentation specifies that below kvm exits are considered // errors. VcpuExit::FailEntry(reason, vcpu) => { error!("Received KVM_EXIT_FAIL_ENTRY signal: reason={reason}, vcpu={vcpu}"); Err(Error::VcpuUnhandledKvmExit) } + // TODO: to remove this + Unsupported(39) => Ok(VcpuEmulation::Handled), VcpuExit::InternalError => { error!("Received KVM_EXIT_INTERNAL_ERROR signal"); Err(Error::VcpuUnhandledKvmExit) @@ -1610,7 +1704,9 @@ mod tests { // Create valid memory region and test that the initialization is successful. let gm = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x1000)]).unwrap(); - assert!(vm.memory_init(&gm, kvm_context.max_memslots(), false).is_ok()); + assert!(vm + .memory_init(&gm, kvm_context.max_memslots(), false) + .is_ok()); // Set the maximum number of memory slots to 1 in KvmContext to check the error // path of memory_init. Create 2 non-overlapping memory slots. @@ -1620,7 +1716,9 @@ mod tests { (GuestAddress(0x1001), 0x2000), ]) .unwrap(); - assert!(vm.memory_init(&gm, kvm_context.max_memslots(), false).is_err()); + assert!(vm + .memory_init(&gm, kvm_context.max_memslots(), false) + .is_err()); } #[cfg(target_arch = "x86_64")] diff --git a/src/vmm/src/vmm_config/boot_source.rs b/src/vmm/src/vmm_config/boot_source.rs index 9c70d5b1..83408f5a 100644 --- a/src/vmm/src/vmm_config/boot_source.rs +++ b/src/vmm/src/vmm_config/boot_source.rs @@ -18,7 +18,7 @@ use std::fmt::{Display, Formatter, Result}; #[cfg(all(target_os = "linux", not(feature = "tee")))] pub const DEFAULT_KERNEL_CMDLINE: &str = "reboot=k panic=-1 panic_print=0 nomodule console=hvc0 \ - rootfstype=virtiofs rw quiet no-kvmapf"; + rootfstype=virtiofs rw no-kvmapf"; //ignore_loglevel=1 initcall_debug"; #[cfg(feature = "amd-sev")] pub const DEFAULT_KERNEL_CMDLINE: &str = "reboot=k panic=-1 panic_print=0 nomodule console=hvc0 \