From e7e2a906327b904a4c5742ba68c0dbf94623db5b Mon Sep 17 00:00:00 2001 From: Matias Ezequiel Vara Larsen Date: Fri, 9 Aug 2024 04:50:26 -0400 Subject: [PATCH] Add CCA feature This is WIP Signed-off-by: Matias Ezequiel Vara Larsen --- Makefile | 3 + src/arch/src/aarch64/fdt.rs | 3 + src/arch/src/aarch64/linux/regs.rs | 4 +- src/arch/src/aarch64/mod.rs | 2 +- src/devices/src/virtio/console/device.rs | 15 ++- src/devices/src/virtio/console/mod.rs | 1 + src/libkrun/src/lib.rs | 55 ++++++++ src/vmm/src/builder.rs | 165 +++++++++++++++++++---- src/vmm/src/device_manager/kvm/mmio.rs | 5 +- src/vmm/src/lib.rs | 8 ++ src/vmm/src/linux/vstate.rs | 129 ++++++++++++++++-- src/vmm/src/vmm_config/boot_source.rs | 2 +- 12 files changed, 348 insertions(+), 44 deletions(-) diff --git a/Makefile b/Makefile index 3dcb2ad8..f0fec397 100644 --- a/Makefile +++ b/Makefile @@ -27,6 +27,9 @@ ifeq ($(SEV),1) INIT_SRC += $(SNP_INIT_SRC) BUILD_INIT = 0 endif +ifeq ($(CCA), 1) + FEATURE_FLAGS := --features cca +endif ifeq ($(GPU),1) FEATURE_FLAGS += --features gpu endif diff --git a/src/arch/src/aarch64/fdt.rs b/src/arch/src/aarch64/fdt.rs index 02b45112..4e051506 100644 --- a/src/arch/src/aarch64/fdt.rs +++ b/src/arch/src/aarch64/fdt.rs @@ -285,7 +285,10 @@ fn create_psci_node(fdt: &mut FdtWriter) -> Result<()> { // Two methods available: hvc and smc. // As per documentation, PSCI calls between a guest and hypervisor may use the HVC conduit instead of SMC. // So, since we are using kvm, we need to use hvc. + #[cfg(not(feature = "cca"))] fdt.property_string("method", "hvc")?; + #[cfg(feature = "cca")] + fdt.property_string("method", "smc")?; fdt.end_node(node)?; Ok(()) diff --git a/src/arch/src/aarch64/linux/regs.rs b/src/arch/src/aarch64/linux/regs.rs index 81146b8a..71dffe5b 100644 --- a/src/arch/src/aarch64/linux/regs.rs +++ b/src/arch/src/aarch64/linux/regs.rs @@ -125,8 +125,10 @@ arm64_sys_reg!(MPIDR_EL1, 3, 0, 0, 0, 5); /// * `boot_ip` - Starting instruction pointer. /// * `mem` - Reserved DRAM for current VM. pub fn setup_regs(vcpu: &VcpuFd, cpu_id: u8, boot_ip: u64, mem: &GuestMemoryMmap) -> Result<()> { - // Get the register index of the PSTATE (Processor State) register. + // PSTATE cannot be accesed from the host in CCA + #[cfg(not(feature = "cca"))] #[allow(deref_nullptr)] + // Get the register index of the PSTATE (Processor State) register. vcpu.set_one_reg(arm64_core_reg!(pstate), &PSTATE_FAULT_BITS_64.to_le_bytes()) .map_err(Error::SetCoreRegister)?; diff --git a/src/arch/src/aarch64/mod.rs b/src/arch/src/aarch64/mod.rs index 9450b94d..23116ea5 100644 --- a/src/arch/src/aarch64/mod.rs +++ b/src/arch/src/aarch64/mod.rs @@ -69,7 +69,7 @@ pub fn arch_memory_regions(size: usize) -> (ArchMemoryInfo, Vec<(GuestAddress, u } else { vec![ (GuestAddress(layout::DRAM_MEM_START), dram_size), - (GuestAddress(shm_start_addr), MMIO_SHM_SIZE as usize), + //(GuestAddress(shm_start_addr), MMIO_SHM_SIZE as usize), ] }; diff --git a/src/devices/src/virtio/console/device.rs b/src/devices/src/virtio/console/device.rs index e1193ec6..56535afa 100644 --- a/src/devices/src/virtio/console/device.rs +++ b/src/devices/src/virtio/console/device.rs @@ -30,9 +30,18 @@ use crate::virtio::{PortDescription, VmmExitObserver}; pub(crate) const CONTROL_RXQ_INDEX: usize = 2; pub(crate) const CONTROL_TXQ_INDEX: usize = 3; -pub(crate) const AVAIL_FEATURES: u64 = 1 << uapi::VIRTIO_CONSOLE_F_SIZE as u64 - | 1 << uapi::VIRTIO_CONSOLE_F_MULTIPORT as u64 - | 1 << uapi::VIRTIO_F_VERSION_1 as u64; +// CCA requires VIRTIO_F_ACCESS_PLATFORM to ensure DMA-APIs +// are triggered for virtio in Linux +pub(crate) const AVAIL_FEATURES: u64 = if cfg!(feature = "cca") { + 1 << uapi::VIRTIO_CONSOLE_F_SIZE as u64 + | 1 << uapi::VIRTIO_CONSOLE_F_MULTIPORT as u64 + | 1 << uapi::VIRTIO_F_VERSION_1 as u64 + | 1 << uapi::VIRTIO_F_ACCESS_PLATFORM as u64 +} else { + 1 << uapi::VIRTIO_CONSOLE_F_SIZE as u64 + | 1 << uapi::VIRTIO_CONSOLE_F_MULTIPORT as u64 + | 1 << uapi::VIRTIO_F_VERSION_1 as u64 +}; #[repr(C)] #[derive(Default)] diff --git a/src/devices/src/virtio/console/mod.rs b/src/devices/src/virtio/console/mod.rs index bbaba4dd..c6d0fb9d 100644 --- a/src/devices/src/virtio/console/mod.rs +++ b/src/devices/src/virtio/console/mod.rs @@ -22,6 +22,7 @@ mod defs { pub const VIRTIO_CONSOLE_F_MULTIPORT: u32 = 1; pub const VIRTIO_F_VERSION_1: u32 = 32; pub const VIRTIO_ID_CONSOLE: u32 = 3; + pub const VIRTIO_F_ACCESS_PLATFORM: u32 = 33; } #[allow(dead_code)] diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index 6cd2b156..a6567739 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -1,6 +1,11 @@ #[macro_use] extern crate log; +use crossbeam_channel::unbounded; +use kvm_bindings::kvm_memory_attributes; +use libc::fallocate; +use libc::FALLOC_FL_KEEP_SIZE; +use libc::FALLOC_FL_PUNCH_HOLE; use std::collections::hash_map::Entry; use std::collections::HashMap; use std::convert::TryInto; @@ -15,6 +20,9 @@ use std::path::PathBuf; use std::slice; use std::sync::atomic::{AtomicI32, Ordering}; use std::sync::Mutex; +use vm_memory::GuestMemoryMmap; +use vm_memory::GuestMemoryRegion; +use vm_memory::{Address, GuestMemory}; #[cfg(target_os = "macos")] use crossbeam_channel::unbounded; @@ -1077,9 +1085,12 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { #[cfg(target_os = "macos")] let (sender, receiver) = unbounded(); + let (io_sender, receiver) = unbounded(); + let _vmm = match vmm::builder::build_microvm( &ctx_cfg.vmr, &mut event_manager, + io_sender, ctx_cfg.shutdown_efd, #[cfg(target_os = "macos")] sender, @@ -1094,6 +1105,50 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { #[cfg(target_os = "macos")] let mapper_vmm = _vmm.clone(); + let vm = _vmm.lock().unwrap().kvm_vm().fd.clone(); + let guest_mem = _vmm.lock().unwrap().guest_memory().clone(); + let guest_memfd = _vmm.lock().unwrap().guest_memfd_vec.clone(); + + std::thread::spawn(move || loop { + match receiver.recv() { + Err(e) => error!("Error in receiver: {:?}", e), + Ok(m) => { + let ret = vm + .lock() + .unwrap() + .set_memory_attributes(kvm_memory_attributes { + address: m.addr, + size: m.size, + attributes: m.attributes as u64, + flags: 0, + }); + + // from private to shared + // e.g., ram_block_discard_guest_memfd_range + if m.attributes == 0 { + for (index, region) in guest_mem.iter().enumerate() { + if (region.start_addr().raw_value() + region.size() as u64) > m.addr { + // offset es function de la posicion de mapeo + let offset = m.addr - region.start_addr().raw_value(); + unsafe { + let _ret = fallocate( + *guest_memfd.get(index).unwrap(), + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + offset as i64, + m.size as i64, + ); + } + } + } + // from shared to private + // e.g., ram_block_discard_range + } else { + // do something + } + } + } + }); + #[cfg(target_os = "macos")] std::thread::spawn(move || loop { match receiver.recv() { diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 627d7134..c18461ca 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -3,13 +3,17 @@ //! Enables pre-boot setup, instantiation and booting of a Firecracker VMM. +use crate::vstate::MemProperties; +use cca::Algo; #[cfg(target_os = "macos")] use crossbeam_channel::{unbounded, Sender}; +use std::cmp::max; use std::fmt::{Display, Formatter}; use std::fs::File; use std::io; #[cfg(target_os = "linux")] use std::os::fd::AsRawFd; +use std::os::fd::RawFd; use std::path::PathBuf; use std::sync::{Arc, Mutex}; @@ -29,9 +33,6 @@ use devices::virtio::{port_io, MmioTransport, PortDescription, Vsock}; #[cfg(target_os = "macos")] use hvf::MemoryMapping; -#[cfg(feature = "tee")] -use kbs_types::Tee; - use crate::device_manager; #[cfg(feature = "tee")] use crate::resources::TeeConfig; @@ -49,13 +50,15 @@ use crate::vmm_config::fs::FsBuilder; use crate::vmm_config::kernel_bundle::{InitrdBundle, QbootBundle}; #[cfg(target_os = "linux")] use crate::vstate::KvmContext; -#[cfg(all(target_os = "linux", feature = "tee"))] +#[cfg(all(target_os = "linux", any(feature = "tee", feature = "cca")))] use crate::vstate::MeasuredRegion; use crate::vstate::{Error as VstateError, Vcpu, VcpuConfig, Vm}; use arch::ArchMemoryInfo; #[cfg(feature = "tee")] use arch::InitrdConfig; #[cfg(feature = "tee")] +use kbs_types::Tee; +#[cfg(feature = "tee")] use kvm_bindings::KVM_MAX_CPUID_ENTRIES; use libc::{STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO}; use nix::unistd::isatty; @@ -68,7 +71,10 @@ use vm_memory::mmap::MmapRegion; #[cfg(any(target_arch = "aarch64", feature = "tee"))] use vm_memory::Bytes; use vm_memory::GuestMemory; -use vm_memory::{GuestAddress, GuestMemoryMmap}; +use vm_memory::{Address, GuestAddress, GuestMemoryMmap, GuestMemoryRegion}; + +use crossbeam_channel::Sender; +use kvm_bindings::KVM_ARM_VCPU_REC; #[cfg(feature = "efi")] static EDK2_BINARY: &[u8] = include_bytes!("../../../edk2/KRUN_EFI.silent.fd"); @@ -312,6 +318,7 @@ impl Display for StartMicrovmError { pub fn build_microvm( vm_resources: &super::resources::VmResources, event_manager: &mut EventManager, + io_sender: Sender, _shutdown_efd: Option, #[cfg(target_os = "macos")] _map_sender: Sender, ) -> std::result::Result>, StartMicrovmError> { @@ -361,9 +368,11 @@ pub fn build_microvm( Some(s) => kernel_cmdline.insert_str(s).unwrap(), }; + let mut guest_memfd: Vec = vec![]; + #[cfg(not(feature = "tee"))] #[allow(unused_mut)] - let mut vm = setup_vm(&guest_memory)?; + let mut vm = setup_vm(&guest_memory, &mut guest_memfd)?; #[cfg(feature = "tee")] let (kvm, mut vm) = { @@ -433,19 +442,49 @@ pub fn build_microvm( m }; + #[cfg(feature = "cca")] + // TODO: to fix the regions that require measurement + let measured_regions = { + let m = vec![ + MeasuredRegion { + guest_addr: kernel_bundle.guest_addr, + // TODO: remove host_addr + host_addr: guest_memory + .get_host_address(GuestAddress(kernel_bundle.guest_addr)) + .unwrap() as u64, + size: vm_resources.vm_config().mem_size_mib.unwrap() << 20, + populate: true, + }, + /* + MeasuredRegion { + guest_addr: kernel_bundle.guest_addr + kernel_bundle.size as u64, + host_addr: guest_memory + .get_host_address(GuestAddress(kernel_bundle.guest_addr + kernel_bundle.size as u64)) + .unwrap() as u64, + // this is probably wrong + size: vm_resources.vm_config().mem_size_mib.unwrap() << 20 - kernel_bundle.size, + populate: false + }, + */ + ]; + + m + }; + // On x86_64 always create a serial device, // while on aarch64 only create it if 'console=' is specified in the boot args. - let serial_device = if cfg!(feature = "efi") { + // TODO: to comment this + let serial_device = //if cfg!(feature = "efi") { Some(setup_serial_device( event_manager, None, - None, + //None, // Uncomment this to get EFI output when debugging EDK2. - // Some(Box::new(io::stdout())), - )?) - } else { - None - }; + Some(Box::new(io::stdout())), + )?); + //} else { + // None + //}; let exit_evt = EventFd::new(utils::eventfd::EFD_NONBLOCK) .map_err(Error::EventFd) @@ -517,6 +556,7 @@ pub fn build_microvm( &guest_memory, GuestAddress(kernel_bundle.guest_addr), &exit_evt, + io_sender, ) .map_err(StartMicrovmError::Internal)?; @@ -559,7 +599,7 @@ pub fn build_microvm( )?; } - #[cfg(not(feature = "tee"))] + #[cfg(all(not(feature = "tee"), not(feature = "cca")))] let _shm_region = Some(VirtioShmRegion { host_addr: guest_memory .get_host_address(GuestAddress(arch_memory_info.shm_start_addr)) @@ -577,6 +617,7 @@ pub fn build_microvm( exit_observers: Vec::new(), vm, mmio_device_manager, + guest_memfd_vec: guest_memfd, #[cfg(target_arch = "x86_64")] pio_device_manager, }; @@ -683,6 +724,55 @@ pub fn build_microvm( println!("Starting TEE/microVM."); } + // after this point guest memory and regs are not accesible anymore + #[cfg(feature = "cca")] + { + let _ = vmm + .kvm_vm() + .realm + .configure_measurement(&vmm.kvm_vm().fd.lock().unwrap(), Algo::AlgoSha256); + + vmm.kvm_vm() + .realm + .create_realm_descriptor(&vmm.kvm_vm().fd.lock().unwrap()) + .unwrap(); + + println!("Injecting and measuring memory regions. This may take a while."); + + for region in measured_regions.iter() { + if region.populate { + vmm.kvm_vm() + .realm + .populate( + &vmm.kvm_vm().fd.lock().unwrap(), + region.guest_addr, + region.size.try_into().unwrap(), + ) + .unwrap(); + } else { + vmm.kvm_vm() + .realm + .initiate( + &vmm.kvm_vm().fd.lock().unwrap(), + region.guest_addr, + region.size.try_into().unwrap(), + ) + .unwrap(); + } + } + + let feature = KVM_ARM_VCPU_REC as i32; + + for vcpu in vcpus.iter() { + vcpu.fd.vcpu_finalize(&feature).unwrap(); + } + + vmm.kvm_vm() + .realm + .activate(&vmm.kvm_vm().fd.lock().unwrap()) + .unwrap(); + } + vmm.start_vcpus(vcpus) .map_err(StartMicrovmError::Internal)?; @@ -809,7 +899,7 @@ fn load_cmdline(vmm: &Vmm) -> std::result::Result<(), StartMicrovmError> { .map_err(StartMicrovmError::LoadCommandline) } -#[cfg(all(target_os = "linux", not(feature = "tee")))] +#[cfg(all(target_os = "linux", not(feature = "tee"), not(feature = "cca")))] pub(crate) fn setup_vm( guest_memory: &GuestMemoryMmap, ) -> std::result::Result { @@ -824,6 +914,30 @@ pub(crate) fn setup_vm( .map_err(StartMicrovmError::Internal)?; Ok(vm) } +#[cfg(all(target_os = "linux", feature = "cca"))] +pub(crate) fn setup_vm( + guest_memory: &GuestMemoryMmap, + guest_memfd: &mut Vec, +) -> std::result::Result { + let kvm = KvmContext::new() + .map_err(Error::KvmContext) + .map_err(StartMicrovmError::Internal)?; + + // calculate max_addr for max_ipa + let mut max_addr = 0; + for (_index, region) in guest_memory.iter().enumerate() { + max_addr = max(max_addr, region.start_addr().raw_value() + region.len() - 1); + } + + let mut vm = Vm::new(kvm.fd(), max_addr as usize) + .map_err(Error::Vm) + .map_err(StartMicrovmError::Internal)?; + + vm.memory_init(guest_memory, kvm.max_memslots(), guest_memfd, true) + .map_err(Error::Vm) + .map_err(StartMicrovmError::Internal)?; + Ok(vm) +} #[cfg(all(target_os = "linux", feature = "tee"))] pub(crate) fn setup_vm( kvm: &KvmContext, @@ -932,13 +1046,13 @@ fn attach_legacy_devices( ) -> std::result::Result<(), StartMicrovmError> { if let Some(serial) = serial { mmio_device_manager - .register_mmio_serial(vm.fd(), kernel_cmdline, serial) + .register_mmio_serial(&vm.fd.lock().unwrap(), kernel_cmdline, serial) .map_err(Error::RegisterMMIODevice) .map_err(StartMicrovmError::Internal)?; } mmio_device_manager - .register_mmio_rtc(vm.fd()) + .register_mmio_rtc(&vm.fd.lock().unwrap()) .map_err(Error::RegisterMMIODevice) .map_err(StartMicrovmError::Internal)?; @@ -1018,17 +1132,19 @@ fn create_vcpus_aarch64( guest_mem: &GuestMemoryMmap, entry_addr: GuestAddress, exit_evt: &EventFd, + sender_io: Sender, ) -> super::Result> { let mut vcpus = Vec::with_capacity(vcpu_config.vcpu_count as usize); for cpu_index in 0..vcpu_config.vcpu_count { - let mut vcpu = Vcpu::new_aarch64( + let mut vcpu: Vcpu = Vcpu::new_aarch64( cpu_index, - vm.fd(), + &vm.fd.lock().unwrap(), exit_evt.try_clone().map_err(Error::EventFd)?, + sender_io.clone(), ) .map_err(Error::Vcpu)?; - vcpu.configure_aarch64(vm.fd(), guest_mem, entry_addr) + vcpu.configure_aarch64(&vm.fd.lock().unwrap(), guest_mem, entry_addr) .map_err(Error::Vcpu)?; vcpus.push(vcpu); @@ -1092,9 +1208,12 @@ fn attach_mmio_device( let _cmdline = &mut vmm.kernel_cmdline; #[cfg(target_os = "linux")] - let (_mmio_base, _irq) = - vmm.mmio_device_manager - .register_mmio_device(vmm.vm.fd(), device, type_id, id)?; + let (_mmio_base, _irq) = vmm.mmio_device_manager.register_mmio_device( + &vmm.vm.fd.lock().unwrap(), + device, + type_id, + id, + )?; #[cfg(target_os = "macos")] let (_mmio_base, _irq) = vmm .mmio_device_manager diff --git a/src/vmm/src/device_manager/kvm/mmio.rs b/src/vmm/src/device_manager/kvm/mmio.rs index 1fa7fe61..c3aec6e4 100644 --- a/src/vmm/src/device_manager/kvm/mmio.rs +++ b/src/vmm/src/device_manager/kvm/mmio.rs @@ -179,7 +179,10 @@ impl MMIODeviceManager { .map_err(Error::BusError)?; cmdline - .insert("earlycon", &format!("uart,mmio,0x{:08x}", self.mmio_base)) + .insert( + "earlycon", + &format!("pl011,mmio32,0x{:08x}", self.mmio_base), + ) .map_err(Error::Cmdline)?; let ret = self.mmio_base; diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index ea3fdb38..b0d6e12f 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -39,6 +39,7 @@ use macos::vstate; use std::fmt::{Display, Formatter}; use std::io; +use std::os::fd::RawFd; use std::os::unix::io::AsRawFd; use std::sync::{Arc, Mutex}; #[cfg(target_os = "linux")] @@ -55,15 +56,20 @@ use crate::vstate::{Vcpu, VcpuHandle, VcpuResponse, Vm}; use arch::ArchMemoryInfo; use arch::DeviceType; use arch::InitrdConfig; +use cca::Algo; #[cfg(target_os = "macos")] use crossbeam_channel::Sender; use devices::virtio::VmmExitObserver; use devices::BusDevice; use kernel::cmdline::Cmdline as KernelCmdline; +use kvm_bindings::{kvm_memory_attributes, KVM_ARM_VCPU_REC}; use polly::event_manager::{self, EventManager, Subscriber}; use utils::epoll::{EpollEvent, EventSet}; use utils::eventfd::EventFd; +use vm_memory::Address; +use vm_memory::GuestMemory; use vm_memory::GuestMemoryMmap; +use vm_memory::GuestMemoryRegion; /// Success exit code. pub const FC_EXIT_CODE_OK: u8 = 0; @@ -190,6 +196,8 @@ pub struct Vmm { guest_memory: GuestMemoryMmap, arch_memory_info: ArchMemoryInfo, + pub guest_memfd_vec: Vec, + kernel_cmdline: KernelCmdline, vcpus_handles: Vec, diff --git a/src/vmm/src/linux/vstate.rs b/src/vmm/src/linux/vstate.rs index 439dd203..a9a60e7c 100644 --- a/src/vmm/src/linux/vstate.rs +++ b/src/vmm/src/linux/vstate.rs @@ -8,13 +8,17 @@ use crossbeam_channel::{unbounded, Receiver, Sender, TryRecvError}; use libc::{c_int, c_void, siginfo_t}; use std::cell::Cell; +use std::cmp::max; use std::fmt::{Display, Formatter}; use std::io; use std::os::fd::RawFd; +use std::sync::Arc; +use std::sync::Mutex; #[cfg(feature = "tee")] use std::os::unix::io::RawFd; +use kvm_ioctls::VcpuExit::Unsupported; use std::result; use std::sync::atomic::{fence, Ordering}; #[cfg(not(test))] @@ -48,8 +52,10 @@ use kvm_bindings::{ KVM_MAX_CPUID_ENTRIES, KVM_PIT_SPEAKER_DUMMY, }; use kvm_bindings::{ - kvm_create_guest_memfd, kvm_userspace_memory_region, kvm_userspace_memory_region2, - KVM_API_VERSION, KVM_MEM_GUEST_MEMFD, + kvm_create_guest_memfd, kvm_memory_attributes, kvm_userspace_memory_region, + kvm_userspace_memory_region2, KVM_API_VERSION, KVM_MEMORY_ATTRIBUTE_PRIVATE, + KVM_MEMORY_EXIT_FLAG_PRIVATE, KVM_MEM_GUEST_MEMFD, KVM_VM_TYPE_ARM_IPA_SIZE_MASK, + KVM_VM_TYPE_ARM_REALM, }; use kvm_ioctls::*; use utils::eventfd::EventFd; @@ -65,6 +71,9 @@ use sev::launch::sev as sev_launch; #[cfg(feature = "amd-sev")] use sev::launch::snp; +#[cfg(feature = "cca")] +use cca::Realm; + /// Signal number (SIGRTMIN) used to kick Vcpus. pub(crate) const VCPU_RTSIG_OFFSET: i32 = 0; @@ -403,12 +412,13 @@ impl Display for Error { pub type Result = result::Result; -#[cfg(feature = "tee")] +#[cfg(any(feature = "tee", feature = "cca"))] #[derive(Debug)] pub struct MeasuredRegion { pub guest_addr: u64, pub host_addr: u64, pub size: usize, + pub populate: bool, } /// Describes a KVM context that gets attached to the microVM. @@ -462,7 +472,7 @@ impl KvmContext { /// A wrapper around creating and using a VM. pub struct Vm { - fd: VmFd, + pub fd: Arc>, // X86 specific fields. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] @@ -483,11 +493,14 @@ pub struct Vm { #[cfg(feature = "amd-sev")] pub tee: Tee, + + #[cfg(feature = "cca")] + pub realm: Realm, } impl Vm { /// Constructs a new `Vm` using the given `Kvm` instance. - #[cfg(not(feature = "tee"))] + #[cfg(all(not(feature = "tee"), not(feature = "cca")))] pub fn new(kvm: &Kvm) -> Result { //create fd for interacting with kvm-vm specific functions let vm_fd = kvm.create_vm().map_err(Error::VmFd)?; @@ -511,6 +524,26 @@ impl Vm { }) } + #[cfg(feature = "cca")] + pub fn new(kvm: &Kvm, max_ipa: usize) -> Result { + //create fd for interacting with kvm-vm specific functions + let ipa_bits = max(64u32 - max_ipa.leading_zeros() - 1, 32) + 1; + let vm_fd = kvm + .create_vm_with_type( + (KVM_VM_TYPE_ARM_REALM | (ipa_bits & KVM_VM_TYPE_ARM_IPA_SIZE_MASK)).into(), + ) + .map_err(Error::VmFd)?; + + let realm = Realm::new().unwrap(); + + Ok(Vm { + fd: Arc::new(Mutex::new(vm_fd)), + #[cfg(target_arch = "aarch64")] + irqchip_handle: None, + realm, + }) + } + #[cfg(feature = "amd-sev")] pub fn new(kvm: &Kvm, tee_config: &TeeConfig) -> Result { //create fd for interacting with kvm-vm specific functions @@ -559,6 +592,7 @@ impl Vm { &mut self, guest_mem: &GuestMemoryMmap, kvm_max_memslots: usize, + guest_memfd: &mut Vec, require_guest_memfd: bool, ) -> Result<()> { if guest_mem.num_regions() > kvm_max_memslots { @@ -578,10 +612,14 @@ impl Vm { let id: RawFd = self .fd + .lock() + .unwrap() .create_guest_memfd(gmem) .map_err(Error::CreateGuestMemfd)?; - let memory_region = kvm_userspace_memory_region2 { + guest_memfd.push(id); + + let memory_region: kvm_userspace_memory_region2 = kvm_userspace_memory_region2 { slot: index as u32, flags: KVM_MEM_GUEST_MEMFD, guest_phys_addr: region.start_addr().raw_value(), @@ -597,9 +635,22 @@ impl Vm { // are not overlapping. unsafe { self.fd + .lock() + .unwrap() .set_user_memory_region2(memory_region) .map_err(Error::SetUserMemoryRegion2)?; }; + + // set private by default when using guestmemfd + // this imitates QEMU behavior + let attr = kvm_memory_attributes { + address: region.start_addr().raw_value(), + size: region.len(), + attributes: KVM_MEMORY_ATTRIBUTE_PRIVATE as u64, + flags: 0, + }; + + self.fd.lock().unwrap().set_memory_attributes(attr).unwrap(); } else { let memory_region = kvm_userspace_memory_region { slot: index as u32, @@ -612,6 +663,8 @@ impl Vm { // are not overlapping. unsafe { self.fd + .lock() + .unwrap() .set_user_memory_region(memory_region) .map_err(Error::SetUserMemoryRegion)?; }; @@ -700,7 +753,8 @@ impl Vm { #[cfg(target_arch = "aarch64")] pub fn setup_irqchip(&mut self, vcpu_count: u8) -> Result<()> { self.irqchip_handle = Some( - arch::aarch64::gic::create_gic(&self.fd, vcpu_count.into()).map_err(Error::SetupGIC)?, + arch::aarch64::gic::create_gic(&self.fd.lock().unwrap(), vcpu_count.into()) + .map_err(Error::SetupGIC)?, ); Ok(()) } @@ -713,9 +767,9 @@ impl Vm { } /// Gets a reference to the kvm file descriptor owned by this VM. - pub fn fd(&self) -> &VmFd { - &self.fd - } + //pub fn fd(&self) -> &VmFd { + // &self.fd + // } #[allow(unused)] #[cfg(target_arch = "x86_64")] @@ -806,9 +860,14 @@ pub struct VcpuConfig { // Using this for easier explicit type-casting to help IDEs interpret the code. type VcpuCell = Cell>; +pub struct MemProperties { + pub addr: u64, + pub size: u64, + pub attributes: u32, +} /// A wrapper around creating and using a kvm-based VCPU. pub struct Vcpu { - fd: VcpuFd, + pub fd: VcpuFd, id: u8, mmio_bus: Option, #[allow(dead_code)] @@ -825,6 +884,9 @@ pub struct Vcpu { #[cfg(target_arch = "aarch64")] mpidr: u64, + #[cfg(feature = "cca")] + sender_io: Sender, + // The receiving end of events channel owned by the vcpu side. event_receiver: Receiver, // The transmitting end of the events channel which will be given to the handler. @@ -966,7 +1028,12 @@ impl Vcpu { /// * `exit_evt` - An `EventFd` that will be written into when this vcpu exits. /// * `create_ts` - A timestamp used by the vcpu to calculate its lifetime. #[cfg(target_arch = "aarch64")] - pub fn new_aarch64(id: u8, vm_fd: &VmFd, exit_evt: EventFd) -> Result { + pub fn new_aarch64( + id: u8, + vm_fd: &VmFd, + exit_evt: EventFd, + sender_io: Sender, + ) -> Result { let kvm_vcpu = vm_fd.create_vcpu(id as u64).map_err(Error::VcpuFd)?; let (event_sender, event_receiver) = unbounded(); let (response_sender, response_receiver) = unbounded(); @@ -981,6 +1048,7 @@ impl Vcpu { event_sender: Some(event_sender), response_receiver: Some(response_receiver), response_sender, + sender_io, }) } @@ -1267,12 +1335,40 @@ impl Vcpu { info!("Received KVM_EXIT_SHUTDOWN signal"); Ok(VcpuEmulation::Stopped) } + VcpuExit::MemoryFault { flags, gpa, size } => { + // TODO: flags can be private or shared + if flags & !KVM_MEMORY_EXIT_FLAG_PRIVATE as u64 != 0 { + error!("KVM_EXIT_MEMORY_FAULT: Unknown flag {}", flags); + Err(Error::VcpuUnhandledKvmExit) + } else { + // from private to shared + let mut attr = 0; + // from shared to private + if flags & KVM_MEMORY_EXIT_FLAG_PRIVATE as u64 + == KVM_MEMORY_EXIT_FLAG_PRIVATE as u64 + { + attr = KVM_MEMORY_ATTRIBUTE_PRIVATE; + }; + + let _ = self.sender_io.try_send(MemProperties { + addr: gpa, + size: size, + attributes: attr, + }); + Ok(VcpuEmulation::Handled) + } + } // Documentation specifies that below kvm exits are considered // errors. VcpuExit::FailEntry(reason, vcpu) => { error!("Received KVM_EXIT_FAIL_ENTRY signal: reason={reason}, vcpu={vcpu}"); Err(Error::VcpuUnhandledKvmExit) } + // TODO: to remove this + Unsupported(39) => { + println!("memory fault!"); + Ok(VcpuEmulation::Handled) + } VcpuExit::InternalError => { error!("Received KVM_EXIT_INTERNAL_ERROR signal"); Err(Error::VcpuUnhandledKvmExit) @@ -1280,6 +1376,7 @@ impl Vcpu { r => { // TODO: Are we sure we want to finish running a vcpu upon // receiving a vm exit that is not necessarily an error? + println!("error! {:?}", r); error!("Unexpected exit reason on vcpu run: {:?}", r); Err(Error::VcpuUnhandledKvmExit) } @@ -1605,7 +1702,9 @@ mod tests { // Create valid memory region and test that the initialization is successful. let gm = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x1000)]).unwrap(); - assert!(vm.memory_init(&gm, kvm_context.max_memslots(), false).is_ok()); + assert!(vm + .memory_init(&gm, kvm_context.max_memslots(), false) + .is_ok()); // Set the maximum number of memory slots to 1 in KvmContext to check the error // path of memory_init. Create 2 non-overlapping memory slots. @@ -1615,7 +1714,9 @@ mod tests { (GuestAddress(0x1001), 0x2000), ]) .unwrap(); - assert!(vm.memory_init(&gm, kvm_context.max_memslots(), false).is_err()); + assert!(vm + .memory_init(&gm, kvm_context.max_memslots(), false) + .is_err()); } #[cfg(target_arch = "x86_64")] diff --git a/src/vmm/src/vmm_config/boot_source.rs b/src/vmm/src/vmm_config/boot_source.rs index 9c70d5b1..16b4b5b4 100644 --- a/src/vmm/src/vmm_config/boot_source.rs +++ b/src/vmm/src/vmm_config/boot_source.rs @@ -18,7 +18,7 @@ use std::fmt::{Display, Formatter, Result}; #[cfg(all(target_os = "linux", not(feature = "tee")))] pub const DEFAULT_KERNEL_CMDLINE: &str = "reboot=k panic=-1 panic_print=0 nomodule console=hvc0 \ - rootfstype=virtiofs rw quiet no-kvmapf"; + rootfstype=virtiofs rw no-kvmapf"; #[cfg(feature = "amd-sev")] pub const DEFAULT_KERNEL_CMDLINE: &str = "reboot=k panic=-1 panic_print=0 nomodule console=hvc0 \