diff --git a/Makefile b/Makefile index 3dcb2ad8..f0fec397 100644 --- a/Makefile +++ b/Makefile @@ -27,6 +27,9 @@ ifeq ($(SEV),1) INIT_SRC += $(SNP_INIT_SRC) BUILD_INIT = 0 endif +ifeq ($(CCA), 1) + FEATURE_FLAGS := --features cca +endif ifeq ($(GPU),1) FEATURE_FLAGS += --features gpu endif diff --git a/src/arch/src/aarch64/fdt.rs b/src/arch/src/aarch64/fdt.rs index 02b45112..4e051506 100644 --- a/src/arch/src/aarch64/fdt.rs +++ b/src/arch/src/aarch64/fdt.rs @@ -285,7 +285,10 @@ fn create_psci_node(fdt: &mut FdtWriter) -> Result<()> { // Two methods available: hvc and smc. // As per documentation, PSCI calls between a guest and hypervisor may use the HVC conduit instead of SMC. // So, since we are using kvm, we need to use hvc. + #[cfg(not(feature = "cca"))] fdt.property_string("method", "hvc")?; + #[cfg(feature = "cca")] + fdt.property_string("method", "smc")?; fdt.end_node(node)?; Ok(()) diff --git a/src/arch/src/aarch64/linux/regs.rs b/src/arch/src/aarch64/linux/regs.rs index 81146b8a..71dffe5b 100644 --- a/src/arch/src/aarch64/linux/regs.rs +++ b/src/arch/src/aarch64/linux/regs.rs @@ -125,8 +125,10 @@ arm64_sys_reg!(MPIDR_EL1, 3, 0, 0, 0, 5); /// * `boot_ip` - Starting instruction pointer. /// * `mem` - Reserved DRAM for current VM. pub fn setup_regs(vcpu: &VcpuFd, cpu_id: u8, boot_ip: u64, mem: &GuestMemoryMmap) -> Result<()> { - // Get the register index of the PSTATE (Processor State) register. + // PSTATE cannot be accesed from the host in CCA + #[cfg(not(feature = "cca"))] #[allow(deref_nullptr)] + // Get the register index of the PSTATE (Processor State) register. vcpu.set_one_reg(arm64_core_reg!(pstate), &PSTATE_FAULT_BITS_64.to_le_bytes()) .map_err(Error::SetCoreRegister)?; diff --git a/src/arch/src/aarch64/mod.rs b/src/arch/src/aarch64/mod.rs index 9450b94d..23116ea5 100644 --- a/src/arch/src/aarch64/mod.rs +++ b/src/arch/src/aarch64/mod.rs @@ -69,7 +69,7 @@ pub fn arch_memory_regions(size: usize) -> (ArchMemoryInfo, Vec<(GuestAddress, u } else { vec![ (GuestAddress(layout::DRAM_MEM_START), dram_size), - (GuestAddress(shm_start_addr), MMIO_SHM_SIZE as usize), + //(GuestAddress(shm_start_addr), MMIO_SHM_SIZE as usize), ] }; diff --git a/src/devices/Cargo.toml b/src/devices/Cargo.toml index ff4b8a4f..6ad0c994 100644 --- a/src/devices/Cargo.toml +++ b/src/devices/Cargo.toml @@ -5,7 +5,9 @@ authors = ["The Chromium OS Authors"] edition = "2021" [features] +default = ["cca"] tee = [] +cca = [] amd-sev = ["blk", "tee"] net = [] blk = [] diff --git a/src/devices/src/virtio/console/device.rs b/src/devices/src/virtio/console/device.rs index e1193ec6..56535afa 100644 --- a/src/devices/src/virtio/console/device.rs +++ b/src/devices/src/virtio/console/device.rs @@ -30,9 +30,18 @@ use crate::virtio::{PortDescription, VmmExitObserver}; pub(crate) const CONTROL_RXQ_INDEX: usize = 2; pub(crate) const CONTROL_TXQ_INDEX: usize = 3; -pub(crate) const AVAIL_FEATURES: u64 = 1 << uapi::VIRTIO_CONSOLE_F_SIZE as u64 - | 1 << uapi::VIRTIO_CONSOLE_F_MULTIPORT as u64 - | 1 << uapi::VIRTIO_F_VERSION_1 as u64; +// CCA requires VIRTIO_F_ACCESS_PLATFORM to ensure DMA-APIs +// are triggered for virtio in Linux +pub(crate) const AVAIL_FEATURES: u64 = if cfg!(feature = "cca") { + 1 << uapi::VIRTIO_CONSOLE_F_SIZE as u64 + | 1 << uapi::VIRTIO_CONSOLE_F_MULTIPORT as u64 + | 1 << uapi::VIRTIO_F_VERSION_1 as u64 + | 1 << uapi::VIRTIO_F_ACCESS_PLATFORM as u64 +} else { + 1 << uapi::VIRTIO_CONSOLE_F_SIZE as u64 + | 1 << uapi::VIRTIO_CONSOLE_F_MULTIPORT as u64 + | 1 << uapi::VIRTIO_F_VERSION_1 as u64 +}; #[repr(C)] #[derive(Default)] diff --git a/src/devices/src/virtio/console/mod.rs b/src/devices/src/virtio/console/mod.rs index bbaba4dd..c6d0fb9d 100644 --- a/src/devices/src/virtio/console/mod.rs +++ b/src/devices/src/virtio/console/mod.rs @@ -22,6 +22,7 @@ mod defs { pub const VIRTIO_CONSOLE_F_MULTIPORT: u32 = 1; pub const VIRTIO_F_VERSION_1: u32 = 32; pub const VIRTIO_ID_CONSOLE: u32 = 3; + pub const VIRTIO_F_ACCESS_PLATFORM: u32 = 33; } #[allow(dead_code)] diff --git a/src/devices/src/virtio/fs/device.rs b/src/devices/src/virtio/fs/device.rs index 03efab0c..5e752fdc 100644 --- a/src/devices/src/virtio/fs/device.rs +++ b/src/devices/src/virtio/fs/device.rs @@ -5,7 +5,10 @@ use std::sync::{Arc, Mutex}; use std::thread::JoinHandle; use utils::eventfd::{EventFd, EFD_NONBLOCK}; -use virtio_bindings::{virtio_config::VIRTIO_F_VERSION_1, virtio_ring::VIRTIO_RING_F_EVENT_IDX}; +use virtio_bindings::{ + virtio_config::VIRTIO_F_ACCESS_PLATFORM, virtio_config::VIRTIO_F_VERSION_1, + virtio_ring::VIRTIO_RING_F_EVENT_IDX, +}; use vm_memory::{ByteValued, GuestMemoryMmap}; use super::super::{ @@ -63,7 +66,13 @@ impl Fs { .push(EventFd::new(utils::eventfd::EFD_NONBLOCK).map_err(FsError::EventFd)?); } - let avail_features = (1u64 << VIRTIO_F_VERSION_1) | (1u64 << VIRTIO_RING_F_EVENT_IDX); + let avail_features = if cfg!(feature = "cca") { + (1u64 << VIRTIO_F_VERSION_1) + | (1u64 << VIRTIO_RING_F_EVENT_IDX) + | (1 << VIRTIO_F_ACCESS_PLATFORM as u64) + } else { + (1u64 << VIRTIO_F_VERSION_1) | (1u64 << VIRTIO_RING_F_EVENT_IDX) + }; let tag = fs_id.into_bytes(); let mut config = VirtioFsConfig::default(); diff --git a/src/devices/src/virtio/rng/device.rs b/src/devices/src/virtio/rng/device.rs index d87f696d..75ea6cd4 100644 --- a/src/devices/src/virtio/rng/device.rs +++ b/src/devices/src/virtio/rng/device.rs @@ -13,12 +13,17 @@ use super::super::{ use super::{defs, defs::uapi}; use crate::legacy::Gic; use crate::Error as DeviceError; +use virtio_bindings::virtio_config::VIRTIO_F_ACCESS_PLATFORM; // Request queue. pub(crate) const REQ_INDEX: usize = 0; // Supported features. -pub(crate) const AVAIL_FEATURES: u64 = 1 << uapi::VIRTIO_F_VERSION_1 as u64; +pub(crate) const AVAIL_FEATURES: u64 = if cfg!(feature = "cca") { + 1 << uapi::VIRTIO_F_VERSION_1 as u64 | 1 << VIRTIO_F_ACCESS_PLATFORM as u64 +} else { + 1 << uapi::VIRTIO_F_VERSION_1 as u64 +}; #[derive(Copy, Clone, Debug, Default)] #[repr(C, packed)] diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index 6cd2b156..0c1e7d75 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -1,6 +1,13 @@ #[macro_use] extern crate log; +use crossbeam_channel::unbounded; +use kvm_bindings::kvm_memory_attributes; +use libc::fallocate; +use libc::madvise; +use libc::FALLOC_FL_KEEP_SIZE; +use libc::FALLOC_FL_PUNCH_HOLE; +use libc::MADV_DONTNEED; use std::collections::hash_map::Entry; use std::collections::HashMap; use std::convert::TryInto; @@ -11,10 +18,13 @@ use std::ffi::CString; #[cfg(target_os = "linux")] use std::os::fd::AsRawFd; use std::os::fd::RawFd; +use std::os::raw::c_void; use std::path::PathBuf; use std::slice; use std::sync::atomic::{AtomicI32, Ordering}; use std::sync::Mutex; +use vm_memory::GuestMemoryRegion; +use vm_memory::{Address, GuestMemory}; #[cfg(target_os = "macos")] use crossbeam_channel::unbounded; @@ -1077,9 +1087,12 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { #[cfg(target_os = "macos")] let (sender, receiver) = unbounded(); + let (io_sender, receiver) = unbounded(); + let _vmm = match vmm::builder::build_microvm( &ctx_cfg.vmr, &mut event_manager, + io_sender, ctx_cfg.shutdown_efd, #[cfg(target_os = "macos")] sender, @@ -1094,6 +1107,61 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { #[cfg(target_os = "macos")] let mapper_vmm = _vmm.clone(); + let vm = _vmm.lock().unwrap().kvm_vm().fd.clone(); + let guest_mem = _vmm.lock().unwrap().guest_memory().clone(); + let guest_memfd = _vmm.lock().unwrap().guest_memfd_vec.clone(); + + std::thread::spawn(move || loop { + match receiver.recv() { + Err(e) => error!("Error in receiver: {:?}", e), + Ok(m) => { + let _ret = vm + .lock() + .unwrap() + .set_memory_attributes(kvm_memory_attributes { + address: m.addr, + size: m.size, + attributes: m.attributes as u64, + flags: 0, + }); + + // from private to shared + if m.attributes == 0 { + for (index, region) in guest_mem.iter().enumerate() { + // this supposes that m.addr + m.size < region.start + region.size + // which may be false + if (region.start_addr().raw_value() + region.size() as u64) > m.addr { + let offset = m.addr - region.start_addr().raw_value(); + unsafe { + let _ret = fallocate( + *guest_memfd.get(index).unwrap(), + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + offset as i64, + m.size as i64, + ); + } + } + } + // from shared to private + } else { + for (_index, region) in guest_mem.iter().enumerate() { + if (region.start_addr().raw_value() + region.size() as u64) > m.addr { + let offset = m.addr - region.start_addr().raw_value(); + let host_startaddr = m.addr + offset; + unsafe { + let _ret = madvise( + host_startaddr as *mut c_void, + m.size.try_into().unwrap(), + MADV_DONTNEED, + ); + } + } + } + } + } + } + }); + #[cfg(target_os = "macos")] std::thread::spawn(move || loop { match receiver.recv() { diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 627d7134..6dcc9267 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -3,13 +3,17 @@ //! Enables pre-boot setup, instantiation and booting of a Firecracker VMM. +use crate::vstate::MemProperties; +use cca::Algo; #[cfg(target_os = "macos")] use crossbeam_channel::{unbounded, Sender}; +use std::cmp::max; use std::fmt::{Display, Formatter}; use std::fs::File; use std::io; #[cfg(target_os = "linux")] use std::os::fd::AsRawFd; +use std::os::fd::RawFd; use std::path::PathBuf; use std::sync::{Arc, Mutex}; @@ -29,9 +33,6 @@ use devices::virtio::{port_io, MmioTransport, PortDescription, Vsock}; #[cfg(target_os = "macos")] use hvf::MemoryMapping; -#[cfg(feature = "tee")] -use kbs_types::Tee; - use crate::device_manager; #[cfg(feature = "tee")] use crate::resources::TeeConfig; @@ -49,13 +50,15 @@ use crate::vmm_config::fs::FsBuilder; use crate::vmm_config::kernel_bundle::{InitrdBundle, QbootBundle}; #[cfg(target_os = "linux")] use crate::vstate::KvmContext; -#[cfg(all(target_os = "linux", feature = "tee"))] +#[cfg(all(target_os = "linux", any(feature = "tee", feature = "cca")))] use crate::vstate::MeasuredRegion; use crate::vstate::{Error as VstateError, Vcpu, VcpuConfig, Vm}; use arch::ArchMemoryInfo; #[cfg(feature = "tee")] use arch::InitrdConfig; #[cfg(feature = "tee")] +use kbs_types::Tee; +#[cfg(feature = "tee")] use kvm_bindings::KVM_MAX_CPUID_ENTRIES; use libc::{STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO}; use nix::unistd::isatty; @@ -68,7 +71,10 @@ use vm_memory::mmap::MmapRegion; #[cfg(any(target_arch = "aarch64", feature = "tee"))] use vm_memory::Bytes; use vm_memory::GuestMemory; -use vm_memory::{GuestAddress, GuestMemoryMmap}; +use vm_memory::{Address, GuestAddress, GuestMemoryMmap, GuestMemoryRegion}; + +use crossbeam_channel::Sender; +use kvm_bindings::KVM_ARM_VCPU_REC; #[cfg(feature = "efi")] static EDK2_BINARY: &[u8] = include_bytes!("../../../edk2/KRUN_EFI.silent.fd"); @@ -312,6 +318,7 @@ impl Display for StartMicrovmError { pub fn build_microvm( vm_resources: &super::resources::VmResources, event_manager: &mut EventManager, + io_sender: Sender, _shutdown_efd: Option, #[cfg(target_os = "macos")] _map_sender: Sender, ) -> std::result::Result>, StartMicrovmError> { @@ -361,9 +368,11 @@ pub fn build_microvm( Some(s) => kernel_cmdline.insert_str(s).unwrap(), }; + let mut guest_memfd: Vec = vec![]; + #[cfg(not(feature = "tee"))] #[allow(unused_mut)] - let mut vm = setup_vm(&guest_memory)?; + let mut vm = setup_vm(&guest_memory, &mut guest_memfd)?; #[cfg(feature = "tee")] let (kvm, mut vm) = { @@ -433,19 +442,58 @@ pub fn build_microvm( m }; + #[cfg(feature = "cca")] + let measured_regions = { + let m = vec![ + MeasuredRegion { + guest_addr: kernel_bundle.guest_addr, + // TODO: remove host_addr? + host_addr: guest_memory + .get_host_address(GuestAddress(kernel_bundle.guest_addr)) + .unwrap() as u64, + size: kernel_bundle.size, + populate: true, + }, + MeasuredRegion { + guest_addr: kernel_bundle.guest_addr + kernel_bundle.size as u64, + host_addr: guest_memory + .get_host_address(GuestAddress( + kernel_bundle.guest_addr + kernel_bundle.size as u64, + )) + .unwrap() as u64, + size: vm_resources.vm_config().mem_size_mib.unwrap() << 20 - kernel_bundle.size, + populate: false, + }, + // The region used for the FDT must be populated. However, we only know the addr and the size after + // configure_system() but at that point guest_memory is already shared. For the moment, hardcore the + // fdt addr and size. + MeasuredRegion { + guest_addr: 0x8FE00000, + host_addr: guest_memory + .get_host_address(GuestAddress(0x8FE00000)) + .unwrap() as u64, + size: 0x1000, + populate: true, + }, + ]; + + m + }; + // On x86_64 always create a serial device, // while on aarch64 only create it if 'console=' is specified in the boot args. - let serial_device = if cfg!(feature = "efi") { + // TODO: to comment this + let serial_device = //if cfg!(feature = "efi") { Some(setup_serial_device( event_manager, None, - None, + //None, // Uncomment this to get EFI output when debugging EDK2. - // Some(Box::new(io::stdout())), - )?) - } else { - None - }; + Some(Box::new(io::stdout())), + )?); + //} else { + // None + //}; let exit_evt = EventFd::new(utils::eventfd::EFD_NONBLOCK) .map_err(Error::EventFd) @@ -517,16 +565,19 @@ pub fn build_microvm( &guest_memory, GuestAddress(kernel_bundle.guest_addr), &exit_evt, + io_sender, ) .map_err(StartMicrovmError::Internal)?; setup_interrupt_controller(&mut vm, vcpu_config.vcpu_count)?; + /* + This makes the kernel to block in parsing it, I do not know why attach_legacy_devices( &vm, &mut mmio_device_manager, &mut kernel_cmdline, serial_device, - )?; + )?; */ } #[cfg(all(target_arch = "aarch64", target_os = "macos"))] @@ -559,7 +610,7 @@ pub fn build_microvm( )?; } - #[cfg(not(feature = "tee"))] + #[cfg(all(not(feature = "tee"), not(feature = "cca")))] let _shm_region = Some(VirtioShmRegion { host_addr: guest_memory .get_host_address(GuestAddress(arch_memory_info.shm_start_addr)) @@ -577,6 +628,7 @@ pub fn build_microvm( exit_observers: Vec::new(), vm, mmio_device_manager, + guest_memfd_vec: guest_memfd, #[cfg(target_arch = "x86_64")] pio_device_manager, }; @@ -610,7 +662,7 @@ pub fn build_microvm( if let Some(vsock) = vm_resources.vsock.get() { attach_unixsock_vsock_device(&mut vmm, vsock, event_manager, intc.clone())?; #[cfg(not(feature = "net"))] - vmm.kernel_cmdline.insert_str("tsi_hijack")?; + //vmm.kernel_cmdline.insert_str("tsi_hijack")?; #[cfg(feature = "net")] if vm_resources .net_builder @@ -619,7 +671,7 @@ pub fn build_microvm( .is_empty() { // Only enable TSI if we don't have any network devices. - vmm.kernel_cmdline.insert_str("tsi_hijack")?; + //vmm.kernel_cmdline.insert_str("tsi_hijack")?; } } #[cfg(feature = "net")] @@ -683,6 +735,55 @@ pub fn build_microvm( println!("Starting TEE/microVM."); } + // after this point guest memory and regs are not accesible anymore + #[cfg(feature = "cca")] + { + let _ = vmm + .kvm_vm() + .realm + .configure_measurement(&vmm.kvm_vm().fd.lock().unwrap(), Algo::AlgoSha256); + + vmm.kvm_vm() + .realm + .create_realm_descriptor(&vmm.kvm_vm().fd.lock().unwrap()) + .unwrap(); + + println!("Injecting and measuring memory regions. This may take a while."); + + for region in measured_regions.iter() { + if region.populate { + vmm.kvm_vm() + .realm + .populate( + &vmm.kvm_vm().fd.lock().unwrap(), + region.guest_addr, + region.size.try_into().unwrap(), + ) + .unwrap(); + } else { + vmm.kvm_vm() + .realm + .initiate( + &vmm.kvm_vm().fd.lock().unwrap(), + region.guest_addr, + region.size.try_into().unwrap(), + ) + .unwrap(); + } + } + + let feature = KVM_ARM_VCPU_REC as i32; + + for vcpu in vcpus.iter() { + vcpu.fd.vcpu_finalize(&feature).unwrap(); + } + + vmm.kvm_vm() + .realm + .activate(&vmm.kvm_vm().fd.lock().unwrap()) + .unwrap(); + } + vmm.start_vcpus(vcpus) .map_err(StartMicrovmError::Internal)?; @@ -809,7 +910,7 @@ fn load_cmdline(vmm: &Vmm) -> std::result::Result<(), StartMicrovmError> { .map_err(StartMicrovmError::LoadCommandline) } -#[cfg(all(target_os = "linux", not(feature = "tee")))] +#[cfg(all(target_os = "linux", not(feature = "tee"), not(feature = "cca")))] pub(crate) fn setup_vm( guest_memory: &GuestMemoryMmap, ) -> std::result::Result { @@ -824,6 +925,30 @@ pub(crate) fn setup_vm( .map_err(StartMicrovmError::Internal)?; Ok(vm) } +#[cfg(all(target_os = "linux", feature = "cca"))] +pub(crate) fn setup_vm( + guest_memory: &GuestMemoryMmap, + guest_memfd: &mut Vec, +) -> std::result::Result { + let kvm = KvmContext::new() + .map_err(Error::KvmContext) + .map_err(StartMicrovmError::Internal)?; + + // calculate max_addr for max_ipa + let mut max_addr = 0; + for region in guest_memory.iter() { + max_addr = max(max_addr, region.start_addr().raw_value() + region.len() - 1); + } + + let mut vm = Vm::new(kvm.fd(), max_addr as usize) + .map_err(Error::Vm) + .map_err(StartMicrovmError::Internal)?; + + vm.memory_init(guest_memory, kvm.max_memslots(), guest_memfd, true) + .map_err(Error::Vm) + .map_err(StartMicrovmError::Internal)?; + Ok(vm) +} #[cfg(all(target_os = "linux", feature = "tee"))] pub(crate) fn setup_vm( kvm: &KvmContext, @@ -932,13 +1057,13 @@ fn attach_legacy_devices( ) -> std::result::Result<(), StartMicrovmError> { if let Some(serial) = serial { mmio_device_manager - .register_mmio_serial(vm.fd(), kernel_cmdline, serial) + .register_mmio_serial(&vm.fd.lock().unwrap(), kernel_cmdline, serial) .map_err(Error::RegisterMMIODevice) .map_err(StartMicrovmError::Internal)?; } mmio_device_manager - .register_mmio_rtc(vm.fd()) + .register_mmio_rtc(&vm.fd.lock().unwrap()) .map_err(Error::RegisterMMIODevice) .map_err(StartMicrovmError::Internal)?; @@ -1018,17 +1143,19 @@ fn create_vcpus_aarch64( guest_mem: &GuestMemoryMmap, entry_addr: GuestAddress, exit_evt: &EventFd, + sender_io: Sender, ) -> super::Result> { let mut vcpus = Vec::with_capacity(vcpu_config.vcpu_count as usize); for cpu_index in 0..vcpu_config.vcpu_count { - let mut vcpu = Vcpu::new_aarch64( + let mut vcpu: Vcpu = Vcpu::new_aarch64( cpu_index, - vm.fd(), + &vm.fd.lock().unwrap(), exit_evt.try_clone().map_err(Error::EventFd)?, + sender_io.clone(), ) .map_err(Error::Vcpu)?; - vcpu.configure_aarch64(vm.fd(), guest_mem, entry_addr) + vcpu.configure_aarch64(&vm.fd.lock().unwrap(), guest_mem, entry_addr) .map_err(Error::Vcpu)?; vcpus.push(vcpu); @@ -1092,9 +1219,12 @@ fn attach_mmio_device( let _cmdline = &mut vmm.kernel_cmdline; #[cfg(target_os = "linux")] - let (_mmio_base, _irq) = - vmm.mmio_device_manager - .register_mmio_device(vmm.vm.fd(), device, type_id, id)?; + let (_mmio_base, _irq) = vmm.mmio_device_manager.register_mmio_device( + &vmm.vm.fd.lock().unwrap(), + device, + type_id, + id, + )?; #[cfg(target_os = "macos")] let (_mmio_base, _irq) = vmm .mmio_device_manager diff --git a/src/vmm/src/device_manager/kvm/mmio.rs b/src/vmm/src/device_manager/kvm/mmio.rs index 1fa7fe61..c3aec6e4 100644 --- a/src/vmm/src/device_manager/kvm/mmio.rs +++ b/src/vmm/src/device_manager/kvm/mmio.rs @@ -179,7 +179,10 @@ impl MMIODeviceManager { .map_err(Error::BusError)?; cmdline - .insert("earlycon", &format!("uart,mmio,0x{:08x}", self.mmio_base)) + .insert( + "earlycon", + &format!("pl011,mmio32,0x{:08x}", self.mmio_base), + ) .map_err(Error::Cmdline)?; let ret = self.mmio_base; diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index ea3fdb38..58f88b7e 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -39,6 +39,7 @@ use macos::vstate; use std::fmt::{Display, Formatter}; use std::io; +use std::os::fd::RawFd; use std::os::unix::io::AsRawFd; use std::sync::{Arc, Mutex}; #[cfg(target_os = "linux")] @@ -190,6 +191,8 @@ pub struct Vmm { guest_memory: GuestMemoryMmap, arch_memory_info: ArchMemoryInfo, + pub guest_memfd_vec: Vec, + kernel_cmdline: KernelCmdline, vcpus_handles: Vec, diff --git a/src/vmm/src/linux/vstate.rs b/src/vmm/src/linux/vstate.rs index 439dd203..0840afe3 100644 --- a/src/vmm/src/linux/vstate.rs +++ b/src/vmm/src/linux/vstate.rs @@ -8,13 +8,17 @@ use crossbeam_channel::{unbounded, Receiver, Sender, TryRecvError}; use libc::{c_int, c_void, siginfo_t}; use std::cell::Cell; +use std::cmp::max; use std::fmt::{Display, Formatter}; use std::io; use std::os::fd::RawFd; +use std::sync::Arc; +use std::sync::Mutex; #[cfg(feature = "tee")] use std::os::unix::io::RawFd; +use kvm_ioctls::VcpuExit::Unsupported; use std::result; use std::sync::atomic::{fence, Ordering}; #[cfg(not(test))] @@ -48,8 +52,10 @@ use kvm_bindings::{ KVM_MAX_CPUID_ENTRIES, KVM_PIT_SPEAKER_DUMMY, }; use kvm_bindings::{ - kvm_create_guest_memfd, kvm_userspace_memory_region, kvm_userspace_memory_region2, - KVM_API_VERSION, KVM_MEM_GUEST_MEMFD, + kvm_create_guest_memfd, kvm_memory_attributes, kvm_userspace_memory_region, + kvm_userspace_memory_region2, KVM_API_VERSION, KVM_MEMORY_ATTRIBUTE_PRIVATE, + KVM_MEMORY_EXIT_FLAG_PRIVATE, KVM_MEM_GUEST_MEMFD, KVM_VM_TYPE_ARM_IPA_SIZE_MASK, + KVM_VM_TYPE_ARM_REALM, }; use kvm_ioctls::*; use utils::eventfd::EventFd; @@ -65,6 +71,9 @@ use sev::launch::sev as sev_launch; #[cfg(feature = "amd-sev")] use sev::launch::snp; +#[cfg(feature = "cca")] +use cca::Realm; + /// Signal number (SIGRTMIN) used to kick Vcpus. pub(crate) const VCPU_RTSIG_OFFSET: i32 = 0; @@ -403,12 +412,13 @@ impl Display for Error { pub type Result = result::Result; -#[cfg(feature = "tee")] +#[cfg(any(feature = "tee", feature = "cca"))] #[derive(Debug)] pub struct MeasuredRegion { pub guest_addr: u64, pub host_addr: u64, pub size: usize, + pub populate: bool, } /// Describes a KVM context that gets attached to the microVM. @@ -462,7 +472,7 @@ impl KvmContext { /// A wrapper around creating and using a VM. pub struct Vm { - fd: VmFd, + pub fd: Arc>, // X86 specific fields. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] @@ -483,11 +493,14 @@ pub struct Vm { #[cfg(feature = "amd-sev")] pub tee: Tee, + + #[cfg(feature = "cca")] + pub realm: Realm, } impl Vm { /// Constructs a new `Vm` using the given `Kvm` instance. - #[cfg(not(feature = "tee"))] + #[cfg(all(not(feature = "tee"), not(feature = "cca")))] pub fn new(kvm: &Kvm) -> Result { //create fd for interacting with kvm-vm specific functions let vm_fd = kvm.create_vm().map_err(Error::VmFd)?; @@ -511,6 +524,26 @@ impl Vm { }) } + #[cfg(feature = "cca")] + pub fn new(kvm: &Kvm, max_ipa: usize) -> Result { + //create fd for interacting with kvm-vm specific functions + let ipa_bits = max(64u32 - max_ipa.leading_zeros() - 1, 32) + 1; + let vm_fd = kvm + .create_vm_with_type( + (KVM_VM_TYPE_ARM_REALM | (ipa_bits & KVM_VM_TYPE_ARM_IPA_SIZE_MASK)).into(), + ) + .map_err(Error::VmFd)?; + + let realm = Realm::new(); + + Ok(Vm { + fd: Arc::new(Mutex::new(vm_fd)), + #[cfg(target_arch = "aarch64")] + irqchip_handle: None, + realm, + }) + } + #[cfg(feature = "amd-sev")] pub fn new(kvm: &Kvm, tee_config: &TeeConfig) -> Result { //create fd for interacting with kvm-vm specific functions @@ -559,6 +592,7 @@ impl Vm { &mut self, guest_mem: &GuestMemoryMmap, kvm_max_memslots: usize, + guest_memfd: &mut Vec, require_guest_memfd: bool, ) -> Result<()> { if guest_mem.num_regions() > kvm_max_memslots { @@ -578,10 +612,14 @@ impl Vm { let id: RawFd = self .fd + .lock() + .unwrap() .create_guest_memfd(gmem) .map_err(Error::CreateGuestMemfd)?; - let memory_region = kvm_userspace_memory_region2 { + guest_memfd.push(id); + + let memory_region: kvm_userspace_memory_region2 = kvm_userspace_memory_region2 { slot: index as u32, flags: KVM_MEM_GUEST_MEMFD, guest_phys_addr: region.start_addr().raw_value(), @@ -597,9 +635,22 @@ impl Vm { // are not overlapping. unsafe { self.fd + .lock() + .unwrap() .set_user_memory_region2(memory_region) .map_err(Error::SetUserMemoryRegion2)?; }; + + // set private by default when using guestmemfd + // this imitates QEMU behavior + let attr = kvm_memory_attributes { + address: region.start_addr().raw_value(), + size: region.len(), + attributes: KVM_MEMORY_ATTRIBUTE_PRIVATE as u64, + flags: 0, + }; + + self.fd.lock().unwrap().set_memory_attributes(attr).unwrap(); } else { let memory_region = kvm_userspace_memory_region { slot: index as u32, @@ -612,6 +663,8 @@ impl Vm { // are not overlapping. unsafe { self.fd + .lock() + .unwrap() .set_user_memory_region(memory_region) .map_err(Error::SetUserMemoryRegion)?; }; @@ -700,7 +753,8 @@ impl Vm { #[cfg(target_arch = "aarch64")] pub fn setup_irqchip(&mut self, vcpu_count: u8) -> Result<()> { self.irqchip_handle = Some( - arch::aarch64::gic::create_gic(&self.fd, vcpu_count.into()).map_err(Error::SetupGIC)?, + arch::aarch64::gic::create_gic(&self.fd.lock().unwrap(), vcpu_count.into()) + .map_err(Error::SetupGIC)?, ); Ok(()) } @@ -713,9 +767,9 @@ impl Vm { } /// Gets a reference to the kvm file descriptor owned by this VM. - pub fn fd(&self) -> &VmFd { - &self.fd - } + //pub fn fd(&self) -> &VmFd { + // &self.fd + // } #[allow(unused)] #[cfg(target_arch = "x86_64")] @@ -806,9 +860,14 @@ pub struct VcpuConfig { // Using this for easier explicit type-casting to help IDEs interpret the code. type VcpuCell = Cell>; +pub struct MemProperties { + pub addr: u64, + pub size: u64, + pub attributes: u32, +} /// A wrapper around creating and using a kvm-based VCPU. pub struct Vcpu { - fd: VcpuFd, + pub fd: VcpuFd, id: u8, mmio_bus: Option, #[allow(dead_code)] @@ -825,6 +884,9 @@ pub struct Vcpu { #[cfg(target_arch = "aarch64")] mpidr: u64, + #[cfg(feature = "cca")] + sender_io: Sender, + // The receiving end of events channel owned by the vcpu side. event_receiver: Receiver, // The transmitting end of the events channel which will be given to the handler. @@ -966,7 +1028,12 @@ impl Vcpu { /// * `exit_evt` - An `EventFd` that will be written into when this vcpu exits. /// * `create_ts` - A timestamp used by the vcpu to calculate its lifetime. #[cfg(target_arch = "aarch64")] - pub fn new_aarch64(id: u8, vm_fd: &VmFd, exit_evt: EventFd) -> Result { + pub fn new_aarch64( + id: u8, + vm_fd: &VmFd, + exit_evt: EventFd, + sender_io: Sender, + ) -> Result { let kvm_vcpu = vm_fd.create_vcpu(id as u64).map_err(Error::VcpuFd)?; let (event_sender, event_receiver) = unbounded(); let (response_sender, response_receiver) = unbounded(); @@ -981,6 +1048,7 @@ impl Vcpu { event_sender: Some(event_sender), response_receiver: Some(response_receiver), response_sender, + sender_io, }) } @@ -1267,12 +1335,40 @@ impl Vcpu { info!("Received KVM_EXIT_SHUTDOWN signal"); Ok(VcpuEmulation::Stopped) } + VcpuExit::MemoryFault { flags, gpa, size } => { + // TODO: flags can be private or shared + if flags & !KVM_MEMORY_EXIT_FLAG_PRIVATE as u64 != 0 { + error!("KVM_EXIT_MEMORY_FAULT: Unknown flag {}", flags); + Err(Error::VcpuUnhandledKvmExit) + } else { + // from private to shared + let mut attr = 0; + // from shared to private + if flags & KVM_MEMORY_EXIT_FLAG_PRIVATE as u64 + == KVM_MEMORY_EXIT_FLAG_PRIVATE as u64 + { + attr = KVM_MEMORY_ATTRIBUTE_PRIVATE; + }; + + let _ = self.sender_io.try_send(MemProperties { + addr: gpa, + size, + attributes: attr, + }); + Ok(VcpuEmulation::Handled) + } + } // Documentation specifies that below kvm exits are considered // errors. VcpuExit::FailEntry(reason, vcpu) => { error!("Received KVM_EXIT_FAIL_ENTRY signal: reason={reason}, vcpu={vcpu}"); Err(Error::VcpuUnhandledKvmExit) } + // TODO: to remove this + Unsupported(39) => { + println!("memory fault!"); + Ok(VcpuEmulation::Handled) + } VcpuExit::InternalError => { error!("Received KVM_EXIT_INTERNAL_ERROR signal"); Err(Error::VcpuUnhandledKvmExit) @@ -1280,6 +1376,7 @@ impl Vcpu { r => { // TODO: Are we sure we want to finish running a vcpu upon // receiving a vm exit that is not necessarily an error? + println!("error! {:?}", r); error!("Unexpected exit reason on vcpu run: {:?}", r); Err(Error::VcpuUnhandledKvmExit) } @@ -1605,7 +1702,9 @@ mod tests { // Create valid memory region and test that the initialization is successful. let gm = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x1000)]).unwrap(); - assert!(vm.memory_init(&gm, kvm_context.max_memslots(), false).is_ok()); + assert!(vm + .memory_init(&gm, kvm_context.max_memslots(), false) + .is_ok()); // Set the maximum number of memory slots to 1 in KvmContext to check the error // path of memory_init. Create 2 non-overlapping memory slots. @@ -1615,7 +1714,9 @@ mod tests { (GuestAddress(0x1001), 0x2000), ]) .unwrap(); - assert!(vm.memory_init(&gm, kvm_context.max_memslots(), false).is_err()); + assert!(vm + .memory_init(&gm, kvm_context.max_memslots(), false) + .is_err()); } #[cfg(target_arch = "x86_64")] diff --git a/src/vmm/src/vmm_config/boot_source.rs b/src/vmm/src/vmm_config/boot_source.rs index 9c70d5b1..16b4b5b4 100644 --- a/src/vmm/src/vmm_config/boot_source.rs +++ b/src/vmm/src/vmm_config/boot_source.rs @@ -18,7 +18,7 @@ use std::fmt::{Display, Formatter, Result}; #[cfg(all(target_os = "linux", not(feature = "tee")))] pub const DEFAULT_KERNEL_CMDLINE: &str = "reboot=k panic=-1 panic_print=0 nomodule console=hvc0 \ - rootfstype=virtiofs rw quiet no-kvmapf"; + rootfstype=virtiofs rw no-kvmapf"; #[cfg(feature = "amd-sev")] pub const DEFAULT_KERNEL_CMDLINE: &str = "reboot=k panic=-1 panic_print=0 nomodule console=hvc0 \