From 481914f6b1d7812e0965091cce5957af971d8859 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Fri, 2 Jun 2023 19:11:10 +0200 Subject: [PATCH 01/29] Use Guest[Virt|Phys]Addr in uhyve --- src/consts.rs | 14 +++++---- src/linux/gdb/breakpoints.rs | 5 +-- src/linux/gdb/mod.rs | 5 +-- src/linux/uhyve.rs | 17 +++++------ src/linux/vcpu.rs | 48 +++++++++++++++++------------ src/linux/virtio.rs | 9 +++--- src/macos/x86_64/uhyve.rs | 17 +++++------ src/macos/x86_64/vcpu.rs | 51 +++++++++++++++++++------------ src/vm.rs | 46 +++++++++++++--------------- uhyve-interface/src/lib.rs | 7 +++++ uhyve-interface/src/parameters.rs | 16 +++++----- 11 files changed, 128 insertions(+), 107 deletions(-) diff --git a/src/consts.rs b/src/consts.rs index c36aed25..d6da870b 100644 --- a/src/consts.rs +++ b/src/consts.rs @@ -1,17 +1,19 @@ +use x86_64::addr::PhysAddr; + pub const PAGE_SIZE: usize = 0x1000; pub const GDT_KERNEL_CODE: u16 = 1; pub const GDT_KERNEL_DATA: u16 = 2; pub const APIC_DEFAULT_BASE: u64 = 0xfee00000; -pub const BOOT_GDT: u64 = 0x1000; +pub const BOOT_GDT: PhysAddr = PhysAddr::new(0x1000); pub const BOOT_GDT_NULL: u64 = 0; pub const BOOT_GDT_CODE: u64 = 1; pub const BOOT_GDT_DATA: u64 = 2; pub const BOOT_GDT_MAX: u64 = 3; -pub const BOOT_PML4: u64 = 0x10000; -pub const BOOT_PGT: u64 = BOOT_PML4; -pub const BOOT_PDPTE: u64 = 0x11000; -pub const BOOT_PDE: u64 = 0x12000; -pub const BOOT_INFO_ADDR: u64 = 0x9000; +pub const BOOT_PML4: PhysAddr = PhysAddr::new(0x10000); +pub const BOOT_PGT: PhysAddr = BOOT_PML4; +pub const BOOT_PDPTE: PhysAddr = PhysAddr::new(0x11000); +pub const BOOT_PDE: PhysAddr = PhysAddr::new(0x12000); +pub const BOOT_INFO_ADDR: PhysAddr = PhysAddr::new(0x9000); pub const EFER_SCE: u64 = 1; /* System Call Extensions */ pub const EFER_LME: u64 = 1 << 8; /* Long mode enable */ pub const EFER_LMA: u64 = 1 << 10; /* Long mode active (read-only) */ diff --git a/src/linux/gdb/breakpoints.rs b/src/linux/gdb/breakpoints.rs index 3644d3b1..9443f326 100644 --- a/src/linux/gdb/breakpoints.rs +++ b/src/linux/gdb/breakpoints.rs @@ -1,6 +1,7 @@ use std::collections::{hash_map::Entry, HashMap}; use gdbstub::target::{self, ext::breakpoints::WatchKind, TargetResult}; +use uhyve_interface::GuestVirtAddr; use super::GdbUhyve; use crate::arch::x86_64::registers; @@ -49,7 +50,7 @@ impl target::ext::breakpoints::SwBreakpoint for GdbUhyve { let sw_breakpoint = SwBreakpoint::new(addr, kind); if let Entry::Vacant(entry) = self.sw_breakpoints.entry(sw_breakpoint) { - let instructions = unsafe { self.vcpu.memory(addr, kind) }; + let instructions = unsafe { self.vcpu.memory(GuestVirtAddr::new(addr), kind) }; entry.insert(instructions.into()); instructions.fill(SwBreakpoint::OPCODE); Ok(true) @@ -62,7 +63,7 @@ impl target::ext::breakpoints::SwBreakpoint for GdbUhyve { let sw_breakpoint = SwBreakpoint::new(addr, kind); if let Entry::Occupied(entry) = self.sw_breakpoints.entry(sw_breakpoint) { - let instructions = unsafe { self.vcpu.memory(addr, kind) }; + let instructions = unsafe { self.vcpu.memory(GuestVirtAddr::new(addr), kind) }; instructions.copy_from_slice(&entry.remove()); Ok(true) } else { diff --git a/src/linux/gdb/mod.rs b/src/linux/gdb/mod.rs index 4560ceb0..450c28a0 100644 --- a/src/linux/gdb/mod.rs +++ b/src/linux/gdb/mod.rs @@ -17,6 +17,7 @@ use kvm_bindings::{ }; use libc::EINVAL; use nix::sys::pthread::pthread_self; +use uhyve_interface::GuestVirtAddr; use x86_64::registers::debug::Dr6Flags; use self::breakpoints::SwBreakpoints; @@ -119,13 +120,13 @@ impl SingleThreadBase for GdbUhyve { } fn read_addrs(&mut self, start_addr: u64, data: &mut [u8]) -> TargetResult { - let src = unsafe { self.vcpu.memory(start_addr, data.len()) }; + let src = unsafe { self.vcpu.memory(GuestVirtAddr::new(start_addr), data.len()) }; data.copy_from_slice(src); Ok(data.len()) } fn write_addrs(&mut self, start_addr: u64, data: &[u8]) -> TargetResult<(), Self> { - let mem = unsafe { self.vcpu.memory(start_addr, data.len()) }; + let mem = unsafe { self.vcpu.memory(GuestVirtAddr::new(start_addr), data.len()) }; mem.copy_from_slice(data); Ok(()) } diff --git a/src/linux/uhyve.rs b/src/linux/uhyve.rs index 1c2582fd..ae874125 100755 --- a/src/linux/uhyve.rs +++ b/src/linux/uhyve.rs @@ -246,10 +246,10 @@ impl Vm for Uhyve { let (mem_addr, _) = self.guest_mem(); unsafe { - let pml4 = &mut *((mem_addr as u64 + BOOT_PML4) as *mut PageTable); - let pdpte = &mut *((mem_addr as u64 + BOOT_PDPTE) as *mut PageTable); - let pde = &mut *((mem_addr as u64 + BOOT_PDE) as *mut PageTable); - let gdt_entry: u64 = mem_addr as u64 + BOOT_GDT; + let pml4 = &mut *((mem_addr as u64 + BOOT_PML4.as_u64()) as *mut PageTable); + let pdpte = &mut *((mem_addr as u64 + BOOT_PDPTE.as_u64()) as *mut PageTable); + let pde = &mut *((mem_addr as u64 + BOOT_PDE.as_u64()) as *mut PageTable); + let gdt_entry: u64 = mem_addr as u64 + BOOT_GDT.as_u64(); // initialize GDT *((gdt_entry) as *mut u64) = create_gdt_entry(0, 0, 0); @@ -267,17 +267,14 @@ impl Vm for Uhyve { libc::memset(pde as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE);*/ pml4[0].set_addr( - PhysAddr::new(BOOT_PDPTE), + BOOT_PDPTE, PageTableFlags::PRESENT | PageTableFlags::WRITABLE, ); pml4[511].set_addr( - PhysAddr::new(BOOT_PML4), - PageTableFlags::PRESENT | PageTableFlags::WRITABLE, - ); - pdpte[0].set_addr( - PhysAddr::new(BOOT_PDE), + BOOT_PML4, PageTableFlags::PRESENT | PageTableFlags::WRITABLE, ); + pdpte[0].set_addr(BOOT_PDE, PageTableFlags::PRESENT | PageTableFlags::WRITABLE); for i in 0..512 { let addr = PhysAddr::new(i as u64 * Page::::SIZE); diff --git a/src/linux/vcpu.rs b/src/linux/vcpu.rs index fdde66c2..6d4cc57f 100755 --- a/src/linux/vcpu.rs +++ b/src/linux/vcpu.rs @@ -7,7 +7,7 @@ use std::{ use kvm_bindings::*; use kvm_ioctls::{VcpuExit, VcpuFd}; -use uhyve_interface::Hypercall; +use uhyve_interface::{GuestPhysAddr, GuestVirtAddr, Hypercall}; use x86_64::{ registers::control::{Cr0Flags, Cr4Flags}, structures::paging::PageTableFlags, @@ -37,8 +37,8 @@ pub struct UhyveCPU { } impl UhyveCPU { - pub unsafe fn memory(&mut self, start_addr: u64, len: usize) -> &mut [u8] { - let phys = self.virt_to_phys(start_addr.try_into().unwrap()); + pub unsafe fn memory(&mut self, start_addr: GuestVirtAddr, len: usize) -> &mut [u8] { + let phys = self.virt_to_phys(start_addr); let host = self.host_address(phys); slice::from_raw_parts_mut(host as *mut u8, len) } @@ -182,7 +182,7 @@ impl UhyveCPU { | Cr0Flags::PAGING; sregs.cr0 = cr0.bits(); - sregs.cr3 = BOOT_PML4; + sregs.cr3 = BOOT_PML4.as_u64(); let cr4 = Cr4Flags::PHYSICAL_ADDRESS_EXTENSION; sregs.cr4 = cr4.bits(); @@ -213,7 +213,7 @@ impl UhyveCPU { sregs.ss = seg; //sregs.fs = seg; //sregs.gs = seg; - sregs.gdt.base = BOOT_GDT; + sregs.gdt.base = BOOT_GDT.as_u64(); sregs.gdt.limit = ((std::mem::size_of::() * BOOT_GDT_MAX as usize) - 1) as u16; self.vcpu.set_sregs(&sregs)?; @@ -221,7 +221,7 @@ impl UhyveCPU { let mut regs = self.vcpu.get_regs()?; regs.rflags = 2; regs.rip = entry_point; - regs.rdi = BOOT_INFO_ADDR; + regs.rdi = BOOT_INFO_ADDR.as_u64(); regs.rsi = cpu_id.into(); regs.rsp = stack_address; @@ -271,36 +271,43 @@ impl VirtualCPU for UhyveCPU { self.args.as_slice() } - fn host_address(&self, addr: usize) -> usize { - addr + self.vm_start + fn host_address(&self, addr: GuestPhysAddr) -> usize { + addr.as_u64() as usize + self.vm_start } - fn virt_to_phys(&self, addr: usize) -> usize { + fn virt_to_phys(&self, addr: GuestVirtAddr) -> GuestPhysAddr { + // TODO: This fn is curently x86_64 only /// Number of Offset bits of a virtual address for a 4 KiB page, which are shifted away to get its Page Frame Number (PFN). - pub const PAGE_BITS: usize = 12; + pub const PAGE_BITS: u64 = 12; /// Number of bits of the index in each table (PML4, PDPT, PDT, PGT). pub const PAGE_MAP_BITS: usize = 9; - let executable_disable_mask = !usize::try_from(PageTableFlags::NO_EXECUTE.bits()).unwrap(); - let mut page_table = self.host_address(BOOT_PML4 as usize) as *const usize; + let executable_disable_mask = !u64::try_from(PageTableFlags::NO_EXECUTE.bits()).unwrap(); + let mut page_table = self.host_address(BOOT_PML4) as *const u64; let mut page_bits = 39; - let mut entry: usize = 0; + let mut entry: u64 = 0; for _i in 0..4 { - let index = (addr >> page_bits) & ((1 << PAGE_MAP_BITS) - 1); - entry = unsafe { *page_table.add(index) & executable_disable_mask }; + let index = (addr.as_u64() >> page_bits) & ((1 << PAGE_MAP_BITS) - 1); + entry = unsafe { *page_table.add(index as usize) & executable_disable_mask }; // bit 7 is set if this entry references a 1 GiB (PDPT) or 2 MiB (PDT) page. - if entry & usize::try_from(PageTableFlags::HUGE_PAGE.bits()).unwrap() != 0 { - return (entry & ((!0usize) << page_bits)) | (addr & !((!0usize) << page_bits)); + if entry & u64::try_from(PageTableFlags::HUGE_PAGE.bits()).unwrap() != 0 { + return GuestPhysAddr::new( + (entry & ((!0u64) << page_bits)) | (addr.as_u64() & !((!0_u64) << page_bits)), + ); } else { - page_table = self.host_address(entry & !((1 << PAGE_BITS) - 1)) as *const usize; + page_table = self.host_address(GuestPhysAddr::new( + (entry & !((1 << PAGE_BITS) - 1)) as *const u64 as u64, + )) as *const u64; page_bits -= PAGE_MAP_BITS; } } - (entry & ((!0usize) << PAGE_BITS)) | (addr & !((!0usize) << PAGE_BITS)) + GuestPhysAddr::new( + (entry & ((!0u64) << PAGE_BITS)) | (addr.as_u64() & !((!0u64) << PAGE_BITS)), + ) } fn r#continue(&mut self) -> HypervisorResult { @@ -358,7 +365,8 @@ impl VirtualCPU for UhyveCPU { } }, VcpuExit::IoOut(port, addr) => { - let data_addr: usize = unsafe { (*(addr.as_ptr() as *const u32)) as usize }; + let data_addr = + GuestPhysAddr::new(unsafe { (*(addr.as_ptr() as *const u32)) as u64 }); if let Some(hypercall) = unsafe { self.address_to_hypercall(port, data_addr) } { diff --git a/src/linux/virtio.rs b/src/linux/virtio.rs index 54aa76a1..a400f86a 100644 --- a/src/linux/virtio.rs +++ b/src/linux/virtio.rs @@ -3,6 +3,7 @@ use std::{fmt, mem::size_of, ptr::copy_nonoverlapping, sync::Mutex, vec::Vec}; use log::info; use mac_address::*; use tun_tap::*; +use uhyve_interface::GuestPhysAddr; use virtio_bindings::bindings::virtio_net::*; use crate::{linux::virtqueue::*, vm::VirtualCPU}; @@ -139,7 +140,7 @@ impl VirtioNetPciDevice { } for index in send_indices { let desc = unsafe { tx_queue.get_descriptor(index) }; - let gpa = unsafe { *(desc.addr as *const usize) }; + let gpa = GuestPhysAddr::new(unsafe { *(desc.addr as *const u64) }); let hva = (*cpu).host_address(gpa) as *mut u8; match &self.iface { Some(tap) => unsafe { @@ -269,10 +270,10 @@ impl VirtioNetPciDevice { && status & STATUS_DRIVER_OK == 0 && self.selected_queue_num as usize == self.virt_queues.len() { - let gpa = unsafe { + let gpa = GuestPhysAddr::new(unsafe { #[allow(clippy::cast_ptr_alignment)] - *(dest.as_ptr() as *const usize) - }; + *(dest.as_ptr() as *const u64) + }); let hva = (*vcpu).host_address(gpa) as *mut u8; let queue = unsafe { Virtqueue::new(hva, QUEUE_LIMIT) }; self.virt_queues.push(queue); diff --git a/src/macos/x86_64/uhyve.rs b/src/macos/x86_64/uhyve.rs index 35d3635a..92ad8e5c 100644 --- a/src/macos/x86_64/uhyve.rs +++ b/src/macos/x86_64/uhyve.rs @@ -166,10 +166,10 @@ impl Vm for Uhyve { let (mem_addr, _) = self.guest_mem(); unsafe { - let pml4 = &mut *((mem_addr as u64 + BOOT_PML4) as *mut PageTable); - let pdpte = &mut *((mem_addr as u64 + BOOT_PDPTE) as *mut PageTable); - let pde = &mut *((mem_addr as u64 + BOOT_PDE) as *mut PageTable); - let gdt_entry: u64 = mem_addr as u64 + BOOT_GDT; + let pml4 = &mut *((mem_addr as u64 + BOOT_PML4.as_u64()) as *mut PageTable); + let pdpte = &mut *((mem_addr as u64 + BOOT_PDPTE.as_u64()) as *mut PageTable); + let pde = &mut *((mem_addr as u64 + BOOT_PDE.as_u64()) as *mut PageTable); + let gdt_entry: u64 = mem_addr as u64 + BOOT_GDT.as_u64(); // initialize GDT *((gdt_entry) as *mut u64) = create_gdt_entry(0, 0, 0); @@ -187,17 +187,14 @@ impl Vm for Uhyve { libc::memset(pde as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE);*/ pml4[0].set_addr( - PhysAddr::new(BOOT_PDPTE), + BOOT_PDPTE, PageTableFlags::PRESENT | PageTableFlags::WRITABLE, ); pml4[511].set_addr( - PhysAddr::new(BOOT_PML4), - PageTableFlags::PRESENT | PageTableFlags::WRITABLE, - ); - pdpte[0].set_addr( - PhysAddr::new(BOOT_PDE), + BOOT_PML4, PageTableFlags::PRESENT | PageTableFlags::WRITABLE, ); + pdpte[0].set_addr(BOOT_PDE, PageTableFlags::PRESENT | PageTableFlags::WRITABLE); for i in 0..512 { let addr = PhysAddr::new(i as u64 * Page::::SIZE); diff --git a/src/macos/x86_64/vcpu.rs b/src/macos/x86_64/vcpu.rs index 764b25e1..2e016b98 100644 --- a/src/macos/x86_64/vcpu.rs +++ b/src/macos/x86_64/vcpu.rs @@ -10,7 +10,7 @@ use std::{ use burst::x86::{disassemble_64, InstructionOperation, OperandType}; use lazy_static::lazy_static; use log::{debug, trace}; -use uhyve_interface::Hypercall; +use uhyve_interface::{GuestPhysAddr, GuestVirtAddr, Hypercall}; use x86_64::{ registers::control::{Cr0Flags, Cr4Flags}, structures::{gdt::SegmentSelector, paging::PageTableFlags}, @@ -202,7 +202,8 @@ impl UhyveCPU { self.vcpu.write_vmcs(VMCS_GUEST_GS_BASE, 0)?; self.vcpu.write_vmcs(VMCS_GUEST_GS_AR, 0x4093)?; - self.vcpu.write_vmcs(VMCS_GUEST_GDTR_BASE, BOOT_GDT)?; + self.vcpu + .write_vmcs(VMCS_GUEST_GDTR_BASE, BOOT_GDT.as_u64())?; self.vcpu.write_vmcs( VMCS_GUEST_GDTR_LIMIT, ((std::mem::size_of::() * BOOT_GDT_MAX as usize) - 1) as u64, @@ -270,7 +271,8 @@ impl UhyveCPU { self.vcpu.write_register(&Register::CR0, cr0.bits())?; self.vcpu.write_register(&Register::CR4, cr4.bits())?; - self.vcpu.write_register(&Register::CR3, BOOT_PML4)?; + self.vcpu + .write_register(&Register::CR3, BOOT_PML4.as_u64())?; self.vcpu.write_register(&Register::DR7, 0)?; self.vcpu.write_vmcs(VMCS_GUEST_SYSENTER_ESP, 0)?; self.vcpu.write_vmcs(VMCS_GUEST_SYSENTER_EIP, 0)?; @@ -507,8 +509,9 @@ impl UhyveCPU { let qualification = self.vcpu.read_vmcs(VMCS_RO_EXIT_QUALIFIC)?; let read = (qualification & (1 << 0)) != 0; let write = (qualification & (1 << 1)) != 0; - let code = - unsafe { std::slice::from_raw_parts(self.host_address(rip as usize) as *const u8, 8) }; + let code = unsafe { + std::slice::from_raw_parts(self.host_address(GuestPhysAddr::new(rip)) as *const u8, 8) + }; if let Ok(instr) = disassemble_64(code, rip as usize, code.len()) { match instr.operation { @@ -616,7 +619,8 @@ impl VirtualCPU for UhyveCPU { self.vcpu.write_register(&Register::RCX, 0)?; self.vcpu.write_register(&Register::RDX, 0)?; self.vcpu.write_register(&Register::RSI, cpu_id.into())?; - self.vcpu.write_register(&Register::RDI, BOOT_INFO_ADDR)?; + self.vcpu + .write_register(&Register::RDI, BOOT_INFO_ADDR.as_u64())?; self.vcpu.write_register(&Register::R8, 0)?; self.vcpu.write_register(&Register::R9, 0)?; self.vcpu.write_register(&Register::R10, 0)?; @@ -639,36 +643,43 @@ impl VirtualCPU for UhyveCPU { self.args.as_slice() } - fn host_address(&self, addr: usize) -> usize { - addr + self.vm_start + fn host_address(&self, addr: GuestPhysAddr) -> usize { + addr.as_u64() as usize + self.vm_start } - fn virt_to_phys(&self, addr: usize) -> usize { + fn virt_to_phys(&self, addr: GuestVirtAddr) -> GuestPhysAddr { + // TODO: This fn is curently x86_64 only /// Number of Offset bits of a virtual address for a 4 KiB page, which are shifted away to get its Page Frame Number (PFN). - pub const PAGE_BITS: usize = 12; + pub const PAGE_BITS: u64 = 12; /// Number of bits of the index in each table (PML4, PDPT, PDT, PGT). pub const PAGE_MAP_BITS: usize = 9; - let executable_disable_mask = !usize::try_from(PageTableFlags::NO_EXECUTE.bits()).unwrap(); - let mut page_table = self.host_address(BOOT_PML4 as usize) as *const usize; + let executable_disable_mask = !u64::try_from(PageTableFlags::NO_EXECUTE.bits()).unwrap(); + let mut page_table = self.host_address(BOOT_PML4) as *const u64; let mut page_bits = 39; - let mut entry: usize = 0; + let mut entry: u64 = 0; for _i in 0..4 { - let index = (addr >> page_bits) & ((1 << PAGE_MAP_BITS) - 1); - entry = unsafe { *page_table.add(index) & executable_disable_mask }; + let index = (addr.as_u64() >> page_bits) & ((1 << PAGE_MAP_BITS) - 1); + entry = unsafe { *page_table.add(index as usize) & executable_disable_mask }; // bit 7 is set if this entry references a 1 GiB (PDPT) or 2 MiB (PDT) page. - if entry & usize::try_from(PageTableFlags::HUGE_PAGE.bits()).unwrap() != 0 { - return (entry & ((!0usize) << page_bits)) | (addr & !((!0usize) << page_bits)); + if entry & u64::try_from(PageTableFlags::HUGE_PAGE.bits()).unwrap() != 0 { + return GuestPhysAddr::new( + (entry & ((!0u64) << page_bits)) | (addr.as_u64() & !((!0_u64) << page_bits)), + ); } else { - page_table = self.host_address(entry & !((1 << PAGE_BITS) - 1)) as *const usize; + page_table = self.host_address(GuestPhysAddr::new( + (entry & !((1 << PAGE_BITS) - 1)) as *const u64 as u64, + )) as *const u64; page_bits -= PAGE_MAP_BITS; } } - (entry & ((!0usize) << PAGE_BITS)) | (addr & !((!0usize) << PAGE_BITS)) + GuestPhysAddr::new( + (entry & ((!0u64) << PAGE_BITS)) | (addr.as_u64() & !((!0u64) << PAGE_BITS)), + ) } fn r#continue(&mut self) -> HypervisorResult { @@ -746,7 +757,7 @@ impl VirtualCPU for UhyveCPU { let data_addr: u64 = self.vcpu.read_register(&Register::RAX)? & 0xFFFFFFFF; if let Some(hypercall) = - unsafe { self.address_to_hypercall(port, data_addr as usize) } + unsafe { self.address_to_hypercall(port, GuestPhysAddr::new(data_addr)) } { match hypercall { Hypercall::Cmdsize(syssize) => self.cmdsize(syssize), diff --git a/src/vm.rs b/src/vm.rs index df6362bb..2e6106a9 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -9,7 +9,9 @@ use hermit_entry::{ }; use log::{error, warn}; use thiserror::Error; -use uhyve_interface::{parameters::*, Hypercall, HypercallAddress, MAX_ARGC_ENVC}; +use uhyve_interface::{ + parameters::*, GuestPhysAddr, GuestVirtAddr, Hypercall, HypercallAddress, MAX_ARGC_ENVC, +}; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::{ @@ -61,10 +63,10 @@ pub trait VirtualCPU { fn print_registers(&self); /// Translates an address from the VM's physical space into the hosts virtual space. - fn host_address(&self, addr: usize) -> usize; + fn host_address(&self, addr: GuestPhysAddr) -> usize; /// Looks up the guests pagetable and translates a guest's virtual address to a guest's physical address. - fn virt_to_phys(&self, addr: usize) -> usize; + fn virt_to_phys(&self, addr: GuestVirtAddr) -> GuestPhysAddr; /// Returns the (host) path of the kernel binary. fn kernel_path(&self) -> &Path; @@ -79,7 +81,7 @@ pub trait VirtualCPU { /// - `data` must be a valid pointer to the data attached to the hypercall. /// - The return value is only valid, as long as the guest is halted. /// - This fn must not be called multiple times on the same data, to avoid creating mutable aliasing. - unsafe fn address_to_hypercall(&self, addr: u16, data: usize) -> Option> { + unsafe fn address_to_hypercall(&self, addr: u16, data: GuestPhysAddr) -> Option> { if let Ok(hypercall_port) = HypercallAddress::try_from(addr) { Some(match hypercall_port { HypercallAddress::FileClose => { @@ -118,7 +120,7 @@ pub trait VirtualCPU { let syscmdval = unsafe { &*(self.host_address(data) as *const CmdvalParams) }; Hypercall::Cmdval(syscmdval) } - HypercallAddress::Uart => Hypercall::SerialWriteByte(data as u8), + HypercallAddress::Uart => Hypercall::SerialWriteByte(data.as_u64() as u8), _ => unimplemented!(), }) } else { @@ -158,13 +160,14 @@ pub trait VirtualCPU { /// Copies the arguments end environment of the application into the VM's memory. fn cmdval(&self, syscmdval: &CmdvalParams) { - let argv = self.host_address(syscmdval.argv.as_u64() as usize); + let argv = self.host_address(syscmdval.argv); // copy kernel path as first argument { let path = self.kernel_path().as_os_str(); - let argvptr = unsafe { self.host_address(*(argv as *mut *mut u8) as usize) }; + let argvptr = + unsafe { self.host_address(GuestPhysAddr::new(*(argv as *mut *mut u8) as u64)) }; let len = path.len(); let slice = unsafe { slice::from_raw_parts_mut(argvptr as *mut u8, len + 1) }; @@ -176,9 +179,9 @@ pub trait VirtualCPU { // Copy the application arguments into the vm memory for (counter, argument) in self.args().iter().enumerate() { let argvptr = unsafe { - self.host_address( - *((argv + (counter + 1) * mem::size_of::()) as *mut *mut u8) as usize, - ) + self.host_address(GuestPhysAddr::new( + *((argv + (counter + 1) * mem::size_of::()) as *mut *mut u8) as u64, + )) }; let len = argument.len(); let slice = unsafe { slice::from_raw_parts_mut(argvptr as *mut u8, len + 1) }; @@ -190,14 +193,14 @@ pub trait VirtualCPU { // Copy the environment variables into the vm memory let mut counter = 0; - let envp = self.host_address(syscmdval.envp.as_u64() as usize); + let envp = self.host_address(syscmdval.envp); for (key, value) in std::env::vars_os() { if counter < MAX_ARGC_ENVC.try_into().unwrap() { let envptr = unsafe { - self.host_address( + self.host_address(GuestPhysAddr::new( *((envp + counter as usize * mem::size_of::()) as *mut *mut u8) - as usize, - ) + as u64, + )) }; let len = key.len() + value.len(); let slice = unsafe { slice::from_raw_parts_mut(envptr as *mut u8, len + 2) }; @@ -216,8 +219,7 @@ pub trait VirtualCPU { /// TODO: UNSAFE AS *%@#. It has to be checked that the VM is allowed to unlink that file! fn unlink(&self, sysunlink: &mut UnlinkParams) { unsafe { - sysunlink.ret = - libc::unlink(self.host_address(sysunlink.name.as_u64() as usize) as *const i8); + sysunlink.ret = libc::unlink(self.host_address(sysunlink.name) as *const i8); } } @@ -230,7 +232,7 @@ pub trait VirtualCPU { fn open(&self, sysopen: &mut OpenParams) { unsafe { sysopen.ret = libc::open( - self.host_address(sysopen.name.as_u64() as usize) as *const i8, + self.host_address(sysopen.name) as *const i8, sysopen.flags, sysopen.mode, ); @@ -247,11 +249,9 @@ pub trait VirtualCPU { /// Handles an read syscall on the host. fn read(&self, sysread: &mut ReadPrams) { unsafe { - let buffer = self.virt_to_phys(sysread.buf.as_u64() as usize); - let bytes_read = libc::read( sysread.fd, - self.host_address(buffer) as *mut libc::c_void, + self.host_address(sysread.buf) as *mut libc::c_void, sysread.len, ); if bytes_read >= 0 { @@ -265,13 +265,11 @@ pub trait VirtualCPU { /// Handles an write syscall on the host. fn write(&self, syswrite: &WriteParams) -> io::Result<()> { let mut bytes_written: usize = 0; - let buffer = self.virt_to_phys(syswrite.buf.as_u64() as usize); - while bytes_written != syswrite.len { unsafe { let step = libc::write( syswrite.fd, - self.host_address(buffer + bytes_written) as *const libc::c_void, + self.host_address(syswrite.buf + bytes_written) as *const libc::c_void, syswrite.len - bytes_written, ); if step >= 0 { @@ -360,7 +358,7 @@ pub trait Vm { boot_time: SystemTime::now().into(), }, }; - let raw_boot_info_ptr = vm_mem.add(BOOT_INFO_ADDR as usize) as *mut RawBootInfo; + let raw_boot_info_ptr = vm_mem.add(BOOT_INFO_ADDR.as_u64() as usize) as *mut RawBootInfo; *raw_boot_info_ptr = RawBootInfo::from(boot_info); self.set_boot_info(raw_boot_info_ptr); self.set_stack_address(start_address.checked_sub(KERNEL_STACK_SIZE).expect( diff --git a/uhyve-interface/src/lib.rs b/uhyve-interface/src/lib.rs index 02c34c1a..d20c14fb 100644 --- a/uhyve-interface/src/lib.rs +++ b/uhyve-interface/src/lib.rs @@ -14,6 +14,13 @@ use num_enum::TryFromPrimitive; pub mod elf; pub mod parameters; + +#[cfg(target_arch = "x86_64")] +pub use ::x86_64::addr::PhysAddr as GuestPhysAddr; +#[cfg(target_arch = "x86_64")] +pub use ::x86_64::addr::VirtAddr as GuestVirtAddr; +#[cfg(not(target_pointer_width = "64"))] +compile_error!("Using uhyve-interface on a non-64-bit system is not (yet?) supported"); use parameters::*; /// The version of the uhyve interface. Note: This is not the same as the semver of the crate but diff --git a/uhyve-interface/src/parameters.rs b/uhyve-interface/src/parameters.rs index 888c53cb..321e21d8 100644 --- a/uhyve-interface/src/parameters.rs +++ b/uhyve-interface/src/parameters.rs @@ -1,8 +1,6 @@ //! Parameters for hypercalls. -use x86_64::PhysAddr; - -use crate::MAX_ARGC_ENVC; +use crate::{GuestPhysAddr, MAX_ARGC_ENVC}; /// Parameters for a [`Cmdsize`](crate::Hypercall::Cmdsize) hypercall which provides the lengths of the items in the argument end environment vector. #[repr(C, packed)] @@ -23,9 +21,9 @@ pub struct CmdsizeParams { #[derive(Debug, Copy, Clone)] pub struct CmdvalParams { /// Pointer to a memory section in the VM memory large enough to store the argument string. - pub argv: PhysAddr, + pub argv: GuestPhysAddr, /// Pointer to a memory section in the VM memory large enough to store the environment values. - pub envp: PhysAddr, + pub envp: GuestPhysAddr, } /// Parameters for a [`Exit`](crate::Hypercall::Exit) hypercall. @@ -41,7 +39,7 @@ pub struct ExitParams { #[derive(Debug, Copy, Clone)] pub struct UnlinkParams { /// Address of the file that should be unlinked. - pub name: PhysAddr, + pub name: GuestPhysAddr, /// On success, `0` is returned. On error, `-1` is returned. pub ret: i32, } @@ -53,7 +51,7 @@ pub struct WriteParams { /// File descriptor of the file. pub fd: i32, /// Buffer to be written into the file. - pub buf: PhysAddr, + pub buf: GuestPhysAddr, /// Number of bytes in the buffer to be written. pub len: usize, } @@ -65,7 +63,7 @@ pub struct ReadPrams { /// File descriptor of the file. pub fd: i32, /// Buffer to read the file into. - pub buf: PhysAddr, + pub buf: GuestPhysAddr, /// Number of bytes to read into the buffer. pub len: usize, /// Number of bytes read on success. `-1` on failure. @@ -87,7 +85,7 @@ pub struct CloseParams { #[derive(Debug, Copy, Clone)] pub struct OpenParams { /// Pathname of the file to be opened. - pub name: PhysAddr, + pub name: GuestPhysAddr, /// Posix file access mode flags. pub flags: i32, /// Access permissions upon opening/creating a file. From 6c76d43af7c8fdad316c193a19c1385eab2647c0 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Mon, 5 Jun 2023 20:32:28 +0200 Subject: [PATCH 02/29] Renamed UhyveCPU to KvmCpu & XhyveCpu in respective module --- src/linux/gdb/mod.rs | 6 ++--- src/linux/mod.rs | 4 +++- src/linux/uhyve.rs | 6 ++--- src/linux/{vcpu.rs => x86_64/kvm_cpu.rs} | 30 ++++++++++++------------ src/linux/x86_64/mod.rs | 1 + src/macos/aarch64/uhyve.rs | 4 ++-- src/macos/aarch64/vcpu.rs | 10 ++++---- src/macos/mod.rs | 2 +- src/macos/x86_64/uhyve.rs | 7 +++--- src/macos/x86_64/vcpu.rs | 15 ++++++------ src/vm.rs | 9 +++++-- 11 files changed, 52 insertions(+), 42 deletions(-) rename src/linux/{vcpu.rs => x86_64/kvm_cpu.rs} (96%) mode change 100755 => 100644 create mode 100644 src/linux/x86_64/mod.rs diff --git a/src/linux/gdb/mod.rs b/src/linux/gdb/mod.rs index 450c28a0..490c764d 100644 --- a/src/linux/gdb/mod.rs +++ b/src/linux/gdb/mod.rs @@ -24,20 +24,20 @@ use self::breakpoints::SwBreakpoints; use super::HypervisorError; use crate::{ arch::x86_64::registers::debug::HwBreakpoints, - linux::{vcpu::UhyveCPU, KickSignal}, + linux::{x86_64::kvm_cpu::KvmCpu, KickSignal}, vm::{VcpuStopReason, VirtualCPU}, Uhyve, }; pub struct GdbUhyve { vm: Uhyve, - vcpu: UhyveCPU, + vcpu: KvmCpu, hw_breakpoints: HwBreakpoints, sw_breakpoints: SwBreakpoints, } impl GdbUhyve { - pub fn new(vm: Uhyve, vcpu: UhyveCPU) -> Self { + pub fn new(vm: Uhyve, vcpu: KvmCpu) -> Self { Self { vm, vcpu, diff --git a/src/linux/mod.rs b/src/linux/mod.rs index 6af6e386..2b11a562 100755 --- a/src/linux/mod.rs +++ b/src/linux/mod.rs @@ -1,6 +1,8 @@ +#[cfg(target_arch = "x86_64")] +pub mod x86_64; + pub mod gdb; pub mod uhyve; -pub mod vcpu; pub mod virtio; pub mod virtqueue; diff --git a/src/linux/uhyve.rs b/src/linux/uhyve.rs index ae874125..d331264f 100755 --- a/src/linux/uhyve.rs +++ b/src/linux/uhyve.rs @@ -24,7 +24,7 @@ use x86_64::{ use crate::{ consts::*, - linux::{vcpu::*, virtio::*, KVM}, + linux::{virtio::*, x86_64::kvm_cpu::KvmCpu, KVM}, params::Params, vm::{HypervisorResult, Vm}, x86_64::create_gdt_entry, @@ -224,8 +224,8 @@ impl Vm for Uhyve { self.path.as_path() } - fn create_cpu(&self, id: u32) -> HypervisorResult { - Ok(UhyveCPU::new( + fn create_cpu(&self, id: u32) -> HypervisorResult { + Ok(KvmCpu::new( id, self.path.clone(), self.args.clone(), diff --git a/src/linux/vcpu.rs b/src/linux/x86_64/kvm_cpu.rs old mode 100755 new mode 100644 similarity index 96% rename from src/linux/vcpu.rs rename to src/linux/x86_64/kvm_cpu.rs index 6d4cc57f..c52ce265 --- a/src/linux/vcpu.rs +++ b/src/linux/x86_64/kvm_cpu.rs @@ -26,7 +26,7 @@ const MSR_IA32_MISC_ENABLE: u32 = 0x000001a0; const PCI_CONFIG_DATA_PORT: u16 = 0xCFC; const PCI_CONFIG_ADDRESS_PORT: u16 = 0xCF8; -pub struct UhyveCPU { +pub struct KvmCpu { id: u32, vcpu: VcpuFd, vm_start: usize, @@ -36,7 +36,7 @@ pub struct UhyveCPU { pci_addr: Option, } -impl UhyveCPU { +impl KvmCpu { pub unsafe fn memory(&mut self, start_addr: GuestVirtAddr, len: usize) -> &mut [u8] { let phys = self.virt_to_phys(start_addr); let host = self.host_address(phys); @@ -50,8 +50,8 @@ impl UhyveCPU { vcpu: VcpuFd, vm_start: usize, virtio_device: Arc>, - ) -> UhyveCPU { - UhyveCPU { + ) -> KvmCpu { + KvmCpu { id, vcpu, vm_start, @@ -247,7 +247,7 @@ impl UhyveCPU { } } -impl VirtualCPU for UhyveCPU { +impl VirtualCPU for KvmCpu { fn init(&mut self, entry_point: u64, stack_address: u64, cpu_id: u32) -> HypervisorResult<()> { self.setup_long_mode(entry_point, stack_address, cpu_id)?; self.setup_cpuid()?; @@ -469,16 +469,16 @@ impl VirtualCPU for UhyveCPU { println!("Segment registers:"); println!("------------------"); println!("register selector base limit type p dpl db s l g avl"); - UhyveCPU::show_segment("cs ", &sregs.cs); - UhyveCPU::show_segment("ss ", &sregs.ss); - UhyveCPU::show_segment("ds ", &sregs.ds); - UhyveCPU::show_segment("es ", &sregs.es); - UhyveCPU::show_segment("fs ", &sregs.fs); - UhyveCPU::show_segment("gs ", &sregs.gs); - UhyveCPU::show_segment("tr ", &sregs.tr); - UhyveCPU::show_segment("ldt", &sregs.ldt); - UhyveCPU::show_dtable("gdt", &sregs.gdt); - UhyveCPU::show_dtable("idt", &sregs.idt); + KvmCpu::show_segment("cs ", &sregs.cs); + KvmCpu::show_segment("ss ", &sregs.ss); + KvmCpu::show_segment("ds ", &sregs.ds); + KvmCpu::show_segment("es ", &sregs.es); + KvmCpu::show_segment("fs ", &sregs.fs); + KvmCpu::show_segment("gs ", &sregs.gs); + KvmCpu::show_segment("tr ", &sregs.tr); + KvmCpu::show_segment("ldt", &sregs.ldt); + KvmCpu::show_dtable("gdt", &sregs.gdt); + KvmCpu::show_dtable("idt", &sregs.idt); println!(); println!("\nAPIC:"); diff --git a/src/linux/x86_64/mod.rs b/src/linux/x86_64/mod.rs new file mode 100644 index 00000000..0452b284 --- /dev/null +++ b/src/linux/x86_64/mod.rs @@ -0,0 +1 @@ +pub mod kvm_cpu; diff --git a/src/macos/aarch64/uhyve.rs b/src/macos/aarch64/uhyve.rs index 317e37f4..45d5b723 100644 --- a/src/macos/aarch64/uhyve.rs +++ b/src/macos/aarch64/uhyve.rs @@ -147,8 +147,8 @@ impl Vm for Uhyve { self.path.as_path() } - fn create_cpu(&self, id: u32) -> HypervisorResult { - Ok(UhyveCPU::new( + fn create_cpu(&self, id: u32) -> HypervisorResult { + Ok(XhyveCpu::new( id, self.path.clone(), self.args.clone(), diff --git a/src/macos/aarch64/vcpu.rs b/src/macos/aarch64/vcpu.rs index 43a96e53..59b48eb3 100644 --- a/src/macos/aarch64/vcpu.rs +++ b/src/macos/aarch64/vcpu.rs @@ -19,7 +19,7 @@ use crate::{ vm::{HypervisorResult, VcpuStopReason, VirtualCPU}, }; -pub struct UhyveCPU { +pub struct XhyveCpu { id: u32, kernel_path: PathBuf, args: Vec, @@ -27,8 +27,8 @@ pub struct UhyveCPU { vm_start: usize, } -impl UhyveCPU { - pub fn new(id: u32, kernel_path: PathBuf, args: Vec, vm_start: usize) -> UhyveCPU { +impl XhyveCpu { + pub fn new(id: u32, kernel_path: PathBuf, args: Vec, vm_start: usize) -> XhyveCpu { Self { id, kernel_path, @@ -39,7 +39,7 @@ impl UhyveCPU { } } -impl VirtualCPU for UhyveCPU { +impl VirtualCPU for XhyveCpu { fn init(&mut self, entry_point: u64, stack_address: u64, cpu_id: u32) -> HypervisorResult<()> { debug!("Initialize VirtualCPU"); @@ -300,7 +300,7 @@ impl VirtualCPU for UhyveCPU { } } -impl Drop for UhyveCPU { +impl Drop for XhyveCpu { fn drop(&mut self) { self.vcpu.destroy().unwrap(); } diff --git a/src/macos/mod.rs b/src/macos/mod.rs index 51cc3254..84db7478 100644 --- a/src/macos/mod.rs +++ b/src/macos/mod.rs @@ -13,7 +13,7 @@ use core_affinity::CoreId; #[cfg(target_arch = "x86_64")] pub use crate::macos::x86_64::{uhyve, vcpu}; -use crate::vm::{VirtualCPU, Vm}; +use crate::{vcpu::VirtualCPU, vm::Vm}; pub type HypervisorError = xhypervisor::Error; pub type DebugExitInfo = (); diff --git a/src/macos/x86_64/uhyve.rs b/src/macos/x86_64/uhyve.rs index 92ad8e5c..b88c2693 100644 --- a/src/macos/x86_64/uhyve.rs +++ b/src/macos/x86_64/uhyve.rs @@ -19,8 +19,9 @@ use crate::{ consts::*, macos::x86_64::{ioapic::IoApic, vcpu::*}, params::Params, - vm::{HypervisorResult, Vm}, + vm::Vm, x86_64::create_gdt_entry, + HypervisorResult, }; pub struct Uhyve { @@ -145,8 +146,8 @@ impl Vm for Uhyve { self.path.as_path() } - fn create_cpu(&self, id: u32) -> HypervisorResult { - Ok(UhyveCPU::new( + fn create_cpu(&self, id: u32) -> HypervisorResult { + Ok(XhyveCpu::new( id, self.path.clone(), self.args.clone(), diff --git a/src/macos/x86_64/vcpu.rs b/src/macos/x86_64/vcpu.rs index 2e016b98..fe781a43 100644 --- a/src/macos/x86_64/vcpu.rs +++ b/src/macos/x86_64/vcpu.rs @@ -34,7 +34,8 @@ use xhypervisor::{ use crate::{ consts::*, macos::x86_64::ioapic::IoApic, - vm::{HypervisorResult, VcpuStopReason, VirtualCPU}, + vcpu::{VcpuStopReason, VirtualCPU}, + HypervisorResult, }; /// Extracted from `x86::msr`. @@ -151,7 +152,7 @@ lazy_static! { }; } -pub struct UhyveCPU { +pub struct XhyveCpu { id: u32, kernel_path: PathBuf, args: Vec, @@ -161,15 +162,15 @@ pub struct UhyveCPU { ioapic: Arc>, } -impl UhyveCPU { +impl XhyveCpu { pub fn new( id: u32, kernel_path: PathBuf, args: Vec, vm_start: usize, ioapic: Arc>, - ) -> UhyveCPU { - UhyveCPU { + ) -> XhyveCpu { + XhyveCpu { id, kernel_path, args, @@ -598,7 +599,7 @@ impl UhyveCPU { } } -impl VirtualCPU for UhyveCPU { +impl VirtualCPU for XhyveCpu { fn init(&mut self, entry_point: u64, stack_address: u64, cpu_id: u32) -> HypervisorResult<()> { self.setup_capabilities()?; self.setup_msr()?; @@ -963,7 +964,7 @@ impl VirtualCPU for UhyveCPU { } } -impl Drop for UhyveCPU { +impl Drop for XhyveCpu { fn drop(&mut self) { self.vcpu.destroy().unwrap(); } diff --git a/src/vm.rs b/src/vm.rs index 2e6106a9..2daf617f 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -20,10 +20,15 @@ use crate::arch::x86_64::{ use crate::{ arch, consts::*, - os::{vcpu::UhyveCPU, DebugExitInfo, HypervisorError}, + os::{DebugExitInfo, HypervisorError}, }; +#[cfg(all(target_arch = "x86_64", target_os = "linux"))] +use crate::linux::x86_64::kvm_cpu::KvmCpu; + pub type HypervisorResult = Result; +#[cfg(all(target_arch = "x86_64", target_os = "macos"))] +use crate::macos::x86_64::vcpu::XhyveCpu; #[derive(Error, Debug)] pub enum LoadKernelError { @@ -312,7 +317,7 @@ pub trait Vm { fn set_stack_address(&mut self, stack_addresss: u64); fn stack_address(&self) -> u64; fn kernel_path(&self) -> &Path; - fn create_cpu(&self, id: u32) -> HypervisorResult; + fn create_cpu(&self, id: u32) -> HypervisorResult; fn set_boot_info(&mut self, header: *const RawBootInfo); fn verbose(&self) -> bool; fn init_guest_mem(&self); From bedf10d2fce963e25c31280d615497f1a4c7ca37 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Mon, 5 Jun 2023 23:52:37 +0200 Subject: [PATCH 03/29] Put the virtual CPU trait in a separate module --- src/lib.rs | 4 +- src/linux/gdb/mod.rs | 2 +- src/linux/mod.rs | 3 +- src/linux/uhyve.rs | 3 +- src/linux/virtio.rs | 2 +- src/linux/x86_64/kvm_cpu.rs | 3 +- src/vcpu.rs | 270 +++++++++++++++++++++++++++++++++++ src/vm.rs | 275 +----------------------------------- 8 files changed, 283 insertions(+), 279 deletions(-) create mode 100644 src/vcpu.rs diff --git a/src/lib.rs b/src/lib.rs index 32efc03d..23e4e911 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,7 +22,9 @@ pub use macos as os; pub mod params; #[cfg(target_os = "linux")] pub mod shared_queue; +mod vcpu; pub mod vm; pub use arch::*; -pub use os::uhyve::Uhyve; +pub use os::{uhyve::Uhyve, HypervisorError}; +pub type HypervisorResult = Result; diff --git a/src/linux/gdb/mod.rs b/src/linux/gdb/mod.rs index 490c764d..c98132fe 100644 --- a/src/linux/gdb/mod.rs +++ b/src/linux/gdb/mod.rs @@ -25,7 +25,7 @@ use super::HypervisorError; use crate::{ arch::x86_64::registers::debug::HwBreakpoints, linux::{x86_64::kvm_cpu::KvmCpu, KickSignal}, - vm::{VcpuStopReason, VirtualCPU}, + vcpu::{VcpuStopReason, VirtualCPU}, Uhyve, }; diff --git a/src/linux/mod.rs b/src/linux/mod.rs index 2b11a562..a5ed419c 100755 --- a/src/linux/mod.rs +++ b/src/linux/mod.rs @@ -29,7 +29,8 @@ use nix::sys::{ use crate::{ linux::gdb::{GdbUhyve, UhyveGdbEventLoop}, - vm::{VirtualCPU, Vm}, + vcpu::VirtualCPU, + vm::Vm, Uhyve, }; diff --git a/src/linux/uhyve.rs b/src/linux/uhyve.rs index d331264f..c77b7c7b 100755 --- a/src/linux/uhyve.rs +++ b/src/linux/uhyve.rs @@ -26,8 +26,9 @@ use crate::{ consts::*, linux::{virtio::*, x86_64::kvm_cpu::KvmCpu, KVM}, params::Params, - vm::{HypervisorResult, Vm}, + vm::Vm, x86_64::create_gdt_entry, + HypervisorResult, }; const KVM_32BIT_MAX_MEM_SIZE: usize = 1 << 32; diff --git a/src/linux/virtio.rs b/src/linux/virtio.rs index a400f86a..2d984197 100644 --- a/src/linux/virtio.rs +++ b/src/linux/virtio.rs @@ -6,7 +6,7 @@ use tun_tap::*; use uhyve_interface::GuestPhysAddr; use virtio_bindings::bindings::virtio_net::*; -use crate::{linux::virtqueue::*, vm::VirtualCPU}; +use crate::{linux::virtqueue::*, vcpu::VirtualCPU}; const STATUS_ACKNOWLEDGE: u8 = 0b00000001; const STATUS_DRIVER: u8 = 0b00000010; diff --git a/src/linux/x86_64/kvm_cpu.rs b/src/linux/x86_64/kvm_cpu.rs index c52ce265..f2be4a58 100644 --- a/src/linux/x86_64/kvm_cpu.rs +++ b/src/linux/x86_64/kvm_cpu.rs @@ -16,7 +16,8 @@ use x86_64::{ use crate::{ consts::*, linux::{virtio::*, KVM}, - vm::{HypervisorResult, VcpuStopReason, VirtualCPU}, + vcpu::{VcpuStopReason, VirtualCPU}, + HypervisorResult, }; const CPUID_EXT_HYPERVISOR: u32 = 1 << 31; diff --git a/src/vcpu.rs b/src/vcpu.rs new file mode 100644 index 00000000..edaf2bc0 --- /dev/null +++ b/src/vcpu.rs @@ -0,0 +1,270 @@ +use std::{ffi::OsString, io, io::Write, mem, os::unix::ffi::OsStrExt, path::Path, slice}; + +use uhyve_interface::{ + parameters::*, GuestPhysAddr, GuestVirtAddr, Hypercall, HypercallAddress, MAX_ARGC_ENVC, +}; + +/// The trait and fns that a virtual cpu requires +use crate::{os::DebugExitInfo, HypervisorResult}; + +/// Reasons for vCPU exits. +pub enum VcpuStopReason { + /// The vCPU stopped for debugging. + Debug(DebugExitInfo), + + /// The vCPU exited with the specified exit code. + Exit(i32), + + /// The vCPU got kicked. + Kick, +} + +/// Functionality a virtual CPU backend must provide to be used by uhyve +pub trait VirtualCPU { + /// Initialize the cpu to start running the code ad entry_point. + fn init(&mut self, entry_point: u64, stack_address: u64, cpu_id: u32) -> HypervisorResult<()>; + + /// Continues execution. + fn r#continue(&mut self) -> HypervisorResult; + + /// Start the execution of the CPU. The function will run until it crashes (`Err`) or terminate with an exit code (`Ok`). + fn run(&mut self) -> HypervisorResult>; + + /// Prints the VCPU's registers to stdout. + fn print_registers(&self); + + /// Translates an address from the VM's physical space into the hosts virtual space. + fn host_address(&self, addr: GuestPhysAddr) -> usize; + + /// Looks up the guests pagetable and translates a guest's virtual address to a guest's physical address. + fn virt_to_phys(&self, addr: GuestVirtAddr) -> GuestPhysAddr; + + /// Returns the (host) path of the kernel binary. + fn kernel_path(&self) -> &Path; + + // TODO remove + fn args(&self) -> &[OsString]; + + /// `addr` is the address of the hypercall parameter in the guest's memory space. `data` is the + /// parameter that was send to that address by the guest. + /// + /// # Safety + /// + /// - `data` must be a valid pointer to the data attached to the hypercall. + /// - The return value is only valid, as long as the guest is halted. + /// - This fn must not be called multiple times on the same data, to avoid creating mutable aliasing. + unsafe fn address_to_hypercall(&self, addr: u16, data: GuestPhysAddr) -> Option> { + if let Ok(hypercall_port) = HypercallAddress::try_from(addr) { + Some(match hypercall_port { + HypercallAddress::FileClose => { + let sysclose = unsafe { &mut *(self.host_address(data) as *mut CloseParams) }; + Hypercall::FileClose(sysclose) + } + HypercallAddress::FileLseek => { + let syslseek = unsafe { &mut *(self.host_address(data) as *mut LseekParams) }; + Hypercall::FileLseek(syslseek) + } + HypercallAddress::FileOpen => { + let sysopen = unsafe { &mut *(self.host_address(data) as *mut OpenParams) }; + Hypercall::FileOpen(sysopen) + } + HypercallAddress::FileRead => { + let sysread = unsafe { &mut *(self.host_address(data) as *mut ReadPrams) }; + Hypercall::FileRead(sysread) + } + HypercallAddress::FileWrite => { + let syswrite = unsafe { &*(self.host_address(data) as *const WriteParams) }; + Hypercall::FileWrite(syswrite) + } + HypercallAddress::FileUnlink => { + let sysunlink = unsafe { &mut *(self.host_address(data) as *mut UnlinkParams) }; + Hypercall::FileUnlink(sysunlink) + } + HypercallAddress::Exit => { + let sysexit = unsafe { &*(self.host_address(data) as *const ExitParams) }; + Hypercall::Exit(sysexit) + } + HypercallAddress::Cmdsize => { + let syssize = unsafe { &mut *(self.host_address(data) as *mut CmdsizeParams) }; + Hypercall::Cmdsize(syssize) + } + HypercallAddress::Cmdval => { + let syscmdval = unsafe { &*(self.host_address(data) as *const CmdvalParams) }; + Hypercall::Cmdval(syscmdval) + } + HypercallAddress::Uart => Hypercall::SerialWriteByte(data.as_u64() as u8), + _ => unimplemented!(), + }) + } else { + None + } + } + + fn cmdsize(&self, syssize: &mut CmdsizeParams) { + syssize.argc = 0; + syssize.envc = 0; + + let path = self.kernel_path(); + syssize.argsz[0] = path.as_os_str().len() as i32 + 1; + + let mut counter = 0; + for argument in self.args() { + syssize.argsz[(counter + 1) as usize] = argument.len() as i32 + 1; + + counter += 1; + } + + syssize.argc = counter + 1; + + let mut counter = 0; + for (key, value) in std::env::vars_os() { + if counter < MAX_ARGC_ENVC.try_into().unwrap() { + syssize.envsz[counter as usize] = (key.len() + value.len()) as i32 + 2; + counter += 1; + } + } + syssize.envc = counter; + + if counter >= MAX_ARGC_ENVC.try_into().unwrap() { + warn!("Environment is too large!"); + } + } + + /// Copies the arguments end environment of the application into the VM's memory. + fn cmdval(&self, syscmdval: &CmdvalParams) { + let argv = self.host_address(syscmdval.argv); + + // copy kernel path as first argument + { + let path = self.kernel_path().as_os_str(); + + let argvptr = + unsafe { self.host_address(GuestPhysAddr::new(*(argv as *mut *mut u8) as u64)) }; + let len = path.len(); + let slice = unsafe { slice::from_raw_parts_mut(argvptr as *mut u8, len + 1) }; + + // Create string for environment variable + slice[0..len].copy_from_slice(path.as_bytes()); + slice[len] = 0; + } + + // Copy the application arguments into the vm memory + for (counter, argument) in self.args().iter().enumerate() { + let argvptr = unsafe { + self.host_address(GuestPhysAddr::new( + *((argv + (counter + 1) * mem::size_of::()) as *mut *mut u8) as u64, + )) + }; + let len = argument.len(); + let slice = unsafe { slice::from_raw_parts_mut(argvptr as *mut u8, len + 1) }; + + // Create string for environment variable + slice[0..len].copy_from_slice(argument.as_bytes()); + slice[len] = 0; + } + + // Copy the environment variables into the vm memory + let mut counter = 0; + let envp = self.host_address(syscmdval.envp); + for (key, value) in std::env::vars_os() { + if counter < MAX_ARGC_ENVC.try_into().unwrap() { + let envptr = unsafe { + self.host_address(GuestPhysAddr::new( + *((envp + counter as usize * mem::size_of::()) as *mut *mut u8) + as u64, + )) + }; + let len = key.len() + value.len(); + let slice = unsafe { slice::from_raw_parts_mut(envptr as *mut u8, len + 2) }; + + // Create string for environment variable + slice[0..key.len()].copy_from_slice(key.as_bytes()); + slice[key.len()..(key.len() + 1)].copy_from_slice("=".as_bytes()); + slice[(key.len() + 1)..(len + 1)].copy_from_slice(value.as_bytes()); + slice[len + 1] = 0; + counter += 1; + } + } + } + + /// unlink deletes a name from the filesystem. This is used to handle `unlink` syscalls from the guest. + /// TODO: UNSAFE AS *%@#. It has to be checked that the VM is allowed to unlink that file! + fn unlink(&self, sysunlink: &mut UnlinkParams) { + unsafe { + sysunlink.ret = libc::unlink(self.host_address(sysunlink.name) as *const i8); + } + } + + /// Reads the exit code from an VM and returns it + fn exit(&self, sysexit: &ExitParams) -> i32 { + sysexit.arg + } + + /// Handles an open syscall by opening a file on the host. + fn open(&self, sysopen: &mut OpenParams) { + unsafe { + sysopen.ret = libc::open( + self.host_address(sysopen.name) as *const i8, + sysopen.flags, + sysopen.mode, + ); + } + } + + /// Handles an close syscall by closing the file on the host. + fn close(&self, sysclose: &mut CloseParams) { + unsafe { + sysclose.ret = libc::close(sysclose.fd); + } + } + + /// Handles an read syscall on the host. + fn read(&self, sysread: &mut ReadPrams) { + unsafe { + let bytes_read = libc::read( + sysread.fd, + self.host_address(sysread.buf) as *mut libc::c_void, + sysread.len, + ); + if bytes_read >= 0 { + sysread.ret = bytes_read; + } else { + sysread.ret = -1; + } + } + } + + /// Handles an write syscall on the host. + fn write(&self, syswrite: &WriteParams) -> io::Result<()> { + let mut bytes_written: usize = 0; + while bytes_written != syswrite.len { + unsafe { + let step = libc::write( + syswrite.fd, + self.host_address(syswrite.buf + bytes_written) as *const libc::c_void, + syswrite.len - bytes_written, + ); + if step >= 0 { + bytes_written += step as usize; + } else { + return Err(io::Error::last_os_error()); + } + } + } + + Ok(()) + } + + /// Handles an write syscall on the host. + fn lseek(&self, syslseek: &mut LseekParams) { + unsafe { + syslseek.offset = + libc::lseek(syslseek.fd, syslseek.offset as i64, syslseek.whence) as isize; + } + } + + /// Handles an UART syscall by writing to stdout. + fn uart(&self, buf: &[u8]) -> io::Result<()> { + io::stdout().write_all(buf) + } +} diff --git a/src/vm.rs b/src/vm.rs index 2daf617f..eb53b75e 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -1,7 +1,4 @@ -use std::{ - ffi::OsString, fs, io, io::Write, mem, mem::MaybeUninit, num::NonZeroU32, - os::unix::ffi::OsStrExt, path::Path, slice, time::SystemTime, -}; +use std::{fs, io, mem::MaybeUninit, num::NonZeroU32, path::Path, slice, time::SystemTime}; use hermit_entry::{ boot_info::{BootInfo, HardwareInfo, PlatformInfo, RawBootInfo, SerialPortBase}, @@ -9,22 +6,14 @@ use hermit_entry::{ }; use log::{error, warn}; use thiserror::Error; -use uhyve_interface::{ - parameters::*, GuestPhysAddr, GuestVirtAddr, Hypercall, HypercallAddress, MAX_ARGC_ENVC, -}; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::{ detect_freq_from_cpuid, detect_freq_from_cpuid_hypervisor_info, get_cpu_frequency_from_os, }; -use crate::{ - arch, - consts::*, - os::{DebugExitInfo, HypervisorError}, -}; - #[cfg(all(target_arch = "x86_64", target_os = "linux"))] use crate::linux::x86_64::kvm_cpu::KvmCpu; +use crate::{arch, consts::*, os::HypervisorError}; pub type HypervisorResult = Result; #[cfg(all(target_arch = "x86_64", target_os = "macos"))] @@ -42,266 +31,6 @@ pub enum LoadKernelError { pub type LoadKernelResult = Result; -/// Reasons for vCPU exits. -pub enum VcpuStopReason { - /// The vCPU stopped for debugging. - Debug(DebugExitInfo), - - /// The vCPU exited with the specified exit code. - Exit(i32), - - /// The vCPU got kicked. - Kick, -} - -pub trait VirtualCPU { - /// Initialize the cpu to start running the code ad entry_point. - fn init(&mut self, entry_point: u64, stack_address: u64, cpu_id: u32) -> HypervisorResult<()>; - - /// Continues execution. - fn r#continue(&mut self) -> HypervisorResult; - - /// Start the execution of the CPU. The function will run until it crashes (`Err`) or terminate with an exit code (`Ok`). - fn run(&mut self) -> HypervisorResult>; - - /// Prints the VCPU's registers to stdout. - fn print_registers(&self); - - /// Translates an address from the VM's physical space into the hosts virtual space. - fn host_address(&self, addr: GuestPhysAddr) -> usize; - - /// Looks up the guests pagetable and translates a guest's virtual address to a guest's physical address. - fn virt_to_phys(&self, addr: GuestVirtAddr) -> GuestPhysAddr; - - /// Returns the (host) path of the kernel binary. - fn kernel_path(&self) -> &Path; - - fn args(&self) -> &[OsString]; - - /// `addr` is the address of the hypercall parameter in the guest's memory space. `data` is the - /// parameter that was send to that address by the guest. - /// - /// # Safety - /// - /// - `data` must be a valid pointer to the data attached to the hypercall. - /// - The return value is only valid, as long as the guest is halted. - /// - This fn must not be called multiple times on the same data, to avoid creating mutable aliasing. - unsafe fn address_to_hypercall(&self, addr: u16, data: GuestPhysAddr) -> Option> { - if let Ok(hypercall_port) = HypercallAddress::try_from(addr) { - Some(match hypercall_port { - HypercallAddress::FileClose => { - let sysclose = unsafe { &mut *(self.host_address(data) as *mut CloseParams) }; - Hypercall::FileClose(sysclose) - } - HypercallAddress::FileLseek => { - let syslseek = unsafe { &mut *(self.host_address(data) as *mut LseekParams) }; - Hypercall::FileLseek(syslseek) - } - HypercallAddress::FileOpen => { - let sysopen = unsafe { &mut *(self.host_address(data) as *mut OpenParams) }; - Hypercall::FileOpen(sysopen) - } - HypercallAddress::FileRead => { - let sysread = unsafe { &mut *(self.host_address(data) as *mut ReadPrams) }; - Hypercall::FileRead(sysread) - } - HypercallAddress::FileWrite => { - let syswrite = unsafe { &*(self.host_address(data) as *const WriteParams) }; - Hypercall::FileWrite(syswrite) - } - HypercallAddress::FileUnlink => { - let sysunlink = unsafe { &mut *(self.host_address(data) as *mut UnlinkParams) }; - Hypercall::FileUnlink(sysunlink) - } - HypercallAddress::Exit => { - let sysexit = unsafe { &*(self.host_address(data) as *const ExitParams) }; - Hypercall::Exit(sysexit) - } - HypercallAddress::Cmdsize => { - let syssize = unsafe { &mut *(self.host_address(data) as *mut CmdsizeParams) }; - Hypercall::Cmdsize(syssize) - } - HypercallAddress::Cmdval => { - let syscmdval = unsafe { &*(self.host_address(data) as *const CmdvalParams) }; - Hypercall::Cmdval(syscmdval) - } - HypercallAddress::Uart => Hypercall::SerialWriteByte(data.as_u64() as u8), - _ => unimplemented!(), - }) - } else { - None - } - } - - fn cmdsize(&self, syssize: &mut CmdsizeParams) { - syssize.argc = 0; - syssize.envc = 0; - - let path = self.kernel_path(); - syssize.argsz[0] = path.as_os_str().len() as i32 + 1; - - let mut counter = 0; - for argument in self.args() { - syssize.argsz[(counter + 1) as usize] = argument.len() as i32 + 1; - - counter += 1; - } - - syssize.argc = counter + 1; - - let mut counter = 0; - for (key, value) in std::env::vars_os() { - if counter < MAX_ARGC_ENVC.try_into().unwrap() { - syssize.envsz[counter as usize] = (key.len() + value.len()) as i32 + 2; - counter += 1; - } - } - syssize.envc = counter; - - if counter >= MAX_ARGC_ENVC.try_into().unwrap() { - warn!("Environment is too large!"); - } - } - - /// Copies the arguments end environment of the application into the VM's memory. - fn cmdval(&self, syscmdval: &CmdvalParams) { - let argv = self.host_address(syscmdval.argv); - - // copy kernel path as first argument - { - let path = self.kernel_path().as_os_str(); - - let argvptr = - unsafe { self.host_address(GuestPhysAddr::new(*(argv as *mut *mut u8) as u64)) }; - let len = path.len(); - let slice = unsafe { slice::from_raw_parts_mut(argvptr as *mut u8, len + 1) }; - - // Create string for environment variable - slice[0..len].copy_from_slice(path.as_bytes()); - slice[len] = 0; - } - - // Copy the application arguments into the vm memory - for (counter, argument) in self.args().iter().enumerate() { - let argvptr = unsafe { - self.host_address(GuestPhysAddr::new( - *((argv + (counter + 1) * mem::size_of::()) as *mut *mut u8) as u64, - )) - }; - let len = argument.len(); - let slice = unsafe { slice::from_raw_parts_mut(argvptr as *mut u8, len + 1) }; - - // Create string for environment variable - slice[0..len].copy_from_slice(argument.as_bytes()); - slice[len] = 0; - } - - // Copy the environment variables into the vm memory - let mut counter = 0; - let envp = self.host_address(syscmdval.envp); - for (key, value) in std::env::vars_os() { - if counter < MAX_ARGC_ENVC.try_into().unwrap() { - let envptr = unsafe { - self.host_address(GuestPhysAddr::new( - *((envp + counter as usize * mem::size_of::()) as *mut *mut u8) - as u64, - )) - }; - let len = key.len() + value.len(); - let slice = unsafe { slice::from_raw_parts_mut(envptr as *mut u8, len + 2) }; - - // Create string for environment variable - slice[0..key.len()].copy_from_slice(key.as_bytes()); - slice[key.len()..(key.len() + 1)].copy_from_slice("=".as_bytes()); - slice[(key.len() + 1)..(len + 1)].copy_from_slice(value.as_bytes()); - slice[len + 1] = 0; - counter += 1; - } - } - } - - /// unlink deletes a name from the filesystem. This is used to handle `unlink` syscalls from the guest. - /// TODO: UNSAFE AS *%@#. It has to be checked that the VM is allowed to unlink that file! - fn unlink(&self, sysunlink: &mut UnlinkParams) { - unsafe { - sysunlink.ret = libc::unlink(self.host_address(sysunlink.name) as *const i8); - } - } - - /// Reads the exit code from an VM and returns it - fn exit(&self, sysexit: &ExitParams) -> i32 { - sysexit.arg - } - - /// Handles an open syscall by opening a file on the host. - fn open(&self, sysopen: &mut OpenParams) { - unsafe { - sysopen.ret = libc::open( - self.host_address(sysopen.name) as *const i8, - sysopen.flags, - sysopen.mode, - ); - } - } - - /// Handles an close syscall by closing the file on the host. - fn close(&self, sysclose: &mut CloseParams) { - unsafe { - sysclose.ret = libc::close(sysclose.fd); - } - } - - /// Handles an read syscall on the host. - fn read(&self, sysread: &mut ReadPrams) { - unsafe { - let bytes_read = libc::read( - sysread.fd, - self.host_address(sysread.buf) as *mut libc::c_void, - sysread.len, - ); - if bytes_read >= 0 { - sysread.ret = bytes_read; - } else { - sysread.ret = -1; - } - } - } - - /// Handles an write syscall on the host. - fn write(&self, syswrite: &WriteParams) -> io::Result<()> { - let mut bytes_written: usize = 0; - while bytes_written != syswrite.len { - unsafe { - let step = libc::write( - syswrite.fd, - self.host_address(syswrite.buf + bytes_written) as *const libc::c_void, - syswrite.len - bytes_written, - ); - if step >= 0 { - bytes_written += step as usize; - } else { - return Err(io::Error::last_os_error()); - } - } - } - - Ok(()) - } - - /// Handles an write syscall on the host. - fn lseek(&self, syslseek: &mut LseekParams) { - unsafe { - syslseek.offset = - libc::lseek(syslseek.fd, syslseek.offset as i64, syslseek.whence) as isize; - } - } - - /// Handles an UART syscall by writing to stdout. - fn uart(&self, buf: &[u8]) -> io::Result<()> { - io::stdout().write_all(buf) - } -} - pub trait Vm { /// Returns the number of cores for the vm. fn num_cpus(&self) -> u32; From 745cac2c0e7b49ee4d90dceda4a68c88610be3b7 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Wed, 7 Jun 2023 11:00:14 +0200 Subject: [PATCH 04/29] Created Vm_Guest_Memory trait and changed MmapMemory accordingly --- src/linux/mem.rs | 130 +++++++++++++++++++++++++++++++++++++ src/linux/mod.rs | 5 +- src/linux/uhyve.rs | 128 ++---------------------------------- src/macos/aarch64/uhyve.rs | 2 +- src/macos/x86_64/uhyve.rs | 2 +- src/vm.rs | 13 +++- 6 files changed, 154 insertions(+), 126 deletions(-) create mode 100644 src/linux/mem.rs diff --git a/src/linux/mem.rs b/src/linux/mem.rs new file mode 100644 index 00000000..5babf627 --- /dev/null +++ b/src/linux/mem.rs @@ -0,0 +1,130 @@ +use std::{mem, os::raw::c_void, ptr::NonNull}; + +use log::debug; +use nix::sys::mman::*; +use x86_64::{ + structures::paging::{Page, PageTable, PageTableFlags, Size2MiB}, + PhysAddr, +}; + +use crate::{consts::*, vm::VmGuestMemory, x86_64::create_gdt_entry}; + +/// A general purpose VM memory section that can exploit some Linux Kernel features. +#[derive(Debug)] +pub struct MmapMemory { + // TODO: make private + pub flags: u32, + pub memory_size: usize, + pub guest_address: usize, + pub host_address: usize, +} + +impl MmapMemory { + pub fn new( + flags: u32, + memory_size: usize, + guest_address: u64, + huge_pages: bool, + mergeable: bool, + ) -> MmapMemory { + let host_address = unsafe { + mmap_anonymous( + None, + memory_size.try_into().unwrap(), + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + MapFlags::MAP_PRIVATE | MapFlags::MAP_NORESERVE, + ) + .expect("mmap failed") + }; + + if mergeable { + debug!("Enable kernel feature to merge same pages"); + unsafe { + madvise(host_address, memory_size, MmapAdvise::MADV_MERGEABLE).unwrap(); + } + } + + if huge_pages { + debug!("Uhyve uses huge pages"); + unsafe { + madvise(host_address, memory_size, MmapAdvise::MADV_HUGEPAGE).unwrap(); + } + } + + MmapMemory { + flags, + memory_size, + guest_address: guest_address as usize, + host_address: host_address.as_ptr() as usize, + } + } + + #[allow(dead_code)] + fn as_slice_mut(&mut self) -> &mut [u8] { + unsafe { std::slice::from_raw_parts_mut(self.host_address as *mut u8, self.memory_size) } + } +} +impl VmGuestMemory for MmapMemory { + fn guest_mem(&self) -> (*mut u8, usize) { + (self.host_address as *mut u8, self.memory_size) + } + + /// Initialize the page tables for the guest + fn init_guest_mem(&mut self) { + // TODO: Move to x86_64 + debug!("Initialize guest memory"); + + let (mem_addr, _) = self.guest_mem(); + + unsafe { + let pml4 = &mut *((mem_addr as u64 + BOOT_PML4.as_u64()) as *mut PageTable); + let pdpte = &mut *((mem_addr as u64 + BOOT_PDPTE.as_u64()) as *mut PageTable); + let pde = &mut *((mem_addr as u64 + BOOT_PDE.as_u64()) as *mut PageTable); + let gdt_entry: u64 = mem_addr as u64 + BOOT_GDT.as_u64(); + + // initialize GDT + *((gdt_entry) as *mut u64) = create_gdt_entry(0, 0, 0); + *((gdt_entry + mem::size_of::<*mut u64>() as u64) as *mut u64) = + create_gdt_entry(0xA09B, 0, 0xFFFFF); /* code */ + *((gdt_entry + 2 * mem::size_of::<*mut u64>() as u64) as *mut u64) = + create_gdt_entry(0xC093, 0, 0xFFFFF); /* data */ + + /* For simplicity we currently use 2MB pages and only a single + PML4/PDPTE/PDE. */ + + // per default is the memory zeroed, which we allocate by the system call mmap + /*libc::memset(pml4 as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); + libc::memset(pdpte as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); + libc::memset(pde as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE);*/ + + pml4[0].set_addr( + BOOT_PDPTE, + PageTableFlags::PRESENT | PageTableFlags::WRITABLE, + ); + pml4[511].set_addr( + BOOT_PML4, + PageTableFlags::PRESENT | PageTableFlags::WRITABLE, + ); + pdpte[0].set_addr(BOOT_PDE, PageTableFlags::PRESENT | PageTableFlags::WRITABLE); + + for i in 0..512 { + let addr = PhysAddr::new(i as u64 * Page::::SIZE); + pde[i].set_addr( + addr, + PageTableFlags::PRESENT | PageTableFlags::WRITABLE | PageTableFlags::HUGE_PAGE, + ); + } + } + } +} + +impl Drop for MmapMemory { + fn drop(&mut self) { + if self.memory_size > 0 { + let host_addr = NonNull::new(self.host_address as *mut c_void).unwrap(); + unsafe { + munmap(host_addr, self.memory_size).unwrap(); + } + } + } +} diff --git a/src/linux/mod.rs b/src/linux/mod.rs index a5ed419c..f3b21376 100755 --- a/src/linux/mod.rs +++ b/src/linux/mod.rs @@ -2,6 +2,7 @@ pub mod x86_64; pub mod gdb; +pub mod mem; pub mod uhyve; pub mod virtio; pub mod virtqueue; @@ -10,7 +11,7 @@ pub type HypervisorError = kvm_ioctls::Error; pub type DebugExitInfo = kvm_bindings::kvm_debug_exit_arch; use std::{ - io, mem, + io, net::{TcpListener, TcpStream}, os::unix::prelude::JoinHandleExt, sync::{Arc, Barrier}, @@ -51,7 +52,7 @@ impl KickSignal { assert!(kick_signal <= SIGRTMAX()); // TODO: Remove the transmute once realtime signals are properly supported by nix // https://github.com/nix-rust/nix/issues/495 - unsafe { mem::transmute(kick_signal) } + unsafe { std::mem::transmute(kick_signal) } } fn register_handler() -> nix::Result<()> { diff --git a/src/linux/uhyve.rs b/src/linux/uhyve.rs index c77b7c7b..8a9e3c04 100755 --- a/src/linux/uhyve.rs +++ b/src/linux/uhyve.rs @@ -4,10 +4,9 @@ use std::{ cmp, ffi::OsString, - fmt, mem, - os::raw::c_void, + fmt, path::{Path, PathBuf}, - ptr::{self, NonNull}, + ptr, sync::{Arc, Mutex}, }; @@ -15,19 +14,13 @@ use hermit_entry::boot_info::RawBootInfo; use kvm_bindings::*; use kvm_ioctls::VmFd; use log::debug; -use nix::sys::mman::*; use vmm_sys_util::eventfd::EventFd; -use x86_64::{ - structures::paging::{Page, PageTable, PageTableFlags, Size2MiB}, - PhysAddr, -}; use crate::{ consts::*, - linux::{virtio::*, x86_64::kvm_cpu::KvmCpu, KVM}, + linux::{mem::MmapMemory, virtio::*, x86_64::kvm_cpu::KvmCpu, KVM}, params::Params, - vm::Vm, - x86_64::create_gdt_entry, + vm::{Vm, VmGuestMemory}, HypervisorResult, }; @@ -163,7 +156,7 @@ impl Uhyve { "gdbstub is only supported with one CPU" ); - let hyve = Uhyve { + let mut hyve = Uhyve { vm, offset: 0, entry_point: 0, @@ -241,50 +234,8 @@ impl Vm for Uhyve { } /// Initialize the page tables for the guest - fn init_guest_mem(&self) { - debug!("Initialize guest memory"); - - let (mem_addr, _) = self.guest_mem(); - - unsafe { - let pml4 = &mut *((mem_addr as u64 + BOOT_PML4.as_u64()) as *mut PageTable); - let pdpte = &mut *((mem_addr as u64 + BOOT_PDPTE.as_u64()) as *mut PageTable); - let pde = &mut *((mem_addr as u64 + BOOT_PDE.as_u64()) as *mut PageTable); - let gdt_entry: u64 = mem_addr as u64 + BOOT_GDT.as_u64(); - - // initialize GDT - *((gdt_entry) as *mut u64) = create_gdt_entry(0, 0, 0); - *((gdt_entry + mem::size_of::<*mut u64>() as u64) as *mut u64) = - create_gdt_entry(0xA09B, 0, 0xFFFFF); /* code */ - *((gdt_entry + 2 * mem::size_of::<*mut u64>() as u64) as *mut u64) = - create_gdt_entry(0xC093, 0, 0xFFFFF); /* data */ - - /* For simplicity we currently use 2MB pages and only a single - PML4/PDPTE/PDE. */ - - // per default is the memory zeroed, which we allocate by the system call mmap - /*libc::memset(pml4 as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); - libc::memset(pdpte as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); - libc::memset(pde as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE);*/ - - pml4[0].set_addr( - BOOT_PDPTE, - PageTableFlags::PRESENT | PageTableFlags::WRITABLE, - ); - pml4[511].set_addr( - BOOT_PML4, - PageTableFlags::PRESENT | PageTableFlags::WRITABLE, - ); - pdpte[0].set_addr(BOOT_PDE, PageTableFlags::PRESENT | PageTableFlags::WRITABLE); - - for i in 0..512 { - let addr = PhysAddr::new(i as u64 * Page::::SIZE); - pde[i].set_addr( - addr, - PageTableFlags::PRESENT | PageTableFlags::WRITABLE | PageTableFlags::HUGE_PAGE, - ); - } - } + fn init_guest_mem(&mut self) { + self.mem.init_guest_mem(); } } @@ -293,68 +244,3 @@ impl Vm for Uhyve { #[allow(clippy::non_send_fields_in_send_ty)] unsafe impl Send for Uhyve {} unsafe impl Sync for Uhyve {} - -#[derive(Debug)] -struct MmapMemory { - flags: u32, - memory_size: usize, - guest_address: usize, - host_address: usize, -} - -impl MmapMemory { - pub fn new( - flags: u32, - memory_size: usize, - guest_address: u64, - huge_pages: bool, - mergeable: bool, - ) -> MmapMemory { - let host_address = unsafe { - mmap_anonymous( - None, - memory_size.try_into().unwrap(), - ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, - MapFlags::MAP_PRIVATE | MapFlags::MAP_NORESERVE, - ) - .expect("mmap failed") - }; - - if mergeable { - debug!("Enable kernel feature to merge same pages"); - unsafe { - madvise(host_address, memory_size, MmapAdvise::MADV_MERGEABLE).unwrap(); - } - } - - if huge_pages { - debug!("Uhyve uses huge pages"); - unsafe { - madvise(host_address, memory_size, MmapAdvise::MADV_HUGEPAGE).unwrap(); - } - } - - MmapMemory { - flags, - memory_size, - guest_address: guest_address as usize, - host_address: host_address.as_ptr() as usize, - } - } - - #[allow(dead_code)] - fn as_slice_mut(&mut self) -> &mut [u8] { - unsafe { std::slice::from_raw_parts_mut(self.host_address as *mut u8, self.memory_size) } - } -} - -impl Drop for MmapMemory { - fn drop(&mut self) { - if self.memory_size > 0 { - let host_addr = NonNull::new(self.host_address as *mut c_void).unwrap(); - unsafe { - munmap(host_addr, self.memory_size).unwrap(); - } - } - } -} diff --git a/src/macos/aarch64/uhyve.rs b/src/macos/aarch64/uhyve.rs index 45d5b723..554a5252 100644 --- a/src/macos/aarch64/uhyve.rs +++ b/src/macos/aarch64/uhyve.rs @@ -160,7 +160,7 @@ impl Vm for Uhyve { self.boot_info = header; } - fn init_guest_mem(&self) { + fn init_guest_mem(&mut self) { debug!("Initialize guest memory"); let (mem_addr, _) = self.guest_mem(); diff --git a/src/macos/x86_64/uhyve.rs b/src/macos/x86_64/uhyve.rs index b88c2693..0c5c03b0 100644 --- a/src/macos/x86_64/uhyve.rs +++ b/src/macos/x86_64/uhyve.rs @@ -161,7 +161,7 @@ impl Vm for Uhyve { } /// Initialize the page tables for the guest - fn init_guest_mem(&self) { + fn init_guest_mem(&mut self) { debug!("Initialize guest memory"); let (mem_addr, _) = self.guest_mem(); diff --git a/src/vm.rs b/src/vm.rs index eb53b75e..26cb4503 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -49,7 +49,7 @@ pub trait Vm { fn create_cpu(&self, id: u32) -> HypervisorResult; fn set_boot_info(&mut self, header: *const RawBootInfo); fn verbose(&self) -> bool; - fn init_guest_mem(&self); + fn init_guest_mem(&mut self); unsafe fn load_kernel(&mut self) -> LoadKernelResult<()> { let elf = fs::read(self.kernel_path())?; @@ -125,3 +125,14 @@ fn detect_cpu_freq() -> u32 { } mhz } + +/// A section of memory that is reserved for the VM guest. +pub trait VmGuestMemory { + /// returns a pointer to the address of the guest memory and the size of the memory in bytes. + // TODO: replace with slice + // TODO: rename to memory + fn guest_mem(&self) -> (*mut u8, usize); + + /// Initialize the memory + fn init_guest_mem(&mut self); +} From c15979e21492de9af8fd20998efac78c7d2ce0ec Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Wed, 7 Jun 2023 17:45:33 +0200 Subject: [PATCH 05/29] moved the remaining KVM functionality from linux/uhyve to linux/x86_64/kvm_cpu --- src/linux/uhyve.rs | 102 +++-------------------------------- src/linux/x86_64/kvm_cpu.rs | 105 +++++++++++++++++++++++++++++++++--- 2 files changed, 107 insertions(+), 100 deletions(-) diff --git a/src/linux/uhyve.rs b/src/linux/uhyve.rs index 8a9e3c04..764a86dd 100755 --- a/src/linux/uhyve.rs +++ b/src/linux/uhyve.rs @@ -2,7 +2,6 @@ //! create a Virtual Machine and load the kernel. use std::{ - cmp, ffi::OsString, fmt, path::{Path, PathBuf}, @@ -11,25 +10,19 @@ use std::{ }; use hermit_entry::boot_info::RawBootInfo; -use kvm_bindings::*; -use kvm_ioctls::VmFd; -use log::debug; -use vmm_sys_util::eventfd::EventFd; use crate::{ - consts::*, - linux::{mem::MmapMemory, virtio::*, x86_64::kvm_cpu::KvmCpu, KVM}, + linux::{ + mem::MmapMemory, + virtio::*, + x86_64::kvm_cpu::{initialize_kvm, KvmCpu}, + }, params::Params, vm::{Vm, VmGuestMemory}, HypervisorResult, }; -const KVM_32BIT_MAX_MEM_SIZE: usize = 1 << 32; -const KVM_32BIT_GAP_SIZE: usize = 768 << 20; -const KVM_32BIT_GAP_START: usize = KVM_32BIT_MAX_MEM_SIZE - KVM_32BIT_GAP_SIZE; - pub struct Uhyve { - vm: VmFd, offset: u64, entry_point: u64, stack_address: u64, @@ -62,92 +55,15 @@ impl Uhyve { pub fn new(kernel_path: PathBuf, params: Params) -> HypervisorResult { let memory_size = params.memory_size.get(); - let vm = KVM.create_vm()?; - let mem = MmapMemory::new(0, memory_size, 0, params.thp, params.ksm); - let sz = cmp::min(memory_size, KVM_32BIT_GAP_START); - // create virtio interface // TODO: Remove allow once fixed: // https://github.com/rust-lang/rust-clippy/issues/11382 #[allow(clippy::arc_with_non_send_sync)] let virtio_device = Arc::new(Mutex::new(VirtioNetPciDevice::new())); - let kvm_mem = kvm_userspace_memory_region { - slot: 0, - flags: mem.flags, - memory_size: sz as u64, - guest_phys_addr: mem.guest_address as u64, - userspace_addr: mem.host_address as u64, - }; - - unsafe { vm.set_user_memory_region(kvm_mem) }?; - - if memory_size > KVM_32BIT_GAP_START + KVM_32BIT_GAP_SIZE { - let kvm_mem = kvm_userspace_memory_region { - slot: 1, - flags: mem.flags, - memory_size: (memory_size - KVM_32BIT_GAP_START - KVM_32BIT_GAP_SIZE) as u64, - guest_phys_addr: (mem.guest_address + KVM_32BIT_GAP_START + KVM_32BIT_GAP_SIZE) - as u64, - userspace_addr: (mem.host_address + KVM_32BIT_GAP_START + KVM_32BIT_GAP_SIZE) - as u64, - }; - - unsafe { vm.set_user_memory_region(kvm_mem) }?; - } - - debug!("Initialize interrupt controller"); - - // create basic interrupt controller - vm.create_irq_chip()?; - - if params.pit { - vm.create_pit2(kvm_pit_config::default()).unwrap(); - } - - // enable x2APIC support - let mut cap: kvm_enable_cap = kvm_bindings::kvm_enable_cap { - cap: KVM_CAP_X2APIC_API, - flags: 0, - ..Default::default() - }; - cap.args[0] = - (KVM_X2APIC_API_USE_32BIT_IDS | KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK).into(); - vm.enable_cap(&cap) - .expect("Unable to enable x2apic support"); - - // currently, we support only system, which provides the - // cpu feature TSC_DEADLINE - let mut cap: kvm_enable_cap = kvm_bindings::kvm_enable_cap { - cap: KVM_CAP_TSC_DEADLINE_TIMER, - ..Default::default() - }; - cap.args[0] = 0; - vm.enable_cap(&cap) - .expect_err("Processor feature `tsc deadline` isn't supported!"); - - let cap: kvm_enable_cap = kvm_bindings::kvm_enable_cap { - cap: KVM_CAP_IRQFD, - ..Default::default() - }; - vm.enable_cap(&cap) - .expect_err("The support of KVM_CAP_IRQFD is currently required"); - - let mut cap: kvm_enable_cap = kvm_bindings::kvm_enable_cap { - cap: KVM_CAP_X86_DISABLE_EXITS, - flags: 0, - ..Default::default() - }; - cap.args[0] = - (KVM_X86_DISABLE_EXITS_PAUSE | KVM_X86_DISABLE_EXITS_MWAIT | KVM_X86_DISABLE_EXITS_HLT) - .into(); - vm.enable_cap(&cap) - .expect("Unable to disable exists due pause instructions"); - - let evtfd = EventFd::new(0).unwrap(); - vm.register_irqfd(&evtfd, UHYVE_IRQ_NET)?; + initialize_kvm(&mem, params.pit)?; let cpu_count = params.cpu_count.get(); @@ -157,7 +73,6 @@ impl Uhyve { ); let mut hyve = Uhyve { - vm, offset: 0, entry_point: 0, stack_address: 0, @@ -219,14 +134,13 @@ impl Vm for Uhyve { } fn create_cpu(&self, id: u32) -> HypervisorResult { - Ok(KvmCpu::new( + KvmCpu::new( id, self.path.clone(), self.args.clone(), - self.vm.create_vcpu(id.into())?, self.mem.host_address, self.virtio_device.clone(), - )) + ) } fn set_boot_info(&mut self, header: *const RawBootInfo) { diff --git a/src/linux/x86_64/kvm_cpu.rs b/src/linux/x86_64/kvm_cpu.rs index f2be4a58..d7c29765 100644 --- a/src/linux/x86_64/kvm_cpu.rs +++ b/src/linux/x86_64/kvm_cpu.rs @@ -6,8 +6,9 @@ use std::{ }; use kvm_bindings::*; -use kvm_ioctls::{VcpuExit, VcpuFd}; +use kvm_ioctls::{VcpuExit, VcpuFd, VmFd}; use uhyve_interface::{GuestPhysAddr, GuestVirtAddr, Hypercall}; +use vmm_sys_util::eventfd::EventFd; use x86_64::{ registers::control::{Cr0Flags, Cr4Flags}, structures::paging::PageTableFlags, @@ -15,7 +16,7 @@ use x86_64::{ use crate::{ consts::*, - linux::{virtio::*, KVM}, + linux::{mem::MmapMemory, virtio::*, KVM}, vcpu::{VcpuStopReason, VirtualCPU}, HypervisorResult, }; @@ -27,6 +28,93 @@ const MSR_IA32_MISC_ENABLE: u32 = 0x000001a0; const PCI_CONFIG_DATA_PORT: u16 = 0xCFC; const PCI_CONFIG_ADDRESS_PORT: u16 = 0xCF8; +const KVM_32BIT_MAX_MEM_SIZE: usize = 1 << 32; +const KVM_32BIT_GAP_SIZE: usize = 768 << 20; +const KVM_32BIT_GAP_START: usize = KVM_32BIT_MAX_MEM_SIZE - KVM_32BIT_GAP_SIZE; + +static KVM_ACCESS: Mutex> = Mutex::new(None); + +pub fn initialize_kvm(mem: &MmapMemory, use_pit: bool) -> HypervisorResult<()> { + let sz = std::cmp::min(mem.memory_size, KVM_32BIT_GAP_START); + + let kvm_mem = kvm_userspace_memory_region { + slot: 0, + flags: mem.flags, + memory_size: sz as u64, + guest_phys_addr: mem.guest_address as u64, + userspace_addr: mem.host_address as u64, + }; + + // TODO: make vm a global struct in linux blah + let vm = KVM.create_vm()?; + unsafe { vm.set_user_memory_region(kvm_mem) }?; + + if mem.memory_size > KVM_32BIT_GAP_START + KVM_32BIT_GAP_SIZE { + let kvm_mem = kvm_userspace_memory_region { + slot: 1, + flags: mem.flags, + memory_size: (mem.memory_size - KVM_32BIT_GAP_START - KVM_32BIT_GAP_SIZE) as u64, + guest_phys_addr: (mem.guest_address + KVM_32BIT_GAP_START + KVM_32BIT_GAP_SIZE) as u64, + userspace_addr: (mem.host_address + KVM_32BIT_GAP_START + KVM_32BIT_GAP_SIZE) as u64, + }; + + unsafe { vm.set_user_memory_region(kvm_mem) }?; + } + + debug!("Initialize interrupt controller"); + + // create basic interrupt controller + vm.create_irq_chip()?; + + if use_pit { + vm.create_pit2(kvm_pit_config::default()).unwrap(); + } + + // enable x2APIC support + let mut cap: kvm_enable_cap = kvm_bindings::kvm_enable_cap { + cap: KVM_CAP_X2APIC_API, + flags: 0, + ..Default::default() + }; + cap.args[0] = (KVM_X2APIC_API_USE_32BIT_IDS | KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK).into(); + vm.enable_cap(&cap) + .expect("Unable to enable x2apic support"); + + // currently, we support only system, which provides the + // cpu feature TSC_DEADLINE + let mut cap: kvm_enable_cap = kvm_bindings::kvm_enable_cap { + cap: KVM_CAP_TSC_DEADLINE_TIMER, + ..Default::default() + }; + cap.args[0] = 0; + vm.enable_cap(&cap) + .expect_err("Processor feature `tsc deadline` isn't supported!"); + + let cap: kvm_enable_cap = kvm_bindings::kvm_enable_cap { + cap: KVM_CAP_IRQFD, + ..Default::default() + }; + vm.enable_cap(&cap) + .expect_err("The support of KVM_CAP_IRQFD is currently required"); + + let mut cap: kvm_enable_cap = kvm_bindings::kvm_enable_cap { + cap: KVM_CAP_X86_DISABLE_EXITS, + flags: 0, + ..Default::default() + }; + cap.args[0] = + (KVM_X86_DISABLE_EXITS_PAUSE | KVM_X86_DISABLE_EXITS_MWAIT | KVM_X86_DISABLE_EXITS_HLT) + .into(); + vm.enable_cap(&cap) + .expect("Unable to disable exists due pause instructions"); + + let evtfd = EventFd::new(0).unwrap(); + vm.register_irqfd(&evtfd, UHYVE_IRQ_NET)?; + + *KVM_ACCESS.lock().unwrap() = Some(vm); + Ok(()) +} + pub struct KvmCpu { id: u32, vcpu: VcpuFd, @@ -48,11 +136,16 @@ impl KvmCpu { id: u32, kernel_path: PathBuf, args: Vec, - vcpu: VcpuFd, vm_start: usize, virtio_device: Arc>, - ) -> KvmCpu { - KvmCpu { + ) -> HypervisorResult { + let vcpu = KVM_ACCESS + .lock() + .unwrap() + .as_mut() + .expect("KVM is not initialized yet") + .create_vcpu(id.try_into().unwrap())?; + Ok(KvmCpu { id, vcpu, vm_start, @@ -60,7 +153,7 @@ impl KvmCpu { args, virtio_device, pci_addr: None, - } + }) } fn setup_cpuid(&self) -> Result<(), kvm_ioctls::Error> { From 993126a17cc08d761b26c4768a766cca002c66b0 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Thu, 8 Jun 2023 10:55:31 +0200 Subject: [PATCH 06/29] Made mmap generic to unix and use it on macos as well --- Cargo.toml | 2 +- src/lib.rs | 1 + src/linux/mod.rs | 1 - src/linux/uhyve.rs | 2 +- src/linux/x86_64/kvm_cpu.rs | 3 +- src/macos/mod.rs | 3 +- src/macos/x86_64/uhyve.rs | 57 ++++++++----------------------------- src/macos/xhyve.rs | 12 ++++++++ src/{linux => }/mem.rs | 29 +++++++++++++------ 9 files changed, 52 insertions(+), 58 deletions(-) create mode 100644 src/macos/xhyve.rs rename src/{linux => }/mem.rs (84%) diff --git a/Cargo.toml b/Cargo.toml index d30aa9cb..849f82d7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,7 @@ instrument = ["rftrace", "rftrace-frontend"] [dependencies] byte-unit = { version = "5", features = ["byte"] } clap = { version = "4.5", features = ["derive", "env"] } +nix = { version = "0.28", features = ["mman", "pthread", "signal"] } core_affinity = "0.8" either = "1.10" env_logger = "0.11" @@ -62,7 +63,6 @@ rftrace-frontend = { version = "0.1", optional = true } kvm-bindings = "0.7" kvm-ioctls = "0.16" mac_address = "1.1" -nix = { version = "0.28", features = ["mman", "pthread", "signal"] } tun-tap = { version = "0.1", default-features = false } virtio-bindings = { version = "0.2", features = ["virtio-v4_14_0"] } vmm-sys-util = "0.12" diff --git a/src/lib.rs b/src/lib.rs index 23e4e911..be81411f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,6 +19,7 @@ pub use linux as os; pub mod macos; #[cfg(target_os = "macos")] pub use macos as os; +pub mod mem; pub mod params; #[cfg(target_os = "linux")] pub mod shared_queue; diff --git a/src/linux/mod.rs b/src/linux/mod.rs index f3b21376..7bf8e520 100755 --- a/src/linux/mod.rs +++ b/src/linux/mod.rs @@ -2,7 +2,6 @@ pub mod x86_64; pub mod gdb; -pub mod mem; pub mod uhyve; pub mod virtio; pub mod virtqueue; diff --git a/src/linux/uhyve.rs b/src/linux/uhyve.rs index 764a86dd..041b7853 100755 --- a/src/linux/uhyve.rs +++ b/src/linux/uhyve.rs @@ -13,10 +13,10 @@ use hermit_entry::boot_info::RawBootInfo; use crate::{ linux::{ - mem::MmapMemory, virtio::*, x86_64::kvm_cpu::{initialize_kvm, KvmCpu}, }, + mem::MmapMemory, params::Params, vm::{Vm, VmGuestMemory}, HypervisorResult, diff --git a/src/linux/x86_64/kvm_cpu.rs b/src/linux/x86_64/kvm_cpu.rs index d7c29765..6713751c 100644 --- a/src/linux/x86_64/kvm_cpu.rs +++ b/src/linux/x86_64/kvm_cpu.rs @@ -16,7 +16,8 @@ use x86_64::{ use crate::{ consts::*, - linux::{mem::MmapMemory, virtio::*, KVM}, + linux::{virtio::*, KVM}, + mem::MmapMemory, vcpu::{VcpuStopReason, VirtualCPU}, HypervisorResult, }; diff --git a/src/macos/mod.rs b/src/macos/mod.rs index 84db7478..001c2208 100644 --- a/src/macos/mod.rs +++ b/src/macos/mod.rs @@ -4,6 +4,7 @@ pub mod aarch64; pub use crate::macos::aarch64::{uhyve, vcpu}; #[cfg(target_arch = "x86_64")] pub mod x86_64; +pub mod xhyve; use std::{ sync::{mpsc, Arc}, thread, @@ -13,7 +14,7 @@ use core_affinity::CoreId; #[cfg(target_arch = "x86_64")] pub use crate::macos::x86_64::{uhyve, vcpu}; -use crate::{vcpu::VirtualCPU, vm::Vm}; +use crate::vm::Vm; pub type HypervisorError = xhypervisor::Error; pub type DebugExitInfo = (); diff --git a/src/macos/x86_64/uhyve.rs b/src/macos/x86_64/uhyve.rs index 0c5c03b0..d5ec3c5c 100644 --- a/src/macos/x86_64/uhyve.rs +++ b/src/macos/x86_64/uhyve.rs @@ -13,11 +13,14 @@ use x86_64::{ structures::paging::{Page, PageTable, PageTableFlags, Size2MiB}, PhysAddr, }; -use xhypervisor::{create_vm, map_mem, unmap_mem, MemPerm}; use crate::{ consts::*, - macos::x86_64::{ioapic::IoApic, vcpu::*}, + macos::{ + x86_64::{ioapic::IoApic, vcpu::*}, + xhyve::initialize_xhyve, + }, + mem::MmapMemory, params::Params, vm::Vm, x86_64::create_gdt_entry, @@ -28,8 +31,7 @@ pub struct Uhyve { offset: u64, entry_point: u64, stack_address: u64, - mem_size: usize, - guest_mem: *mut c_void, + mem: MmapMemory, num_cpus: u32, path: PathBuf, args: Vec, @@ -43,8 +45,7 @@ impl std::fmt::Debug for Uhyve { f.debug_struct("Uhyve") .field("entry_point", &self.entry_point) .field("stack_address", &self.stack_address) - .field("mem_size", &self.mem_size) - .field("guest_mem", &self.guest_mem) + .field("mem", &self.mem) .field("num_cpus", &self.num_cpus) .field("path", &self.path) .field("boot_info", &self.boot_info) @@ -58,39 +59,15 @@ impl Uhyve { pub fn new(kernel_path: PathBuf, params: Params) -> HypervisorResult { let memory_size = params.memory_size.get(); - let mem = unsafe { - libc::mmap( - std::ptr::null_mut(), - memory_size, - libc::PROT_READ | libc::PROT_WRITE, - libc::MAP_PRIVATE | libc::MAP_ANON | libc::MAP_NORESERVE, - -1, - 0, - ) - }; - - assert_ne!(libc::MAP_FAILED, mem, "mmap failed"); - - debug!("Allocate memory for the guest at 0x{:x}", mem as usize); + let mem = MmapMemory::new(0, memory_size, 0, false, false); - debug!("Create VM..."); - create_vm()?; - - debug!("Map guest memory..."); - unsafe { - map_mem( - std::slice::from_raw_parts(mem as *mut u8, memory_size), - 0, - MemPerm::ExecAndWrite, - )?; - } + initialize_xhyve(&mut mem)?; let hyve = Uhyve { offset: 0, entry_point: 0, stack_address: 0, - mem_size: memory_size, - guest_mem: mem, + mem, num_cpus: params.cpu_count.get(), path: kernel_path, args: params.kernel_args, @@ -139,7 +116,7 @@ impl Vm for Uhyve { } fn guest_mem(&self) -> (*mut u8, usize) { - (self.guest_mem as *mut u8, self.mem_size) + (self.mem.host_address as *mut u8, self.mem.memory_size) } fn kernel_path(&self) -> &Path { @@ -151,7 +128,7 @@ impl Vm for Uhyve { id, self.path.clone(), self.args.clone(), - self.guest_mem as usize, + self.guest_mem().0 as usize, self.ioapic.clone(), )) } @@ -208,15 +185,5 @@ impl Vm for Uhyve { } } -impl Drop for Uhyve { - fn drop(&mut self) { - unmap_mem(0, self.mem_size).unwrap(); - - unsafe { - libc::munmap(self.guest_mem, self.mem_size); - } - } -} - unsafe impl Send for Uhyve {} unsafe impl Sync for Uhyve {} diff --git a/src/macos/xhyve.rs b/src/macos/xhyve.rs new file mode 100644 index 00000000..bce10d1f --- /dev/null +++ b/src/macos/xhyve.rs @@ -0,0 +1,12 @@ +use xhypervisor::{create_vm, map_mem, MemPerm}; + +use crate::{mem::MmapMemory, HypervisorResult}; + +pub fn initialize_xhyve(mem: &mut MmapMemory) -> HypervisorResult<()> { + debug!("Create VM..."); + create_vm()?; + + debug!("Map guest memory..."); + map_mem(mem.as_slice_mut(), 0, MemPerm::ExecAndWrite)?; + Ok(()) +} diff --git a/src/linux/mem.rs b/src/mem.rs similarity index 84% rename from src/linux/mem.rs rename to src/mem.rs index 5babf627..162bae77 100644 --- a/src/linux/mem.rs +++ b/src/mem.rs @@ -38,16 +38,30 @@ impl MmapMemory { }; if mergeable { - debug!("Enable kernel feature to merge same pages"); - unsafe { - madvise(host_address, memory_size, MmapAdvise::MADV_MERGEABLE).unwrap(); + #[cfg(target_os = "linux")] + { + debug!("Enable kernel feature to merge same pages"); + unsafe { + madvise(host_address, memory_size, MmapAdvise::MADV_MERGEABLE).unwrap(); + } + } + #[cfg(not(target_os = "linux"))] + { + error!("OS does not support same page merging"); } } if huge_pages { - debug!("Uhyve uses huge pages"); - unsafe { - madvise(host_address, memory_size, MmapAdvise::MADV_HUGEPAGE).unwrap(); + #[cfg(target_os = "linux")] + { + debug!("Uhyve uses huge pages"); + unsafe { + madvise(host_address, memory_size, MmapAdvise::MADV_HUGEPAGE).unwrap(); + } + } + #[cfg(not(target_os = "linux"))] + { + error!("OS does not support huge pages"); } } @@ -59,8 +73,7 @@ impl MmapMemory { } } - #[allow(dead_code)] - fn as_slice_mut(&mut self) -> &mut [u8] { + pub fn as_slice_mut(& self) -> &mut [u8] { unsafe { std::slice::from_raw_parts_mut(self.host_address as *mut u8, self.memory_size) } } } From 0e7955df0056423cc6d6413faa22a1691c5eeef5 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Thu, 8 Jun 2023 23:01:13 +0200 Subject: [PATCH 07/29] Moved the ioapic for macos to the vcpu section --- src/macos/x86_64/uhyve.rs | 10 +--------- src/macos/x86_64/vcpu.rs | 12 ++++-------- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/src/macos/x86_64/uhyve.rs b/src/macos/x86_64/uhyve.rs index d5ec3c5c..ca9a3275 100644 --- a/src/macos/x86_64/uhyve.rs +++ b/src/macos/x86_64/uhyve.rs @@ -3,7 +3,6 @@ use std::{ mem, path::{Path, PathBuf}, ptr, - sync::{Arc, Mutex}, }; use hermit_entry::boot_info::RawBootInfo; @@ -16,10 +15,7 @@ use x86_64::{ use crate::{ consts::*, - macos::{ - x86_64::{ioapic::IoApic, vcpu::*}, - xhyve::initialize_xhyve, - }, + macos::{x86_64::vcpu::*, xhyve::initialize_xhyve}, mem::MmapMemory, params::Params, vm::Vm, @@ -36,7 +32,6 @@ pub struct Uhyve { path: PathBuf, args: Vec, boot_info: *const RawBootInfo, - ioapic: Arc>, verbose: bool, } @@ -49,7 +44,6 @@ impl std::fmt::Debug for Uhyve { .field("num_cpus", &self.num_cpus) .field("path", &self.path) .field("boot_info", &self.boot_info) - .field("ioapic", &self.ioapic) .field("verbose", &self.verbose) .finish() } @@ -72,7 +66,6 @@ impl Uhyve { path: kernel_path, args: params.kernel_args, boot_info: ptr::null(), - ioapic: Arc::new(Mutex::new(IoApic::new())), verbose: params.verbose, }; @@ -129,7 +122,6 @@ impl Vm for Uhyve { self.path.clone(), self.args.clone(), self.guest_mem().0 as usize, - self.ioapic.clone(), )) } diff --git a/src/macos/x86_64/vcpu.rs b/src/macos/x86_64/vcpu.rs index fe781a43..eaf0dd58 100644 --- a/src/macos/x86_64/vcpu.rs +++ b/src/macos/x86_64/vcpu.rs @@ -38,6 +38,8 @@ use crate::{ HypervisorResult, }; +static IOAPIC: Arc> = Arc::new(Mutex::new(IoApic::new())); + /// Extracted from `x86::msr`. mod msr { /// See Section 17.13, Time-Stamp Counter. @@ -163,13 +165,7 @@ pub struct XhyveCpu { } impl XhyveCpu { - pub fn new( - id: u32, - kernel_path: PathBuf, - args: Vec, - vm_start: usize, - ioapic: Arc>, - ) -> XhyveCpu { + pub fn new(id: u32, kernel_path: PathBuf, args: Vec, vm_start: usize) -> XhyveCpu { XhyveCpu { id, kernel_path, @@ -177,7 +173,7 @@ impl XhyveCpu { vcpu: xhypervisor::VirtualCpu::new().unwrap(), vm_start, apic_base: APIC_DEFAULT_BASE, - ioapic, + ioapic: IOAPIC.clone(), } } From 386339d8e0b541270892b971364eb44737c06d43 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Thu, 8 Jun 2023 23:29:03 +0200 Subject: [PATCH 08/29] Move virtio and virtqueue out of linux, since there seems to be macos support now --- Cargo.toml | 7 +++---- src/lib.rs | 2 ++ src/linux/mod.rs | 2 -- src/linux/uhyve.rs | 6 ++---- src/linux/x86_64/kvm_cpu.rs | 3 ++- src/{linux => }/virtio.rs | 2 +- src/{linux => }/virtqueue.rs | 0 7 files changed, 10 insertions(+), 12 deletions(-) rename src/{linux => }/virtio.rs (99%) rename src/{linux => }/virtqueue.rs (100%) diff --git a/Cargo.toml b/Cargo.toml index 849f82d7..53760989 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,19 +52,18 @@ hermit-entry = { version = "0.9", features = ["loader"] } lazy_static = "1.4" libc = "0.2" log = "0.4" +mac_address = "1.1" thiserror = "1.0" time = "0.3" +tun-tap = { version = "0.1.3", default-features = false } uhyve-interface = { version = "0.1", path = "uhyve-interface" } - +virtio-bindings = { version = "0.2", features = ["virtio-v4_14_0"] } rftrace = { version = "0.1", optional = true } rftrace-frontend = { version = "0.1", optional = true } [target.'cfg(target_os = "linux")'.dependencies] kvm-bindings = "0.7" kvm-ioctls = "0.16" -mac_address = "1.1" -tun-tap = { version = "0.1", default-features = false } -virtio-bindings = { version = "0.2", features = ["virtio-v4_14_0"] } vmm-sys-util = "0.12" [target.'cfg(target_os = "macos")'.dependencies] diff --git a/src/lib.rs b/src/lib.rs index be81411f..1a176682 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,6 +24,8 @@ pub mod params; #[cfg(target_os = "linux")] pub mod shared_queue; mod vcpu; +pub mod virtio; +pub mod virtqueue; pub mod vm; pub use arch::*; diff --git a/src/linux/mod.rs b/src/linux/mod.rs index 7bf8e520..39ecd7e0 100755 --- a/src/linux/mod.rs +++ b/src/linux/mod.rs @@ -3,8 +3,6 @@ pub mod x86_64; pub mod gdb; pub mod uhyve; -pub mod virtio; -pub mod virtqueue; pub type HypervisorError = kvm_ioctls::Error; pub type DebugExitInfo = kvm_bindings::kvm_debug_exit_arch; diff --git a/src/linux/uhyve.rs b/src/linux/uhyve.rs index 041b7853..6b73a73b 100755 --- a/src/linux/uhyve.rs +++ b/src/linux/uhyve.rs @@ -12,12 +12,10 @@ use std::{ use hermit_entry::boot_info::RawBootInfo; use crate::{ - linux::{ - virtio::*, - x86_64::kvm_cpu::{initialize_kvm, KvmCpu}, - }, + linux::x86_64::kvm_cpu::{initialize_kvm, KvmCpu}, mem::MmapMemory, params::Params, + virtio::*, vm::{Vm, VmGuestMemory}, HypervisorResult, }; diff --git a/src/linux/x86_64/kvm_cpu.rs b/src/linux/x86_64/kvm_cpu.rs index 6713751c..6b3b994f 100644 --- a/src/linux/x86_64/kvm_cpu.rs +++ b/src/linux/x86_64/kvm_cpu.rs @@ -16,9 +16,10 @@ use x86_64::{ use crate::{ consts::*, - linux::{virtio::*, KVM}, + linux::KVM, mem::MmapMemory, vcpu::{VcpuStopReason, VirtualCPU}, + virtio::*, HypervisorResult, }; diff --git a/src/linux/virtio.rs b/src/virtio.rs similarity index 99% rename from src/linux/virtio.rs rename to src/virtio.rs index 2d984197..5bd6c23b 100644 --- a/src/linux/virtio.rs +++ b/src/virtio.rs @@ -6,7 +6,7 @@ use tun_tap::*; use uhyve_interface::GuestPhysAddr; use virtio_bindings::bindings::virtio_net::*; -use crate::{linux::virtqueue::*, vcpu::VirtualCPU}; +use crate::{vcpu::VirtualCPU, virtqueue::*}; const STATUS_ACKNOWLEDGE: u8 = 0b00000001; const STATUS_DRIVER: u8 = 0b00000010; diff --git a/src/linux/virtqueue.rs b/src/virtqueue.rs similarity index 100% rename from src/linux/virtqueue.rs rename to src/virtqueue.rs From 69c1427938e6ec1e74e2cd7d03f77aa65ae703b8 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Mon, 12 Jun 2023 11:34:28 +0200 Subject: [PATCH 09/29] Made the ioapic a static mutex --- src/macos/x86_64/vcpu.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/macos/x86_64/vcpu.rs b/src/macos/x86_64/vcpu.rs index eaf0dd58..bf394342 100644 --- a/src/macos/x86_64/vcpu.rs +++ b/src/macos/x86_64/vcpu.rs @@ -4,7 +4,7 @@ use std::{ arch::x86_64::__cpuid_count, ffi::OsString, path::{Path, PathBuf}, - sync::{Arc, Mutex}, + sync::Mutex, }; use burst::x86::{disassemble_64, InstructionOperation, OperandType}; @@ -38,7 +38,7 @@ use crate::{ HypervisorResult, }; -static IOAPIC: Arc> = Arc::new(Mutex::new(IoApic::new())); +static IOAPIC: Mutex> = Mutex::new(None); /// Extracted from `x86::msr`. mod msr { @@ -161,7 +161,6 @@ pub struct XhyveCpu { vcpu: xhypervisor::VirtualCpu, vm_start: usize, apic_base: u64, - ioapic: Arc>, } impl XhyveCpu { @@ -173,7 +172,6 @@ impl XhyveCpu { vcpu: xhypervisor::VirtualCpu::new().unwrap(), vm_start, apic_base: APIC_DEFAULT_BASE, - ioapic: IOAPIC.clone(), } } @@ -542,14 +540,21 @@ impl XhyveCpu { } }; - self.ioapic + IOAPIC .lock() .unwrap() + .as_mut() + .expect("IOAPIC not initialized") .write(address - IOAPIC_BASE, val); } if read { - let value = self.ioapic.lock().unwrap().read(address - IOAPIC_BASE); + let value = IOAPIC + .lock() + .unwrap() + .as_mut() + .expect("IOAPIC not initialized") + .read(address - IOAPIC_BASE); match instr.operands[0].operand { OperandType::REG_EDI => { From a1f423c2482b4184cff0e88b1cd90ef470cd7883 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Mon, 12 Jun 2023 11:38:52 +0200 Subject: [PATCH 10/29] Merged Linux and Macos Uhyve into UhyveVM struct and removed VM trait Fixes #382 Breaks the whole ARM code --- benches/vm/mod.rs | 8 +- src/bin/uhyve.rs | 11 +- src/lib.rs | 2 +- src/linux/gdb/mod.rs | 6 +- src/linux/gdb/section_offsets.rs | 1 - src/linux/mod.rs | 11 +- src/linux/uhyve.rs | 158 ------------------ src/macos/mod.rs | 11 +- src/macos/x86_64/mod.rs | 1 - src/macos/x86_64/uhyve.rs | 181 --------------------- src/params.rs | 2 - src/vm.rs | 264 +++++++++++++++++++++++++------ tests/common.rs | 4 +- tests/gdb.rs | 4 +- 14 files changed, 247 insertions(+), 417 deletions(-) delete mode 100755 src/linux/uhyve.rs delete mode 100644 src/macos/x86_64/uhyve.rs diff --git a/benches/vm/mod.rs b/benches/vm/mod.rs index 4fdad1d5..143e6f0e 100644 --- a/benches/vm/mod.rs +++ b/benches/vm/mod.rs @@ -1,6 +1,9 @@ use byte_unit::Byte; use criterion::{criterion_group, Criterion}; -use uhyvelib::{params::Params, vm::Vm, Uhyve}; +use uhyvelib::{ + params::Params, + vm::{UhyveVm, VcpuDefault}, +}; pub fn load_vm_hello_world(c: &mut Criterion) { let path = [env!("CARGO_MANIFEST_DIR"), "benches_data/hello_world"] @@ -10,7 +13,8 @@ pub fn load_vm_hello_world(c: &mut Criterion) { memory_size: Byte::from_u64(1024 * 4096 * 500).try_into().unwrap(), ..Default::default() }; - let mut vm = Uhyve::new(path, params).expect("Unable to create VM"); + + let mut vm = UhyveVm::::new(path, params).expect("Unable to create VM"); c.bench_function("vm::load_kernel(hello world)", |b| { b.iter(|| unsafe { diff --git a/src/bin/uhyve.rs b/src/bin/uhyve.rs index 5122c4fd..82c5e438 100644 --- a/src/bin/uhyve.rs +++ b/src/bin/uhyve.rs @@ -11,7 +11,7 @@ use either::Either; use thiserror::Error; use uhyvelib::{ params::{CpuCount, GuestMemorySize, Params}, - Uhyve, + vm::UhyveVm, }; #[cfg(feature = "instrument")] @@ -262,6 +262,8 @@ impl From for Params { pit, #[cfg(target_os = "linux")] gdb_port, + #[cfg(target_os = "macos")] + gdb_port: None, kernel_args, } } @@ -279,9 +281,10 @@ fn run_uhyve() -> i32 { let affinity = args.cpu_args.clone().get_affinity(&mut app); let params = Params::from(args); - Uhyve::new(kernel, params) - .expect("Unable to create VM! Is the hypervisor interface (e.g. KVM) activated?") - .run(affinity) + let vm = UhyveVm::new(kernel, params) + .expect("Unable to create VM! Is the hypervisor interface (e.g. KVM) activated?"); + + vm.run(affinity) } fn main() { diff --git a/src/lib.rs b/src/lib.rs index 1a176682..1d1565c4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,5 +29,5 @@ pub mod virtqueue; pub mod vm; pub use arch::*; -pub use os::{uhyve::Uhyve, HypervisorError}; +pub use os::HypervisorError; pub type HypervisorResult = Result; diff --git a/src/linux/gdb/mod.rs b/src/linux/gdb/mod.rs index c98132fe..4de964d1 100644 --- a/src/linux/gdb/mod.rs +++ b/src/linux/gdb/mod.rs @@ -26,18 +26,18 @@ use crate::{ arch::x86_64::registers::debug::HwBreakpoints, linux::{x86_64::kvm_cpu::KvmCpu, KickSignal}, vcpu::{VcpuStopReason, VirtualCPU}, - Uhyve, + vm::UhyveVm, }; pub struct GdbUhyve { - vm: Uhyve, + vm: UhyveVm, vcpu: KvmCpu, hw_breakpoints: HwBreakpoints, sw_breakpoints: SwBreakpoints, } impl GdbUhyve { - pub fn new(vm: Uhyve, vcpu: KvmCpu) -> Self { + pub fn new(vm: UhyveVm, vcpu: KvmCpu) -> Self { Self { vm, vcpu, diff --git a/src/linux/gdb/section_offsets.rs b/src/linux/gdb/section_offsets.rs index 20a6ed82..4774c804 100644 --- a/src/linux/gdb/section_offsets.rs +++ b/src/linux/gdb/section_offsets.rs @@ -4,7 +4,6 @@ use gdbstub::target::{ }; use super::GdbUhyve; -use crate::vm::Vm; impl target::ext::section_offsets::SectionOffsets for GdbUhyve { fn get_section_offsets(&mut self) -> Result, Self::Error> { diff --git a/src/linux/mod.rs b/src/linux/mod.rs index 39ecd7e0..7f58e1df 100755 --- a/src/linux/mod.rs +++ b/src/linux/mod.rs @@ -2,7 +2,6 @@ pub mod x86_64; pub mod gdb; -pub mod uhyve; pub type HypervisorError = kvm_ioctls::Error; pub type DebugExitInfo = kvm_bindings::kvm_debug_exit_arch; @@ -26,10 +25,12 @@ use nix::sys::{ }; use crate::{ - linux::gdb::{GdbUhyve, UhyveGdbEventLoop}, + linux::{ + gdb::{GdbUhyve, UhyveGdbEventLoop}, + x86_64::kvm_cpu::KvmCpu, + }, vcpu::VirtualCPU, - vm::Vm, - Uhyve, + vm::UhyveVm, }; lazy_static! { @@ -69,7 +70,7 @@ impl KickSignal { } } -impl Uhyve { +impl UhyveVm { /// Runs the VM. /// /// Blocks until the VM has finished execution. diff --git a/src/linux/uhyve.rs b/src/linux/uhyve.rs deleted file mode 100755 index 6b73a73b..00000000 --- a/src/linux/uhyve.rs +++ /dev/null @@ -1,158 +0,0 @@ -//! This file contains the entry point to the Hypervisor. The Uhyve utilizes KVM to -//! create a Virtual Machine and load the kernel. - -use std::{ - ffi::OsString, - fmt, - path::{Path, PathBuf}, - ptr, - sync::{Arc, Mutex}, -}; - -use hermit_entry::boot_info::RawBootInfo; - -use crate::{ - linux::x86_64::kvm_cpu::{initialize_kvm, KvmCpu}, - mem::MmapMemory, - params::Params, - virtio::*, - vm::{Vm, VmGuestMemory}, - HypervisorResult, -}; - -pub struct Uhyve { - offset: u64, - entry_point: u64, - stack_address: u64, - mem: MmapMemory, - num_cpus: u32, - path: PathBuf, - args: Vec, - boot_info: *const RawBootInfo, - verbose: bool, - virtio_device: Arc>, - pub(super) gdb_port: Option, -} - -impl fmt::Debug for Uhyve { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Uhyve") - .field("entry_point", &self.entry_point) - .field("stack_address", &self.stack_address) - .field("mem", &self.mem) - .field("num_cpus", &self.num_cpus) - .field("path", &self.path) - .field("boot_info", &self.boot_info) - .field("verbose", &self.verbose) - .field("virtio_device", &self.virtio_device) - .finish() - } -} - -impl Uhyve { - pub fn new(kernel_path: PathBuf, params: Params) -> HypervisorResult { - let memory_size = params.memory_size.get(); - - let mem = MmapMemory::new(0, memory_size, 0, params.thp, params.ksm); - - // create virtio interface - // TODO: Remove allow once fixed: - // https://github.com/rust-lang/rust-clippy/issues/11382 - #[allow(clippy::arc_with_non_send_sync)] - let virtio_device = Arc::new(Mutex::new(VirtioNetPciDevice::new())); - - initialize_kvm(&mem, params.pit)?; - - let cpu_count = params.cpu_count.get(); - - assert!( - params.gdb_port.is_none() || cpu_count == 1, - "gdbstub is only supported with one CPU" - ); - - let mut hyve = Uhyve { - offset: 0, - entry_point: 0, - stack_address: 0, - mem, - num_cpus: cpu_count, - path: kernel_path, - args: params.kernel_args, - boot_info: ptr::null(), - verbose: params.verbose, - virtio_device, - gdb_port: params.gdb_port, - }; - - hyve.init_guest_mem(); - - Ok(hyve) - } -} - -impl Vm for Uhyve { - fn verbose(&self) -> bool { - self.verbose - } - - fn set_offset(&mut self, offset: u64) { - self.offset = offset; - } - - fn get_offset(&self) -> u64 { - self.offset - } - - fn set_entry_point(&mut self, entry: u64) { - self.entry_point = entry; - } - - fn get_entry_point(&self) -> u64 { - self.entry_point - } - - fn set_stack_address(&mut self, stack_addresss: u64) { - self.stack_address = stack_addresss; - } - - fn stack_address(&self) -> u64 { - self.stack_address - } - - fn num_cpus(&self) -> u32 { - self.num_cpus - } - - fn guest_mem(&self) -> (*mut u8, usize) { - (self.mem.host_address as *mut u8, self.mem.memory_size) - } - - fn kernel_path(&self) -> &Path { - self.path.as_path() - } - - fn create_cpu(&self, id: u32) -> HypervisorResult { - KvmCpu::new( - id, - self.path.clone(), - self.args.clone(), - self.mem.host_address, - self.virtio_device.clone(), - ) - } - - fn set_boot_info(&mut self, header: *const RawBootInfo) { - self.boot_info = header; - } - - /// Initialize the page tables for the guest - fn init_guest_mem(&mut self) { - self.mem.init_guest_mem(); - } -} - -// TODO: Investigate soundness -// https://github.com/hermitcore/uhyve/issues/229 -#[allow(clippy::non_send_fields_in_send_ty)] -unsafe impl Send for Uhyve {} -unsafe impl Sync for Uhyve {} diff --git a/src/macos/mod.rs b/src/macos/mod.rs index 001c2208..66e3d8d9 100644 --- a/src/macos/mod.rs +++ b/src/macos/mod.rs @@ -1,9 +1,8 @@ #[cfg(target_arch = "aarch64")] pub mod aarch64; -#[cfg(target_arch = "aarch64")] -pub use crate::macos::aarch64::{uhyve, vcpu}; #[cfg(target_arch = "x86_64")] pub mod x86_64; + pub mod xhyve; use std::{ sync::{mpsc, Arc}, @@ -12,14 +11,16 @@ use std::{ use core_affinity::CoreId; +#[cfg(target_arch = "aarch64")] +pub use crate::macos::aarch64::{uhyve, vcpu}; #[cfg(target_arch = "x86_64")] -pub use crate::macos::x86_64::{uhyve, vcpu}; -use crate::vm::Vm; +use crate::macos::x86_64::vcpu::XhyveCpu; +use crate::{vcpu::VirtualCPU, vm::UhyveVm}; pub type HypervisorError = xhypervisor::Error; pub type DebugExitInfo = (); -impl uhyve::Uhyve { +impl UhyveVm { /// Runs the VM. /// /// Blocks until the VM has finished execution. diff --git a/src/macos/x86_64/mod.rs b/src/macos/x86_64/mod.rs index 19fdd515..b5dc20fe 100644 --- a/src/macos/x86_64/mod.rs +++ b/src/macos/x86_64/mod.rs @@ -1,3 +1,2 @@ mod ioapic; -pub mod uhyve; pub mod vcpu; diff --git a/src/macos/x86_64/uhyve.rs b/src/macos/x86_64/uhyve.rs deleted file mode 100644 index ca9a3275..00000000 --- a/src/macos/x86_64/uhyve.rs +++ /dev/null @@ -1,181 +0,0 @@ -use std::{ - ffi::OsString, - mem, - path::{Path, PathBuf}, - ptr, -}; - -use hermit_entry::boot_info::RawBootInfo; -use libc::{self, c_void}; -use log::debug; -use x86_64::{ - structures::paging::{Page, PageTable, PageTableFlags, Size2MiB}, - PhysAddr, -}; - -use crate::{ - consts::*, - macos::{x86_64::vcpu::*, xhyve::initialize_xhyve}, - mem::MmapMemory, - params::Params, - vm::Vm, - x86_64::create_gdt_entry, - HypervisorResult, -}; - -pub struct Uhyve { - offset: u64, - entry_point: u64, - stack_address: u64, - mem: MmapMemory, - num_cpus: u32, - path: PathBuf, - args: Vec, - boot_info: *const RawBootInfo, - verbose: bool, -} - -impl std::fmt::Debug for Uhyve { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Uhyve") - .field("entry_point", &self.entry_point) - .field("stack_address", &self.stack_address) - .field("mem", &self.mem) - .field("num_cpus", &self.num_cpus) - .field("path", &self.path) - .field("boot_info", &self.boot_info) - .field("verbose", &self.verbose) - .finish() - } -} - -impl Uhyve { - pub fn new(kernel_path: PathBuf, params: Params) -> HypervisorResult { - let memory_size = params.memory_size.get(); - - let mem = MmapMemory::new(0, memory_size, 0, false, false); - - initialize_xhyve(&mut mem)?; - - let hyve = Uhyve { - offset: 0, - entry_point: 0, - stack_address: 0, - mem, - num_cpus: params.cpu_count.get(), - path: kernel_path, - args: params.kernel_args, - boot_info: ptr::null(), - verbose: params.verbose, - }; - - hyve.init_guest_mem(); - - Ok(hyve) - } -} - -impl Vm for Uhyve { - fn verbose(&self) -> bool { - self.verbose - } - - fn set_offset(&mut self, offset: u64) { - self.offset = offset; - } - - fn get_offset(&self) -> u64 { - self.offset - } - - fn set_entry_point(&mut self, entry: u64) { - self.entry_point = entry; - } - - fn get_entry_point(&self) -> u64 { - self.entry_point - } - - fn set_stack_address(&mut self, stack_address: u64) { - self.stack_address = stack_address; - } - - fn stack_address(&self) -> u64 { - self.stack_address - } - - fn num_cpus(&self) -> u32 { - self.num_cpus - } - - fn guest_mem(&self) -> (*mut u8, usize) { - (self.mem.host_address as *mut u8, self.mem.memory_size) - } - - fn kernel_path(&self) -> &Path { - self.path.as_path() - } - - fn create_cpu(&self, id: u32) -> HypervisorResult { - Ok(XhyveCpu::new( - id, - self.path.clone(), - self.args.clone(), - self.guest_mem().0 as usize, - )) - } - - fn set_boot_info(&mut self, header: *const RawBootInfo) { - self.boot_info = header; - } - - /// Initialize the page tables for the guest - fn init_guest_mem(&mut self) { - debug!("Initialize guest memory"); - - let (mem_addr, _) = self.guest_mem(); - - unsafe { - let pml4 = &mut *((mem_addr as u64 + BOOT_PML4.as_u64()) as *mut PageTable); - let pdpte = &mut *((mem_addr as u64 + BOOT_PDPTE.as_u64()) as *mut PageTable); - let pde = &mut *((mem_addr as u64 + BOOT_PDE.as_u64()) as *mut PageTable); - let gdt_entry: u64 = mem_addr as u64 + BOOT_GDT.as_u64(); - - // initialize GDT - *((gdt_entry) as *mut u64) = create_gdt_entry(0, 0, 0); - *((gdt_entry + mem::size_of::<*mut u64>() as u64) as *mut u64) = - create_gdt_entry(0xA09B, 0, 0xFFFFF); /* code */ - *((gdt_entry + 2 * mem::size_of::<*mut u64>() as u64) as *mut u64) = - create_gdt_entry(0xC093, 0, 0xFFFFF); /* data */ - - /* For simplicity we currently use 2MB pages and only a single - PML4/PDPTE/PDE. */ - - // per default is the memory zeroed, which we allocate by the system call mmap - /*libc::memset(pml4 as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); - libc::memset(pdpte as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); - libc::memset(pde as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE);*/ - - pml4[0].set_addr( - BOOT_PDPTE, - PageTableFlags::PRESENT | PageTableFlags::WRITABLE, - ); - pml4[511].set_addr( - BOOT_PML4, - PageTableFlags::PRESENT | PageTableFlags::WRITABLE, - ); - pdpte[0].set_addr(BOOT_PDE, PageTableFlags::PRESENT | PageTableFlags::WRITABLE); - - for i in 0..512 { - let addr = PhysAddr::new(i as u64 * Page::::SIZE); - pde[i].set_addr( - addr, - PageTableFlags::PRESENT | PageTableFlags::WRITABLE | PageTableFlags::HUGE_PAGE, - ); - } - } - } -} - -unsafe impl Send for Uhyve {} -unsafe impl Sync for Uhyve {} diff --git a/src/params.rs b/src/params.rs index 78a2dc8a..11d69f64 100644 --- a/src/params.rs +++ b/src/params.rs @@ -32,7 +32,6 @@ pub struct Params { pub pit: bool, /// GDB server port - #[cfg(target_os = "linux")] pub gdb_port: Option, /// Arguments to forward to the kernel @@ -52,7 +51,6 @@ impl Default for Params { #[cfg(target_os = "linux")] pit: false, cpu_count: Default::default(), - #[cfg(target_os = "linux")] gdb_port: Default::default(), kernel_args: Default::default(), } diff --git a/src/vm.rs b/src/vm.rs index 26cb4503..36186d44 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -1,4 +1,14 @@ -use std::{fs, io, mem::MaybeUninit, num::NonZeroU32, path::Path, slice, time::SystemTime}; +use std::{ + ffi::OsString, + fmt, fs, io, + marker::PhantomData, + mem::MaybeUninit, + num::NonZeroU32, + path::{Path, PathBuf}, + ptr, slice, + sync::{Arc, Mutex}, + time::SystemTime, +}; use hermit_entry::{ boot_info::{BootInfo, HardwareInfo, PlatformInfo, RawBootInfo, SerialPortBase}, @@ -12,12 +22,15 @@ use crate::arch::x86_64::{ detect_freq_from_cpuid, detect_freq_from_cpuid_hypervisor_info, get_cpu_frequency_from_os, }; #[cfg(all(target_arch = "x86_64", target_os = "linux"))] -use crate::linux::x86_64::kvm_cpu::KvmCpu; -use crate::{arch, consts::*, os::HypervisorError}; - -pub type HypervisorResult = Result; +use crate::linux::x86_64::kvm_cpu::{initialize_kvm, KvmCpu}; #[cfg(all(target_arch = "x86_64", target_os = "macos"))] use crate::macos::x86_64::vcpu::XhyveCpu; +use crate::{ + arch, consts::*, mem::MmapMemory, os::HypervisorError, params::Params, vcpu::VirtualCPU, + virtio::*, +}; + +pub type HypervisorResult = Result; #[derive(Error, Debug)] pub enum LoadKernelError { @@ -31,27 +44,166 @@ pub enum LoadKernelError { pub type LoadKernelResult = Result; -pub trait Vm { - /// Returns the number of cores for the vm. - fn num_cpus(&self) -> u32; - /// Returns a pointer to the address of the guest memory and the size of the memory in bytes. +// TODO: move to architecture specific section +fn detect_cpu_freq() -> u32 { + #[cfg(target_arch = "aarch64")] + let mhz: u32 = 0; + #[cfg(target_arch = "x86_64")] + let mhz = { + let cpuid = raw_cpuid::CpuId::new(); + let mhz: u32 = detect_freq_from_cpuid(&cpuid).unwrap_or_else(|_| { + debug!("Failed to detect from cpuid"); + detect_freq_from_cpuid_hypervisor_info(&cpuid).unwrap_or_else(|_| { + debug!("Failed to detect from hypervisor_info"); + get_cpu_frequency_from_os().unwrap_or(0) + }) + }); + debug!("detected a cpu frequency of {} Mhz", mhz); + + mhz + }; + if mhz == 0 { + warn!("Unable to determine processor frequency"); + } + mhz +} + +/// A section of memory that is reserved for the VM guest. +pub trait VmGuestMemory { + /// returns a pointer to the address of the guest memory and the size of the memory in bytes. + // TODO: replace with slice + // TODO: rename to memory fn guest_mem(&self) -> (*mut u8, usize); - #[doc(hidden)] - fn set_offset(&mut self, offset: u64); + + /// Initialize the memory + fn init_guest_mem(&mut self); + + // TODO Guest physical to virtual here +} + +#[cfg(target_os = "linux")] +pub type VcpuDefault = crate::linux::x86_64::kvm_cpu::KvmCpu; +#[cfg(target_os = "macos")] +pub type VcpuDefault = crate::macos::x86_64::vcpu::XhyveCpu; + +pub struct UhyveVm { + /// The starting position of the image in physical memory + offset: u64, + entry_point: u64, + stack_address: u64, + mem: MmapMemory, + num_cpus: u32, + path: PathBuf, + args: Vec, + boot_info: *const RawBootInfo, + verbose: bool, + virtio_device: Arc>, + #[allow(dead_code)] // gdb is not supported on macos + pub(super) gdb_port: Option, + _vcpu_type: PhantomData, +} +impl UhyveVm { + pub fn new(kernel_path: PathBuf, params: Params) -> HypervisorResult> { + let memory_size = params.memory_size.get(); + + #[cfg(target_os = "linux")] + let mem = MmapMemory::new(0, memory_size, 0, params.thp, params.ksm); + #[cfg(not(target_os = "linux"))] + let mem = MmapMemory::new(0, memory_size, 0, false, false); + + // create virtio interface + // TODO: Remove allow once fixed: + // https://github.com/rust-lang/rust-clippy/issues/11382 + #[allow(clippy::arc_with_non_send_sync)] + let virtio_device = Arc::new(Mutex::new(VirtioNetPciDevice::new())); + + #[cfg(target_os = "linux")] + initialize_kvm(&mem, params.pit)?; + + let cpu_count = params.cpu_count.get(); + + assert!( + params.gdb_port.is_none() || cfg!(target_os = "linux"), + "gdb is only supported on linux (yet)" + ); + assert!( + params.gdb_port.is_none() || cpu_count == 1, + "gdbstub is only supported with one CPU" + ); + + let mut vm = Self { + offset: 0, + entry_point: 0, + stack_address: 0, + mem, + num_cpus: cpu_count, + path: kernel_path, + args: params.kernel_args, + boot_info: ptr::null(), + verbose: params.verbose, + virtio_device, + gdb_port: params.gdb_port, + _vcpu_type: PhantomData, + }; + + vm.init_guest_mem(); + + Ok(vm) + } + + fn verbose(&self) -> bool { + self.verbose + } + + fn set_offset(&mut self, offset: u64) { + self.offset = offset; + } + /// Returns the section offsets relative to their base addresses - fn get_offset(&self) -> u64; + pub fn get_offset(&self) -> u64 { + self.offset + } + /// Sets the elf entry point. - fn set_entry_point(&mut self, entry: u64); - fn get_entry_point(&self) -> u64; - fn set_stack_address(&mut self, stack_addresss: u64); - fn stack_address(&self) -> u64; - fn kernel_path(&self) -> &Path; - fn create_cpu(&self, id: u32) -> HypervisorResult; - fn set_boot_info(&mut self, header: *const RawBootInfo); - fn verbose(&self) -> bool; - fn init_guest_mem(&mut self); + fn set_entry_point(&mut self, entry: u64) { + self.entry_point = entry; + } + + pub fn get_entry_point(&self) -> u64 { + self.entry_point + } + + fn set_stack_address(&mut self, stack_addresss: u64) { + self.stack_address = stack_addresss; + } + + pub fn stack_address(&self) -> u64 { + self.stack_address + } + + /// Returns the number of cores for the vm. + pub fn num_cpus(&self) -> u32 { + self.num_cpus + } + + fn guest_mem(&self) -> (*mut u8, usize) { + (self.mem.host_address as *mut u8, self.mem.memory_size) + } + + fn kernel_path(&self) -> &Path { + self.path.as_path() + } + + fn set_boot_info(&mut self, header: *const RawBootInfo) { + self.boot_info = header; + } - unsafe fn load_kernel(&mut self) -> LoadKernelResult<()> { + /// Initialize the page tables for the guest + fn init_guest_mem(&mut self) { + self.mem.init_guest_mem(); + } + + pub unsafe fn load_kernel(&mut self) -> LoadKernelResult<()> { let elf = fs::read(self.kernel_path())?; let object = KernelObject::parse(&elf).map_err(LoadKernelError::ParseKernelError)?; @@ -103,36 +255,48 @@ pub trait Vm { } } -fn detect_cpu_freq() -> u32 { - #[cfg(target_arch = "aarch64")] - let mhz: u32 = 0; - #[cfg(target_arch = "x86_64")] - let mhz = { - let cpuid = raw_cpuid::CpuId::new(); - let mhz: u32 = detect_freq_from_cpuid(&cpuid).unwrap_or_else(|_| { - debug!("Failed to detect from cpuid"); - detect_freq_from_cpuid_hypervisor_info(&cpuid).unwrap_or_else(|_| { - debug!("Failed to detect from hypervisor_info"); - get_cpu_frequency_from_os().unwrap_or(0) - }) - }); - debug!("detected a cpu frequency of {} Mhz", mhz); - - mhz - }; - if mhz == 0 { - warn!("Unable to determine processor frequency"); +#[cfg(target_os = "linux")] +impl UhyveVm { + pub fn create_cpu(&self, id: u32) -> HypervisorResult { + KvmCpu::new( + id, + self.path.clone(), + self.args.clone(), + self.mem.host_address, + self.virtio_device.clone(), + ) } - mhz } -/// A section of memory that is reserved for the VM guest. -pub trait VmGuestMemory { - /// returns a pointer to the address of the guest memory and the size of the memory in bytes. - // TODO: replace with slice - // TODO: rename to memory - fn guest_mem(&self) -> (*mut u8, usize); +#[cfg(target_os = "macos")] +impl UhyveVm { + pub fn create_cpu(&self, id: u32) -> HypervisorResult { + Ok(XhyveCpu::new( + id, + self.path.clone(), + self.args.clone(), + self.mem.host_address, + )) + } +} - /// Initialize the memory - fn init_guest_mem(&mut self); +impl fmt::Debug for UhyveVm { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("UhyveVm") + .field("entry_point", &self.entry_point) + .field("stack_address", &self.stack_address) + .field("mem", &self.mem) + .field("num_cpus", &self.num_cpus) + .field("path", &self.path) + .field("boot_info", &self.boot_info) + .field("verbose", &self.verbose) + .field("virtio_device", &self.virtio_device) + .finish() + } } + +// TODO: Investigate soundness +// https://github.com/hermitcore/uhyve/issues/229 +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for UhyveVm {} +unsafe impl Sync for UhyveVm {} diff --git a/tests/common.rs b/tests/common.rs index 173f1225..3a9e598e 100644 --- a/tests/common.rs +++ b/tests/common.rs @@ -5,7 +5,7 @@ use std::{ }; use byte_unit::{Byte, Unit}; -use uhyvelib::{params::Params, Uhyve}; +use uhyvelib::{params::Params, vm::UhyveVm}; /// Uses Cargo to build a kernel in the `tests/test-kernels` directory. /// Returns a path to the build binary. @@ -48,6 +48,6 @@ pub fn run_simple_vm(kernel_path: PathBuf) { .unwrap(), ..Default::default() }; - let code = Uhyve::new(kernel_path, params).unwrap().run(None); + let code = UhyveVm::new(kernel_path, params).unwrap().run(None); assert_eq!(0, code); } diff --git a/tests/gdb.rs b/tests/gdb.rs index 93c9fcd2..b3593f6d 100644 --- a/tests/gdb.rs +++ b/tests/gdb.rs @@ -12,7 +12,7 @@ use std::{ use assert_fs::{assert::PathAssert, fixture::PathChild, TempDir}; use common::build_hermit_bin; -use uhyvelib::{params::Params, Uhyve}; +use uhyvelib::{params::Params, vm::UhyveVm}; #[test] fn gdb() -> io::Result<()> { @@ -22,7 +22,7 @@ fn gdb() -> io::Result<()> { let bin_path_clone = bin_path.clone(); let vm = thread::spawn(move || { let bin_path = bin_path_clone; - let vm = Uhyve::new( + let vm = UhyveVm::new( bin_path, Params { verbose: true, From 0b2d1331f7141b61d9139af846bcafc44102ee68 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Thu, 15 Jun 2023 22:32:59 +0200 Subject: [PATCH 11/29] made load_kernel a safe fn and removed the guest_mem() call from it --- benches/vm/mod.rs | 4 +--- src/linux/mod.rs | 4 +--- src/macos/mod.rs | 4 +--- src/macos/xhyve.rs | 2 +- src/mem.rs | 14 +++++++++++--- src/vm.rs | 43 +++++++++++++++++++++++-------------------- 6 files changed, 38 insertions(+), 33 deletions(-) diff --git a/benches/vm/mod.rs b/benches/vm/mod.rs index 143e6f0e..d7f2944c 100644 --- a/benches/vm/mod.rs +++ b/benches/vm/mod.rs @@ -17,9 +17,7 @@ pub fn load_vm_hello_world(c: &mut Criterion) { let mut vm = UhyveVm::::new(path, params).expect("Unable to create VM"); c.bench_function("vm::load_kernel(hello world)", |b| { - b.iter(|| unsafe { - vm.load_kernel().unwrap(); - }) + b.iter(|| vm.load_kernel().unwrap()) }); } diff --git a/src/linux/mod.rs b/src/linux/mod.rs index 7f58e1df..ffa71fc1 100755 --- a/src/linux/mod.rs +++ b/src/linux/mod.rs @@ -77,9 +77,7 @@ impl UhyveVm { pub fn run(mut self, cpu_affinity: Option>) -> i32 { KickSignal::register_handler().unwrap(); - unsafe { - self.load_kernel().expect("Unabled to load the kernel"); - } + self.load_kernel().expect("Unabled to load the kernel"); if self.gdb_port.is_none() { self.run_no_gdb(cpu_affinity) diff --git a/src/macos/mod.rs b/src/macos/mod.rs index 66e3d8d9..95526b13 100644 --- a/src/macos/mod.rs +++ b/src/macos/mod.rs @@ -25,9 +25,7 @@ impl UhyveVm { /// /// Blocks until the VM has finished execution. pub fn run(mut self, cpu_affinity: Option>) -> i32 { - unsafe { - self.load_kernel().expect("Unabled to load the kernel"); - } + self.load_kernel().expect("Unabled to load the kernel"); // For communication of the exit code from one vcpu to this thread as return // value. diff --git a/src/macos/xhyve.rs b/src/macos/xhyve.rs index bce10d1f..a15781c7 100644 --- a/src/macos/xhyve.rs +++ b/src/macos/xhyve.rs @@ -7,6 +7,6 @@ pub fn initialize_xhyve(mem: &mut MmapMemory) -> HypervisorResult<()> { create_vm()?; debug!("Map guest memory..."); - map_mem(mem.as_slice_mut(), 0, MemPerm::ExecAndWrite)?; + map_mem(unsafe { mem.as_slice_mut() }, 0, MemPerm::ExecAndWrite)?; Ok(()) } diff --git a/src/mem.rs b/src/mem.rs index 162bae77..de2cec6a 100644 --- a/src/mem.rs +++ b/src/mem.rs @@ -1,4 +1,4 @@ -use std::{mem, os::raw::c_void, ptr::NonNull}; +use std::{mem, mem::MaybeUninit, os::raw::c_void, ptr::NonNull}; use log::debug; use nix::sys::mman::*; @@ -73,8 +73,16 @@ impl MmapMemory { } } - pub fn as_slice_mut(& self) -> &mut [u8] { - unsafe { std::slice::from_raw_parts_mut(self.host_address as *mut u8, self.memory_size) } + /// This can create multiple aliasing. During the lifetime of the returned slice, the memory must not be altered, dropped or simmilar. + #[allow(clippy::mut_from_ref)] + pub unsafe fn as_slice_mut(&self) -> &mut [u8] { + std::slice::from_raw_parts_mut(self.host_address as *mut u8, self.memory_size) + } + + /// Same as [`as_slice_mut`], but for `MaybeUninit`. Actually the memory is initialized, as Mmap zero initializes it, but some fns like [`hermit_entry::elf::load_kernel`] require [`MaybeUninit`]s. + #[allow(clippy::mut_from_ref)] + pub unsafe fn as_slice_uninit_mut(&self) -> &mut [MaybeUninit] { + std::slice::from_raw_parts_mut(self.host_address as *mut MaybeUninit, self.memory_size) } } impl VmGuestMemory for MmapMemory { diff --git a/src/vm.rs b/src/vm.rs index 36186d44..5c1c4806 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -2,10 +2,9 @@ use std::{ ffi::OsString, fmt, fs, io, marker::PhantomData, - mem::MaybeUninit, num::NonZeroU32, path::{Path, PathBuf}, - ptr, slice, + ptr, sync::{Arc, Mutex}, time::SystemTime, }; @@ -107,9 +106,9 @@ impl UhyveVm { let memory_size = params.memory_size.get(); #[cfg(target_os = "linux")] - let mem = MmapMemory::new(0, memory_size, 0, params.thp, params.ksm); + let mem = MmapMemory::new(0, memory_size, arch::RAM_START, params.thp, params.ksm); #[cfg(not(target_os = "linux"))] - let mem = MmapMemory::new(0, memory_size, 0, false, false); + let mem = MmapMemory::new(0, memory_size, arch::RAM_START, false, false); // create virtio interface // TODO: Remove allow once fixed: @@ -203,33 +202,33 @@ impl UhyveVm { self.mem.init_guest_mem(); } - pub unsafe fn load_kernel(&mut self) -> LoadKernelResult<()> { + pub fn load_kernel(&mut self) -> LoadKernelResult<()> { let elf = fs::read(self.kernel_path())?; let object = KernelObject::parse(&elf).map_err(LoadKernelError::ParseKernelError)?; // TODO: should be a random start address, if we have a relocatable executable - let start_address = object.start_addr().unwrap_or(0x400000); - self.set_offset(start_address); + let kernel_start_address = object.start_addr().unwrap_or(0x400000) as usize; + let kernel_end_address = kernel_start_address + object.mem_size(); + self.set_offset(kernel_start_address as u64); - let (vm_mem, vm_mem_len) = self.guest_mem(); - if start_address as usize + object.mem_size() > vm_mem_len { + if kernel_end_address > self.mem.memory_size - self.mem.guest_address { return Err(LoadKernelError::InsufficientMemory); } - let vm_slice = { - let vm_slice = slice::from_raw_parts_mut(vm_mem as *mut MaybeUninit, vm_mem_len); - &mut vm_slice[start_address as usize..][..object.mem_size()] - }; - let LoadedKernel { load_info, entry_point, - } = object.load_kernel(vm_slice, start_address); + } = object.load_kernel( + // Safety: Slice only lives during this fn call, so no aliasing happens + &mut unsafe { self.mem.as_slice_uninit_mut() } + [kernel_start_address..kernel_end_address], + kernel_start_address as u64, + ); self.set_entry_point(entry_point); let boot_info = BootInfo { hardware_info: HardwareInfo { - phys_addr_range: arch::RAM_START..arch::RAM_START + vm_mem_len as u64, + phys_addr_range: arch::RAM_START..arch::RAM_START + self.mem.memory_size as u64, serial_port_base: self.verbose().then(|| { SerialPortBase::new((uhyve_interface::HypercallAddress::Uart as u16).into()) .unwrap() @@ -244,10 +243,14 @@ impl UhyveVm { boot_time: SystemTime::now().into(), }, }; - let raw_boot_info_ptr = vm_mem.add(BOOT_INFO_ADDR.as_u64() as usize) as *mut RawBootInfo; - *raw_boot_info_ptr = RawBootInfo::from(boot_info); - self.set_boot_info(raw_boot_info_ptr); - self.set_stack_address(start_address.checked_sub(KERNEL_STACK_SIZE).expect( + unsafe { + let raw_boot_info_ptr = (self.mem.host_address as *mut u8) + .add(BOOT_INFO_ADDR.as_u64() as usize) as *mut RawBootInfo; + *raw_boot_info_ptr = RawBootInfo::from(boot_info); + self.set_boot_info(raw_boot_info_ptr); + } + + self.set_stack_address((kernel_start_address as u64).checked_sub(KERNEL_STACK_SIZE).expect( "there should be enough space for the boot stack before the kernel start address", )); From 4d6cdbd10acaa7dd69b01316617a6651da9c4398 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Thu, 15 Jun 2023 22:39:52 +0200 Subject: [PATCH 12/29] Moved pagetable initialization to arch module and removed obsolete VmMemory trait and guest_vm fn --- src/arch/x86_64/mod.rs | 144 ++++++++++++++++++++++++++++++++++-- src/consts.rs | 8 +- src/linux/x86_64/kvm_cpu.rs | 2 +- src/macos/x86_64/vcpu.rs | 2 +- src/mem.rs | 61 +-------------- src/vm.rs | 26 ++----- 6 files changed, 151 insertions(+), 92 deletions(-) diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index 7db3388c..f9ee90ba 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -9,6 +9,12 @@ use std::{ use log::{debug, warn}; use raw_cpuid::{CpuId, CpuIdReaderNative}; use thiserror::Error; +use x86_64::{ + structures::paging::{Page, PageTable, PageTableFlags, Size2MiB}, + PhysAddr, +}; + +use crate::consts::*; pub const RAM_START: u64 = 0x00; const MHZ_TO_HZ: u64 = 1000000; @@ -101,7 +107,90 @@ pub fn get_cpu_frequency_from_os() -> std::result::Result u64 { + ((base & 0xff000000u64) << (56 - 24)) + | ((flags & 0x0000f0ffu64) << 40) + | ((limit & 0x000f0000u64) << (48 - 16)) + | ((base & 0x00ffffffu64) << 16) + | (limit & 0x0000ffffu64) +} + +pub const MIN_PAGING_MEM_SIZE: usize = BOOT_PDE.as_u64() as usize + 0x1000; + +/// Creates the pagetables and the GDT in the guest memory space. +/// +/// The memory slice must be larger than [`MIN_PAGING_MEM_SIZE`]. +/// Also, the memory `mem` needs to be zeroed for [`PAGE_SIZE`] bytes at the +/// offsets [`BOOT_PML4`] and [`BOOT_PDPTE`], otherwise the integrity of the +/// pagetables and thus the integrity of the guest's memory is not ensured +pub fn initialize_pagetables(mem: &mut [u8]) { + assert!(mem.len() >= MIN_PAGING_MEM_SIZE); + let mem_addr = std::ptr::addr_of_mut!(mem[0]); + + let (gdt_entry, pml4, pdpte, pde); + // Safety: + // We only operate in `mem`, which is plain bytes and we have ownership of + // these and it is asserted to be large enough. + unsafe { + gdt_entry = mem_addr + .add(BOOT_GDT.as_u64() as usize) + .cast::<[u64; 3]>() + .as_mut() + .unwrap(); + + pml4 = mem_addr + .add(BOOT_PML4.as_u64() as usize) + .cast::() + .as_mut() + .unwrap(); + pdpte = mem_addr + .add(BOOT_PDPTE.as_u64() as usize) + .cast::() + .as_mut() + .unwrap(); + pde = mem_addr + .add(BOOT_PDE.as_u64() as usize) + .cast::() + .as_mut() + .unwrap(); + + /* For simplicity we currently use 2MB pages and only a single + PML4/PDPTE/PDE. */ + + // per default is the memory zeroed, which we allocate by the system + // call mmap, so the following is not necessary: + /*libc::memset(pml4 as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); + libc::memset(pdpte as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); + libc::memset(pde as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE);*/ + } + // initialize GDT + gdt_entry[BOOT_GDT_NULL] = 0; + gdt_entry[BOOT_GDT_CODE] = create_gdt_entry(0xA09B, 0, 0xFFFFF); + gdt_entry[BOOT_GDT_DATA] = create_gdt_entry(0xC093, 0, 0xFFFFF); + + pml4[0].set_addr( + BOOT_PDPTE, + PageTableFlags::PRESENT | PageTableFlags::WRITABLE, + ); + pml4[511].set_addr( + BOOT_PML4, + PageTableFlags::PRESENT | PageTableFlags::WRITABLE, + ); + pdpte[0].set_addr(BOOT_PDE, PageTableFlags::PRESENT | PageTableFlags::WRITABLE); + + for i in 0..512 { + let addr = PhysAddr::new(i as u64 * Page::::SIZE); + pde[i].set_addr( + addr, + PageTableFlags::PRESENT | PageTableFlags::WRITABLE | PageTableFlags::HUGE_PAGE, + ); + } +} + +#[cfg(test)] mod tests { + use super::*; // test is derived from // https://github.com/gz/rust-cpuid/blob/master/examples/tsc_frequency.rs #[test] @@ -180,13 +269,52 @@ mod tests { assert!(freq > 0); assert!(freq < 10000); //More than 10Ghz is probably wrong } -} -// Constructor for a conventional segment GDT (or LDT) entry -pub fn create_gdt_entry(flags: u64, base: u64, limit: u64) -> u64 { - ((base & 0xff000000u64) << (56 - 24)) - | ((flags & 0x0000f0ffu64) << 40) - | ((limit & 0x000f0000u64) << (48 - 16)) - | ((base & 0x00ffffffu64) << 16) - | (limit & 0x0000ffffu64) + #[test] + fn test_pagetable_initialization() { + let mut mem: Vec = vec![0; MIN_PAGING_MEM_SIZE]; + initialize_pagetables((&mut mem[0..MIN_PAGING_MEM_SIZE]).try_into().unwrap()); + + // Test pagetable setup + let addr_pdpte = u64::from_le_bytes( + mem[(BOOT_PML4.as_u64() as usize)..(BOOT_PML4.as_u64() as usize + 8)] + .try_into() + .unwrap(), + ); + assert_eq!( + addr_pdpte, + BOOT_PDPTE.as_u64() | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() + ); + let addr_pde = u64::from_le_bytes( + mem[(BOOT_PDPTE.as_u64() as usize)..(BOOT_PDPTE.as_u64() as usize + 8)] + .try_into() + .unwrap(), + ); + assert_eq!( + addr_pde, + BOOT_PDE.as_u64() | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() + ); + + for i in (0..4096).step_by(8) { + let addr = BOOT_PDE.as_u64() as usize + i; + let entry = u64::from_le_bytes(mem[addr..(addr + 8)].try_into().unwrap()); + assert!( + PageTableFlags::from_bits_truncate(entry) + .difference( + PageTableFlags::PRESENT + | PageTableFlags::WRITABLE | PageTableFlags::HUGE_PAGE + ) + .is_empty(), + "Pagetable bits at {addr:#x} are incorrect" + ) + } + + // Test GDT + let gdt_results = [0x0, 0xAF9B000000FFFF, 0xCF93000000FFFF]; + for (i, res) in gdt_results.iter().enumerate() { + let gdt_addr = BOOT_GDT.as_u64() as usize + i * 8; + let gdt_entry = u64::from_le_bytes(mem[gdt_addr..gdt_addr + 8].try_into().unwrap()); + assert_eq!(*res, gdt_entry); + } + } } diff --git a/src/consts.rs b/src/consts.rs index d6da870b..86250a6d 100644 --- a/src/consts.rs +++ b/src/consts.rs @@ -5,10 +5,10 @@ pub const GDT_KERNEL_CODE: u16 = 1; pub const GDT_KERNEL_DATA: u16 = 2; pub const APIC_DEFAULT_BASE: u64 = 0xfee00000; pub const BOOT_GDT: PhysAddr = PhysAddr::new(0x1000); -pub const BOOT_GDT_NULL: u64 = 0; -pub const BOOT_GDT_CODE: u64 = 1; -pub const BOOT_GDT_DATA: u64 = 2; -pub const BOOT_GDT_MAX: u64 = 3; +pub const BOOT_GDT_NULL: usize = 0; +pub const BOOT_GDT_CODE: usize = 1; +pub const BOOT_GDT_DATA: usize = 2; +pub const BOOT_GDT_MAX: usize = 3; pub const BOOT_PML4: PhysAddr = PhysAddr::new(0x10000); pub const BOOT_PGT: PhysAddr = BOOT_PML4; pub const BOOT_PDPTE: PhysAddr = PhysAddr::new(0x11000); diff --git a/src/linux/x86_64/kvm_cpu.rs b/src/linux/x86_64/kvm_cpu.rs index 6b3b994f..ceda33c0 100644 --- a/src/linux/x86_64/kvm_cpu.rs +++ b/src/linux/x86_64/kvm_cpu.rs @@ -310,7 +310,7 @@ impl KvmCpu { //sregs.fs = seg; //sregs.gs = seg; sregs.gdt.base = BOOT_GDT.as_u64(); - sregs.gdt.limit = ((std::mem::size_of::() * BOOT_GDT_MAX as usize) - 1) as u16; + sregs.gdt.limit = ((std::mem::size_of::() * BOOT_GDT_MAX) - 1) as u16; self.vcpu.set_sregs(&sregs)?; diff --git a/src/macos/x86_64/vcpu.rs b/src/macos/x86_64/vcpu.rs index bf394342..da41da14 100644 --- a/src/macos/x86_64/vcpu.rs +++ b/src/macos/x86_64/vcpu.rs @@ -201,7 +201,7 @@ impl XhyveCpu { .write_vmcs(VMCS_GUEST_GDTR_BASE, BOOT_GDT.as_u64())?; self.vcpu.write_vmcs( VMCS_GUEST_GDTR_LIMIT, - ((std::mem::size_of::() * BOOT_GDT_MAX as usize) - 1) as u64, + ((std::mem::size_of::() * BOOT_GDT_MAX) - 1) as u64, )?; self.vcpu.write_vmcs(VMCS_GUEST_IDTR_BASE, 0)?; self.vcpu.write_vmcs(VMCS_GUEST_IDTR_LIMIT, 0xffff)?; diff --git a/src/mem.rs b/src/mem.rs index de2cec6a..b863be4f 100644 --- a/src/mem.rs +++ b/src/mem.rs @@ -1,13 +1,7 @@ -use std::{mem, mem::MaybeUninit, os::raw::c_void, ptr::NonNull}; +use std::{mem::MaybeUninit, os::raw::c_void, ptr::NonNull}; use log::debug; use nix::sys::mman::*; -use x86_64::{ - structures::paging::{Page, PageTable, PageTableFlags, Size2MiB}, - PhysAddr, -}; - -use crate::{consts::*, vm::VmGuestMemory, x86_64::create_gdt_entry}; /// A general purpose VM memory section that can exploit some Linux Kernel features. #[derive(Debug)] @@ -85,59 +79,6 @@ impl MmapMemory { std::slice::from_raw_parts_mut(self.host_address as *mut MaybeUninit, self.memory_size) } } -impl VmGuestMemory for MmapMemory { - fn guest_mem(&self) -> (*mut u8, usize) { - (self.host_address as *mut u8, self.memory_size) - } - - /// Initialize the page tables for the guest - fn init_guest_mem(&mut self) { - // TODO: Move to x86_64 - debug!("Initialize guest memory"); - - let (mem_addr, _) = self.guest_mem(); - - unsafe { - let pml4 = &mut *((mem_addr as u64 + BOOT_PML4.as_u64()) as *mut PageTable); - let pdpte = &mut *((mem_addr as u64 + BOOT_PDPTE.as_u64()) as *mut PageTable); - let pde = &mut *((mem_addr as u64 + BOOT_PDE.as_u64()) as *mut PageTable); - let gdt_entry: u64 = mem_addr as u64 + BOOT_GDT.as_u64(); - - // initialize GDT - *((gdt_entry) as *mut u64) = create_gdt_entry(0, 0, 0); - *((gdt_entry + mem::size_of::<*mut u64>() as u64) as *mut u64) = - create_gdt_entry(0xA09B, 0, 0xFFFFF); /* code */ - *((gdt_entry + 2 * mem::size_of::<*mut u64>() as u64) as *mut u64) = - create_gdt_entry(0xC093, 0, 0xFFFFF); /* data */ - - /* For simplicity we currently use 2MB pages and only a single - PML4/PDPTE/PDE. */ - - // per default is the memory zeroed, which we allocate by the system call mmap - /*libc::memset(pml4 as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); - libc::memset(pdpte as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); - libc::memset(pde as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE);*/ - - pml4[0].set_addr( - BOOT_PDPTE, - PageTableFlags::PRESENT | PageTableFlags::WRITABLE, - ); - pml4[511].set_addr( - BOOT_PML4, - PageTableFlags::PRESENT | PageTableFlags::WRITABLE, - ); - pdpte[0].set_addr(BOOT_PDE, PageTableFlags::PRESENT | PageTableFlags::WRITABLE); - - for i in 0..512 { - let addr = PhysAddr::new(i as u64 * Page::::SIZE); - pde[i].set_addr( - addr, - PageTableFlags::PRESENT | PageTableFlags::WRITABLE | PageTableFlags::HUGE_PAGE, - ); - } - } - } -} impl Drop for MmapMemory { fn drop(&mut self) { diff --git a/src/vm.rs b/src/vm.rs index 5c1c4806..524f7506 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -67,19 +67,6 @@ fn detect_cpu_freq() -> u32 { mhz } -/// A section of memory that is reserved for the VM guest. -pub trait VmGuestMemory { - /// returns a pointer to the address of the guest memory and the size of the memory in bytes. - // TODO: replace with slice - // TODO: rename to memory - fn guest_mem(&self) -> (*mut u8, usize); - - /// Initialize the memory - fn init_guest_mem(&mut self); - - // TODO Guest physical to virtual here -} - #[cfg(target_os = "linux")] pub type VcpuDefault = crate::linux::x86_64::kvm_cpu::KvmCpu; #[cfg(target_os = "macos")] @@ -185,10 +172,6 @@ impl UhyveVm { self.num_cpus } - fn guest_mem(&self) -> (*mut u8, usize) { - (self.mem.host_address as *mut u8, self.mem.memory_size) - } - fn kernel_path(&self) -> &Path { self.path.as_path() } @@ -199,7 +182,14 @@ impl UhyveVm { /// Initialize the page tables for the guest fn init_guest_mem(&mut self) { - self.mem.init_guest_mem(); + debug!("Initialize guest memory"); + + #[cfg(target_arch = "x86_64")] + crate::x86_64::initialize_pagetables( + unsafe { self.mem.as_slice_mut() } // slice only lives during this fn call + .try_into() + .expect("Guest memory is not large enough for pagetables"), + ); } pub fn load_kernel(&mut self) -> LoadKernelResult<()> { From 682616c1c18a1839acc8cd3ca047d4ee29c00f43 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Thu, 15 Jun 2023 22:47:56 +0200 Subject: [PATCH 13/29] Improved MmapMemory - Added index functionality for MmapMemory - Added memory host_address and read functions with test - MmapMemory: Added slice access comfort fns - Use GuestPhysAddr in MmapMemory - changed mmap memory host addr to ptr type --- src/arch/x86_64/mod.rs | 3 +- src/linux/x86_64/kvm_cpu.rs | 8 ++- src/mem.rs | 122 +++++++++++++++++++++++++++++++++--- src/vcpu.rs | 2 +- src/vm.rs | 11 ++-- 5 files changed, 129 insertions(+), 17 deletions(-) diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index f9ee90ba..075be67e 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -9,6 +9,7 @@ use std::{ use log::{debug, warn}; use raw_cpuid::{CpuId, CpuIdReaderNative}; use thiserror::Error; +use uhyve_interface::GuestPhysAddr; use x86_64::{ structures::paging::{Page, PageTable, PageTableFlags, Size2MiB}, PhysAddr, @@ -16,7 +17,7 @@ use x86_64::{ use crate::consts::*; -pub const RAM_START: u64 = 0x00; +pub const RAM_START: GuestPhysAddr = GuestPhysAddr::new(0x00); const MHZ_TO_HZ: u64 = 1000000; const KHZ_TO_HZ: u64 = 1000; diff --git a/src/linux/x86_64/kvm_cpu.rs b/src/linux/x86_64/kvm_cpu.rs index ceda33c0..6c89fcb5 100644 --- a/src/linux/x86_64/kvm_cpu.rs +++ b/src/linux/x86_64/kvm_cpu.rs @@ -43,7 +43,7 @@ pub fn initialize_kvm(mem: &MmapMemory, use_pit: bool) -> HypervisorResult<()> { slot: 0, flags: mem.flags, memory_size: sz as u64, - guest_phys_addr: mem.guest_address as u64, + guest_phys_addr: mem.guest_address.as_u64(), userspace_addr: mem.host_address as u64, }; @@ -56,8 +56,10 @@ pub fn initialize_kvm(mem: &MmapMemory, use_pit: bool) -> HypervisorResult<()> { slot: 1, flags: mem.flags, memory_size: (mem.memory_size - KVM_32BIT_GAP_START - KVM_32BIT_GAP_SIZE) as u64, - guest_phys_addr: (mem.guest_address + KVM_32BIT_GAP_START + KVM_32BIT_GAP_SIZE) as u64, - userspace_addr: (mem.host_address + KVM_32BIT_GAP_START + KVM_32BIT_GAP_SIZE) as u64, + guest_phys_addr: mem.guest_address.as_u64() + + (KVM_32BIT_GAP_START + KVM_32BIT_GAP_SIZE) as u64, + userspace_addr: (mem.host_address as usize + KVM_32BIT_GAP_START + KVM_32BIT_GAP_SIZE) + as u64, }; unsafe { vm.set_user_memory_region(kvm_mem) }?; diff --git a/src/mem.rs b/src/mem.rs index b863be4f..f4a39b3c 100644 --- a/src/mem.rs +++ b/src/mem.rs @@ -1,7 +1,17 @@ -use std::{mem::MaybeUninit, os::raw::c_void, ptr::NonNull}; +use std::{mem::MaybeUninit, ops::Index, os::raw::c_void, ptr::NonNull}; use log::debug; use nix::sys::mman::*; +use thiserror::Error; +use uhyve_interface::GuestPhysAddr; + +#[derive(Error, Debug)] +pub enum MemoryError { + #[error("Memory bounds exceeded")] + BoundsViolation, + #[error("The desired guest location is not part of this memory")] + WrongMemoryError, +} /// A general purpose VM memory section that can exploit some Linux Kernel features. #[derive(Debug)] @@ -9,15 +19,15 @@ pub struct MmapMemory { // TODO: make private pub flags: u32, pub memory_size: usize, - pub guest_address: usize, - pub host_address: usize, + pub guest_address: GuestPhysAddr, + pub host_address: *mut u8, } impl MmapMemory { pub fn new( flags: u32, memory_size: usize, - guest_address: u64, + guest_address: GuestPhysAddr, huge_pages: bool, mergeable: bool, ) -> MmapMemory { @@ -62,15 +72,15 @@ impl MmapMemory { MmapMemory { flags, memory_size, - guest_address: guest_address as usize, - host_address: host_address.as_ptr() as usize, + guest_address, + host_address: host_address.as_ptr() as *mut u8, } } /// This can create multiple aliasing. During the lifetime of the returned slice, the memory must not be altered, dropped or simmilar. #[allow(clippy::mut_from_ref)] pub unsafe fn as_slice_mut(&self) -> &mut [u8] { - std::slice::from_raw_parts_mut(self.host_address as *mut u8, self.memory_size) + std::slice::from_raw_parts_mut(self.host_address, self.memory_size) } /// Same as [`as_slice_mut`], but for `MaybeUninit`. Actually the memory is initialized, as Mmap zero initializes it, but some fns like [`hermit_entry::elf::load_kernel`] require [`MaybeUninit`]s. @@ -78,6 +88,62 @@ impl MmapMemory { pub unsafe fn as_slice_uninit_mut(&self) -> &mut [MaybeUninit] { std::slice::from_raw_parts_mut(self.host_address as *mut MaybeUninit, self.memory_size) } + + /// Read a section of the memory. + /// + /// # Safety + /// + /// This is unsafe, as can create multiple aliasing. During the lifetime of + /// the returned slice, the memory must not be altered to prevent undfined + /// behaviour. + pub unsafe fn slice_at(&self, addr: GuestPhysAddr, len: usize) -> Result<&[u8], MemoryError> { + if addr.as_u64() as usize + len >= self.memory_size - self.guest_address.as_u64() as usize { + Err(MemoryError::BoundsViolation) + } else { + Ok(unsafe { std::slice::from_raw_parts(self.host_address(addr)?, len) }) + } + } + + /// Writeable access to a section of the memory. + /// + /// # Safety + /// + /// This is unsafe, as it can create multiple aliasing. During the lifetime of + /// the returned slice, the memory must not be altered to prevent undfined + /// behavior. + pub unsafe fn slice_at_mut( + &self, + addr: GuestPhysAddr, + len: usize, + ) -> Result<&mut [u8], MemoryError> { + if addr.as_u64() as usize + len >= self.memory_size - self.guest_address.as_u64() as usize { + Err(MemoryError::BoundsViolation) + } else { + Ok(unsafe { std::slice::from_raw_parts_mut(self.host_address(addr)? as *mut u8, len) }) + } + } + + /// Returns the host address of the given internal physical address in the + /// memory, if the address is valid. + pub fn host_address(&self, addr: GuestPhysAddr) -> Result<*const u8, MemoryError> { + if addr < self.guest_address + || addr.as_u64() as usize > self.guest_address.as_u64() as usize + self.memory_size + { + return Err(MemoryError::WrongMemoryError); + } + Ok( + // Safety: + // - The new ptr is checked to be within the mmap'd memory region above + // - to overflow an isize, the guest memory needs to be larger than 2^63 (which is rather unlikely anytime soon). + unsafe { self.host_address.add((addr - self.guest_address) as usize) as usize } + as *const u8, + ) + } + + /// Read the value in the memory at the given address + pub fn read(&self, addr: GuestPhysAddr) -> Result { + Ok(unsafe { self.host_address(addr)?.cast::().read_unaligned() }) + } } impl Drop for MmapMemory { @@ -90,3 +156,45 @@ impl Drop for MmapMemory { } } } + +impl Index for MmapMemory { + type Output = u8; + + #[inline(always)] + fn index(&self, index: usize) -> &Self::Output { + assert!(index < self.memory_size); + + // Safety: + // - The new ptr is checked to be within the mmap'd memory region above + // - to overflow an isize, the guest memory needs to be larger than 2^63 (which is rather unlikely anytime soon). + unsafe { &*self.host_address.add(index) } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::consts::PAGE_SIZE; + + #[test] + fn test_mmap_memory_readwrite() { + let mem = MmapMemory::new(0, 40 * PAGE_SIZE, GuestPhysAddr::new(0x1000), true, true); + unsafe { + mem.as_slice_mut()[0xfe] = 0xaa; + mem.as_slice_mut()[0xff] = 0xbb; + mem.as_slice_mut()[0x100] = 0x78; + mem.as_slice_mut()[0x101] = 0x56; + mem.as_slice_mut()[0x102] = 0x34; + mem.as_slice_mut()[0x103] = 0x12; + } + assert_eq!( + mem.read::(GuestPhysAddr::new(0x1100)).unwrap(), + 0x12345678 + ); + // unaligned read + assert_eq!( + mem.read::(GuestPhysAddr::new(0x10fe)).unwrap(), + 0x12345678bbaa + ); + } +} diff --git a/src/vcpu.rs b/src/vcpu.rs index edaf2bc0..0ca24274 100644 --- a/src/vcpu.rs +++ b/src/vcpu.rs @@ -241,7 +241,7 @@ pub trait VirtualCPU { unsafe { let step = libc::write( syswrite.fd, - self.host_address(syswrite.buf + bytes_written) as *const libc::c_void, + self.host_address(syswrite.buf + bytes_written as u64) as *const libc::c_void, syswrite.len - bytes_written, ); if step >= 0 { diff --git a/src/vm.rs b/src/vm.rs index 524f7506..7f2a258c 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -201,7 +201,7 @@ impl UhyveVm { let kernel_end_address = kernel_start_address + object.mem_size(); self.set_offset(kernel_start_address as u64); - if kernel_end_address > self.mem.memory_size - self.mem.guest_address { + if kernel_end_address > self.mem.memory_size - self.mem.guest_address.as_u64() as usize { return Err(LoadKernelError::InsufficientMemory); } @@ -218,7 +218,8 @@ impl UhyveVm { let boot_info = BootInfo { hardware_info: HardwareInfo { - phys_addr_range: arch::RAM_START..arch::RAM_START + self.mem.memory_size as u64, + phys_addr_range: arch::RAM_START.as_u64() + ..arch::RAM_START.as_u64() + self.mem.memory_size as u64, serial_port_base: self.verbose().then(|| { SerialPortBase::new((uhyve_interface::HypercallAddress::Uart as u16).into()) .unwrap() @@ -234,8 +235,8 @@ impl UhyveVm { }, }; unsafe { - let raw_boot_info_ptr = (self.mem.host_address as *mut u8) - .add(BOOT_INFO_ADDR.as_u64() as usize) as *mut RawBootInfo; + let raw_boot_info_ptr = + self.mem.host_address.add(BOOT_INFO_ADDR.as_u64() as usize) as *mut RawBootInfo; *raw_boot_info_ptr = RawBootInfo::from(boot_info); self.set_boot_info(raw_boot_info_ptr); } @@ -255,7 +256,7 @@ impl UhyveVm { id, self.path.clone(), self.args.clone(), - self.mem.host_address, + self.mem.host_address as usize, self.virtio_device.clone(), ) } From 5475d8555f5441e9b45ed028ffb65f7be312d898 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Thu, 22 Jun 2023 10:16:40 +0200 Subject: [PATCH 14/29] Added virt_to_phys fn to x86 --- src/arch/x86_64/mod.rs | 95 ++++++++++++++++++++++++++++++++++-- src/linux/gdb/breakpoints.rs | 20 ++++++-- src/linux/gdb/mod.rs | 21 ++++++-- src/linux/x86_64/kvm_cpu.rs | 49 +------------------ src/macos/aarch64/vcpu.rs | 4 -- src/macos/x86_64/vcpu.rs | 39 +-------------- src/vcpu.rs | 7 +-- src/vm.rs | 2 +- 8 files changed, 133 insertions(+), 104 deletions(-) diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index 075be67e..e97ae0fc 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -9,13 +9,16 @@ use std::{ use log::{debug, warn}; use raw_cpuid::{CpuId, CpuIdReaderNative}; use thiserror::Error; -use uhyve_interface::GuestPhysAddr; +use uhyve_interface::{GuestPhysAddr, GuestVirtAddr}; use x86_64::{ - structures::paging::{Page, PageTable, PageTableFlags, Size2MiB}, + structures::paging::{ + page_table::{FrameError, PageTableEntry}, + Page, PageTable, PageTableFlags, PageTableIndex, Size2MiB, + }, PhysAddr, }; -use crate::consts::*; +use crate::{consts::*, mem::MmapMemory}; pub const RAM_START: GuestPhysAddr = GuestPhysAddr::new(0x00); const MHZ_TO_HZ: u64 = 1000000; @@ -189,6 +192,51 @@ pub fn initialize_pagetables(mem: &mut [u8]) { } } +#[derive(Error, Debug)] +pub enum PagetableError { + #[error("The accessed virtual address is not mapped")] + InvalidAddress, +} + +/// Converts a virtual address in the guest to a physical address in the guest +pub fn virt_to_phys( + addr: GuestVirtAddr, + mem: &MmapMemory, +) -> Result { + /// Number of Offset bits of a virtual address for a 4 KiB page, which are shifted away to get its Page Frame Number (PFN). + pub const PAGE_BITS: u64 = 12; + + /// Number of bits of the index in each table (PML4, PDPT, PDT, PGT). + pub const PAGE_MAP_BITS: usize = 9; + + let mut page_table = + unsafe { (mem.host_address(BOOT_PML4).unwrap() as *mut PageTable).as_mut() }.unwrap(); + let mut page_bits = 39; + let mut entry = PageTableEntry::new(); + + for _i in 0..4 { + let index = + PageTableIndex::new(((addr.as_u64() >> page_bits) & ((1 << PAGE_MAP_BITS) - 1)) as u16); + entry = page_table[index].clone(); + + match entry.frame() { + Ok(frame) => { + page_table = unsafe { + (mem.host_address(frame.start_address()).unwrap() as *mut PageTable).as_mut() + } + .unwrap(); + page_bits -= PAGE_MAP_BITS; + } + Err(FrameError::FrameNotPresent) => return Err(PagetableError::InvalidAddress), + Err(FrameError::HugeFrame) => { + return Ok(entry.addr() + (addr.as_u64() & !((!0_u64) << page_bits))); + } + } + } + + Ok(entry.addr() + (addr.as_u64() & !((!0u64) << PAGE_BITS))) +} + #[cfg(test)] mod tests { use super::*; @@ -318,4 +366,45 @@ mod tests { assert_eq!(*res, gdt_entry); } } + + #[test] + fn test_virt_to_phys() { + let mem = MmapMemory::new( + 0, + MIN_PAGING_MEM_SIZE * 2, + GuestPhysAddr::new(0), + true, + true, + ); + initialize_pagetables(unsafe { mem.as_slice_mut() }.try_into().unwrap()); + + // Get the address of the first entry in PML4 (the address of the PML4 itself) + let virt_addr = GuestVirtAddr::new(0xFFFFFFFFFFFFF000); + let p_addr = virt_to_phys(virt_addr, &mem).unwrap(); + assert_eq!(p_addr, BOOT_PML4); + + // The last entry on the PML4 is the address of the PML4 with flags + let virt_addr = GuestVirtAddr::new(0xFFFFFFFFFFFFF000 | (4096 - 8)); + let p_addr = virt_to_phys(virt_addr, &mem).unwrap(); + assert_eq!( + mem.read::(p_addr).unwrap(), + BOOT_PML4.as_u64() | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() + ); + + // the first entry on the 3rd level entry in the pagetables is the address of the boot pdpte + let virt_addr = GuestVirtAddr::new(0xFFFFFFFFFFE00000); + let p_addr = virt_to_phys(virt_addr, &mem).unwrap(); + assert_eq!(p_addr, BOOT_PDPTE); + + // the first entry on the 2rd level entry in the pagetables is the address of the boot pde + let virt_addr = GuestVirtAddr::new(0xFFFFFFFFC0000000); + let p_addr = virt_to_phys(virt_addr, &mem).unwrap(); + assert_eq!(p_addr, BOOT_PDE); + // That address points to a huge page + assert!( + PageTableFlags::from_bits_truncate(mem.read::(p_addr).unwrap()).contains( + PageTableFlags::HUGE_PAGE | PageTableFlags::PRESENT | PageTableFlags::WRITABLE + ) + ); + } } diff --git a/src/linux/gdb/breakpoints.rs b/src/linux/gdb/breakpoints.rs index 9443f326..8de01a70 100644 --- a/src/linux/gdb/breakpoints.rs +++ b/src/linux/gdb/breakpoints.rs @@ -4,7 +4,7 @@ use gdbstub::target::{self, ext::breakpoints::WatchKind, TargetResult}; use uhyve_interface::GuestVirtAddr; use super::GdbUhyve; -use crate::arch::x86_64::registers; +use crate::arch::x86_64::{registers, virt_to_phys}; #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct SwBreakpoint { @@ -50,7 +50,14 @@ impl target::ext::breakpoints::SwBreakpoint for GdbUhyve { let sw_breakpoint = SwBreakpoint::new(addr, kind); if let Entry::Vacant(entry) = self.sw_breakpoints.entry(sw_breakpoint) { - let instructions = unsafe { self.vcpu.memory(GuestVirtAddr::new(addr), kind) }; + // Safety: mem is not altered during the lifetime of `instructions` + let instructions = unsafe { + self.vm.mem.slice_at_mut( + virt_to_phys(GuestVirtAddr::new(addr), &self.vm.mem).map_err(|_err| ())?, + kind, + ) + } + .unwrap(); entry.insert(instructions.into()); instructions.fill(SwBreakpoint::OPCODE); Ok(true) @@ -63,7 +70,14 @@ impl target::ext::breakpoints::SwBreakpoint for GdbUhyve { let sw_breakpoint = SwBreakpoint::new(addr, kind); if let Entry::Occupied(entry) = self.sw_breakpoints.entry(sw_breakpoint) { - let instructions = unsafe { self.vcpu.memory(GuestVirtAddr::new(addr), kind) }; + // Safety: mem is not altered during the lifetime of `instructions` + let instructions = unsafe { + self.vm.mem.slice_at_mut( + virt_to_phys(GuestVirtAddr::new(addr), &self.vm.mem).map_err(|_err| ())?, + kind, + ) + } + .unwrap(); instructions.copy_from_slice(&entry.remove()); Ok(true) } else { diff --git a/src/linux/gdb/mod.rs b/src/linux/gdb/mod.rs index 4de964d1..29e5e717 100644 --- a/src/linux/gdb/mod.rs +++ b/src/linux/gdb/mod.rs @@ -23,7 +23,7 @@ use x86_64::registers::debug::Dr6Flags; use self::breakpoints::SwBreakpoints; use super::HypervisorError; use crate::{ - arch::x86_64::registers::debug::HwBreakpoints, + arch::x86_64::{registers::debug::HwBreakpoints, virt_to_phys}, linux::{x86_64::kvm_cpu::KvmCpu, KickSignal}, vcpu::{VcpuStopReason, VirtualCPU}, vm::UhyveVm, @@ -120,13 +120,28 @@ impl SingleThreadBase for GdbUhyve { } fn read_addrs(&mut self, start_addr: u64, data: &mut [u8]) -> TargetResult { - let src = unsafe { self.vcpu.memory(GuestVirtAddr::new(start_addr), data.len()) }; + // Safety: mem is copied to data before mem can be modified. + let src = unsafe { + self.vm.mem.slice_at( + virt_to_phys(GuestVirtAddr::new(start_addr), &self.vm.mem).map_err(|_err| ())?, + data.len(), + ) + } + .unwrap(); data.copy_from_slice(src); Ok(data.len()) } fn write_addrs(&mut self, start_addr: u64, data: &[u8]) -> TargetResult<(), Self> { - let mem = unsafe { self.vcpu.memory(GuestVirtAddr::new(start_addr), data.len()) }; + // Safety: self.vm.mem is not altered during the lifetime of mem. + let mem = unsafe { + self.vm.mem.slice_at_mut( + virt_to_phys(GuestVirtAddr::new(start_addr), &self.vm.mem).map_err(|_err| ())?, + data.len(), + ) + } + .unwrap(); + mem.copy_from_slice(data); Ok(()) } diff --git a/src/linux/x86_64/kvm_cpu.rs b/src/linux/x86_64/kvm_cpu.rs index 6c89fcb5..e141fdff 100644 --- a/src/linux/x86_64/kvm_cpu.rs +++ b/src/linux/x86_64/kvm_cpu.rs @@ -1,18 +1,14 @@ use std::{ ffi::OsString, path::{Path, PathBuf}, - slice, sync::{Arc, Mutex}, }; use kvm_bindings::*; use kvm_ioctls::{VcpuExit, VcpuFd, VmFd}; -use uhyve_interface::{GuestPhysAddr, GuestVirtAddr, Hypercall}; +use uhyve_interface::{GuestPhysAddr, Hypercall}; use vmm_sys_util::eventfd::EventFd; -use x86_64::{ - registers::control::{Cr0Flags, Cr4Flags}, - structures::paging::PageTableFlags, -}; +use x86_64::registers::control::{Cr0Flags, Cr4Flags}; use crate::{ consts::*, @@ -130,12 +126,6 @@ pub struct KvmCpu { } impl KvmCpu { - pub unsafe fn memory(&mut self, start_addr: GuestVirtAddr, len: usize) -> &mut [u8] { - let phys = self.virt_to_phys(start_addr); - let host = self.host_address(phys); - slice::from_raw_parts_mut(host as *mut u8, len) - } - pub fn new( id: u32, kernel_path: PathBuf, @@ -373,41 +363,6 @@ impl VirtualCPU for KvmCpu { addr.as_u64() as usize + self.vm_start } - fn virt_to_phys(&self, addr: GuestVirtAddr) -> GuestPhysAddr { - // TODO: This fn is curently x86_64 only - /// Number of Offset bits of a virtual address for a 4 KiB page, which are shifted away to get its Page Frame Number (PFN). - pub const PAGE_BITS: u64 = 12; - - /// Number of bits of the index in each table (PML4, PDPT, PDT, PGT). - pub const PAGE_MAP_BITS: usize = 9; - - let executable_disable_mask = !u64::try_from(PageTableFlags::NO_EXECUTE.bits()).unwrap(); - let mut page_table = self.host_address(BOOT_PML4) as *const u64; - let mut page_bits = 39; - let mut entry: u64 = 0; - - for _i in 0..4 { - let index = (addr.as_u64() >> page_bits) & ((1 << PAGE_MAP_BITS) - 1); - entry = unsafe { *page_table.add(index as usize) & executable_disable_mask }; - - // bit 7 is set if this entry references a 1 GiB (PDPT) or 2 MiB (PDT) page. - if entry & u64::try_from(PageTableFlags::HUGE_PAGE.bits()).unwrap() != 0 { - return GuestPhysAddr::new( - (entry & ((!0u64) << page_bits)) | (addr.as_u64() & !((!0_u64) << page_bits)), - ); - } else { - page_table = self.host_address(GuestPhysAddr::new( - (entry & !((1 << PAGE_BITS) - 1)) as *const u64 as u64, - )) as *const u64; - page_bits -= PAGE_MAP_BITS; - } - } - - GuestPhysAddr::new( - (entry & ((!0u64) << PAGE_BITS)) | (addr.as_u64() & !((!0u64) << PAGE_BITS)), - ) - } - fn r#continue(&mut self) -> HypervisorResult { loop { match self.vcpu.run() { diff --git a/src/macos/aarch64/vcpu.rs b/src/macos/aarch64/vcpu.rs index 59b48eb3..72bf948b 100644 --- a/src/macos/aarch64/vcpu.rs +++ b/src/macos/aarch64/vcpu.rs @@ -159,10 +159,6 @@ impl VirtualCPU for XhyveCpu { addr + self.vm_start } - fn virt_to_phys(&self, _addr: usize) -> usize { - 0 - } - fn r#continue(&mut self) -> HypervisorResult { loop { self.vcpu.run()?; diff --git a/src/macos/x86_64/vcpu.rs b/src/macos/x86_64/vcpu.rs index da41da14..78790b26 100644 --- a/src/macos/x86_64/vcpu.rs +++ b/src/macos/x86_64/vcpu.rs @@ -10,10 +10,10 @@ use std::{ use burst::x86::{disassemble_64, InstructionOperation, OperandType}; use lazy_static::lazy_static; use log::{debug, trace}; -use uhyve_interface::{GuestPhysAddr, GuestVirtAddr, Hypercall}; +use uhyve_interface::{GuestPhysAddr, Hypercall}; use x86_64::{ registers::control::{Cr0Flags, Cr4Flags}, - structures::{gdt::SegmentSelector, paging::PageTableFlags}, + structures::gdt::SegmentSelector, PrivilegeLevel, }; use xhypervisor::{ @@ -649,41 +649,6 @@ impl VirtualCPU for XhyveCpu { addr.as_u64() as usize + self.vm_start } - fn virt_to_phys(&self, addr: GuestVirtAddr) -> GuestPhysAddr { - // TODO: This fn is curently x86_64 only - /// Number of Offset bits of a virtual address for a 4 KiB page, which are shifted away to get its Page Frame Number (PFN). - pub const PAGE_BITS: u64 = 12; - - /// Number of bits of the index in each table (PML4, PDPT, PDT, PGT). - pub const PAGE_MAP_BITS: usize = 9; - - let executable_disable_mask = !u64::try_from(PageTableFlags::NO_EXECUTE.bits()).unwrap(); - let mut page_table = self.host_address(BOOT_PML4) as *const u64; - let mut page_bits = 39; - let mut entry: u64 = 0; - - for _i in 0..4 { - let index = (addr.as_u64() >> page_bits) & ((1 << PAGE_MAP_BITS) - 1); - entry = unsafe { *page_table.add(index as usize) & executable_disable_mask }; - - // bit 7 is set if this entry references a 1 GiB (PDPT) or 2 MiB (PDT) page. - if entry & u64::try_from(PageTableFlags::HUGE_PAGE.bits()).unwrap() != 0 { - return GuestPhysAddr::new( - (entry & ((!0u64) << page_bits)) | (addr.as_u64() & !((!0_u64) << page_bits)), - ); - } else { - page_table = self.host_address(GuestPhysAddr::new( - (entry & !((1 << PAGE_BITS) - 1)) as *const u64 as u64, - )) as *const u64; - page_bits -= PAGE_MAP_BITS; - } - } - - GuestPhysAddr::new( - (entry & ((!0u64) << PAGE_BITS)) | (addr.as_u64() & !((!0u64) << PAGE_BITS)), - ) - } - fn r#continue(&mut self) -> HypervisorResult { loop { /*if self.extint_pending == true { diff --git a/src/vcpu.rs b/src/vcpu.rs index 0ca24274..30d7cd3a 100644 --- a/src/vcpu.rs +++ b/src/vcpu.rs @@ -1,8 +1,6 @@ use std::{ffi::OsString, io, io::Write, mem, os::unix::ffi::OsStrExt, path::Path, slice}; -use uhyve_interface::{ - parameters::*, GuestPhysAddr, GuestVirtAddr, Hypercall, HypercallAddress, MAX_ARGC_ENVC, -}; +use uhyve_interface::{parameters::*, GuestPhysAddr, Hypercall, HypercallAddress, MAX_ARGC_ENVC}; /// The trait and fns that a virtual cpu requires use crate::{os::DebugExitInfo, HypervisorResult}; @@ -36,9 +34,6 @@ pub trait VirtualCPU { /// Translates an address from the VM's physical space into the hosts virtual space. fn host_address(&self, addr: GuestPhysAddr) -> usize; - /// Looks up the guests pagetable and translates a guest's virtual address to a guest's physical address. - fn virt_to_phys(&self, addr: GuestVirtAddr) -> GuestPhysAddr; - /// Returns the (host) path of the kernel binary. fn kernel_path(&self) -> &Path; diff --git a/src/vm.rs b/src/vm.rs index 7f2a258c..33cb5dc3 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -77,7 +77,7 @@ pub struct UhyveVm { offset: u64, entry_point: u64, stack_address: u64, - mem: MmapMemory, + pub mem: MmapMemory, num_cpus: u32, path: PathBuf, args: Vec, From 2ab958c5c5c206be84a85015771add9e92456570 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Thu, 22 Jun 2023 15:19:46 +0200 Subject: [PATCH 15/29] Minor fomatting fixes --- src/linux/gdb/section_offsets.rs | 5 +---- src/mem.rs | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/linux/gdb/section_offsets.rs b/src/linux/gdb/section_offsets.rs index 4774c804..9bf88b9f 100644 --- a/src/linux/gdb/section_offsets.rs +++ b/src/linux/gdb/section_offsets.rs @@ -1,7 +1,4 @@ -use gdbstub::target::{ - ext::section_offsets::Offsets, - {self}, -}; +use gdbstub::target::{self, ext::section_offsets::Offsets}; use super::GdbUhyve; diff --git a/src/mem.rs b/src/mem.rs index f4a39b3c..68dbc577 100644 --- a/src/mem.rs +++ b/src/mem.rs @@ -1,6 +1,5 @@ use std::{mem::MaybeUninit, ops::Index, os::raw::c_void, ptr::NonNull}; -use log::debug; use nix::sys::mman::*; use thiserror::Error; use uhyve_interface::GuestPhysAddr; From fe19fc4c21a9ad167a771ea2671142ba74a97393 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Mon, 26 Jun 2023 10:18:53 +0200 Subject: [PATCH 16/29] Redesigned VM<->VCpu interaction: Vcpu contains an Arc to the VM to access any non-cpu related information --- src/linux/gdb/mod.rs | 12 +++-- src/linux/mod.rs | 15 +++--- src/linux/x86_64/kvm_cpu.rs | 98 ++++++++++++++++--------------------- src/macos/aarch64/vcpu.rs | 8 +-- src/macos/mod.rs | 6 +-- src/macos/x86_64/vcpu.rs | 42 ++++++---------- src/vcpu.rs | 19 ++++--- src/virtio.rs | 18 +++---- src/vm.rs | 45 +++++------------ 9 files changed, 107 insertions(+), 156 deletions(-) diff --git a/src/linux/gdb/mod.rs b/src/linux/gdb/mod.rs index 29e5e717..9f4fd0ed 100644 --- a/src/linux/gdb/mod.rs +++ b/src/linux/gdb/mod.rs @@ -2,7 +2,13 @@ mod breakpoints; mod regs; mod section_offsets; -use std::{io::Read, net::TcpStream, sync::Once, thread, time::Duration}; +use std::{ + io::Read, + net::TcpStream, + sync::{Arc, Once}, + thread, + time::Duration, +}; use gdbstub::{ common::Signal, @@ -30,14 +36,14 @@ use crate::{ }; pub struct GdbUhyve { - vm: UhyveVm, + vm: Arc>, vcpu: KvmCpu, hw_breakpoints: HwBreakpoints, sw_breakpoints: SwBreakpoints, } impl GdbUhyve { - pub fn new(vm: UhyveVm, vcpu: KvmCpu) -> Self { + pub fn new(vm: Arc>, vcpu: KvmCpu) -> Self { Self { vm, vcpu, diff --git a/src/linux/mod.rs b/src/linux/mod.rs index ffa71fc1..d917ceb4 100755 --- a/src/linux/mod.rs +++ b/src/linux/mod.rs @@ -93,7 +93,7 @@ impl UhyveVm { let this = Arc::new(self); let threads = (0..this.num_cpus()) .map(|cpu_id| { - let vm = this.clone(); + let parent_vm = this.clone(); let barrier = barrier.clone(); let local_cpu_affinity = cpu_affinity .as_ref() @@ -109,9 +109,7 @@ impl UhyveVm { None => debug!("No affinity specified, not binding thread"), } - let mut cpu = vm.create_cpu(cpu_id).unwrap(); - cpu.init(vm.get_entry_point(), vm.stack_address(), cpu_id) - .unwrap(); + let mut cpu = KvmCpu::new(cpu_id, parent_vm.clone()).unwrap(); thread::sleep(std::time::Duration::from_millis(cpu_id as u64 * 50)); @@ -167,13 +165,12 @@ impl UhyveVm { None => debug!("No affinity specified, not binding thread"), } - let mut cpu = self.create_cpu(cpu_id).unwrap(); - cpu.init(self.get_entry_point(), self.stack_address(), cpu_id) - .unwrap(); + let this = Arc::new(self); + let cpu = KvmCpu::new(cpu_id, this.clone()).unwrap(); - let connection = wait_for_gdb_connection(self.gdb_port.unwrap()).unwrap(); + let connection = wait_for_gdb_connection(this.gdb_port.unwrap()).unwrap(); let debugger = GdbStub::new(connection); - let mut debuggable_vcpu = GdbUhyve::new(self, cpu); + let mut debuggable_vcpu = GdbUhyve::new(this, cpu); match debugger .run_blocking::(&mut debuggable_vcpu) diff --git a/src/linux/x86_64/kvm_cpu.rs b/src/linux/x86_64/kvm_cpu.rs index e141fdff..d0c8b866 100644 --- a/src/linux/x86_64/kvm_cpu.rs +++ b/src/linux/x86_64/kvm_cpu.rs @@ -1,8 +1,4 @@ -use std::{ - ffi::OsString, - path::{Path, PathBuf}, - sync::{Arc, Mutex}, -}; +use std::sync::{Arc, Mutex}; use kvm_bindings::*; use kvm_ioctls::{VcpuExit, VcpuFd, VmFd}; @@ -16,6 +12,7 @@ use crate::{ mem::MmapMemory, vcpu::{VcpuStopReason, VirtualCPU}, virtio::*, + vm::UhyveVm, HypervisorResult, }; @@ -118,38 +115,11 @@ pub fn initialize_kvm(mem: &MmapMemory, use_pit: bool) -> HypervisorResult<()> { pub struct KvmCpu { id: u32, vcpu: VcpuFd, - vm_start: usize, - kernel_path: PathBuf, - args: Vec, - virtio_device: Arc>, + parent_vm: Arc>, pci_addr: Option, } impl KvmCpu { - pub fn new( - id: u32, - kernel_path: PathBuf, - args: Vec, - vm_start: usize, - virtio_device: Arc>, - ) -> HypervisorResult { - let vcpu = KVM_ACCESS - .lock() - .unwrap() - .as_mut() - .expect("KVM is not initialized yet") - .create_vcpu(id.try_into().unwrap())?; - Ok(KvmCpu { - id, - vcpu, - vm_start, - kernel_path, - args, - virtio_device, - pci_addr: None, - }) - } - fn setup_cpuid(&self) -> Result<(), kvm_ioctls::Error> { //debug!("Setup cpuid"); @@ -333,9 +303,7 @@ impl KvmCpu { pub fn get_vcpu_mut(&mut self) -> &mut VcpuFd { &mut self.vcpu } -} -impl VirtualCPU for KvmCpu { fn init(&mut self, entry_point: u64, stack_address: u64, cpu_id: u32) -> HypervisorResult<()> { self.setup_long_mode(entry_point, stack_address, cpu_id)?; self.setup_cpuid()?; @@ -350,17 +318,29 @@ impl VirtualCPU for KvmCpu { Ok(()) } +} - fn kernel_path(&self) -> &Path { - self.kernel_path.as_path() - } +impl VirtualCPU for KvmCpu { + fn new(id: u32, parent_vm: Arc>) -> HypervisorResult { + let vcpu = KVM_ACCESS + .lock() + .unwrap() + .as_mut() + .expect("KVM is not initialized yet") + .create_vcpu(id as u64)?; + let mut kvcpu = KvmCpu { + id, + vcpu, + parent_vm: parent_vm.clone(), + pci_addr: None, + }; + kvcpu.init(parent_vm.get_entry_point(), parent_vm.stack_address(), id)?; - fn args(&self) -> &[OsString] { - self.args.as_slice() + Ok(kvcpu) } fn host_address(&self, addr: GuestPhysAddr) -> usize { - addr.as_u64() as usize + self.vm_start + unimplemented!() } fn r#continue(&mut self) -> HypervisorResult { @@ -378,7 +358,8 @@ impl VirtualCPU for KvmCpu { PCI_CONFIG_DATA_PORT => { if let Some(pci_addr) = self.pci_addr { if pci_addr & 0x1ff800 == 0 { - let virtio_device = self.virtio_device.lock().unwrap(); + let virtio_device = + self.parent_vm.virtio_device.lock().unwrap(); virtio_device.handle_read(pci_addr & 0x3ff, addr); } else { unsafe { *(addr.as_ptr() as *mut u32) = 0xffffffff }; @@ -389,28 +370,28 @@ impl VirtualCPU for KvmCpu { } PCI_CONFIG_ADDRESS_PORT => {} VIRTIO_PCI_STATUS => { - let virtio_device = self.virtio_device.lock().unwrap(); + let virtio_device = self.parent_vm.virtio_device.lock().unwrap(); virtio_device.read_status(addr); } VIRTIO_PCI_HOST_FEATURES => { - let virtio_device = self.virtio_device.lock().unwrap(); + let virtio_device = self.parent_vm.virtio_device.lock().unwrap(); virtio_device.read_host_features(addr); } VIRTIO_PCI_GUEST_FEATURES => { - let mut virtio_device = self.virtio_device.lock().unwrap(); + let mut virtio_device = self.parent_vm.virtio_device.lock().unwrap(); virtio_device.read_requested_features(addr); } VIRTIO_PCI_CONFIG_OFF_MSIX_OFF..=VIRTIO_PCI_CONFIG_OFF_MSIX_OFF_MAX => { - let virtio_device = self.virtio_device.lock().unwrap(); + let virtio_device = self.parent_vm.virtio_device.lock().unwrap(); virtio_device .read_mac_byte(addr, port - VIRTIO_PCI_CONFIG_OFF_MSIX_OFF); } VIRTIO_PCI_ISR => { - let mut virtio_device = self.virtio_device.lock().unwrap(); + let mut virtio_device = self.parent_vm.virtio_device.lock().unwrap(); virtio_device.reset_interrupt() } VIRTIO_PCI_LINK_STATUS_MSIX_OFF => { - let virtio_device = self.virtio_device.lock().unwrap(); + let virtio_device = self.parent_vm.virtio_device.lock().unwrap(); virtio_device.read_link_status(addr); } _ => { @@ -445,7 +426,7 @@ impl VirtualCPU for KvmCpu { if let Some(pci_addr) = self.pci_addr { if pci_addr & 0x1ff800 == 0 { let mut virtio_device = - self.virtio_device.lock().unwrap(); + self.parent_vm.virtio_device.lock().unwrap(); virtio_device.handle_write(pci_addr & 0x3ff, addr); } } @@ -454,24 +435,29 @@ impl VirtualCPU for KvmCpu { self.pci_addr = Some(unsafe { *(addr.as_ptr() as *const u32) }); } VIRTIO_PCI_STATUS => { - let mut virtio_device = self.virtio_device.lock().unwrap(); + let mut virtio_device = + self.parent_vm.virtio_device.lock().unwrap(); virtio_device.write_status(addr); } VIRTIO_PCI_GUEST_FEATURES => { - let mut virtio_device = self.virtio_device.lock().unwrap(); + let mut virtio_device = + self.parent_vm.virtio_device.lock().unwrap(); virtio_device.write_requested_features(addr); } VIRTIO_PCI_QUEUE_NOTIFY => { - let mut virtio_device = self.virtio_device.lock().unwrap(); - virtio_device.handle_notify_output(addr, self); + let mut virtio_device = + self.parent_vm.virtio_device.lock().unwrap(); + virtio_device.handle_notify_output(addr, &self.parent_vm.mem); } VIRTIO_PCI_QUEUE_SEL => { - let mut virtio_device = self.virtio_device.lock().unwrap(); + let mut virtio_device = + self.parent_vm.virtio_device.lock().unwrap(); virtio_device.write_selected_queue(addr); } VIRTIO_PCI_QUEUE_PFN => { - let mut virtio_device = self.virtio_device.lock().unwrap(); - virtio_device.write_pfn(addr, self); + let mut virtio_device = + self.parent_vm.virtio_device.lock().unwrap(); + virtio_device.write_pfn(addr, &self.parent_vm.mem); } _ => { panic!("Unhandled IO exit: 0x{:x}", port); diff --git a/src/macos/aarch64/vcpu.rs b/src/macos/aarch64/vcpu.rs index 72bf948b..6cedb4f8 100644 --- a/src/macos/aarch64/vcpu.rs +++ b/src/macos/aarch64/vcpu.rs @@ -24,17 +24,15 @@ pub struct XhyveCpu { kernel_path: PathBuf, args: Vec, vcpu: xhypervisor::VirtualCpu, - vm_start: usize, } impl XhyveCpu { - pub fn new(id: u32, kernel_path: PathBuf, args: Vec, vm_start: usize) -> XhyveCpu { + pub fn new(id: u32, kernel_path: PathBuf, args: Vec) -> XhyveCpu { Self { id, kernel_path, args, vcpu: xhypervisor::VirtualCpu::new().unwrap(), - vm_start, } } } @@ -155,10 +153,6 @@ impl VirtualCPU for XhyveCpu { self.args.as_slice() } - fn host_address(&self, addr: usize) -> usize { - addr + self.vm_start - } - fn r#continue(&mut self) -> HypervisorResult { loop { self.vcpu.run()?; diff --git a/src/macos/mod.rs b/src/macos/mod.rs index 95526b13..7397eab9 100644 --- a/src/macos/mod.rs +++ b/src/macos/mod.rs @@ -34,7 +34,7 @@ impl UhyveVm { let this = Arc::new(self); (0..this.num_cpus()).for_each(|cpu_id| { - let vm = this.clone(); + let parent_vm = this.clone(); let exit_tx = exit_tx.clone(); let local_cpu_affinity = match &cpu_affinity { @@ -53,9 +53,7 @@ impl UhyveVm { None => debug!("No affinity specified, not binding thread"), } - let mut cpu = vm.create_cpu(cpu_id).unwrap(); - cpu.init(vm.get_entry_point(), vm.stack_address(), cpu_id) - .unwrap(); + let mut cpu = XhyveCpu::new(cpu_id, parent_vm.clone()).unwrap(); // jump into the VM and execute code of the guest let result = cpu.run(); diff --git a/src/macos/x86_64/vcpu.rs b/src/macos/x86_64/vcpu.rs index 78790b26..71e4238c 100644 --- a/src/macos/x86_64/vcpu.rs +++ b/src/macos/x86_64/vcpu.rs @@ -2,9 +2,7 @@ use std::{ arch::x86_64::__cpuid_count, - ffi::OsString, - path::{Path, PathBuf}, - sync::Mutex, + sync::{Arc, Mutex}, }; use burst::x86::{disassemble_64, InstructionOperation, OperandType}; @@ -35,6 +33,7 @@ use crate::{ consts::*, macos::x86_64::ioapic::IoApic, vcpu::{VcpuStopReason, VirtualCPU}, + vm::UhyveVm, HypervisorResult, }; @@ -156,25 +155,12 @@ lazy_static! { pub struct XhyveCpu { id: u32, - kernel_path: PathBuf, - args: Vec, vcpu: xhypervisor::VirtualCpu, - vm_start: usize, + parent_vm: Arc>, apic_base: u64, } impl XhyveCpu { - pub fn new(id: u32, kernel_path: PathBuf, args: Vec, vm_start: usize) -> XhyveCpu { - XhyveCpu { - id, - kernel_path, - args, - vcpu: xhypervisor::VirtualCpu::new().unwrap(), - vm_start, - apic_base: APIC_DEFAULT_BASE, - } - } - fn setup_system_gdt(&mut self) -> Result<(), xhypervisor::Error> { debug!("Setup GDT"); @@ -598,9 +584,7 @@ impl XhyveCpu { pub fn get_vcpu(&self) -> &xhypervisor::VirtualCpu { &self.vcpu } -} -impl VirtualCPU for XhyveCpu { fn init(&mut self, entry_point: u64, stack_address: u64, cpu_id: u32) -> HypervisorResult<()> { self.setup_capabilities()?; self.setup_msr()?; @@ -636,17 +620,19 @@ impl VirtualCPU for XhyveCpu { Ok(()) } +} - fn kernel_path(&self) -> &Path { - self.kernel_path.as_path() - } - - fn args(&self) -> &[OsString] { - self.args.as_slice() - } +impl VirtualCPU for XhyveCpu { + fn new(id: u32, parent_vm: Arc>) -> HypervisorResult { + let mut vcpu = XhyveCpu { + id, + parent_vm: parent_vm.clone(), + vcpu: xhypervisor::VirtualCpu::new().unwrap(), + apic_base: APIC_DEFAULT_BASE, + }; + vcpu.init(parent_vm.get_entry_point(), parent_vm.stack_address(), id)?; - fn host_address(&self, addr: GuestPhysAddr) -> usize { - addr.as_u64() as usize + self.vm_start + Ok(vcpu) } fn r#continue(&mut self) -> HypervisorResult { diff --git a/src/vcpu.rs b/src/vcpu.rs index 30d7cd3a..73f90937 100644 --- a/src/vcpu.rs +++ b/src/vcpu.rs @@ -1,7 +1,10 @@ -use std::{ffi::OsString, io, io::Write, mem, os::unix::ffi::OsStrExt, path::Path, slice}; +use std::{ + ffi::OsString, io, io::Write, mem, os::unix::ffi::OsStrExt, path::Path, slice, sync::Arc, +}; use uhyve_interface::{parameters::*, GuestPhysAddr, Hypercall, HypercallAddress, MAX_ARGC_ENVC}; +use crate::vm::UhyveVm; /// The trait and fns that a virtual cpu requires use crate::{os::DebugExitInfo, HypervisorResult}; @@ -18,9 +21,9 @@ pub enum VcpuStopReason { } /// Functionality a virtual CPU backend must provide to be used by uhyve -pub trait VirtualCPU { - /// Initialize the cpu to start running the code ad entry_point. - fn init(&mut self, entry_point: u64, stack_address: u64, cpu_id: u32) -> HypervisorResult<()>; +pub trait VirtualCPU: Sized { + /// Create a new CPU object + fn new(id: u32, vm: Arc>) -> HypervisorResult; /// Continues execution. fn r#continue(&mut self) -> HypervisorResult; @@ -35,10 +38,14 @@ pub trait VirtualCPU { fn host_address(&self, addr: GuestPhysAddr) -> usize; /// Returns the (host) path of the kernel binary. - fn kernel_path(&self) -> &Path; + fn kernel_path(&self) -> &Path { + unimplemented!() + } // TODO remove - fn args(&self) -> &[OsString]; + fn args(&self) -> &[OsString] { + unimplemented!() + } /// `addr` is the address of the hypercall parameter in the guest's memory space. `data` is the /// parameter that was send to that address by the guest. diff --git a/src/virtio.rs b/src/virtio.rs index 5bd6c23b..2280fbff 100644 --- a/src/virtio.rs +++ b/src/virtio.rs @@ -6,7 +6,7 @@ use tun_tap::*; use uhyve_interface::GuestPhysAddr; use virtio_bindings::bindings::virtio_net::*; -use crate::{vcpu::VirtualCPU, virtqueue::*}; +use crate::{mem::MmapMemory, virtqueue::*}; const STATUS_ACKNOWLEDGE: u8 = 0b00000001; const STATUS_DRIVER: u8 = 0b00000010; @@ -124,15 +124,15 @@ impl VirtioNetPciDevice { //TODO: how to read packets without synchronization issues } - pub fn handle_notify_output(&mut self, dest: &[u8], cpu: &impl VirtualCPU) { + pub fn handle_notify_output(&mut self, dest: &[u8], mem: &MmapMemory) { let tx_num = read_u16!(dest, 0); if tx_num == 1 && self.read_status_reg() & STATUS_DRIVER_OK == STATUS_DRIVER_OK { - self.send_available_packets(cpu); + self.send_available_packets(mem); } } // Sends packets using the tun_tap crate, subject to change - fn send_available_packets(&mut self, cpu: &impl VirtualCPU) { + fn send_available_packets(&mut self, mem: &MmapMemory) { let tx_queue = &mut self.virt_queues[TX_QUEUE]; let mut send_indices = Vec::new(); for index in tx_queue.avail_iter() { @@ -141,13 +141,13 @@ impl VirtioNetPciDevice { for index in send_indices { let desc = unsafe { tx_queue.get_descriptor(index) }; let gpa = GuestPhysAddr::new(unsafe { *(desc.addr as *const u64) }); - let hva = (*cpu).host_address(gpa) as *mut u8; + let hva = mem.host_address(gpa).unwrap(); match &self.iface { Some(tap) => unsafe { let vec = vec![0; (desc.len as usize) - size_of::()]; let slice: &[u8] = &vec; copy_nonoverlapping( - hva as *const u8, + hva, slice.as_ptr() as *mut u8, (desc.len as usize) - size_of::(), ); @@ -264,7 +264,7 @@ impl VirtioNetPciDevice { } // Register virtqueue - pub fn write_pfn(&mut self, dest: &[u8], vcpu: &impl VirtualCPU) { + pub fn write_pfn(&mut self, dest: &[u8], mem: &MmapMemory) { let status = self.read_status_reg(); if status & STATUS_FEATURES_OK != 0 && status & STATUS_DRIVER_OK == 0 @@ -274,8 +274,8 @@ impl VirtioNetPciDevice { #[allow(clippy::cast_ptr_alignment)] *(dest.as_ptr() as *const u64) }); - let hva = (*vcpu).host_address(gpa) as *mut u8; - let queue = unsafe { Virtqueue::new(hva, QUEUE_LIMIT) }; + let hva = mem.host_address(gpa).unwrap(); + let queue = unsafe { Virtqueue::new(hva as *mut u8, QUEUE_LIMIT) }; self.virt_queues.push(queue); } } diff --git a/src/vm.rs b/src/vm.rs index 33cb5dc3..1be4e42f 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -3,7 +3,7 @@ use std::{ fmt, fs, io, marker::PhantomData, num::NonZeroU32, - path::{Path, PathBuf}, + path::PathBuf, ptr, sync::{Arc, Mutex}, time::SystemTime, @@ -21,9 +21,7 @@ use crate::arch::x86_64::{ detect_freq_from_cpuid, detect_freq_from_cpuid_hypervisor_info, get_cpu_frequency_from_os, }; #[cfg(all(target_arch = "x86_64", target_os = "linux"))] -use crate::linux::x86_64::kvm_cpu::{initialize_kvm, KvmCpu}; -#[cfg(all(target_arch = "x86_64", target_os = "macos"))] -use crate::macos::x86_64::vcpu::XhyveCpu; +use crate::linux::x86_64::kvm_cpu::initialize_kvm; use crate::{ arch, consts::*, mem::MmapMemory, os::HypervisorError, params::Params, vcpu::VirtualCPU, virtio::*, @@ -77,13 +75,13 @@ pub struct UhyveVm { offset: u64, entry_point: u64, stack_address: u64, - pub mem: MmapMemory, + pub mem: Arc, num_cpus: u32, path: PathBuf, args: Vec, boot_info: *const RawBootInfo, verbose: bool, - virtio_device: Arc>, + pub virtio_device: Arc>, #[allow(dead_code)] // gdb is not supported on macos pub(super) gdb_port: Option, _vcpu_type: PhantomData, @@ -121,7 +119,7 @@ impl UhyveVm { offset: 0, entry_point: 0, stack_address: 0, - mem, + mem: mem.into(), num_cpus: cpu_count, path: kernel_path, args: params.kernel_args, @@ -172,8 +170,12 @@ impl UhyveVm { self.num_cpus } - fn kernel_path(&self) -> &Path { - self.path.as_path() + pub fn kernel_path(&self) -> &PathBuf { + &self.path + } + + pub fn args(&self) -> &Vec { + &self.args } fn set_boot_info(&mut self, header: *const RawBootInfo) { @@ -249,31 +251,6 @@ impl UhyveVm { } } -#[cfg(target_os = "linux")] -impl UhyveVm { - pub fn create_cpu(&self, id: u32) -> HypervisorResult { - KvmCpu::new( - id, - self.path.clone(), - self.args.clone(), - self.mem.host_address as usize, - self.virtio_device.clone(), - ) - } -} - -#[cfg(target_os = "macos")] -impl UhyveVm { - pub fn create_cpu(&self, id: u32) -> HypervisorResult { - Ok(XhyveCpu::new( - id, - self.path.clone(), - self.args.clone(), - self.mem.host_address, - )) - } -} - impl fmt::Debug for UhyveVm { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("UhyveVm") From f20f0fe4c1b4c5ffd968f3fa180ee3a9eec9865d Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Mon, 26 Jun 2023 15:52:44 +0200 Subject: [PATCH 17/29] Moved hypercall related stuff in separate module or the uhyve interface crate --- Cargo.lock | 1 + Cargo.toml | 2 +- src/hypercall.rs | 204 +++++++++++++++++++++++++ src/lib.rs | 1 + src/linux/x86_64/kvm_cpu.rs | 48 +++--- src/macos/x86_64/vcpu.rs | 58 ++++--- src/mem.rs | 10 ++ src/vcpu.rs | 242 +----------------------------- uhyve-interface/Cargo.toml | 4 + uhyve-interface/src/lib.rs | 2 +- uhyve-interface/src/parameters.rs | 37 ++++- 11 files changed, 328 insertions(+), 281 deletions(-) create mode 100644 src/hypercall.rs diff --git a/Cargo.lock b/Cargo.lock index 4acc328c..b5debc23 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1416,6 +1416,7 @@ dependencies = [ name = "uhyve-interface" version = "0.1.0" dependencies = [ + "log", "num_enum", "x86_64", ] diff --git a/Cargo.toml b/Cargo.toml index 53760989..97d8a628 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,7 +56,7 @@ mac_address = "1.1" thiserror = "1.0" time = "0.3" tun-tap = { version = "0.1.3", default-features = false } -uhyve-interface = { version = "0.1", path = "uhyve-interface" } +uhyve-interface = { version = "0.1", path = "uhyve-interface", features = ["std"] } virtio-bindings = { version = "0.2", features = ["virtio-v4_14_0"] } rftrace = { version = "0.1", optional = true } rftrace-frontend = { version = "0.1", optional = true } diff --git a/src/hypercall.rs b/src/hypercall.rs new file mode 100644 index 00000000..a729084a --- /dev/null +++ b/src/hypercall.rs @@ -0,0 +1,204 @@ +use std::{ + ffi::{OsStr, OsString}, + io, + io::Write, + os::unix::ffi::OsStrExt, +}; + +use uhyve_interface::{parameters::*, GuestPhysAddr, Hypercall, HypercallAddress, MAX_ARGC_ENVC}; + +use crate::mem::MmapMemory; + +/// `addr` is the address of the hypercall parameter in the guest's memory space. `data` is the +/// parameter that was send to that address by the guest. +/// +/// # Safety +/// +/// - The return value is only valid, as long as the guest is halted. +/// - This fn must not be called multiple times on the same data, to avoid creating mutable aliasing. +pub unsafe fn address_to_hypercall( + mem: &MmapMemory, + addr: u16, + data: GuestPhysAddr, +) -> Option> { + if let Ok(hypercall_port) = HypercallAddress::try_from(addr) { + Some(match hypercall_port { + HypercallAddress::FileClose => { + let sysclose = mem.get_ref_mut::(data).unwrap(); + // let sysclose = unsafe { &mut *(self.host_address(data) as *mut CloseParams) }; + Hypercall::FileClose(sysclose) + } + HypercallAddress::FileLseek => { + let syslseek = mem.get_ref_mut::(data).unwrap(); + Hypercall::FileLseek(syslseek) + } + HypercallAddress::FileOpen => { + let sysopen = mem.get_ref_mut::(data).unwrap(); + Hypercall::FileOpen(sysopen) + } + HypercallAddress::FileRead => { + let sysread = mem.get_ref_mut::(data).unwrap(); + Hypercall::FileRead(sysread) + } + HypercallAddress::FileWrite => { + let syswrite = mem.get_ref_mut(data).unwrap(); + Hypercall::FileWrite(syswrite) + } + HypercallAddress::FileUnlink => { + let sysunlink = mem.get_ref_mut(data).unwrap(); + Hypercall::FileUnlink(sysunlink) + } + HypercallAddress::Exit => { + let sysexit = mem.get_ref_mut(data).unwrap(); + Hypercall::Exit(sysexit) + } + HypercallAddress::Cmdsize => { + let syssize = mem.get_ref_mut(data).unwrap(); + Hypercall::Cmdsize(syssize) + } + HypercallAddress::Cmdval => { + let syscmdval = mem.get_ref_mut(data).unwrap(); + Hypercall::Cmdval(syscmdval) + } + HypercallAddress::Uart => Hypercall::SerialWriteByte(data.as_u64() as u8), + _ => unimplemented!(), + }) + } else { + None + } +} + +/// unlink deletes a name from the filesystem. This is used to handle `unlink` syscalls from the guest. +/// TODO: UNSAFE AS *%@#. It has to be checked that the VM is allowed to unlink that file! +pub fn unlink(mem: &MmapMemory, sysunlink: &mut UnlinkParams) { + unsafe { + sysunlink.ret = libc::unlink(mem.host_address(sysunlink.name).unwrap() as *const i8); + } +} + +/// Handles an open syscall by opening a file on the host. +pub fn open(mem: &MmapMemory, sysopen: &mut OpenParams) { + unsafe { + sysopen.ret = libc::open( + mem.host_address(sysopen.name).unwrap() as *const i8, + sysopen.flags, + sysopen.mode, + ); + } +} + +/// Handles an close syscall by closing the file on the host. +pub fn close(sysclose: &mut CloseParams) { + unsafe { + sysclose.ret = libc::close(sysclose.fd); + } +} + +/// Handles an read syscall on the host. +pub fn read(mem: &MmapMemory, sysread: &mut ReadPrams) { + unsafe { + let bytes_read = libc::read( + sysread.fd, + mem.host_address(sysread.buf).unwrap() as *mut libc::c_void, + sysread.len, + ); + if bytes_read >= 0 { + sysread.ret = bytes_read; + } else { + sysread.ret = -1; + } + } +} + +/// Handles an write syscall on the host. +pub fn write(mem: &MmapMemory, syswrite: &WriteParams) -> io::Result<()> { + let mut bytes_written: usize = 0; + while bytes_written != syswrite.len { + unsafe { + let step = libc::write( + syswrite.fd, + mem.host_address(syswrite.buf + bytes_written as u64).unwrap() as *const libc::c_void, + syswrite.len - bytes_written, + ); + if step >= 0 { + bytes_written += step as usize; + } else { + return Err(io::Error::last_os_error()); + } + } + } + + Ok(()) +} + +/// Handles an write syscall on the host. +pub fn lseek(syslseek: &mut LseekParams) { + unsafe { + syslseek.offset = + libc::lseek(syslseek.fd, syslseek.offset as i64, syslseek.whence) as isize; + } +} + +/// Handles an UART syscall by writing to stdout. +pub fn uart(buf: &[u8]) -> io::Result<()> { + io::stdout().write_all(buf) +} + +/// Copies the arguments of the application into the VM's memory to the destinations specified in `syscmdval`. +pub fn copy_argv(path: &OsStr, argv: &[OsString], syscmdval: &CmdvalParams, mem: &MmapMemory) { + // copy kernel path as first argument + let argvp = mem + .host_address(syscmdval.argv) + .expect("Systemcall parameters for Cmdval are invalid") as *const GuestPhysAddr; + let arg_addrs = unsafe { std::slice::from_raw_parts(argvp, argv.len() + 1) }; + + { + let len = path.len(); + // Safety: we drop path_dest before anything else is done with mem + let path_dest = unsafe { + mem.slice_at_mut(arg_addrs[0], len + 1) + .expect("Systemcall parameters for Cmdval are invalid") + }; + + path_dest[0..len].copy_from_slice(path.as_bytes()); + path_dest[len] = 0; // argv strings are zero terminated + } + + // Copy the application arguments into the vm memory + for (counter, argument) in argv.iter().enumerate() { + let len = argument.as_bytes().len(); + let arg_dest = unsafe { + mem.slice_at_mut(arg_addrs[counter], len + 1) + .expect("Systemcall parameters for Cmdval are invalid") + }; + arg_dest[0..len].copy_from_slice(argument.as_bytes()); + arg_dest[len] = 0; + } +} + +/// Copies the environment variables into the VM's memory to the destinations specified in `syscmdval`. +pub fn copy_env(syscmdval: &CmdvalParams, mem: &MmapMemory) { + let env_len = std::env::vars_os().count(); + let envp = mem + .host_address(syscmdval.envp) + .expect("Systemcall parameters for Cmdval are invalid") as *const GuestPhysAddr; + let env_addrs = unsafe { std::slice::from_raw_parts(envp, env_len) }; + + // Copy the environment variables into the vm memory + for (counter, (key, value)) in std::env::vars_os().enumerate() { + if counter >= MAX_ARGC_ENVC.try_into().unwrap() { + warn!("Environment is larger than the maximum that can be copied to the VM. Remaining environment is ignored"); + break; + } + + let len = key.len() + value.len() + 1; + let env_dest = unsafe { + mem.slice_at_mut(env_addrs[counter], len + 1) + .expect("Systemcall parameters for Cmdval are invalid") + }; + env_dest[0..key.len()].copy_from_slice(key.as_bytes()); + env_dest[key.len()] = b'='; + env_dest[key.len() + 1..len].copy_from_slice(value.as_bytes()); + env_dest[len] = 0; + } +} diff --git a/src/lib.rs b/src/lib.rs index 1d1565c4..83e1eb1a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,6 +19,7 @@ pub use linux as os; pub mod macos; #[cfg(target_os = "macos")] pub use macos as os; +mod hypercall; pub mod mem; pub mod params; #[cfg(target_os = "linux")] diff --git a/src/linux/x86_64/kvm_cpu.rs b/src/linux/x86_64/kvm_cpu.rs index d0c8b866..2b023152 100644 --- a/src/linux/x86_64/kvm_cpu.rs +++ b/src/linux/x86_64/kvm_cpu.rs @@ -8,6 +8,7 @@ use x86_64::registers::control::{Cr0Flags, Cr4Flags}; use crate::{ consts::*, + hypercall, linux::KVM, mem::MmapMemory, vcpu::{VcpuStopReason, VirtualCPU}, @@ -339,10 +340,6 @@ impl VirtualCPU for KvmCpu { Ok(kvcpu) } - fn host_address(&self, addr: GuestPhysAddr) -> usize { - unimplemented!() - } - fn r#continue(&mut self) -> HypervisorResult { loop { match self.vcpu.run() { @@ -401,22 +398,39 @@ impl VirtualCPU for KvmCpu { VcpuExit::IoOut(port, addr) => { let data_addr = GuestPhysAddr::new(unsafe { (*(addr.as_ptr() as *const u32)) as u64 }); - if let Some(hypercall) = - unsafe { self.address_to_hypercall(port, data_addr) } - { + if let Some(hypercall) = unsafe { + hypercall::address_to_hypercall(&self.parent_vm.mem, port, data_addr) + } { match hypercall { - Hypercall::Cmdsize(syssize) => self.cmdsize(syssize), - Hypercall::Cmdval(syscmdval) => self.cmdval(syscmdval), + Hypercall::Cmdsize(syssize) => syssize + .update(self.parent_vm.kernel_path(), self.parent_vm.args()), + Hypercall::Cmdval(syscmdval) => { + hypercall::copy_argv( + self.parent_vm.kernel_path().as_os_str(), + self.parent_vm.args(), + syscmdval, + &self.parent_vm.mem, + ); + hypercall::copy_env(syscmdval, &self.parent_vm.mem); + } Hypercall::Exit(sysexit) => { - return Ok(VcpuStopReason::Exit(self.exit(sysexit))); + return Ok(VcpuStopReason::Exit(sysexit.arg)); + } + Hypercall::FileClose(sysclose) => hypercall::close(sysclose), + Hypercall::FileLseek(syslseek) => hypercall::lseek(syslseek), + Hypercall::FileOpen(sysopen) => { + hypercall::open(&self.parent_vm.mem, sysopen) + } + Hypercall::FileRead(sysread) => { + hypercall::read(&self.parent_vm.mem, sysread) + } + Hypercall::FileWrite(syswrite) => { + hypercall::write(&self.parent_vm.mem, syswrite)? + } + Hypercall::FileUnlink(sysunlink) => { + hypercall::unlink(&self.parent_vm.mem, sysunlink) } - Hypercall::FileClose(sysclose) => self.close(sysclose), - Hypercall::FileLseek(syslseek) => self.lseek(syslseek), - Hypercall::FileOpen(sysopen) => self.open(sysopen), - Hypercall::FileRead(sysread) => self.read(sysread), - Hypercall::FileWrite(syswrite) => self.write(syswrite)?, - Hypercall::FileUnlink(sysunlink) => self.unlink(sysunlink), - Hypercall::SerialWriteByte(buf) => self.uart(&[buf])?, + Hypercall::SerialWriteByte(buf) => hypercall::uart(&[buf])?, _ => panic!("Got unknown hypercall {:?}", hypercall), }; } else { diff --git a/src/macos/x86_64/vcpu.rs b/src/macos/x86_64/vcpu.rs index 71e4238c..ca76ad7b 100644 --- a/src/macos/x86_64/vcpu.rs +++ b/src/macos/x86_64/vcpu.rs @@ -31,6 +31,8 @@ use xhypervisor::{ use crate::{ consts::*, + hypercall, + hypercall::{copy_argv, copy_env}, macos::x86_64::ioapic::IoApic, vcpu::{VcpuStopReason, VirtualCPU}, vm::UhyveVm, @@ -491,7 +493,13 @@ impl XhyveCpu { let read = (qualification & (1 << 0)) != 0; let write = (qualification & (1 << 1)) != 0; let code = unsafe { - std::slice::from_raw_parts(self.host_address(GuestPhysAddr::new(rip)) as *const u8, 8) + std::slice::from_raw_parts( + self.parent_vm + .mem + .host_address(GuestPhysAddr::new(rip)) + .unwrap(), + 8, + ) }; if let Ok(instr) = disassemble_64(code, rip as usize, code.len()) { @@ -708,30 +716,42 @@ impl VirtualCPU for XhyveCpu { assert!(!input, "Invalid I/O operation"); - let data_addr: u64 = self.vcpu.read_register(&Register::RAX)? & 0xFFFFFFFF; - if let Some(hypercall) = - unsafe { self.address_to_hypercall(port, GuestPhysAddr::new(data_addr)) } - { + let data_addr = + GuestPhysAddr::new(self.vcpu.read_register(&Register::RAX)? & 0xFFFFFFFF); + if let Some(hypercall) = unsafe { + hypercall::address_to_hypercall(&self.parent_vm.mem, port, data_addr) + } { match hypercall { - Hypercall::Cmdsize(syssize) => self.cmdsize(syssize), - Hypercall::Cmdval(syscmdval) => self.cmdval(syscmdval), + Hypercall::Cmdsize(syssize) => { + syssize.update(self.parent_vm.kernel_path(), self.parent_vm.args()) + } + Hypercall::Cmdval(syscmdval) => { + copy_argv( + self.parent_vm.kernel_path().as_os_str(), + self.parent_vm.args(), + syscmdval, + &self.parent_vm.mem, + ); + copy_env(syscmdval, &self.parent_vm.mem); + } Hypercall::Exit(sysexit) => { - return Ok(VcpuStopReason::Exit(self.exit(sysexit))) + return Ok(VcpuStopReason::Exit(sysexit.arg)); + } + Hypercall::FileClose(sysclose) => hypercall::close(sysclose), + Hypercall::FileLseek(syslseek) => hypercall::lseek(syslseek), + Hypercall::FileOpen(sysopen) => { + hypercall::open(&self.parent_vm.mem, sysopen) + } + Hypercall::FileRead(sysread) => { + hypercall::read(&self.parent_vm.mem, sysread) } - Hypercall::FileClose(sysclose) => self.close(sysclose), - Hypercall::FileLseek(syslseek) => self.lseek(syslseek), - Hypercall::FileOpen(sysopen) => self.open(sysopen), - Hypercall::FileRead(sysread) => self.read(sysread), Hypercall::FileWrite(syswrite) => { - // Return an error for proper handling - self.write(syswrite).unwrap() + hypercall::write(&self.parent_vm.mem, syswrite).unwrap() } - Hypercall::FileUnlink(sysunlink) => self.unlink(sysunlink), - Hypercall::SerialWriteByte(_char) => { - // TODO Not sure why this call works different on macos... - let al = (self.vcpu.read_register(&Register::RAX)? & 0xFF) as u8; - self.uart(&[al]).unwrap(); + Hypercall::FileUnlink(sysunlink) => { + hypercall::unlink(&self.parent_vm.mem, sysunlink) } + Hypercall::SerialWriteByte(buf) => hypercall::uart(&[buf]).unwrap(), _ => panic!("Got unknown hypercall {:?}", hypercall), } self.vcpu.write_register(&Register::RIP, rip + len)?; diff --git a/src/mem.rs b/src/mem.rs index 68dbc577..d5e0a5e8 100644 --- a/src/mem.rs +++ b/src/mem.rs @@ -143,6 +143,16 @@ impl MmapMemory { pub fn read(&self, addr: GuestPhysAddr) -> Result { Ok(unsafe { self.host_address(addr)?.cast::().read_unaligned() }) } + + /// Get a reference to the type at the given address in the memory. + pub unsafe fn get_ref(&self, addr: GuestPhysAddr) -> Result<&T, MemoryError> { + Ok(unsafe { &*(self.host_address(addr)? as *const T) }) + } + + /// Get a mutable reference to the type at the given address in the memory. + pub unsafe fn get_ref_mut(&self, addr: GuestPhysAddr) -> Result<&mut T, MemoryError> { + Ok(unsafe { &mut *(self.host_address(addr)? as *mut T) }) + } } impl Drop for MmapMemory { diff --git a/src/vcpu.rs b/src/vcpu.rs index 73f90937..f0424ddc 100644 --- a/src/vcpu.rs +++ b/src/vcpu.rs @@ -1,8 +1,4 @@ -use std::{ - ffi::OsString, io, io::Write, mem, os::unix::ffi::OsStrExt, path::Path, slice, sync::Arc, -}; - -use uhyve_interface::{parameters::*, GuestPhysAddr, Hypercall, HypercallAddress, MAX_ARGC_ENVC}; +use std::sync::Arc; use crate::vm::UhyveVm; /// The trait and fns that a virtual cpu requires @@ -33,240 +29,4 @@ pub trait VirtualCPU: Sized { /// Prints the VCPU's registers to stdout. fn print_registers(&self); - - /// Translates an address from the VM's physical space into the hosts virtual space. - fn host_address(&self, addr: GuestPhysAddr) -> usize; - - /// Returns the (host) path of the kernel binary. - fn kernel_path(&self) -> &Path { - unimplemented!() - } - - // TODO remove - fn args(&self) -> &[OsString] { - unimplemented!() - } - - /// `addr` is the address of the hypercall parameter in the guest's memory space. `data` is the - /// parameter that was send to that address by the guest. - /// - /// # Safety - /// - /// - `data` must be a valid pointer to the data attached to the hypercall. - /// - The return value is only valid, as long as the guest is halted. - /// - This fn must not be called multiple times on the same data, to avoid creating mutable aliasing. - unsafe fn address_to_hypercall(&self, addr: u16, data: GuestPhysAddr) -> Option> { - if let Ok(hypercall_port) = HypercallAddress::try_from(addr) { - Some(match hypercall_port { - HypercallAddress::FileClose => { - let sysclose = unsafe { &mut *(self.host_address(data) as *mut CloseParams) }; - Hypercall::FileClose(sysclose) - } - HypercallAddress::FileLseek => { - let syslseek = unsafe { &mut *(self.host_address(data) as *mut LseekParams) }; - Hypercall::FileLseek(syslseek) - } - HypercallAddress::FileOpen => { - let sysopen = unsafe { &mut *(self.host_address(data) as *mut OpenParams) }; - Hypercall::FileOpen(sysopen) - } - HypercallAddress::FileRead => { - let sysread = unsafe { &mut *(self.host_address(data) as *mut ReadPrams) }; - Hypercall::FileRead(sysread) - } - HypercallAddress::FileWrite => { - let syswrite = unsafe { &*(self.host_address(data) as *const WriteParams) }; - Hypercall::FileWrite(syswrite) - } - HypercallAddress::FileUnlink => { - let sysunlink = unsafe { &mut *(self.host_address(data) as *mut UnlinkParams) }; - Hypercall::FileUnlink(sysunlink) - } - HypercallAddress::Exit => { - let sysexit = unsafe { &*(self.host_address(data) as *const ExitParams) }; - Hypercall::Exit(sysexit) - } - HypercallAddress::Cmdsize => { - let syssize = unsafe { &mut *(self.host_address(data) as *mut CmdsizeParams) }; - Hypercall::Cmdsize(syssize) - } - HypercallAddress::Cmdval => { - let syscmdval = unsafe { &*(self.host_address(data) as *const CmdvalParams) }; - Hypercall::Cmdval(syscmdval) - } - HypercallAddress::Uart => Hypercall::SerialWriteByte(data.as_u64() as u8), - _ => unimplemented!(), - }) - } else { - None - } - } - - fn cmdsize(&self, syssize: &mut CmdsizeParams) { - syssize.argc = 0; - syssize.envc = 0; - - let path = self.kernel_path(); - syssize.argsz[0] = path.as_os_str().len() as i32 + 1; - - let mut counter = 0; - for argument in self.args() { - syssize.argsz[(counter + 1) as usize] = argument.len() as i32 + 1; - - counter += 1; - } - - syssize.argc = counter + 1; - - let mut counter = 0; - for (key, value) in std::env::vars_os() { - if counter < MAX_ARGC_ENVC.try_into().unwrap() { - syssize.envsz[counter as usize] = (key.len() + value.len()) as i32 + 2; - counter += 1; - } - } - syssize.envc = counter; - - if counter >= MAX_ARGC_ENVC.try_into().unwrap() { - warn!("Environment is too large!"); - } - } - - /// Copies the arguments end environment of the application into the VM's memory. - fn cmdval(&self, syscmdval: &CmdvalParams) { - let argv = self.host_address(syscmdval.argv); - - // copy kernel path as first argument - { - let path = self.kernel_path().as_os_str(); - - let argvptr = - unsafe { self.host_address(GuestPhysAddr::new(*(argv as *mut *mut u8) as u64)) }; - let len = path.len(); - let slice = unsafe { slice::from_raw_parts_mut(argvptr as *mut u8, len + 1) }; - - // Create string for environment variable - slice[0..len].copy_from_slice(path.as_bytes()); - slice[len] = 0; - } - - // Copy the application arguments into the vm memory - for (counter, argument) in self.args().iter().enumerate() { - let argvptr = unsafe { - self.host_address(GuestPhysAddr::new( - *((argv + (counter + 1) * mem::size_of::()) as *mut *mut u8) as u64, - )) - }; - let len = argument.len(); - let slice = unsafe { slice::from_raw_parts_mut(argvptr as *mut u8, len + 1) }; - - // Create string for environment variable - slice[0..len].copy_from_slice(argument.as_bytes()); - slice[len] = 0; - } - - // Copy the environment variables into the vm memory - let mut counter = 0; - let envp = self.host_address(syscmdval.envp); - for (key, value) in std::env::vars_os() { - if counter < MAX_ARGC_ENVC.try_into().unwrap() { - let envptr = unsafe { - self.host_address(GuestPhysAddr::new( - *((envp + counter as usize * mem::size_of::()) as *mut *mut u8) - as u64, - )) - }; - let len = key.len() + value.len(); - let slice = unsafe { slice::from_raw_parts_mut(envptr as *mut u8, len + 2) }; - - // Create string for environment variable - slice[0..key.len()].copy_from_slice(key.as_bytes()); - slice[key.len()..(key.len() + 1)].copy_from_slice("=".as_bytes()); - slice[(key.len() + 1)..(len + 1)].copy_from_slice(value.as_bytes()); - slice[len + 1] = 0; - counter += 1; - } - } - } - - /// unlink deletes a name from the filesystem. This is used to handle `unlink` syscalls from the guest. - /// TODO: UNSAFE AS *%@#. It has to be checked that the VM is allowed to unlink that file! - fn unlink(&self, sysunlink: &mut UnlinkParams) { - unsafe { - sysunlink.ret = libc::unlink(self.host_address(sysunlink.name) as *const i8); - } - } - - /// Reads the exit code from an VM and returns it - fn exit(&self, sysexit: &ExitParams) -> i32 { - sysexit.arg - } - - /// Handles an open syscall by opening a file on the host. - fn open(&self, sysopen: &mut OpenParams) { - unsafe { - sysopen.ret = libc::open( - self.host_address(sysopen.name) as *const i8, - sysopen.flags, - sysopen.mode, - ); - } - } - - /// Handles an close syscall by closing the file on the host. - fn close(&self, sysclose: &mut CloseParams) { - unsafe { - sysclose.ret = libc::close(sysclose.fd); - } - } - - /// Handles an read syscall on the host. - fn read(&self, sysread: &mut ReadPrams) { - unsafe { - let bytes_read = libc::read( - sysread.fd, - self.host_address(sysread.buf) as *mut libc::c_void, - sysread.len, - ); - if bytes_read >= 0 { - sysread.ret = bytes_read; - } else { - sysread.ret = -1; - } - } - } - - /// Handles an write syscall on the host. - fn write(&self, syswrite: &WriteParams) -> io::Result<()> { - let mut bytes_written: usize = 0; - while bytes_written != syswrite.len { - unsafe { - let step = libc::write( - syswrite.fd, - self.host_address(syswrite.buf + bytes_written as u64) as *const libc::c_void, - syswrite.len - bytes_written, - ); - if step >= 0 { - bytes_written += step as usize; - } else { - return Err(io::Error::last_os_error()); - } - } - } - - Ok(()) - } - - /// Handles an write syscall on the host. - fn lseek(&self, syslseek: &mut LseekParams) { - unsafe { - syslseek.offset = - libc::lseek(syslseek.fd, syslseek.offset as i64, syslseek.whence) as isize; - } - } - - /// Handles an UART syscall by writing to stdout. - fn uart(&self, buf: &[u8]) -> io::Result<()> { - io::stdout().write_all(buf) - } } diff --git a/uhyve-interface/Cargo.toml b/uhyve-interface/Cargo.toml index 9f82b93d..be115ea0 100644 --- a/uhyve-interface/Cargo.toml +++ b/uhyve-interface/Cargo.toml @@ -15,3 +15,7 @@ categories = ["os"] [dependencies] num_enum = { version = "0.7", default-features = false } x86_64 = { version = "0.15", default-features = false } +log = {version = "0.4", optional = true} + +[features] +std = ["dep:log"] diff --git a/uhyve-interface/src/lib.rs b/uhyve-interface/src/lib.rs index d20c14fb..48135ef3 100644 --- a/uhyve-interface/src/lib.rs +++ b/uhyve-interface/src/lib.rs @@ -7,7 +7,7 @@ //! that port is the physical memory address (of the VM) of the parameters of that hypercall. //! - On `aarch64` you write to the respective [`HypercallAddress`]. The 64-bit value written to that location is the guest's physical memory address of the hypercall's parameter. -#![no_std] +#![cfg_attr(not(feature = "std"), no_std)] // TODO: Throw this out, once https://github.com/rust-lang/rfcs/issues/2783 or https://github.com/rust-lang/rust/issues/86772 is resolved use num_enum::TryFromPrimitive; diff --git a/uhyve-interface/src/parameters.rs b/uhyve-interface/src/parameters.rs index 321e21d8..1d279b68 100644 --- a/uhyve-interface/src/parameters.rs +++ b/uhyve-interface/src/parameters.rs @@ -1,5 +1,7 @@ //! Parameters for hypercalls. +use std::path::Path; + use crate::{GuestPhysAddr, MAX_ARGC_ENVC}; /// Parameters for a [`Cmdsize`](crate::Hypercall::Cmdsize) hypercall which provides the lengths of the items in the argument end environment vector. @@ -15,14 +17,45 @@ pub struct CmdsizeParams { /// Length of the items in the environment. pub envsz: [i32; MAX_ARGC_ENVC], } +impl CmdsizeParams { + #[cfg(feature = "std")] + /// Update the struct with the lengths of the given command. + /// - `path` is usually the path and name of the application. E.g., "/home/hermit/app" + /// - `args` is a list of strings that form the parameters. (E.g., `["-v", "myarg"]`) + /// + /// Note that this hypercall only transfers the sizes. It usually has to be followed up with the [`Cmdval` Hypercall](crate::Hypercall::Cmdval). + pub fn update(&mut self, path: &Path, args: &[std::ffi::OsString]) { + self.argc = 0; + + self.argsz[0] = path.as_os_str().len() as i32 + 1; + + self.argc += 1; + for argument in args { + self.argsz[(self.argc) as usize] = argument.len() as i32 + 1; + + self.argc += 1; + } + + self.envc = 0; + // let mut counter = 0; + for (key, value) in std::env::vars_os() { + if self.envc < MAX_ARGC_ENVC.try_into().unwrap() { + self.envsz[self.envc as usize] = (key.len() + value.len()) as i32 + 2; + self.envc += 1; + } else { + log::warn!("Environment is too large! {key:?}={value:?} will not be passed!"); + } + } + } +} /// Parameters for a [`Cmdval`](crate::Hypercall::Cmdval) hypercall, which copies the arguments end environment of the application into the VM's memory. #[repr(C, packed)] #[derive(Debug, Copy, Clone)] pub struct CmdvalParams { - /// Pointer to a memory section in the VM memory large enough to store the argument string. + /// Pointer to a memory section in the VM memory which holds addresses for the destinations of the individual arguments pub argv: GuestPhysAddr, - /// Pointer to a memory section in the VM memory large enough to store the environment values. + /// Pointer to a memory section in the VM memory which holds addresses for the destinations of the individual environment variables pub envp: GuestPhysAddr, } From 612fdcc7b6aa1fc88434e1785d32fd2a1d083746 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Tue, 27 Jun 2023 09:43:09 +0200 Subject: [PATCH 18/29] Removed unnecessary setter fn --- src/vm.rs | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/src/vm.rs b/src/vm.rs index 1be4e42f..b3536877 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -139,28 +139,15 @@ impl UhyveVm { self.verbose } - fn set_offset(&mut self, offset: u64) { - self.offset = offset; - } - /// Returns the section offsets relative to their base addresses pub fn get_offset(&self) -> u64 { self.offset } - /// Sets the elf entry point. - fn set_entry_point(&mut self, entry: u64) { - self.entry_point = entry; - } - pub fn get_entry_point(&self) -> u64 { self.entry_point } - fn set_stack_address(&mut self, stack_addresss: u64) { - self.stack_address = stack_addresss; - } - pub fn stack_address(&self) -> u64 { self.stack_address } @@ -178,10 +165,6 @@ impl UhyveVm { &self.args } - fn set_boot_info(&mut self, header: *const RawBootInfo) { - self.boot_info = header; - } - /// Initialize the page tables for the guest fn init_guest_mem(&mut self) { debug!("Initialize guest memory"); @@ -201,7 +184,7 @@ impl UhyveVm { // TODO: should be a random start address, if we have a relocatable executable let kernel_start_address = object.start_addr().unwrap_or(0x400000) as usize; let kernel_end_address = kernel_start_address + object.mem_size(); - self.set_offset(kernel_start_address as u64); + self.offset = kernel_start_address as u64; if kernel_end_address > self.mem.memory_size - self.mem.guest_address.as_u64() as usize { return Err(LoadKernelError::InsufficientMemory); @@ -216,7 +199,7 @@ impl UhyveVm { [kernel_start_address..kernel_end_address], kernel_start_address as u64, ); - self.set_entry_point(entry_point); + self.entry_point = entry_point; let boot_info = BootInfo { hardware_info: HardwareInfo { @@ -240,12 +223,14 @@ impl UhyveVm { let raw_boot_info_ptr = self.mem.host_address.add(BOOT_INFO_ADDR.as_u64() as usize) as *mut RawBootInfo; *raw_boot_info_ptr = RawBootInfo::from(boot_info); - self.set_boot_info(raw_boot_info_ptr); + self.boot_info = raw_boot_info_ptr; } - self.set_stack_address((kernel_start_address as u64).checked_sub(KERNEL_STACK_SIZE).expect( - "there should be enough space for the boot stack before the kernel start address", - )); + self.stack_address = (kernel_start_address as u64) + .checked_sub(KERNEL_STACK_SIZE) + .expect( + "there should be enough space for the boot stack before the kernel start address", + ); Ok(()) } From 8026009caaaf754fec3edaba65bb91ab6ea9ac58 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Mon, 29 Jan 2024 17:03:00 +0100 Subject: [PATCH 19/29] Check if GDB memory read is a valid virtual address --- src/linux/gdb/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/linux/gdb/mod.rs b/src/linux/gdb/mod.rs index 9f4fd0ed..29f08e3a 100644 --- a/src/linux/gdb/mod.rs +++ b/src/linux/gdb/mod.rs @@ -126,10 +126,11 @@ impl SingleThreadBase for GdbUhyve { } fn read_addrs(&mut self, start_addr: u64, data: &mut [u8]) -> TargetResult { + let guest_addr = GuestVirtAddr::try_new(start_addr).map_err(|_e| TargetError::NonFatal)?; // Safety: mem is copied to data before mem can be modified. let src = unsafe { self.vm.mem.slice_at( - virt_to_phys(GuestVirtAddr::new(start_addr), &self.vm.mem).map_err(|_err| ())?, + virt_to_phys(guest_addr, &self.vm.mem).map_err(|_err| ())?, data.len(), ) } From 06beb56f1b2e6c5f216ac0ee5a0f9a9f0b489d70 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Tue, 30 Jan 2024 17:46:17 +0100 Subject: [PATCH 20/29] Fixed Error handling: Panic in hypercall handler caused remaining threads never to finish --- src/hypercall.rs | 13 ++++++++++++- src/linux/mod.rs | 14 +++++++------- src/linux/x86_64/kvm_cpu.rs | 5 +++-- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/hypercall.rs b/src/hypercall.rs index a729084a..bef6bd6c 100644 --- a/src/hypercall.rs +++ b/src/hypercall.rs @@ -115,9 +115,20 @@ pub fn write(mem: &MmapMemory, syswrite: &WriteParams) -> io::Result<()> { let mut bytes_written: usize = 0; while bytes_written != syswrite.len { unsafe { + use std::io::{Error, ErrorKind}; + + use crate::mem::MemoryError; let step = libc::write( syswrite.fd, - mem.host_address(syswrite.buf + bytes_written as u64).unwrap() as *const libc::c_void, + mem.host_address(syswrite.buf + bytes_written as u64) + .map_err(|e| match e { + MemoryError::BoundsViolation => { + unreachable!("Bounds violation after host_address function") + } + MemoryError::WrongMemoryError => { + Error::new(ErrorKind::AddrNotAvailable, e.to_string()) + } + })? as *const libc::c_void, syswrite.len - bytes_written, ); if step >= 0 { diff --git a/src/linux/mod.rs b/src/linux/mod.rs index d917ceb4..211da3da 100755 --- a/src/linux/mod.rs +++ b/src/linux/mod.rs @@ -124,7 +124,8 @@ impl UhyveVm { } Err(err) => { error!("CPU {} crashed with {:?}", cpu_id, err); - None + barrier.wait(); + Some(err.errno()) } } }) @@ -142,12 +143,11 @@ impl UhyveVm { .into_iter() .filter_map(|thread| thread.join().unwrap()) .collect::>(); - assert_eq!( - 1, - code.len(), - "more than one thread finished with an exit code" - ); - code[0] + match code.len() { + 0 => panic!("No return code from any CPU? Maybe all have been kicked?"), + 1 => code[0], + _ => panic!("more than one thread finished with an exit code (codes: {code:?})"), + } } fn run_gdb(self, cpu_affinity: Option>) -> i32 { diff --git a/src/linux/x86_64/kvm_cpu.rs b/src/linux/x86_64/kvm_cpu.rs index 2b023152..9729ae10 100644 --- a/src/linux/x86_64/kvm_cpu.rs +++ b/src/linux/x86_64/kvm_cpu.rs @@ -14,7 +14,7 @@ use crate::{ vcpu::{VcpuStopReason, VirtualCPU}, virtio::*, vm::UhyveVm, - HypervisorResult, + HypervisorError, HypervisorResult, }; const CPUID_EXT_HYPERVISOR: u32 = 1 << 31; @@ -425,7 +425,8 @@ impl VirtualCPU for KvmCpu { hypercall::read(&self.parent_vm.mem, sysread) } Hypercall::FileWrite(syswrite) => { - hypercall::write(&self.parent_vm.mem, syswrite)? + hypercall::write(&self.parent_vm.mem, syswrite) + .map_err(|_e| HypervisorError::new(libc::EFAULT))? } Hypercall::FileUnlink(sysunlink) => { hypercall::unlink(&self.parent_vm.mem, sysunlink) From 1ad977db5ceb644a80fd42273464eb449c97dc17 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Wed, 31 Jan 2024 17:50:27 +0100 Subject: [PATCH 21/29] Fixed incorrect type of the data buffers in the two file system hypercalls. Note: I did not increase the interface version, as there are no kernels out there that have used the incorrect type and uhyve has always treated it the way it is now. --- src/hypercall.rs | 16 ++++++++-------- uhyve-interface/src/parameters.rs | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/hypercall.rs b/src/hypercall.rs index bef6bd6c..47921ec6 100644 --- a/src/hypercall.rs +++ b/src/hypercall.rs @@ -1,13 +1,15 @@ use std::{ ffi::{OsStr, OsString}, - io, - io::Write, + io::{self, Error, ErrorKind, Write}, os::unix::ffi::OsStrExt, }; use uhyve_interface::{parameters::*, GuestPhysAddr, Hypercall, HypercallAddress, MAX_ARGC_ENVC}; -use crate::mem::MmapMemory; +use crate::{ + mem::{MemoryError, MmapMemory}, + virt_to_phys, +}; /// `addr` is the address of the hypercall parameter in the guest's memory space. `data` is the /// parameter that was send to that address by the guest. @@ -99,7 +101,8 @@ pub fn read(mem: &MmapMemory, sysread: &mut ReadPrams) { unsafe { let bytes_read = libc::read( sysread.fd, - mem.host_address(sysread.buf).unwrap() as *mut libc::c_void, + mem.host_address(virt_to_phys(sysread.buf, mem).unwrap()) + .unwrap() as *mut libc::c_void, sysread.len, ); if bytes_read >= 0 { @@ -115,12 +118,9 @@ pub fn write(mem: &MmapMemory, syswrite: &WriteParams) -> io::Result<()> { let mut bytes_written: usize = 0; while bytes_written != syswrite.len { unsafe { - use std::io::{Error, ErrorKind}; - - use crate::mem::MemoryError; let step = libc::write( syswrite.fd, - mem.host_address(syswrite.buf + bytes_written as u64) + mem.host_address(virt_to_phys(syswrite.buf + bytes_written as u64, mem).unwrap()) .map_err(|e| match e { MemoryError::BoundsViolation => { unreachable!("Bounds violation after host_address function") diff --git a/uhyve-interface/src/parameters.rs b/uhyve-interface/src/parameters.rs index 1d279b68..2f3887fe 100644 --- a/uhyve-interface/src/parameters.rs +++ b/uhyve-interface/src/parameters.rs @@ -2,7 +2,7 @@ use std::path::Path; -use crate::{GuestPhysAddr, MAX_ARGC_ENVC}; +use crate::{GuestPhysAddr, GuestVirtAddr, MAX_ARGC_ENVC}; /// Parameters for a [`Cmdsize`](crate::Hypercall::Cmdsize) hypercall which provides the lengths of the items in the argument end environment vector. #[repr(C, packed)] @@ -84,7 +84,7 @@ pub struct WriteParams { /// File descriptor of the file. pub fd: i32, /// Buffer to be written into the file. - pub buf: GuestPhysAddr, + pub buf: GuestVirtAddr, /// Number of bytes in the buffer to be written. pub len: usize, } @@ -96,7 +96,7 @@ pub struct ReadPrams { /// File descriptor of the file. pub fd: i32, /// Buffer to read the file into. - pub buf: GuestPhysAddr, + pub buf: GuestVirtAddr, /// Number of bytes to read into the buffer. pub len: usize, /// Number of bytes read on success. `-1` on failure. From 0338c8137a1fecb4bf31b6b6d07f214533a8e360 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Tue, 12 Mar 2024 15:20:34 +0100 Subject: [PATCH 22/29] Renamed MIN_PAGING_MEM_SIZE to MIN_PHYSMEM_SIZE --- src/arch/x86_64/mod.rs | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index e97ae0fc..496d98cd 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -120,16 +120,16 @@ pub fn create_gdt_entry(flags: u64, base: u64, limit: u64) -> u64 { | (limit & 0x0000ffffu64) } -pub const MIN_PAGING_MEM_SIZE: usize = BOOT_PDE.as_u64() as usize + 0x1000; +pub const MIN_PHYSMEM_SIZE: usize = BOOT_PDE.as_u64() as usize + 0x1000; /// Creates the pagetables and the GDT in the guest memory space. /// -/// The memory slice must be larger than [`MIN_PAGING_MEM_SIZE`]. +/// The memory slice must be larger than [`MIN_PHYSMEM_SIZE`]. /// Also, the memory `mem` needs to be zeroed for [`PAGE_SIZE`] bytes at the /// offsets [`BOOT_PML4`] and [`BOOT_PDPTE`], otherwise the integrity of the /// pagetables and thus the integrity of the guest's memory is not ensured pub fn initialize_pagetables(mem: &mut [u8]) { - assert!(mem.len() >= MIN_PAGING_MEM_SIZE); + assert!(mem.len() >= MIN_PHYSMEM_SIZE); let mem_addr = std::ptr::addr_of_mut!(mem[0]); let (gdt_entry, pml4, pdpte, pde); @@ -321,8 +321,8 @@ mod tests { #[test] fn test_pagetable_initialization() { - let mut mem: Vec = vec![0; MIN_PAGING_MEM_SIZE]; - initialize_pagetables((&mut mem[0..MIN_PAGING_MEM_SIZE]).try_into().unwrap()); + let mut mem: Vec = vec![0; MIN_PHYSMEM_SIZE]; + initialize_pagetables((&mut mem[0..MIN_PHYSMEM_SIZE]).try_into().unwrap()); // Test pagetable setup let addr_pdpte = u64::from_le_bytes( @@ -369,13 +369,7 @@ mod tests { #[test] fn test_virt_to_phys() { - let mem = MmapMemory::new( - 0, - MIN_PAGING_MEM_SIZE * 2, - GuestPhysAddr::new(0), - true, - true, - ); + let mem = MmapMemory::new(0, MIN_PHYSMEM_SIZE * 2, GuestPhysAddr::new(0), true, true); initialize_pagetables(unsafe { mem.as_slice_mut() }.try_into().unwrap()); // Get the address of the first entry in PML4 (the address of the PML4 itself) From dfe54a0b07308de32d74696e78411531811c941a Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Tue, 12 Mar 2024 15:28:06 +0100 Subject: [PATCH 23/29] moved generalized init_guest_mem to crate::arch --- src/arch/aarch64/mod.rs | 72 ++++++++++++++++++++++++++++++++++++ src/arch/x86_64/mod.rs | 5 +++ src/macos/aarch64/uhyve.rs | 76 -------------------------------------- src/vm.rs | 4 +- 4 files changed, 78 insertions(+), 79 deletions(-) diff --git a/src/arch/aarch64/mod.rs b/src/arch/aarch64/mod.rs index cd087dab..362c0b5a 100644 --- a/src/arch/aarch64/mod.rs +++ b/src/arch/aarch64/mod.rs @@ -58,3 +58,75 @@ bitflags! { const D_BIT = 0x00000200; } } + +pub fn init_guest_mem(mem: &mut [u8]) { + let mem_addr = std::ptr::addr_of_mut!(mem[0]); + + assert(mem.len() >= BOOT_PGT + 512 * sizeof::()); + let pgt_slice = unsafe { + std::slice::from_raw_parts_mut( + mem_addr.offset(BOOT_PGT.try_into().unwrap()) as *mut u64, + 512, + ) + }; + pgt_slice.fill(0); + pgt_slice[0] = BOOT_PGT + 0x1000 + PT_PT; + pgt_slice[511] = BOOT_PGT + PT_PT + PT_SELF; + + assert(mem.len() >= BOOT_PGT + 0x1000 + 512 * sizeof::()); + let pgt_slice = unsafe { + std::slice::from_raw_parts_mut( + mem_addr.offset((BOOT_PGT + 0x1000).try_into().unwrap()) as *mut u64, + 512, + ) + }; + pgt_slice.fill(0); + pgt_slice[0] = BOOT_PGT + 0x2000 + PT_PT; + + assert(mem.len() >= BOOT_PGT + 0x2000 + 512 * sizeof::()); + let pgt_slice = unsafe { + std::slice::from_raw_parts_mut( + mem_addr.offset((BOOT_PGT + 0x2000).try_into().unwrap()) as *mut u64, + 512, + ) + }; + pgt_slice.fill(0); + pgt_slice[0] = BOOT_PGT + 0x3000 + PT_PT; + pgt_slice[1] = BOOT_PGT + 0x4000 + PT_PT; + pgt_slice[2] = BOOT_PGT + 0x5000 + PT_PT; + + assert(mem.len() >= BOOT_PGT + 0x3000 + 512 * sizeof::()); + let pgt_slice = unsafe { + std::slice::from_raw_parts_mut( + mem_addr.offset((BOOT_PGT + 0x3000).try_into().unwrap()) as *mut u64, + 512, + ) + }; + pgt_slice.fill(0); + // map uhyve ports into the virtual address space + pgt_slice[0] = PT_MEM_CD; + // map BootInfo into the virtual address space + pgt_slice[BOOT_INFO_ADDR as usize / PAGE_SIZE] = BOOT_INFO_ADDR + PT_MEM; + + assert(mem.len() >= BOOT_PGT + 0x4000 + 512 * sizeof::()); + let pgt_slice = unsafe { + std::slice::from_raw_parts_mut( + mem_addr.offset((BOOT_PGT + 0x4000).try_into().unwrap()) as *mut u64, + 512, + ) + }; + for (idx, i) in pgt_slice.iter_mut().enumerate() { + *i = 0x200000u64 + (idx * PAGE_SIZE) as u64 + PT_MEM; + } + + assert(mem.len() >= BOOT_PGT + 0x5000 + 512 * sizeof::()); + let pgt_slice = unsafe { + std::slice::from_raw_parts_mut( + mem_addr.offset((BOOT_PGT + 0x5000).try_into().unwrap()) as *mut u64, + 512, + ) + }; + for (idx, i) in pgt_slice.iter_mut().enumerate() { + *i = 0x400000u64 + (idx * PAGE_SIZE) as u64 + PT_MEM; + } +} diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index 496d98cd..5b8dc1c8 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -237,6 +237,11 @@ pub fn virt_to_phys( Ok(entry.addr() + (addr.as_u64() & !((!0u64) << PAGE_BITS))) } +pub fn init_guest_mem(mem: &mut [u8]) { + // TODO: we should maybe return an error on failure (e.g., the memory is too small) + initialize_pagetables(mem); +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/macos/aarch64/uhyve.rs b/src/macos/aarch64/uhyve.rs index 554a5252..f35fd4f8 100644 --- a/src/macos/aarch64/uhyve.rs +++ b/src/macos/aarch64/uhyve.rs @@ -159,82 +159,6 @@ impl Vm for Uhyve { fn set_boot_info(&mut self, header: *const RawBootInfo) { self.boot_info = header; } - - fn init_guest_mem(&mut self) { - debug!("Initialize guest memory"); - - let (mem_addr, _) = self.guest_mem(); - - let pgt_slice = unsafe { - std::slice::from_raw_parts_mut( - mem_addr.offset(BOOT_PGT.try_into().unwrap()) as *mut u64, - 512, - ) - }; - for i in pgt_slice.iter_mut() { - *i = 0; - } - pgt_slice[0] = BOOT_PGT + 0x1000 + PT_PT; - pgt_slice[511] = BOOT_PGT + PT_PT + PT_SELF; - - let pgt_slice = unsafe { - std::slice::from_raw_parts_mut( - mem_addr.offset((BOOT_PGT + 0x1000).try_into().unwrap()) as *mut u64, - 512, - ) - }; - for i in pgt_slice.iter_mut() { - *i = 0; - } - pgt_slice[0] = BOOT_PGT + 0x2000 + PT_PT; - - let pgt_slice = unsafe { - std::slice::from_raw_parts_mut( - mem_addr.offset((BOOT_PGT + 0x2000).try_into().unwrap()) as *mut u64, - 512, - ) - }; - for i in pgt_slice.iter_mut() { - *i = 0; - } - pgt_slice[0] = BOOT_PGT + 0x3000 + PT_PT; - pgt_slice[1] = BOOT_PGT + 0x4000 + PT_PT; - pgt_slice[2] = BOOT_PGT + 0x5000 + PT_PT; - - let pgt_slice = unsafe { - std::slice::from_raw_parts_mut( - mem_addr.offset((BOOT_PGT + 0x3000).try_into().unwrap()) as *mut u64, - 512, - ) - }; - for i in pgt_slice.iter_mut() { - *i = 0; - } - // map uhyve ports into the virtual address space - pgt_slice[0] = PT_MEM_CD; - // map BootInfo into the virtual address space - pgt_slice[BOOT_INFO_ADDR as usize / PAGE_SIZE] = BOOT_INFO_ADDR + PT_MEM; - - let pgt_slice = unsafe { - std::slice::from_raw_parts_mut( - mem_addr.offset((BOOT_PGT + 0x4000).try_into().unwrap()) as *mut u64, - 512, - ) - }; - for (idx, i) in pgt_slice.iter_mut().enumerate() { - *i = 0x200000u64 + (idx * PAGE_SIZE) as u64 + PT_MEM; - } - - let pgt_slice = unsafe { - std::slice::from_raw_parts_mut( - mem_addr.offset((BOOT_PGT + 0x5000).try_into().unwrap()) as *mut u64, - 512, - ) - }; - for (idx, i) in pgt_slice.iter_mut().enumerate() { - *i = 0x400000u64 + (idx * PAGE_SIZE) as u64 + PT_MEM; - } - } } impl Drop for Uhyve { diff --git a/src/vm.rs b/src/vm.rs index b3536877..a01010bf 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -168,9 +168,7 @@ impl UhyveVm { /// Initialize the page tables for the guest fn init_guest_mem(&mut self) { debug!("Initialize guest memory"); - - #[cfg(target_arch = "x86_64")] - crate::x86_64::initialize_pagetables( + crate::arch::init_guest_mem( unsafe { self.mem.as_slice_mut() } // slice only lives during this fn call .try_into() .expect("Guest memory is not large enough for pagetables"), From c2c3e6bb25c081c2a564492894e730d2767e2ee6 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Tue, 12 Mar 2024 15:28:06 +0100 Subject: [PATCH 24/29] Moved some paging stuff (error) into non-arch module --- src/arch/x86_64/mod.rs | 8 +------- src/lib.rs | 1 + src/paging.rs | 8 ++++++++ 3 files changed, 10 insertions(+), 7 deletions(-) create mode 100644 src/paging.rs diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index 5b8dc1c8..13d20c5d 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -18,7 +18,7 @@ use x86_64::{ PhysAddr, }; -use crate::{consts::*, mem::MmapMemory}; +use crate::{consts::*, mem::MmapMemory, paging::PagetableError}; pub const RAM_START: GuestPhysAddr = GuestPhysAddr::new(0x00); const MHZ_TO_HZ: u64 = 1000000; @@ -192,12 +192,6 @@ pub fn initialize_pagetables(mem: &mut [u8]) { } } -#[derive(Error, Debug)] -pub enum PagetableError { - #[error("The accessed virtual address is not mapped")] - InvalidAddress, -} - /// Converts a virtual address in the guest to a physical address in the guest pub fn virt_to_phys( addr: GuestVirtAddr, diff --git a/src/lib.rs b/src/lib.rs index 83e1eb1a..7c2f2177 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,6 +21,7 @@ pub mod macos; pub use macos as os; mod hypercall; pub mod mem; +pub mod paging; pub mod params; #[cfg(target_os = "linux")] pub mod shared_queue; diff --git a/src/paging.rs b/src/paging.rs new file mode 100644 index 00000000..a8d27925 --- /dev/null +++ b/src/paging.rs @@ -0,0 +1,8 @@ +//! General paging related code +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum PagetableError { + #[error("The accessed virtual address is not mapped")] + InvalidAddress, +} From 1d607d8e4b76263e4eb16c450c62bc10bb4d76d8 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Tue, 12 Mar 2024 15:28:06 +0100 Subject: [PATCH 25/29] Use GuestPhysAddr and GuestVirtAddr everywhere, also on aarch64 --- Cargo.lock | 26 +++++++++++++++++++++ src/arch/aarch64/mod.rs | 46 +++++++++++++++++++------------------- src/consts.rs | 14 ++++++------ src/macos/aarch64/vcpu.rs | 5 +++-- uhyve-interface/Cargo.toml | 9 ++++++-- uhyve-interface/src/lib.rs | 5 +++++ 6 files changed, 71 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b5debc23..c0b47d22 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,25 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "aarch64" +version = "0.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0adf345d8b4e2861016511db094993ee8a9f74195f55ccf62d1305d35ab91bfa" +dependencies = [ + "aarch64-cpu", + "tock-registers", +] + +[[package]] +name = "aarch64-cpu" +version = "9.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac42a04a61c19fc8196dd728022a784baecc5d63d7e256c01ad1b3fbfab26287" +dependencies = [ + "tock-registers", +] + [[package]] name = "ahash" version = "0.7.8" @@ -1350,6 +1369,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +[[package]] +name = "tock-registers" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "696941a0aee7e276a165a978b37918fd5d22c55c3d6bda197813070ca9c0f21c" + [[package]] name = "toml_datetime" version = "0.6.5" @@ -1416,6 +1441,7 @@ dependencies = [ name = "uhyve-interface" version = "0.1.0" dependencies = [ + "aarch64", "log", "num_enum", "x86_64", diff --git a/src/arch/aarch64/mod.rs b/src/arch/aarch64/mod.rs index 362c0b5a..cc203067 100644 --- a/src/arch/aarch64/mod.rs +++ b/src/arch/aarch64/mod.rs @@ -1,6 +1,9 @@ +use crate::consts::{BOOT_INFO_ADDR, BOOT_PGT, PAGE_SIZE}; use bitflags::bitflags; +use std::mem::size_of; +use uhyve_interface::GuestPhysAddr; -pub const RAM_START: u64 = 0x00; +pub const RAM_START: GuestPhysAddr = GuestPhysAddr::new(0x00); pub const PT_DEVICE: u64 = 0x707; pub const PT_PT: u64 = 0x713; @@ -62,43 +65,40 @@ bitflags! { pub fn init_guest_mem(mem: &mut [u8]) { let mem_addr = std::ptr::addr_of_mut!(mem[0]); - assert(mem.len() >= BOOT_PGT + 512 * sizeof::()); + assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 512 * size_of::()); let pgt_slice = unsafe { - std::slice::from_raw_parts_mut( - mem_addr.offset(BOOT_PGT.try_into().unwrap()) as *mut u64, - 512, - ) + std::slice::from_raw_parts_mut(mem_addr.offset(BOOT_PGT.as_u64() as isize) as *mut u64, 512) }; pgt_slice.fill(0); - pgt_slice[0] = BOOT_PGT + 0x1000 + PT_PT; - pgt_slice[511] = BOOT_PGT + PT_PT + PT_SELF; + pgt_slice[0] = BOOT_PGT.as_u64() + 0x1000 + PT_PT; + pgt_slice[511] = BOOT_PGT.as_u64() + PT_PT + PT_SELF; - assert(mem.len() >= BOOT_PGT + 0x1000 + 512 * sizeof::()); + assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 0x1000 + 512 * size_of::()); let pgt_slice = unsafe { std::slice::from_raw_parts_mut( - mem_addr.offset((BOOT_PGT + 0x1000).try_into().unwrap()) as *mut u64, + mem_addr.offset(BOOT_PGT.as_u64() as isize + 0x1000) as *mut u64, 512, ) }; pgt_slice.fill(0); - pgt_slice[0] = BOOT_PGT + 0x2000 + PT_PT; + pgt_slice[0] = BOOT_PGT.as_u64() + 0x2000 + PT_PT; - assert(mem.len() >= BOOT_PGT + 0x2000 + 512 * sizeof::()); + assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 0x2000 + 512 * size_of::()); let pgt_slice = unsafe { std::slice::from_raw_parts_mut( - mem_addr.offset((BOOT_PGT + 0x2000).try_into().unwrap()) as *mut u64, + mem_addr.offset(BOOT_PGT.as_u64() as isize + 0x2000) as *mut u64, 512, ) }; pgt_slice.fill(0); - pgt_slice[0] = BOOT_PGT + 0x3000 + PT_PT; - pgt_slice[1] = BOOT_PGT + 0x4000 + PT_PT; - pgt_slice[2] = BOOT_PGT + 0x5000 + PT_PT; + pgt_slice[0] = BOOT_PGT.as_u64() + 0x3000 + PT_PT; + pgt_slice[1] = BOOT_PGT.as_u64() + 0x4000 + PT_PT; + pgt_slice[2] = BOOT_PGT.as_u64() + 0x5000 + PT_PT; - assert(mem.len() >= BOOT_PGT + 0x3000 + 512 * sizeof::()); + assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 0x3000 + 512 * size_of::()); let pgt_slice = unsafe { std::slice::from_raw_parts_mut( - mem_addr.offset((BOOT_PGT + 0x3000).try_into().unwrap()) as *mut u64, + mem_addr.offset(BOOT_PGT.as_u64() as isize + 0x3000) as *mut u64, 512, ) }; @@ -106,12 +106,12 @@ pub fn init_guest_mem(mem: &mut [u8]) { // map uhyve ports into the virtual address space pgt_slice[0] = PT_MEM_CD; // map BootInfo into the virtual address space - pgt_slice[BOOT_INFO_ADDR as usize / PAGE_SIZE] = BOOT_INFO_ADDR + PT_MEM; + pgt_slice[BOOT_INFO_ADDR.as_u64() as usize / PAGE_SIZE] = BOOT_INFO_ADDR.as_u64() + PT_MEM; - assert(mem.len() >= BOOT_PGT + 0x4000 + 512 * sizeof::()); + assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 0x4000 + 512 * size_of::()); let pgt_slice = unsafe { std::slice::from_raw_parts_mut( - mem_addr.offset((BOOT_PGT + 0x4000).try_into().unwrap()) as *mut u64, + mem_addr.offset(BOOT_PGT.as_u64() as isize + 0x4000) as *mut u64, 512, ) }; @@ -119,10 +119,10 @@ pub fn init_guest_mem(mem: &mut [u8]) { *i = 0x200000u64 + (idx * PAGE_SIZE) as u64 + PT_MEM; } - assert(mem.len() >= BOOT_PGT + 0x5000 + 512 * sizeof::()); + assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 0x5000 + 512 * size_of::()); let pgt_slice = unsafe { std::slice::from_raw_parts_mut( - mem_addr.offset((BOOT_PGT + 0x5000).try_into().unwrap()) as *mut u64, + mem_addr.offset(BOOT_PGT.as_u64() as isize + 0x5000) as *mut u64, 512, ) }; diff --git a/src/consts.rs b/src/consts.rs index 86250a6d..b33727f3 100644 --- a/src/consts.rs +++ b/src/consts.rs @@ -1,19 +1,19 @@ -use x86_64::addr::PhysAddr; +use uhyve_interface::GuestPhysAddr; pub const PAGE_SIZE: usize = 0x1000; pub const GDT_KERNEL_CODE: u16 = 1; pub const GDT_KERNEL_DATA: u16 = 2; pub const APIC_DEFAULT_BASE: u64 = 0xfee00000; -pub const BOOT_GDT: PhysAddr = PhysAddr::new(0x1000); +pub const BOOT_GDT: GuestPhysAddr = GuestPhysAddr::new(0x1000); pub const BOOT_GDT_NULL: usize = 0; pub const BOOT_GDT_CODE: usize = 1; pub const BOOT_GDT_DATA: usize = 2; pub const BOOT_GDT_MAX: usize = 3; -pub const BOOT_PML4: PhysAddr = PhysAddr::new(0x10000); -pub const BOOT_PGT: PhysAddr = BOOT_PML4; -pub const BOOT_PDPTE: PhysAddr = PhysAddr::new(0x11000); -pub const BOOT_PDE: PhysAddr = PhysAddr::new(0x12000); -pub const BOOT_INFO_ADDR: PhysAddr = PhysAddr::new(0x9000); +pub const BOOT_PML4: GuestPhysAddr = GuestPhysAddr::new(0x10000); +pub const BOOT_PGT: GuestPhysAddr = BOOT_PML4; +pub const BOOT_PDPTE: GuestPhysAddr = GuestPhysAddr::new(0x11000); +pub const BOOT_PDE: GuestPhysAddr = GuestPhysAddr::new(0x12000); +pub const BOOT_INFO_ADDR: GuestPhysAddr = GuestPhysAddr::new(0x9000); pub const EFER_SCE: u64 = 1; /* System Call Extensions */ pub const EFER_LME: u64 = 1 << 8; /* Long mode enable */ pub const EFER_LMA: u64 = 1 << 10; /* Long mode active (read-only) */ diff --git a/src/macos/aarch64/vcpu.rs b/src/macos/aarch64/vcpu.rs index 6cedb4f8..10bf0192 100644 --- a/src/macos/aarch64/vcpu.rs +++ b/src/macos/aarch64/vcpu.rs @@ -47,7 +47,8 @@ impl VirtualCPU for XhyveCpu { self.vcpu.write_register(Register::PC, entry_point)?; self.vcpu .write_system_register(SystemRegister::SP_EL1, stack_address)?; - self.vcpu.write_register(Register::X0, BOOT_INFO_ADDR)?; + self.vcpu + .write_register(Register::X0, BOOT_INFO_ADDR.as_u64())?; self.vcpu.write_register(Register::X1, cpu_id.into())?; /* @@ -99,7 +100,7 @@ impl VirtualCPU for XhyveCpu { self.vcpu .write_system_register(SystemRegister::TTBR1_EL1, 0)?; self.vcpu - .write_system_register(SystemRegister::TTBR0_EL1, BOOT_PGT)?; + .write_system_register(SystemRegister::TTBR0_EL1, BOOT_PGT.as_u64())?; /* * Prepare system control register (SCTRL) diff --git a/uhyve-interface/Cargo.toml b/uhyve-interface/Cargo.toml index be115ea0..6e92d2b8 100644 --- a/uhyve-interface/Cargo.toml +++ b/uhyve-interface/Cargo.toml @@ -14,8 +14,13 @@ categories = ["os"] [dependencies] num_enum = { version = "0.7", default-features = false } -x86_64 = { version = "0.15", default-features = false } -log = {version = "0.4", optional = true} +log = { version = "0.4", optional = true } [features] std = ["dep:log"] + +[target.'cfg(target_arch = "x86_64")'.dependencies] +x86_64 = { version = "0.15", default-features = false } + +[target.'cfg(target_arch = "aarch64")'.dependencies] +aarch64 = { version = "0.0.11", default-features = false } diff --git a/uhyve-interface/src/lib.rs b/uhyve-interface/src/lib.rs index 48135ef3..cfd33277 100644 --- a/uhyve-interface/src/lib.rs +++ b/uhyve-interface/src/lib.rs @@ -15,10 +15,15 @@ use num_enum::TryFromPrimitive; pub mod elf; pub mod parameters; +#[cfg(target_arch = "aarch64")] +pub use ::aarch64::paging::PhysAddr as GuestPhysAddr; +#[cfg(target_arch = "aarch64")] +pub use ::aarch64::paging::VirtAddr as GuestVirtAddr; #[cfg(target_arch = "x86_64")] pub use ::x86_64::addr::PhysAddr as GuestPhysAddr; #[cfg(target_arch = "x86_64")] pub use ::x86_64::addr::VirtAddr as GuestVirtAddr; + #[cfg(not(target_pointer_width = "64"))] compile_error!("Using uhyve-interface on a non-64-bit system is not (yet?) supported"); use parameters::*; From df5526c71cc69fe09ff11116b771f030d8359665 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Tue, 12 Mar 2024 15:28:06 +0100 Subject: [PATCH 26/29] Adapted macos to new UhveVm architecture --- src/macos/aarch64/mod.rs | 1 - src/macos/aarch64/uhyve.rs | 175 ------------------------------------- src/macos/aarch64/vcpu.rs | 53 +++++------ src/macos/mod.rs | 4 +- src/vm.rs | 2 +- 5 files changed, 26 insertions(+), 209 deletions(-) delete mode 100644 src/macos/aarch64/uhyve.rs diff --git a/src/macos/aarch64/mod.rs b/src/macos/aarch64/mod.rs index 58cca2f3..f1096356 100644 --- a/src/macos/aarch64/mod.rs +++ b/src/macos/aarch64/mod.rs @@ -1,4 +1,3 @@ -pub mod uhyve; pub mod vcpu; /// The size of a page. diff --git a/src/macos/aarch64/uhyve.rs b/src/macos/aarch64/uhyve.rs deleted file mode 100644 index f35fd4f8..00000000 --- a/src/macos/aarch64/uhyve.rs +++ /dev/null @@ -1,175 +0,0 @@ -use std::{ - ffi::OsString, - path::{Path, PathBuf}, - ptr, -}; - -use hermit_entry::boot_info::RawBootInfo; -use libc::{self, c_void}; -use log::debug; -use xhypervisor::{create_vm, map_mem, unmap_mem, MemPerm}; - -use crate::{ - aarch64::{PT_MEM, PT_MEM_CD, PT_PT, PT_SELF}, - consts::{BOOT_INFO_ADDR, BOOT_PGT, PAGE_SIZE}, - macos::aarch64::{vcpu::*, HYPERVISOR_PAGE_SIZE}, - params::Params, - vm::{HypervisorResult, Vm}, -}; - -pub struct Uhyve { - offset: u64, - entry_point: u64, - stack_address: u64, - mem_size: usize, - guest_mem: *mut c_void, - num_cpus: u32, - path: PathBuf, - args: Vec, - boot_info: *const RawBootInfo, - verbose: bool, -} - -impl std::fmt::Debug for Uhyve { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Uhyve") - .field("entry_point", &self.entry_point) - .field("stack_address", &self.stack_address) - .field("mem_size", &self.mem_size) - .field("guest_mem", &self.guest_mem) - .field("num_cpus", &self.num_cpus) - .field("path", &self.path) - .field("boot_info", &self.boot_info) - .field("verbose", &self.verbose) - .finish() - } -} - -impl Uhyve { - pub fn new(kernel_path: PathBuf, params: Params) -> HypervisorResult { - let memory_size = params.memory_size.get(); - - assert!(HYPERVISOR_PAGE_SIZE < memory_size); - - let mem = unsafe { - libc::mmap( - std::ptr::null_mut(), - memory_size, - libc::PROT_READ | libc::PROT_WRITE, - libc::MAP_PRIVATE | libc::MAP_ANON | libc::MAP_NORESERVE, - -1, - 0, - ) - }; - - assert_ne!(libc::MAP_FAILED, mem, "mmap failed"); - - debug!("Allocate memory for the guest at 0x{:x}", mem as usize); - - debug!("Create VM..."); - create_vm()?; - - debug!("Map guest memory..."); - unsafe { - map_mem( - std::slice::from_raw_parts(mem as *mut u8, HYPERVISOR_PAGE_SIZE), - 0, - MemPerm::Read, - )?; - - map_mem( - std::slice::from_raw_parts_mut( - (mem as *mut u8).offset(HYPERVISOR_PAGE_SIZE.try_into().unwrap()), - memory_size - HYPERVISOR_PAGE_SIZE, - ), - HYPERVISOR_PAGE_SIZE.try_into().unwrap(), - MemPerm::ExecAndWrite, - )?; - } - - let hyve = Uhyve { - offset: 0, - entry_point: 0, - stack_address: 0, - mem_size: memory_size, - guest_mem: mem, - num_cpus: params.cpu_count.get(), - path: kernel_path, - args: params.kernel_args, - boot_info: ptr::null(), - verbose: params.verbose, - }; - - hyve.init_guest_mem(); - - Ok(hyve) - } -} - -impl Vm for Uhyve { - fn verbose(&self) -> bool { - self.verbose - } - - fn set_offset(&mut self, offset: u64) { - self.offset = offset; - } - - fn get_offset(&self) -> u64 { - self.offset - } - - fn set_entry_point(&mut self, entry: u64) { - self.entry_point = entry; - } - - fn get_entry_point(&self) -> u64 { - self.entry_point - } - - fn set_stack_address(&mut self, stack_address: u64) { - self.stack_address = stack_address; - } - - fn stack_address(&self) -> u64 { - self.stack_address - } - - fn num_cpus(&self) -> u32 { - self.num_cpus - } - - fn guest_mem(&self) -> (*mut u8, usize) { - (self.guest_mem as *mut u8, self.mem_size) - } - - fn kernel_path(&self) -> &Path { - self.path.as_path() - } - - fn create_cpu(&self, id: u32) -> HypervisorResult { - Ok(XhyveCpu::new( - id, - self.path.clone(), - self.args.clone(), - self.guest_mem as usize, - )) - } - - fn set_boot_info(&mut self, header: *const RawBootInfo) { - self.boot_info = header; - } -} - -impl Drop for Uhyve { - fn drop(&mut self) { - unmap_mem(0, self.mem_size).unwrap(); - - unsafe { - libc::munmap(self.guest_mem, self.mem_size); - } - } -} - -unsafe impl Send for Uhyve {} -unsafe impl Sync for Uhyve {} diff --git a/src/macos/aarch64/vcpu.rs b/src/macos/aarch64/vcpu.rs index 10bf0192..0d7910bc 100644 --- a/src/macos/aarch64/vcpu.rs +++ b/src/macos/aarch64/vcpu.rs @@ -1,13 +1,10 @@ #![allow(non_snake_case)] #![allow(clippy::identity_op)] -use std::{ - ffi::OsString, - path::{Path, PathBuf}, -}; +use std::sync::Arc; use log::debug; -use uhyve_interface::Hypercall; +use uhyve_interface::{GuestPhysAddr, Hypercall}; use xhypervisor::{self, Register, SystemRegister, VirtualCpuExitReason}; use crate::{ @@ -16,28 +13,19 @@ use crate::{ PSR, TCR_FLAGS, TCR_TG1_4K, VA_BITS, }, consts::*, - vm::{HypervisorResult, VcpuStopReason, VirtualCPU}, + hypercall, + vcpu::{VcpuStopReason, VirtualCPU}, + vm::UhyveVm, + HypervisorResult, }; pub struct XhyveCpu { id: u32, - kernel_path: PathBuf, - args: Vec, vcpu: xhypervisor::VirtualCpu, + parent_vm: Arc>, } impl XhyveCpu { - pub fn new(id: u32, kernel_path: PathBuf, args: Vec) -> XhyveCpu { - Self { - id, - kernel_path, - args, - vcpu: xhypervisor::VirtualCpu::new().unwrap(), - } - } -} - -impl VirtualCPU for XhyveCpu { fn init(&mut self, entry_point: u64, stack_address: u64, cpu_id: u32) -> HypervisorResult<()> { debug!("Initialize VirtualCPU"); @@ -145,13 +133,18 @@ impl VirtualCPU for XhyveCpu { Ok(()) } +} - fn kernel_path(&self) -> &Path { - self.kernel_path.as_path() - } +impl VirtualCPU for XhyveCpu { + fn new(id: u32, parent_vm: Arc>) -> HypervisorResult { + let mut vcpu = XhyveCpu { + id, + parent_vm: parent_vm.clone(), + vcpu: xhypervisor::VirtualCpu::new().unwrap(), + }; + vcpu.init(parent_vm.get_entry_point(), parent_vm.stack_address(), id)?; - fn args(&self) -> &[OsString] { - self.args.as_slice() + Ok(vcpu) } fn r#continue(&mut self) -> HypervisorResult { @@ -168,18 +161,18 @@ impl VirtualCPU for XhyveCpu { let addr: u16 = exception.physical_address.try_into().unwrap(); let pc = self.vcpu.read_register(Register::PC)?; - let data_addr = self.vcpu.read_register(Register::X8)?; - if let Some(hypercall) = - unsafe { self.address_to_hypercall(addr, data_addr as usize) } - { + let data_addr = GuestPhysAddr::new(self.vcpu.read_register(Register::X8)?); + if let Some(hypercall) = unsafe { + hypercall::address_to_hypercall(&self.parent_vm.mem, addr, data_addr) + } { match hypercall { Hypercall::SerialWriteByte(_char) => { let x8 = (self.vcpu.read_register(Register::X8)? & 0xFF) as u8; - self.uart(&[x8]).unwrap(); + hypercall::uart(&[x8]).unwrap(); } Hypercall::Exit(sysexit) => { - return Ok(VcpuStopReason::Exit(self.exit(sysexit))); + return Ok(VcpuStopReason::Exit(sysexit.arg)); } _ => { panic! {"Hypercall {hypercall:?} not implemented on macos-aarch64"} diff --git a/src/macos/mod.rs b/src/macos/mod.rs index 7397eab9..1c58ce18 100644 --- a/src/macos/mod.rs +++ b/src/macos/mod.rs @@ -12,9 +12,9 @@ use std::{ use core_affinity::CoreId; #[cfg(target_arch = "aarch64")] -pub use crate::macos::aarch64::{uhyve, vcpu}; +pub use crate::macos::aarch64::vcpu::XhyveCpu; #[cfg(target_arch = "x86_64")] -use crate::macos::x86_64::vcpu::XhyveCpu; +pub use crate::macos::x86_64::vcpu::XhyveCpu; use crate::{vcpu::VirtualCPU, vm::UhyveVm}; pub type HypervisorError = xhypervisor::Error; diff --git a/src/vm.rs b/src/vm.rs index a01010bf..8f0ba51a 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -68,7 +68,7 @@ fn detect_cpu_freq() -> u32 { #[cfg(target_os = "linux")] pub type VcpuDefault = crate::linux::x86_64::kvm_cpu::KvmCpu; #[cfg(target_os = "macos")] -pub type VcpuDefault = crate::macos::x86_64::vcpu::XhyveCpu; +pub type VcpuDefault = crate::macos::XhyveCpu; pub struct UhyveVm { /// The starting position of the image in physical memory From 422e16eb8dc39332946687c93a3085b4c468cf06 Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Tue, 12 Mar 2024 15:28:06 +0100 Subject: [PATCH 27/29] Added virt_to_phys fn for aarch64 --- src/arch/aarch64/mod.rs | 91 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 88 insertions(+), 3 deletions(-) diff --git a/src/arch/aarch64/mod.rs b/src/arch/aarch64/mod.rs index cc203067..d92e30b1 100644 --- a/src/arch/aarch64/mod.rs +++ b/src/arch/aarch64/mod.rs @@ -1,7 +1,13 @@ -use crate::consts::{BOOT_INFO_ADDR, BOOT_PGT, PAGE_SIZE}; -use bitflags::bitflags; use std::mem::size_of; -use uhyve_interface::GuestPhysAddr; + +use bitflags::bitflags; +use uhyve_interface::{GuestPhysAddr, GuestVirtAddr}; + +use crate::{ + consts::{BOOT_INFO_ADDR, BOOT_PGT}, + mem::MmapMemory, + paging::PagetableError, +}; pub const RAM_START: GuestPhysAddr = GuestPhysAddr::new(0x00); @@ -22,6 +28,16 @@ pub const MT_DEVICE_GRE: u64 = 2; pub const MT_NORMAL_NC: u64 = 3; pub const MT_NORMAL: u64 = 4; +/// Number of Offset bits of a virtual address for a 4 KiB page, which are shifted away to get its Page Frame Number (PFN). +const PAGE_BITS: usize = 12; +const PAGE_SIZE: usize = 1 << PAGE_BITS; + +/// Number of bits of the index in each table (L0Table, L1Table, L2Table, L3Table). +const PAGE_MAP_BITS: usize = 9; + +/// A mask where PAGE_MAP_BITS are set to calculate a table index. +const PAGE_MAP_MASK: u64 = 0x1FF; + #[inline(always)] pub const fn mair(attr: u64, mt: u64) -> u64 { attr << (mt * 8) @@ -62,6 +78,75 @@ bitflags! { } } +/// An entry in a L0 page table (coarses). Adapted from hermit-os/kernel. +#[derive(Clone, Copy, Debug)] +struct PageTableEntry { + /// Physical memory address this entry refers, combined with flags from PageTableEntryFlags. + physical_address_and_flags: GuestPhysAddr, +} + +impl PageTableEntry { + /// Return the stored physical address. + pub fn address(&self) -> GuestPhysAddr { + // For other granules than 4KiB or hugepages we should check the DESCRIPTOR_TYPE bit and modify the address translation accordingly. + GuestPhysAddr( + self.physical_address_and_flags.as_u64() & !(PAGE_SIZE as u64 - 1) & !(u64::MAX << 48), + ) + } +} +impl From for PageTableEntry { + fn from(i: u64) -> Self { + Self { + physical_address_and_flags: GuestPhysAddr::new(i), + } + } +} + +/// Returns whether the given virtual address is a valid one in the AArch64 memory model. +/// +/// Current AArch64 supports only 48-bit for virtual memory addresses. +/// The upper bits must always be 0 or 1 and indicate whether TBBR0 or TBBR1 contains the +/// base address. So always enforce 0 here. +fn is_valid_address(virtual_address: GuestVirtAddr) -> bool { + virtual_address < GuestVirtAddr(0x1_0000_0000_0000) +} + +/// Converts a virtual address in the guest to a physical address in the guest +pub fn virt_to_phys( + addr: GuestVirtAddr, + mem: &MmapMemory, +) -> Result { + if !is_valid_address(addr) { + return Err(PagetableError::InvalidAddress); + } + + // Assumptions: + // - We use 4KiB granule + // - We use maximum VA length + // => We have 4 level paging + + // Safety: + // - We are only working in the vm's memory + // - the memory location of the pagetable is not altered by hermit. + // - Our indices can't be larger than 512, so we stay in the borders of the page. + // - We are page_aligned, and thus also PageTableEntry aligned. + let mut pagetable: &[PageTableEntry] = + unsafe { std::mem::transmute(mem.slice_at(BOOT_PGT, PAGE_SIZE).unwrap()) }; + // TODO: Depending on the virtual address length and granule (defined in TCR register by TG and TxSZ), we could reduce the number of pagetable walks. Hermit doesn't do this at the moment. + for level in 0..3 { + let table_index = + (addr.as_u64() >> PAGE_BITS >> ((3 - level) * PAGE_MAP_BITS) & PAGE_MAP_MASK) as usize; + let pte = PageTableEntry::from(pagetable[table_index]); + // TODO: We could stop here if we have a "Block Entry" (ARM equivalent to huge page). Currently not supported. + + pagetable = unsafe { std::mem::transmute(mem.slice_at(pte.address(), PAGE_SIZE).unwrap()) }; + } + let table_index = (addr.as_u64() >> PAGE_BITS & PAGE_MAP_MASK) as usize; + let pte = PageTableEntry::from(pagetable[table_index]); + + Ok(pte.address()) +} + pub fn init_guest_mem(mem: &mut [u8]) { let mem_addr = std::ptr::addr_of_mut!(mem[0]); From f4426fdf85a4b26f787ed4aa302babeb50d85bee Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Tue, 12 Mar 2024 15:28:06 +0100 Subject: [PATCH 28/29] changed the pagetable location to be a parameter of fn virt_to_phys --- src/arch/aarch64/mod.rs | 3 ++- src/arch/x86_64/mod.rs | 11 ++++++----- src/hypercall.rs | 23 +++++++++++++---------- src/linux/gdb/breakpoints.rs | 12 ++++++++---- src/linux/gdb/mod.rs | 6 ++++-- 5 files changed, 33 insertions(+), 22 deletions(-) diff --git a/src/arch/aarch64/mod.rs b/src/arch/aarch64/mod.rs index d92e30b1..8c91caa6 100644 --- a/src/arch/aarch64/mod.rs +++ b/src/arch/aarch64/mod.rs @@ -115,6 +115,7 @@ fn is_valid_address(virtual_address: GuestVirtAddr) -> bool { pub fn virt_to_phys( addr: GuestVirtAddr, mem: &MmapMemory, + pagetable_l0: GuestPhysAddr, ) -> Result { if !is_valid_address(addr) { return Err(PagetableError::InvalidAddress); @@ -131,7 +132,7 @@ pub fn virt_to_phys( // - Our indices can't be larger than 512, so we stay in the borders of the page. // - We are page_aligned, and thus also PageTableEntry aligned. let mut pagetable: &[PageTableEntry] = - unsafe { std::mem::transmute(mem.slice_at(BOOT_PGT, PAGE_SIZE).unwrap()) }; + unsafe { std::mem::transmute(mem.slice_at(pagetable_l0, PAGE_SIZE).unwrap()) }; // TODO: Depending on the virtual address length and granule (defined in TCR register by TG and TxSZ), we could reduce the number of pagetable walks. Hermit doesn't do this at the moment. for level in 0..3 { let table_index = diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index 13d20c5d..4944d577 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -196,6 +196,7 @@ pub fn initialize_pagetables(mem: &mut [u8]) { pub fn virt_to_phys( addr: GuestVirtAddr, mem: &MmapMemory, + pagetable_l0: GuestPhysAddr, ) -> Result { /// Number of Offset bits of a virtual address for a 4 KiB page, which are shifted away to get its Page Frame Number (PFN). pub const PAGE_BITS: u64 = 12; @@ -204,7 +205,7 @@ pub fn virt_to_phys( pub const PAGE_MAP_BITS: usize = 9; let mut page_table = - unsafe { (mem.host_address(BOOT_PML4).unwrap() as *mut PageTable).as_mut() }.unwrap(); + unsafe { (mem.host_address(pagetable_l0).unwrap() as *mut PageTable).as_mut() }.unwrap(); let mut page_bits = 39; let mut entry = PageTableEntry::new(); @@ -373,12 +374,12 @@ mod tests { // Get the address of the first entry in PML4 (the address of the PML4 itself) let virt_addr = GuestVirtAddr::new(0xFFFFFFFFFFFFF000); - let p_addr = virt_to_phys(virt_addr, &mem).unwrap(); + let p_addr = virt_to_phys(virt_addr, &mem, BOOT_PML4).unwrap(); assert_eq!(p_addr, BOOT_PML4); // The last entry on the PML4 is the address of the PML4 with flags let virt_addr = GuestVirtAddr::new(0xFFFFFFFFFFFFF000 | (4096 - 8)); - let p_addr = virt_to_phys(virt_addr, &mem).unwrap(); + let p_addr = virt_to_phys(virt_addr, &mem, BOOT_PML4).unwrap(); assert_eq!( mem.read::(p_addr).unwrap(), BOOT_PML4.as_u64() | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() @@ -386,12 +387,12 @@ mod tests { // the first entry on the 3rd level entry in the pagetables is the address of the boot pdpte let virt_addr = GuestVirtAddr::new(0xFFFFFFFFFFE00000); - let p_addr = virt_to_phys(virt_addr, &mem).unwrap(); + let p_addr = virt_to_phys(virt_addr, &mem, BOOT_PML4).unwrap(); assert_eq!(p_addr, BOOT_PDPTE); // the first entry on the 2rd level entry in the pagetables is the address of the boot pde let virt_addr = GuestVirtAddr::new(0xFFFFFFFFC0000000); - let p_addr = virt_to_phys(virt_addr, &mem).unwrap(); + let p_addr = virt_to_phys(virt_addr, &mem, BOOT_PML4).unwrap(); assert_eq!(p_addr, BOOT_PDE); // That address points to a huge page assert!( diff --git a/src/hypercall.rs b/src/hypercall.rs index 47921ec6..0205419c 100644 --- a/src/hypercall.rs +++ b/src/hypercall.rs @@ -7,6 +7,7 @@ use std::{ use uhyve_interface::{parameters::*, GuestPhysAddr, Hypercall, HypercallAddress, MAX_ARGC_ENVC}; use crate::{ + consts::BOOT_PML4, mem::{MemoryError, MmapMemory}, virt_to_phys, }; @@ -101,7 +102,7 @@ pub fn read(mem: &MmapMemory, sysread: &mut ReadPrams) { unsafe { let bytes_read = libc::read( sysread.fd, - mem.host_address(virt_to_phys(sysread.buf, mem).unwrap()) + mem.host_address(virt_to_phys(sysread.buf, mem, BOOT_PML4).unwrap()) .unwrap() as *mut libc::c_void, sysread.len, ); @@ -120,15 +121,17 @@ pub fn write(mem: &MmapMemory, syswrite: &WriteParams) -> io::Result<()> { unsafe { let step = libc::write( syswrite.fd, - mem.host_address(virt_to_phys(syswrite.buf + bytes_written as u64, mem).unwrap()) - .map_err(|e| match e { - MemoryError::BoundsViolation => { - unreachable!("Bounds violation after host_address function") - } - MemoryError::WrongMemoryError => { - Error::new(ErrorKind::AddrNotAvailable, e.to_string()) - } - })? as *const libc::c_void, + mem.host_address( + virt_to_phys(syswrite.buf + bytes_written as u64, mem, BOOT_PML4).unwrap(), + ) + .map_err(|e| match e { + MemoryError::BoundsViolation => { + unreachable!("Bounds violation after host_address function") + } + MemoryError::WrongMemoryError => { + Error::new(ErrorKind::AddrNotAvailable, e.to_string()) + } + })? as *const libc::c_void, syswrite.len - bytes_written, ); if step >= 0 { diff --git a/src/linux/gdb/breakpoints.rs b/src/linux/gdb/breakpoints.rs index 8de01a70..454f9cdf 100644 --- a/src/linux/gdb/breakpoints.rs +++ b/src/linux/gdb/breakpoints.rs @@ -4,8 +4,10 @@ use gdbstub::target::{self, ext::breakpoints::WatchKind, TargetResult}; use uhyve_interface::GuestVirtAddr; use super::GdbUhyve; -use crate::arch::x86_64::{registers, virt_to_phys}; - +use crate::{ + arch::x86_64::{registers, virt_to_phys}, + consts::BOOT_PML4, +}; #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct SwBreakpoint { addr: u64, @@ -53,7 +55,8 @@ impl target::ext::breakpoints::SwBreakpoint for GdbUhyve { // Safety: mem is not altered during the lifetime of `instructions` let instructions = unsafe { self.vm.mem.slice_at_mut( - virt_to_phys(GuestVirtAddr::new(addr), &self.vm.mem).map_err(|_err| ())?, + virt_to_phys(GuestVirtAddr::new(addr), &self.vm.mem, BOOT_PML4) + .map_err(|_err| ())?, kind, ) } @@ -73,7 +76,8 @@ impl target::ext::breakpoints::SwBreakpoint for GdbUhyve { // Safety: mem is not altered during the lifetime of `instructions` let instructions = unsafe { self.vm.mem.slice_at_mut( - virt_to_phys(GuestVirtAddr::new(addr), &self.vm.mem).map_err(|_err| ())?, + virt_to_phys(GuestVirtAddr::new(addr), &self.vm.mem, BOOT_PML4) + .map_err(|_err| ())?, kind, ) } diff --git a/src/linux/gdb/mod.rs b/src/linux/gdb/mod.rs index 29f08e3a..5dd525f0 100644 --- a/src/linux/gdb/mod.rs +++ b/src/linux/gdb/mod.rs @@ -30,6 +30,7 @@ use self::breakpoints::SwBreakpoints; use super::HypervisorError; use crate::{ arch::x86_64::{registers::debug::HwBreakpoints, virt_to_phys}, + consts::BOOT_PML4, linux::{x86_64::kvm_cpu::KvmCpu, KickSignal}, vcpu::{VcpuStopReason, VirtualCPU}, vm::UhyveVm, @@ -130,7 +131,7 @@ impl SingleThreadBase for GdbUhyve { // Safety: mem is copied to data before mem can be modified. let src = unsafe { self.vm.mem.slice_at( - virt_to_phys(guest_addr, &self.vm.mem).map_err(|_err| ())?, + virt_to_phys(guest_addr, &self.vm.mem, BOOT_PML4).map_err(|_err| ())?, data.len(), ) } @@ -143,7 +144,8 @@ impl SingleThreadBase for GdbUhyve { // Safety: self.vm.mem is not altered during the lifetime of mem. let mem = unsafe { self.vm.mem.slice_at_mut( - virt_to_phys(GuestVirtAddr::new(start_addr), &self.vm.mem).map_err(|_err| ())?, + virt_to_phys(GuestVirtAddr::new(start_addr), &self.vm.mem, BOOT_PML4) + .map_err(|_err| ())?, data.len(), ) } From 3768b348af20357a4d73ed202be69dfc573969bf Mon Sep 17 00:00:00 2001 From: Jonathan Klimt Date: Tue, 12 Mar 2024 15:28:06 +0100 Subject: [PATCH 29/29] Untested add of the file hypercall functions --- src/macos/aarch64/vcpu.rs | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/macos/aarch64/vcpu.rs b/src/macos/aarch64/vcpu.rs index 0d7910bc..35d86869 100644 --- a/src/macos/aarch64/vcpu.rs +++ b/src/macos/aarch64/vcpu.rs @@ -13,7 +13,7 @@ use crate::{ PSR, TCR_FLAGS, TCR_TG1_4K, VA_BITS, }, consts::*, - hypercall, + hypercall::{self, copy_argv, copy_env}, vcpu::{VcpuStopReason, VirtualCPU}, vm::UhyveVm, HypervisorResult, @@ -174,6 +174,31 @@ impl VirtualCPU for XhyveCpu { Hypercall::Exit(sysexit) => { return Ok(VcpuStopReason::Exit(sysexit.arg)); } + Hypercall::Cmdsize(syssize) => syssize + .update(self.parent_vm.kernel_path(), self.parent_vm.args()), + Hypercall::Cmdval(syscmdval) => { + copy_argv( + self.parent_vm.kernel_path().as_os_str(), + self.parent_vm.args(), + syscmdval, + &self.parent_vm.mem, + ); + copy_env(syscmdval, &self.parent_vm.mem); + } + Hypercall::FileClose(sysclose) => hypercall::close(sysclose), + Hypercall::FileLseek(syslseek) => hypercall::lseek(syslseek), + Hypercall::FileOpen(sysopen) => { + hypercall::open(&self.parent_vm.mem, sysopen) + } + Hypercall::FileRead(sysread) => { + hypercall::read(&self.parent_vm.mem, sysread) + } + Hypercall::FileWrite(syswrite) => { + hypercall::write(&self.parent_vm.mem, syswrite).unwrap() + } + Hypercall::FileUnlink(sysunlink) => { + hypercall::unlink(&self.parent_vm.mem, sysunlink) + } _ => { panic! {"Hypercall {hypercall:?} not implemented on macos-aarch64"} }