From 3476bb73caf4a07dd30850bbfbba9b1025d51aea Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Fri, 16 Feb 2024 14:11:45 +0100 Subject: [PATCH] prepare kernel to use it as common monolithic kernel - currently, only x86 is supported - add system call interface and system call table, which includes function pointer to the implementations - save GS register during a context switch - swap GS register, if the user spaces is interrupted - introduce privilege level dor the the user-space - create for every process an own page table --- src/arch/mod.rs | 69 +++++++++------- src/arch/x86_64/kernel/apic.rs | 11 ++- src/arch/x86_64/kernel/gdt.rs | 19 ++++- src/arch/x86_64/kernel/interrupts.rs | 58 +++++++------ src/arch/x86_64/kernel/mod.rs | 119 ++++++++++++++++++++++++++- src/arch/x86_64/kernel/pic.rs | 10 ++- src/arch/x86_64/kernel/processor.rs | 32 +++++++ src/arch/x86_64/kernel/scheduler.rs | 14 +++- src/arch/x86_64/kernel/switch.rs | 70 +++++++++++++++- src/arch/x86_64/kernel/syscall.rs | 49 +++++++++++ src/arch/x86_64/mm/mod.rs | 61 +++++++++++++- src/arch/x86_64/mm/paging.rs | 79 +++++++++++++++--- src/arch/x86_64/mm/virtualmem.rs | 10 ++- src/arch/x86_64/mod.rs | 19 +++++ src/config.rs | 2 +- src/drivers/net/mod.rs | 4 +- src/env.rs | 2 +- src/lib.rs | 13 +-- src/mm/mod.rs | 57 ++++++++++++- src/scheduler/mod.rs | 47 +++++++++-- src/scheduler/task.rs | 29 +++++-- src/syscalls/entropy.rs | 1 + src/syscalls/mod.rs | 10 ++- src/syscalls/table.rs | 88 ++++++++++++++++++++ src/syscalls/tasks.rs | 16 +--- 25 files changed, 761 insertions(+), 128 deletions(-) create mode 100644 src/arch/x86_64/kernel/syscall.rs create mode 100644 src/syscalls/table.rs diff --git a/src/arch/mod.rs b/src/arch/mod.rs index c093434eda..81b75b3c33 100644 --- a/src/arch/mod.rs +++ b/src/arch/mod.rs @@ -2,64 +2,70 @@ cfg_if::cfg_if! { if #[cfg(target_arch = "aarch64")] { - pub mod aarch64; - pub use self::aarch64::*; + pub(crate) mod aarch64; + pub(crate) use self::aarch64::*; #[cfg(target_os = "none")] - pub use self::aarch64::kernel::boot_processor_init; - pub use self::aarch64::kernel::core_local; - pub use self::aarch64::kernel::interrupts; - pub use self::aarch64::kernel::interrupts::wakeup_core; + pub(crate) use self::aarch64::kernel::boot_processor_init; + pub(crate) use self::aarch64::kernel::core_local; + pub(crate) use self::aarch64::kernel::interrupts; + pub(crate) use self::aarch64::kernel::interrupts::wakeup_core; #[cfg(feature = "pci")] - pub use self::aarch64::kernel::pci; - pub use self::aarch64::kernel::processor; - pub use self::aarch64::kernel::processor::set_oneshot_timer; - pub use self::aarch64::kernel::scheduler; - pub use self::aarch64::kernel::switch; + pub(crate) use self::aarch64::kernel::pci; + pub(crate) use self::aarch64::kernel::processor; + pub(crate) use self::aarch64::kernel::processor::set_oneshot_timer; + pub(crate) use self::aarch64::kernel::scheduler; + pub(crate) use self::aarch64::kernel::switch; #[cfg(feature = "smp")] - pub use self::aarch64::kernel::application_processor_init; - pub use self::aarch64::kernel::{ + pub(crate) use self::aarch64::kernel::application_processor_init; + pub(crate) use self::aarch64::kernel::{ boot_application_processors, get_processor_count, message_output_init, output_message_buf, }; + pub use self::aarch64::mm::paging::{BasePageSize, PageSize}; } else if #[cfg(target_arch = "x86_64")] { - pub mod x86_64; - pub use self::x86_64::*; + pub(crate) mod x86_64; + pub(crate) use self::x86_64::*; - pub use self::x86_64::kernel::apic::{ + pub(crate) use self::x86_64::kernel::apic::{ set_oneshot_timer, wakeup_core, }; #[cfg(all(target_os = "none", feature = "smp"))] - pub use self::x86_64::kernel::application_processor_init; - pub use self::x86_64::kernel::core_local; - pub use self::x86_64::kernel::gdt::set_current_kernel_stack; - pub use self::x86_64::kernel::interrupts; + pub(crate) use self::x86_64::kernel::application_processor_init; + pub(crate) use self::x86_64::kernel::core_local; + pub(crate) use self::x86_64::kernel::gdt::set_current_kernel_stack; + pub(crate) use self::x86_64::kernel::interrupts; #[cfg(feature = "pci")] - pub use self::x86_64::kernel::pci; - pub use self::x86_64::kernel::processor; - pub use self::x86_64::kernel::scheduler; - pub use self::x86_64::kernel::switch; + pub(crate) use self::x86_64::kernel::pci; + pub(crate) use self::x86_64::kernel::processor; + pub(crate) use self::x86_64::kernel::scheduler; + pub(crate) use self::x86_64::kernel::switch; #[cfg(target_os = "none")] - pub use self::x86_64::kernel::{ + pub(crate) use self::x86_64::kernel::{ boot_application_processors, boot_processor_init, }; - pub use self::x86_64::kernel::{ + pub(crate) use self::x86_64::kernel::{ get_processor_count, message_output_init, output_message_buf, }; + pub use self::x86_64::mm::paging::{BasePageSize, PageSize}; + #[cfg(feature = "common-os")] + pub use self::x86_64::mm::create_new_root_page_table; + #[cfg(feature = "common-os")] + pub use self::x86_64::kernel::{load_application, jump_to_user_land}; } else if #[cfg(target_arch = "riscv64")] { - pub mod riscv64; - pub use self::riscv64::*; + pub(crate) mod riscv64; + pub(crate) use self::riscv64::*; #[cfg(feature = "smp")] - pub use self::riscv64::kernel::application_processor_init; - pub use self::riscv64::kernel::processor::{self, set_oneshot_timer, wakeup_core}; - pub use self::riscv64::kernel::{ + pub(crate) use self::riscv64::kernel::application_processor_init; + pub(crate) use self::riscv64::kernel::processor::{self, set_oneshot_timer, wakeup_core}; + pub(crate) use self::riscv64::kernel::{ boot_application_processors, boot_processor_init, core_local, @@ -70,5 +76,6 @@ cfg_if::cfg_if! { scheduler, switch, }; + pub use self::riscv64::mm::paging::{BasePageSize, PageSize}; } } diff --git a/src/arch/x86_64/kernel/apic.rs b/src/arch/x86_64/kernel/apic.rs index adc3ee1498..e16cfe16ec 100644 --- a/src/arch/x86_64/kernel/apic.rs +++ b/src/arch/x86_64/kernel/apic.rs @@ -22,6 +22,7 @@ use crate::arch::x86_64::mm::paging::{ BasePageSize, PageSize, PageTableEntryFlags, PageTableEntryFlagsExt, }; use crate::arch::x86_64::mm::{paging, virtualmem, PhysAddr, VirtAddr}; +use crate::arch::x86_64::swapgs; use crate::config::*; use crate::scheduler::CoreId; use crate::{arch, env, mm, scheduler}; @@ -199,16 +200,19 @@ impl fmt::Display for IoApicRecord { } #[cfg(feature = "smp")] -extern "x86-interrupt" fn tlb_flush_handler(_stack_frame: interrupts::ExceptionStackFrame) { +extern "x86-interrupt" fn tlb_flush_handler(stack_frame: interrupts::ExceptionStackFrame) { + swapgs(&stack_frame); debug!("Received TLB Flush Interrupt"); increment_irq_counter(TLB_FLUSH_INTERRUPT_NUMBER); unsafe { cr3_write(cr3()); } eoi(); + swapgs(&stack_frame); } extern "x86-interrupt" fn error_interrupt_handler(stack_frame: interrupts::ExceptionStackFrame) { + swapgs(&stack_frame); error!("APIC LVT Error Interrupt"); error!("ESR: {:#X}", local_apic_read(IA32_X2APIC_ESR)); error!("{:#?}", stack_frame); @@ -217,12 +221,14 @@ extern "x86-interrupt" fn error_interrupt_handler(stack_frame: interrupts::Excep } extern "x86-interrupt" fn spurious_interrupt_handler(stack_frame: interrupts::ExceptionStackFrame) { + swapgs(&stack_frame); error!("Spurious Interrupt: {:#?}", stack_frame); scheduler::abort(); } #[cfg(feature = "smp")] -extern "x86-interrupt" fn wakeup_handler(_stack_frame: interrupts::ExceptionStackFrame) { +extern "x86-interrupt" fn wakeup_handler(stack_frame: interrupts::ExceptionStackFrame) { + swapgs(&stack_frame); use crate::scheduler::PerCoreSchedulerExt; debug!("Received Wakeup Interrupt"); @@ -233,6 +239,7 @@ extern "x86-interrupt" fn wakeup_handler(_stack_frame: interrupts::ExceptionStac if core_scheduler.is_scheduling() { core_scheduler.reschedule(); } + swapgs(&stack_frame); } #[inline] diff --git a/src/arch/x86_64/kernel/gdt.rs b/src/arch/x86_64/kernel/gdt.rs index e81908d6cf..bd0a6dfdfe 100644 --- a/src/arch/x86_64/kernel/gdt.rs +++ b/src/arch/x86_64/kernel/gdt.rs @@ -3,6 +3,8 @@ use core::sync::atomic::Ordering; use x86_64::instructions::tables; use x86_64::registers::segmentation::{Segment, CS, DS, ES, SS}; +#[cfg(feature = "common-os")] +use x86_64::structures::gdt::DescriptorFlags; use x86_64::structures::gdt::{Descriptor, GlobalDescriptorTable}; use x86_64::structures::tss::TaskStateSegment; use x86_64::VirtAddr; @@ -15,9 +17,16 @@ use crate::arch::x86_64::mm::paging::{BasePageSize, PageSize}; use crate::config::KERNEL_STACK_SIZE; pub fn add_current_core() { - let gdt = Box::leak(Box::new(GlobalDescriptorTable::new())); + let gdt: &mut GlobalDescriptorTable = Box::leak(Box::new(GlobalDescriptorTable::new())); let kernel_code_selector = gdt.add_entry(Descriptor::kernel_code_segment()); let kernel_data_selector = gdt.add_entry(Descriptor::kernel_data_segment()); + #[cfg(feature = "common-os")] + { + let _user_code32_selector = + gdt.add_entry(Descriptor::UserSegment(DescriptorFlags::USER_CODE32.bits())); + let _user_data64_selector = gdt.add_entry(Descriptor::user_data_segment()); + let _user_code64_selector = gdt.add_entry(Descriptor::user_code_segment()); + } // Dynamically allocate memory for a Task-State Segment (TSS) for this core. let tss = Box::leak(Box::new(TaskStateSegment::new())); @@ -60,5 +69,13 @@ pub fn add_current_core() { } pub extern "C" fn set_current_kernel_stack() { + #[cfg(feature = "common-os")] + unsafe { + let root = crate::scheduler::get_root_page_table(); + if root != x86::controlregs::cr3().try_into().unwrap() { + x86::controlregs::cr3_write(root.try_into().unwrap()); + } + } + core_scheduler().set_current_kernel_stack(); } diff --git a/src/arch/x86_64/kernel/interrupts.rs b/src/arch/x86_64/kernel/interrupts.rs index d3381a2535..7bd3b15de6 100644 --- a/src/arch/x86_64/kernel/interrupts.rs +++ b/src/arch/x86_64/kernel/interrupts.rs @@ -6,23 +6,23 @@ use ahash::RandomState; use hashbrown::HashMap; use hermit_sync::{InterruptSpinMutex, InterruptTicketMutex}; pub use x86_64::instructions::interrupts::{disable, enable, enable_and_hlt as enable_and_wait}; -use x86_64::registers::control::Cr2; use x86_64::set_general_handler; pub use x86_64::structures::idt::InterruptStackFrame as ExceptionStackFrame; -use x86_64::structures::idt::{InterruptDescriptorTable, InterruptStackFrame, PageFaultErrorCode}; +use x86_64::structures::idt::{InterruptDescriptorTable, InterruptStackFrame}; use crate::arch::x86_64::kernel::core_local::{core_scheduler, increment_irq_counter}; use crate::arch::x86_64::kernel::{apic, processor}; -use crate::arch::x86_64::mm::paging::{BasePageSize, PageSize}; +use crate::arch::x86_64::mm::paging::{page_fault_handler, BasePageSize, PageSize}; +use crate::arch::x86_64::swapgs; use crate::scheduler::{self, CoreId}; -pub const IST_ENTRIES: usize = 4; -pub const IST_SIZE: usize = 8 * BasePageSize::SIZE as usize; +pub(crate) const IST_ENTRIES: usize = 4; +pub(crate) const IST_SIZE: usize = 8 * BasePageSize::SIZE as usize; -pub static IDT: InterruptSpinMutex = +pub(crate) static IDT: InterruptSpinMutex = InterruptSpinMutex::new(InterruptDescriptorTable::new()); -pub fn load_idt() { +pub(crate) fn load_idt() { // FIXME: This is not sound! For this to be sound, the table must never be // modified or destroyed while in use. This is _not_ the case here. Instead, we // disable interrupts on the current core when modifying the table and hope for @@ -32,7 +32,7 @@ pub fn load_idt() { } } -pub fn install() { +pub(crate) fn install() { let mut idt = IDT.lock(); set_general_handler!(&mut *idt, abort, 0..32); @@ -144,41 +144,49 @@ fn unknown(_stack_frame: ExceptionStackFrame, index: u8, _error_code: Option ! { + swapgs(&stack_frame); error!( "Double Fault (#DF) Exception: {:#?}, error {:#X}", stack_frame, error_code @@ -245,39 +259,31 @@ extern "x86-interrupt" fn double_fault_exception( } extern "x86-interrupt" fn floating_point_exception(stack_frame: ExceptionStackFrame) { + swapgs(&stack_frame); error!("Floating-Point Error (#MF) Exception: {:#?}", stack_frame); scheduler::abort(); } extern "x86-interrupt" fn alignment_check_exception(stack_frame: ExceptionStackFrame, _code: u64) { + swapgs(&stack_frame); error!("Alignment Check (#AC) Exception: {:#?}", stack_frame); scheduler::abort(); } -pub extern "x86-interrupt" fn page_fault_handler( - stack_frame: ExceptionStackFrame, - error_code: PageFaultErrorCode, -) { - error!("Page fault (#PF)!"); - error!("page_fault_linear_address = {:p}", Cr2::read()); - error!("error_code = {error_code:?}"); - error!("fs = {:#X}", processor::readfs()); - error!("gs = {:#X}", processor::readgs()); - error!("stack_frame = {stack_frame:#?}"); - scheduler::abort(); -} - extern "x86-interrupt" fn machine_check_exception(stack_frame: ExceptionStackFrame) -> ! { + swapgs(&stack_frame); error!("Machine Check (#MC) Exception: {:#?}", stack_frame); scheduler::abort() } extern "x86-interrupt" fn simd_floating_point_exception(stack_frame: ExceptionStackFrame) { + swapgs(&stack_frame); error!("SIMD Floating-Point (#XM) Exception: {:#?}", stack_frame); scheduler::abort(); } extern "x86-interrupt" fn virtualization_exception(stack_frame: ExceptionStackFrame) { + swapgs(&stack_frame); error!("Virtualization (#VE) Exception: {:#?}", stack_frame); scheduler::abort(); } @@ -285,7 +291,7 @@ extern "x86-interrupt" fn virtualization_exception(stack_frame: ExceptionStackFr static IRQ_NAMES: InterruptTicketMutex> = InterruptTicketMutex::new(HashMap::with_hasher(RandomState::with_seeds(0, 0, 0, 0))); -pub fn add_irq_name(irq_number: u8, name: &'static str) { +pub(crate) fn add_irq_name(irq_number: u8, name: &'static str) { debug!("Register name \"{}\" for interrupt {}", name, irq_number); IRQ_NAMES.lock().insert(32 + irq_number, name); } @@ -294,10 +300,10 @@ fn get_irq_name(irq_number: u8) -> Option<&'static str> { IRQ_NAMES.lock().get(&irq_number).copied() } -pub static IRQ_COUNTERS: InterruptSpinMutex> = +pub(crate) static IRQ_COUNTERS: InterruptSpinMutex> = InterruptSpinMutex::new(BTreeMap::new()); -pub struct IrqStatistics { +pub(crate) struct IrqStatistics { pub counters: [AtomicU64; 256], } @@ -315,7 +321,7 @@ impl IrqStatistics { } } -pub fn print_statistics() { +pub(crate) fn print_statistics() { info!("Number of interrupts"); for (core_id, irg_statistics) in IRQ_COUNTERS.lock().iter() { for (i, counter) in irg_statistics.counters.iter().enumerate() { diff --git a/src/arch/x86_64/kernel/mod.rs b/src/arch/x86_64/kernel/mod.rs index a5a34c68b1..23f1a58a11 100644 --- a/src/arch/x86_64/kernel/mod.rs +++ b/src/arch/x86_64/kernel/mod.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "common-os")] +use core::arch::asm; #[cfg(feature = "newlib")] use core::slice; use core::sync::atomic::{AtomicU32, AtomicU64, Ordering}; @@ -29,7 +31,9 @@ pub mod serial; #[cfg(target_os = "none")] mod start; pub mod switch; -pub mod systemtime; +#[cfg(feature = "common-os")] +mod syscall; +pub(crate) mod systemtime; #[cfg(feature = "vga")] mod vga; @@ -269,3 +273,116 @@ unsafe extern "C" fn pre_init(boot_info: &'static RawBootInfo, cpu_id: u32) -> ! crate::application_processor_main(); } } + +#[cfg(feature = "common-os")] +const LOADER_START: usize = 0x10000000000; +#[cfg(feature = "common-os")] +const LOADER_STACK_SIZE: usize = 0x8000; + +#[cfg(feature = "common-os")] +pub fn load_application(code_size: u64, tls_size: u64, func: F) -> Result<(), ()> +where + F: FnOnce(&'static mut [u8], Option<&'static mut [u8]>) -> Result<(), ()>, +{ + use core::ptr::slice_from_raw_parts_mut; + + use align_address::Align; + use x86_64::structures::paging::{PageSize, Size4KiB as BasePageSize}; + + use crate::arch::x86_64::mm::paging::{self, PageTableEntryFlags, PageTableEntryFlagsExt}; + use crate::arch::x86_64::mm::physicalmem; + + let code_size = (code_size as usize + LOADER_STACK_SIZE).align_up(BasePageSize::SIZE as usize); + let physaddr = + physicalmem::allocate_aligned(code_size as usize, BasePageSize::SIZE as usize).unwrap(); + + let mut flags = PageTableEntryFlags::empty(); + flags.normal().writable().user().execute_enable(); + paging::map::( + VirtAddr::from(LOADER_START), + physaddr, + code_size / BasePageSize::SIZE as usize, + flags, + ); + + let code_slice = unsafe { &mut *slice_from_raw_parts_mut(LOADER_START as *mut u8, code_size) }; + + if tls_size > 0 { + // To access TLS blocks on x86-64, TLS offsets are *subtracted* from the thread register value. + // So the thread pointer needs to be `block_ptr + tls_offset`. + // GNU style TLS requires `gs:0` to represent the same address as the thread pointer. + // Since the thread pointer points to the end of the TLS blocks, we need to store it there. + let tcb_size = core::mem::size_of::<*mut ()>(); + let tls_offset = tls_size as usize; + + let tls_memsz = (tls_offset + tcb_size).align_up(BasePageSize::SIZE as usize); + let physaddr = + physicalmem::allocate_aligned(tls_memsz, BasePageSize::SIZE as usize).unwrap(); + + let mut flags = PageTableEntryFlags::empty(); + flags.normal().writable().user().execute_disable(); + let tls_virt = VirtAddr::from(LOADER_START + code_size + BasePageSize::SIZE as usize); + paging::map::( + tls_virt, + physaddr, + tls_memsz / BasePageSize::SIZE as usize, + flags, + ); + let block = unsafe { + &mut *slice_from_raw_parts_mut(tls_virt.as_mut_ptr() as *mut u8, tls_offset + tcb_size) + }; + for elem in block.iter_mut() { + *elem = 0; + } + + // thread_ptr = block_ptr + tls_offset + let thread_ptr = block[tls_offset..].as_mut_ptr().cast::<()>(); + unsafe { + thread_ptr.cast::<*mut ()>().write(thread_ptr); + } + crate::arch::x86_64::kernel::processor::writefs(thread_ptr as usize); + + func(code_slice, Some(block)) + } else { + func(code_slice, None) + } +} + +#[cfg(feature = "common-os")] +pub unsafe fn jump_to_user_land(entry_point: u64, code_size: u64) -> ! { + use align_address::Align; + use x86_64::structures::paging::{PageSize, Size4KiB as BasePageSize}; + + use crate::arch::x86_64::kernel::scheduler::TaskStacks; + + let ds = 0x23u64; + let cs = 0x2bu64; + let entry_point: u64 = (LOADER_START as u64) | entry_point; + let stack_pointer: u64 = LOADER_START as u64 + + (code_size + LOADER_STACK_SIZE as u64).align_up(BasePageSize::SIZE) + - 128 /* red zone */ - 8; + + debug!( + "Jump to user space at 0x{:x}, stack pointer 0x{:x}", + entry_point, stack_pointer + ); + unsafe { + asm!( + "and rsp, {0}", + "swapgs", + "push {1}", + "push {2}", + "push {3}", + "push {4}", + "push {5}", + "iretq", + const u64::MAX - (TaskStacks::MARKER_SIZE as u64 - 1), + in(reg) ds, + in(reg) stack_pointer, + const 0x1202u64, + in(reg) cs, + in(reg) entry_point, + options(nostack, noreturn) + ); + } +} diff --git a/src/arch/x86_64/kernel/pic.rs b/src/arch/x86_64/kernel/pic.rs index 70c79ed744..96c9503e84 100644 --- a/src/arch/x86_64/kernel/pic.rs +++ b/src/arch/x86_64/kernel/pic.rs @@ -2,6 +2,8 @@ use x86::io::*; use super::interrupts::IDT; use crate::arch::x86_64::kernel::interrupts::ExceptionStackFrame; +use crate::arch::x86_64::swapgs; +use crate::scheduler; const PIC1_COMMAND_PORT: u16 = 0x20; const PIC1_DATA_PORT: u16 = 0x21; @@ -72,11 +74,14 @@ pub fn init() { } } -extern "x86-interrupt" fn spurious_interrupt_on_master(_stack_frame: ExceptionStackFrame) { +extern "x86-interrupt" fn spurious_interrupt_on_master(stack_frame: ExceptionStackFrame) { + swapgs(&stack_frame); debug!("Spurious Interrupt on Master PIC (IRQ7)"); + scheduler::abort(); } -extern "x86-interrupt" fn spurious_interrupt_on_slave(_stack_frame: ExceptionStackFrame) { +extern "x86-interrupt" fn spurious_interrupt_on_slave(stack_frame: ExceptionStackFrame) { + swapgs(&stack_frame); debug!("Spurious Interrupt on Slave PIC (IRQ15)"); // As this is an interrupt forwarded by the master, we have to acknowledge it on the master @@ -84,6 +89,7 @@ extern "x86-interrupt" fn spurious_interrupt_on_slave(_stack_frame: ExceptionSta unsafe { outb(PIC1_COMMAND_PORT, PIC_EOI_COMMAND); } + scheduler::abort(); } fn edit_mask(int_no: u8, insert: bool) { diff --git a/src/arch/x86_64/kernel/processor.rs b/src/arch/x86_64/kernel/processor.rs index 711bedd537..32d2605385 100644 --- a/src/arch/x86_64/kernel/processor.rs +++ b/src/arch/x86_64/kernel/processor.rs @@ -778,6 +778,8 @@ pub fn detect_features() { } pub fn configure() { + let cpuid = CpuId::new(); + // setup MSR EFER unsafe { wrmsr(IA32_EFER, rdmsr(IA32_EFER) | EFER_LMA | EFER_SCE | EFER_NXE); @@ -808,6 +810,15 @@ pub fn configure() { // let mut cr4 = unsafe { cr4() }; + let has_pge = match cpuid.get_feature_info() { + Some(finfo) => finfo.has_pge(), + None => false, + }; + + if has_pge { + cr4 |= Cr4::CR4_ENABLE_GLOBAL_PAGES; + } + // Enable Machine Check Exceptions. // No need to check for support here, all x86-64 CPUs support it. cr4.insert(Cr4::CR4_ENABLE_MACHINE_CHECK); @@ -862,6 +873,27 @@ pub fn configure() { } } + // enable support of syscall and sysret + #[cfg(feature = "common-os")] + unsafe { + let has_syscall = match cpuid.get_extended_processor_and_feature_identifiers() { + Some(finfo) => finfo.has_syscall_sysret(), + None => false, + }; + + if has_syscall { + info!("Enable SYSCALL support"); + } else { + panic!("Syscall support is missing"); + } + wrmsr(IA32_STAR, (0x1Bu64 << 48) | (0x08u64 << 32)); + wrmsr( + IA32_LSTAR, + crate::arch::x86_64::kernel::syscall::syscall_handler as u64, + ); + wrmsr(IA32_FMASK, 1 << 9); // clear IF flag during system call + } + // Initialize the FS register, which is later used for Thread-Local Storage. writefs(0); diff --git a/src/arch/x86_64/kernel/scheduler.rs b/src/arch/x86_64/kernel/scheduler.rs index 9f4e96098e..369b7512f3 100644 --- a/src/arch/x86_64/kernel/scheduler.rs +++ b/src/arch/x86_64/kernel/scheduler.rs @@ -1,9 +1,13 @@ //! Architecture dependent interface to initialize a task +#[cfg(not(feature = "common-os"))] use alloc::boxed::Box; use core::arch::asm; +#[cfg(not(feature = "common-os"))] use core::mem::MaybeUninit; -use core::{mem, ptr, slice}; +#[cfg(not(feature = "common-os"))] +use core::slice; +use core::{mem, ptr}; use align_address::Align; @@ -15,12 +19,16 @@ use crate::arch::x86_64::mm::paging::{ }; use crate::arch::x86_64::mm::{PhysAddr, VirtAddr}; use crate::config::*; +#[cfg(not(feature = "common-os"))] use crate::kernel; use crate::scheduler::task::{Task, TaskFrame}; use crate::scheduler::PerCoreSchedulerExt; #[repr(C, packed)] struct State { + #[cfg(feature = "common-os")] + /// GS register + gs: u64, /// FS register for TLS support fs: u64, /// R15 register @@ -231,11 +239,13 @@ impl Drop for TaskStacks { } } +#[cfg(not(feature = "common-os"))] pub struct TaskTLS { _block: Box<[MaybeUninit]>, thread_ptr: *mut (), } +#[cfg(not(feature = "common-os"))] impl TaskTLS { // For details on thread-local storage data structures see // @@ -328,6 +338,7 @@ extern "C" fn task_entry(func: extern "C" fn(usize), arg: usize) -> ! { impl TaskFrame for Task { fn create_stack_frame(&mut self, func: extern "C" fn(usize), arg: usize) { // Check if TLS is allocated already and if the task uses thread-local storage. + #[cfg(not(feature = "common-os"))] if self.tls.is_none() { self.tls = TaskTLS::from_environment(); } @@ -344,6 +355,7 @@ impl TaskFrame for Task { let state = stack.as_mut_ptr::(); ptr::write_bytes(stack.as_mut_ptr::(), 0, mem::size_of::()); + #[cfg(not(feature = "common-os"))] if let Some(tls) = &self.tls { (*state).fs = tls.thread_ptr().addr() as u64; } diff --git a/src/arch/x86_64/kernel/switch.rs b/src/arch/x86_64/kernel/switch.rs index eb9b19f7f5..71c276de48 100644 --- a/src/arch/x86_64/kernel/switch.rs +++ b/src/arch/x86_64/kernel/switch.rs @@ -4,6 +4,68 @@ use core::{mem, ptr}; use crate::core_local::CoreLocal; use crate::set_current_kernel_stack; +#[cfg(not(feature = "common-os"))] +macro_rules! push_gs { + () => { + r#" + "# + }; +} + +#[cfg(not(feature = "common-os"))] +macro_rules! pop_gs { + () => { + r#" + "# + }; +} + +#[cfg(all(feature = "fsgsbase", feature = "common-os"))] +macro_rules! push_gs { + () => { + r#" + rdfsbase rax + push rax + "# + }; +} + +#[cfg(all(feature = "fsgsbase", feature = "common-os"))] +macro_rules! pop_gs { + () => { + r#" + pop rax + wrfsbase rax + "# + }; +} + +#[cfg(all(not(feature = "fsgsbase"), feature = "common-os"))] +macro_rules! push_gs { + () => { + r#" + mov ecx, 0xc0000101 // Kernel GS.Base Model Specific Register + rdmsr + sub rsp, 8 + mov [rsp+4], edx + mov [rsp], eax + "# + }; +} + +#[cfg(all(not(feature = "fsgsbase"), feature = "common-os"))] +macro_rules! pop_gs { + () => { + r#" + mov ecx, 0xc0000101 // Kernel GS.Base Model Specific Register + mov edx, [rsp+4] + mov eax, [rsp] + add rsp, 8 + wrmsr + "# + }; +} + #[cfg(feature = "fsgsbase")] macro_rules! push_fs { () => { @@ -71,7 +133,8 @@ macro_rules! save_context { push r14 push r15 "#, - push_fs!() + push_fs!(), + push_gs!() ) }; } @@ -79,6 +142,7 @@ macro_rules! save_context { macro_rules! restore_context { () => { concat!( + pop_gs!(), pop_fs!(), r#" pop r15 @@ -104,7 +168,7 @@ macro_rules! restore_context { } #[naked] -pub unsafe extern "C" fn switch_to_task(_old_stack: *mut usize, _new_stack: usize) { +pub(crate) unsafe extern "C" fn switch_to_task(_old_stack: *mut usize, _new_stack: usize) { // `old_stack` is in `rdi` register // `new_stack` is in `rsi` register @@ -131,7 +195,7 @@ pub unsafe extern "C" fn switch_to_task(_old_stack: *mut usize, _new_stack: usiz /// Performa a context switch to an idle task or a task, which already is owner /// of the FPU. #[naked] -pub unsafe extern "C" fn switch_to_fpu_owner(_old_stack: *mut usize, _new_stack: usize) { +pub(crate) unsafe extern "C" fn switch_to_fpu_owner(_old_stack: *mut usize, _new_stack: usize) { // `old_stack` is in `rdi` register // `new_stack` is in `rsi` register diff --git a/src/arch/x86_64/kernel/syscall.rs b/src/arch/x86_64/kernel/syscall.rs new file mode 100644 index 0000000000..2c99d0c27c --- /dev/null +++ b/src/arch/x86_64/kernel/syscall.rs @@ -0,0 +1,49 @@ +use core::arch::asm; + +use crate::syscalls::table::SYSHANDLER_TABLE; + +#[no_mangle] +#[naked] +pub(crate) unsafe extern "C" fn syscall_handler() -> ! { + unsafe { + asm!( + // save context, see x86_64 ABI + "push rcx", + "push rdx", + "push rsi", + "push rdi", + "push r8", + "push r9", + "push r10", + "push r11", + // switch to kernel stack + "swapgs", + "mov rcx, rsp", + "mov rsp, gs:32", + // save user stack pointer + "push rcx", + // copy 4th argument to rcx to adhere x86_64 ABI + "mov rcx, r10", + "sti", + "mov r10, qword ptr [rip + {table}@GOTPCREL]", + "call [r10 + 8*rax]", + "cli", + // restore user stack pointer + "pop rcx", + "mov rsp, rcx", + "swapgs", + // restore context, see x86_64 ABI + "pop r11", + "pop r10", + "pop r9", + "pop r8", + "pop rdi", + "pop rsi", + "pop rdx", + "pop rcx", + "sysretq", + table = sym SYSHANDLER_TABLE, + options(noreturn) + ); + } +} diff --git a/src/arch/x86_64/mm/mod.rs b/src/arch/x86_64/mm/mod.rs index 1a8b356438..b4e606ee09 100644 --- a/src/arch/x86_64/mm/mod.rs +++ b/src/arch/x86_64/mm/mod.rs @@ -1,12 +1,18 @@ -pub mod paging; -pub mod physicalmem; -pub mod virtualmem; +pub(crate) mod paging; +pub(crate) mod physicalmem; +pub(crate) mod virtualmem; use core::slice; +#[cfg(feature = "common-os")] +use align_address::Align; pub use x86::bits64::paging::{PAddr as PhysAddr, VAddr as VirtAddr}; +#[cfg(feature = "common-os")] +use x86_64::structures::paging::{PageSize, Size4KiB as BasePageSize}; pub use self::paging::init_page_tables; +#[cfg(feature = "common-os")] +use crate::arch::mm::paging::{PageTableEntryFlags, PageTableEntryFlagsExt}; /// Memory translation, allocation and deallocation for MultibootInformation struct MultibootMemory; @@ -34,8 +40,57 @@ impl multiboot::information::MemoryManagement for MultibootMemory { } } +#[cfg(feature = "common-os")] +pub fn create_new_root_page_table() -> usize { + let physaddr = + physicalmem::allocate_aligned(BasePageSize::SIZE as usize, BasePageSize::SIZE as usize) + .unwrap(); + let virtaddr = + virtualmem::allocate_aligned(2 * BasePageSize::SIZE as usize, BasePageSize::SIZE as usize) + .unwrap(); + let mut flags = PageTableEntryFlags::empty(); + flags.normal().writable(); + + let entry: u64 = unsafe { + let cr3 = x86::controlregs::cr3().align_down(BasePageSize::SIZE); + paging::map::(virtaddr, PhysAddr(cr3), 1, flags); + let entry: &u64 = &*virtaddr.as_ptr(); + + *entry + }; + + let slice_addr = virtaddr + BasePageSize::SIZE; + paging::map::(slice_addr, physaddr, 1, flags); + + unsafe { + let pml4 = core::slice::from_raw_parts_mut(slice_addr.as_mut_ptr() as *mut u64, 512); + + // clear PML4 + for elem in pml4.iter_mut() { + *elem = 0; + } + + // copy first element and the self reference + pml4[0] = entry; + // create self reference + pml4[511] = physaddr.as_u64() + 0x3; // PG_PRESENT | PG_RW + }; + + paging::unmap::(virtaddr, 2); + virtualmem::deallocate(virtaddr, 2 * BasePageSize::SIZE as usize); + + physaddr.as_usize() +} + pub fn init() { paging::init(); physicalmem::init(); virtualmem::init(); + + #[cfg(feature = "common-os")] + unsafe { + crate::scheduler::BOOT_ROOT_PAGE_TABLE + .set(x86::controlregs::cr3().try_into().unwrap()) + .unwrap(); + } } diff --git a/src/arch/x86_64/mm/paging.rs b/src/arch/x86_64/mm/paging.rs index 1f44845a81..fcff993c71 100644 --- a/src/arch/x86_64/mm/paging.rs +++ b/src/arch/x86_64/mm/paging.rs @@ -2,15 +2,18 @@ use core::fmt::Debug; use core::ptr; use x86_64::instructions::tlb; -use x86_64::registers::control::Cr3; +use x86_64::registers::control::Cr2; +pub use x86_64::structures::idt::InterruptStackFrame as ExceptionStackFrame; +use x86_64::structures::idt::PageFaultErrorCode; use x86_64::structures::paging::mapper::{TranslateResult, UnmapError}; pub use x86_64::structures::paging::PageTableFlags as PageTableEntryFlags; use x86_64::structures::paging::{ - Mapper, Page, PageTable, PageTableIndex, PhysFrame, RecursivePageTable, Size2MiB, Translate, + Mapper, Page, PageTableIndex, PhysFrame, RecursivePageTable, Size2MiB, Translate, }; +use crate::arch::x86_64::kernel::processor; use crate::arch::x86_64::mm::{physicalmem, PhysAddr, VirtAddr}; -use crate::{env, mm}; +use crate::{env, mm, scheduler}; pub trait PageTableEntryFlagsExt { fn device(&mut self) -> &mut Self; @@ -22,6 +25,12 @@ pub trait PageTableEntryFlagsExt { fn writable(&mut self) -> &mut Self; fn execute_disable(&mut self) -> &mut Self; + + fn execute_enable(&mut self) -> &mut Self; + + fn user(&mut self) -> &mut Self; + + fn kernel(&mut self) -> &mut Self; } impl PageTableEntryFlagsExt for PageTableEntryFlags { @@ -49,6 +58,21 @@ impl PageTableEntryFlagsExt for PageTableEntryFlags { self.insert(PageTableEntryFlags::NO_EXECUTE); self } + + fn execute_enable(&mut self) -> &mut Self { + self.remove(PageTableEntryFlags::NO_EXECUTE); + self + } + + fn user(&mut self) -> &mut Self { + self.insert(PageTableEntryFlags::USER_ACCESSIBLE); + self + } + + fn kernel(&mut self) -> &mut Self { + self.remove(PageTableEntryFlags::USER_ACCESSIBLE); + self + } } pub use x86_64::structures::paging::{ @@ -218,7 +242,7 @@ where // FIXME: Some sentinel pages around stacks are supposed to be unmapped. // We should handle this case there instead of here. Err(UnmapError::PageNotMapped) => { - debug!("Tried to unmap {page:?}, which was not mapped.") + info!("Tried to unmap {page:?}, which was not mapped.") } Err(err) => panic!("{err:?}"), } @@ -230,6 +254,39 @@ pub fn get_application_page_size() -> usize { LargePageSize::SIZE as usize } +#[cfg(not(feature = "common-os"))] +pub(crate) extern "x86-interrupt" fn page_fault_handler( + stack_frame: ExceptionStackFrame, + error_code: PageFaultErrorCode, +) { + error!("Page fault (#PF)!"); + error!("page_fault_linear_address = {:p}", Cr2::read()); + error!("error_code = {error_code:?}"); + error!("fs = {:#X}", processor::readfs()); + error!("gs = {:#X}", processor::readgs()); + error!("stack_frame = {stack_frame:#?}"); + scheduler::abort(); +} + +#[cfg(feature = "common-os")] +pub(crate) extern "x86-interrupt" fn page_fault_handler( + mut stack_frame: ExceptionStackFrame, + error_code: PageFaultErrorCode, +) { + unsafe { + if stack_frame.as_mut().read().code_segment != 0x08 { + core::arch::asm!("swapgs", options(nostack)); + } + } + error!("Page fault (#PF)!"); + error!("page_fault_linear_address = {:p}", Cr2::read()); + error!("error_code = {error_code:?}"); + error!("fs = {:#X}", processor::readfs()); + error!("gs = {:#X}", processor::readgs()); + error!("stack_frame = {stack_frame:#?}"); + scheduler::abort(); +} + pub fn init() {} pub fn init_page_tables() { @@ -306,7 +363,7 @@ unsafe fn disect(pt: PT, virt_addr: x86_64::VirtAddr) { } #[allow(dead_code)] -unsafe fn print_page_tables(levels: usize) { +pub(crate) unsafe fn print_page_tables(levels: usize) { assert!((1..=4).contains(&levels)); fn print(table: &x86_64::structures::paging::PageTable, level: usize, min_level: usize) { @@ -332,14 +389,14 @@ unsafe fn print_page_tables(levels: usize) { } // Recursive - // let mut recursive_page_table = unsafe { recursive_page_table() }; - // let pt = recursive_page_table.level_4_table(); + let mut recursive_page_table = unsafe { recursive_page_table() }; + let pt = recursive_page_table.level_4_table(); // Identity mapped - let level_4_table_addr = Cr3::read().0.start_address().as_u64(); - let level_4_table_ptr = - ptr::from_exposed_addr::(level_4_table_addr.try_into().unwrap()); - let pt = unsafe { &*level_4_table_ptr }; + //let level_4_table_addr = Cr3::read().0.start_address().as_u64(); + //let level_4_table_ptr = + // ptr::from_exposed_addr::(level_4_table_addr.try_into().unwrap()); + //let pt = unsafe { &*level_4_table_ptr }; print(pt, 4, 5 - levels); } diff --git a/src/arch/x86_64/mm/virtualmem.rs b/src/arch/x86_64/mm/virtualmem.rs index 2645d7bd43..fd849b4c59 100644 --- a/src/arch/x86_64/mm/virtualmem.rs +++ b/src/arch/x86_64/mm/virtualmem.rs @@ -140,13 +140,19 @@ pub fn print_information() { /// End of the virtual memory address space reserved for kernel memory. /// This also marks the start of the virtual memory address space reserved for the task heap. /// In case of pure rust applications, we don't have a task heap. -#[cfg(not(feature = "newlib"))] +#[cfg(all(not(feature = "common-os"), not(feature = "newlib")))] #[inline] pub const fn kernel_heap_end() -> VirtAddr { VirtAddr(0x8000_0000_0000u64) } -#[cfg(feature = "newlib")] +#[cfg(all(feature = "common-os", not(feature = "newlib")))] +#[inline] +pub const fn kernel_heap_end() -> VirtAddr { + VirtAddr(0x200_0000_0000u64) +} + +#[cfg(all(not(featur = "common-os"), feature = "newlib"))] #[inline] pub const fn kernel_heap_end() -> VirtAddr { VirtAddr(0x1_0000_0000u64) diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index 7809b7212c..e4ccd0eb34 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -1,6 +1,25 @@ pub mod kernel; pub mod mm; +use crate::arch::mm::paging::ExceptionStackFrame; + +/// Helper function to swap the GS register, if the user-space is +/// is interrupted. +#[cfg(feature = "common-os")] +#[inline(always)] +pub(crate) fn swapgs(stack_frame: &ExceptionStackFrame) { + use core::arch::asm; + if stack_frame.code_segment != 8 { + unsafe { + asm!("swapgs", options(nomem, nostack, preserves_flags)); + } + } +} + +#[cfg(not(feature = "common-os"))] +#[inline(always)] +pub(crate) fn swapgs(_stack_frame: &ExceptionStackFrame) {} + /// Force strict CPU ordering, serializes load and store operations. #[allow(dead_code)] #[inline(always)] diff --git a/src/config.rs b/src/config.rs index 9acbcd848f..8b32b25ca8 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,6 +1,6 @@ pub(crate) const KERNEL_STACK_SIZE: usize = 32_768; -pub(crate) const DEFAULT_STACK_SIZE: usize = 65_536; +pub const DEFAULT_STACK_SIZE: usize = 65_536; pub(crate) const USER_STACK_SIZE: usize = 1_048_576; diff --git a/src/drivers/net/mod.rs b/src/drivers/net/mod.rs index d56e73e5b6..66ab04aa93 100644 --- a/src/drivers/net/mod.rs +++ b/src/drivers/net/mod.rs @@ -71,7 +71,8 @@ pub(crate) fn network_irqhandler(_state: &State) -> bool { } #[cfg(target_arch = "x86_64")] -pub(crate) extern "x86-interrupt" fn network_irqhandler(_stack_frame: ExceptionStackFrame) { +pub(crate) extern "x86-interrupt" fn network_irqhandler(stack_frame: ExceptionStackFrame) { + crate::arch::x86_64::swapgs(&stack_frame); use crate::scheduler::PerCoreSchedulerExt; debug!("Receive network interrupt"); @@ -79,6 +80,7 @@ pub(crate) extern "x86-interrupt" fn network_irqhandler(_stack_frame: ExceptionS let _ = _irqhandler(); core_scheduler().reschedule(); + crate::arch::x86_64::swapgs(&stack_frame); } #[cfg(target_arch = "riscv64")] diff --git a/src/env.rs b/src/env.rs index 09d6c0a5ba..ccbc37892b 100644 --- a/src/env.rs +++ b/src/env.rs @@ -10,7 +10,7 @@ use hashbrown::HashMap; use hermit_entry::boot_info::PlatformInfo; use hermit_sync::OnceCell; -pub use crate::arch::kernel::{self, get_base_address, get_image_size, get_ram_address}; +pub(crate) use crate::arch::kernel::{self, get_base_address, get_image_size, get_ram_address}; use crate::kernel::boot_info; static CLI: OnceCell = OnceCell::new(); diff --git a/src/lib.rs b/src/lib.rs index 2626d9d374..cb128aed56 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -50,7 +50,9 @@ extern crate std; #[macro_use] extern crate num_derive; +#[cfg(not(feature = "common-os"))] use alloc::alloc::Layout; +#[cfg(not(feature = "common-os"))] use core::alloc::GlobalAlloc; #[cfg(feature = "smp")] use core::hint::spin_loop; @@ -64,6 +66,7 @@ pub use env::is_uhyve as _is_uhyve; use mm::allocator::LockedAllocator; pub(crate) use crate::arch::*; +pub use crate::config::DEFAULT_STACK_SIZE; pub(crate) use crate::config::*; pub use crate::fs::create_file; use crate::kernel::is_uhyve_with_pci; @@ -76,7 +79,7 @@ mod macros; #[macro_use] mod logging; -mod arch; +pub mod arch; mod config; pub mod console; mod drivers; @@ -88,7 +91,7 @@ pub mod fd; pub mod fs; pub mod io; mod mm; -mod scheduler; +pub mod scheduler; mod synch; pub mod syscalls; pub mod time; @@ -132,7 +135,7 @@ static ALLOCATOR: LockedAllocator = LockedAllocator::new(); /// Returning a null pointer indicates that either memory is exhausted or /// `size` and `align` do not meet this allocator's size or alignment constraints. /// -#[cfg(target_os = "none")] +#[cfg(all(target_os = "none", not(feature = "common-os")))] pub(crate) extern "C" fn __sys_malloc(size: usize, align: usize) -> *mut u8 { let layout_res = Layout::from_size_align(size, align); if layout_res.is_err() || size == 0 { @@ -174,7 +177,7 @@ pub(crate) extern "C" fn __sys_malloc(size: usize, align: usize) -> *mut u8 { /// # Errors /// Returns null if the new layout does not meet the size and alignment constraints of the /// allocator, or if reallocation otherwise fails. -#[cfg(target_os = "none")] +#[cfg(all(target_os = "none", not(feature = "common-os")))] pub(crate) extern "C" fn __sys_realloc( ptr: *mut u8, size: usize, @@ -219,7 +222,7 @@ pub(crate) extern "C" fn __sys_realloc( /// /// # Errors /// May panic if debug assertions are enabled and invalid parameters `size` or `align` where passed. -#[cfg(target_os = "none")] +#[cfg(all(target_os = "none", not(feature = "common-os")))] pub(crate) extern "C" fn __sys_free(ptr: *mut u8, size: usize, align: usize) { unsafe { let layout_res = Layout::from_size_align(size, align); diff --git a/src/mm/mod.rs b/src/mm/mod.rs index ca6bdfc385..815960c350 100644 --- a/src/mm/mod.rs +++ b/src/mm/mod.rs @@ -104,12 +104,13 @@ pub(crate) fn init() { - reserved_space) .align_down(LargePageSize::SIZE as usize); - // we reserve 10% of the memory for stack allocations - let stack_reserve: usize = (available_memory * 10) / 100; let heap_start_addr; - #[cfg(feature = "newlib")] + #[cfg(all(feature = "newlib", not(feature = "common-os")))] { + // we reserve 10% of the memory for stack allocations + let stack_reserve: usize = (available_memory * 10) / 100; + info!("An application with a C-based runtime is running on top of Hermit!"); let kernel_heap_size = 10 * LargePageSize::SIZE as usize; @@ -131,8 +132,56 @@ pub(crate) fn init() { heap_start_addr = map_addr; } - #[cfg(not(feature = "newlib"))] + #[cfg(all(not(feature = "newlib"), feature = "common-os"))] + { + info!("Using HermitOS as common OS!"); + + // we reserve at least 75% of the memory for the user space + let reserve: usize = (available_memory * 75) / 100; + // 64 MB is enough as kernel heap + let reserve = core::cmp::min(reserve, 0x4000000); + + let virt_size: usize = reserve.align_down(LargePageSize::SIZE as usize); + let virt_addr = + arch::mm::virtualmem::allocate_aligned(virt_size, LargePageSize::SIZE as usize) + .unwrap(); + heap_start_addr = virt_addr; + + info!( + "Heap: size {} MB, start address {:p}", + virt_size >> 20, + virt_addr + ); + + #[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))] + if has_1gib_pages && virt_size > HugePageSize::SIZE as usize { + // Mount large pages to the next huge page boundary + let npages = (virt_addr.align_up_to_huge_page().as_usize() - virt_addr.as_usize()) + / LargePageSize::SIZE as usize; + if let Err(n) = paging::map_heap::(virt_addr, npages) { + map_addr = virt_addr + n * LargePageSize::SIZE as usize; + map_size = virt_size - (map_addr - virt_addr).as_usize(); + } else { + map_addr = virt_addr.align_up_to_huge_page(); + map_size = virt_size - (map_addr - virt_addr).as_usize(); + } + } else { + map_addr = virt_addr; + map_size = virt_size; + } + + #[cfg(not(any(target_arch = "x86_64", target_arch = "riscv64")))] + { + map_addr = virt_addr; + map_size = virt_size; + } + } + + #[cfg(all(not(feature = "newlib"), not(feature = "common-os")))] { + // we reserve 10% of the memory for stack allocations + let stack_reserve: usize = (available_memory * 10) / 100; + info!("A pure Rust application is running on top of Hermit!"); // At first, we map only a small part into the heap. diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs index 6637c31e20..22a5d8648f 100644 --- a/src/scheduler/mod.rs +++ b/src/scheduler/mod.rs @@ -14,15 +14,15 @@ use riscv::register::sstatus; use crate::arch; use crate::arch::core_local::*; -use crate::arch::interrupts; #[cfg(target_arch = "riscv64")] use crate::arch::switch::switch_to_task; #[cfg(target_arch = "x86_64")] use crate::arch::switch::{switch_to_fpu_owner, switch_to_task}; +use crate::arch::{get_processor_count, interrupts}; use crate::kernel::scheduler::TaskStacks; use crate::scheduler::task::*; -pub(crate) mod task; +pub mod task; static NO_TASKS: AtomicU32 = AtomicU32::new(0); /// Map between Core ID and per-core scheduler @@ -40,7 +40,7 @@ static TASKS: InterruptTicketMutex> = pub type CoreId = u32; #[cfg(feature = "smp")] -pub struct SchedulerInput { +pub(crate) struct SchedulerInput { /// Queue of new tasks new_tasks: VecDeque, /// Queue of task, which are wakeup by another core @@ -62,7 +62,7 @@ impl SchedulerInput { not(any(target_arch = "x86_64", target_arch = "aarch64")), repr(align(64)) )] -pub struct PerCoreScheduler { +pub(crate) struct PerCoreScheduler { /// Core ID of this per-core scheduler #[cfg(feature = "smp")] core_id: CoreId, @@ -81,7 +81,7 @@ pub struct PerCoreScheduler { blocked_tasks: BlockedTaskQueue, } -pub trait PerCoreSchedulerExt { +pub(crate) trait PerCoreSchedulerExt { /// Triggers the scheduler to reschedule the tasks. /// Interrupt flag will be cleared during the reschedule fn reschedule(self); @@ -708,12 +708,12 @@ fn get_tid() -> TaskId { } #[inline] -pub fn abort() -> ! { +pub(crate) fn abort() -> ! { core_scheduler().exit(-1) } /// Add a per-core scheduler for the current core. -pub fn add_current_core() { +pub(crate) fn add_current_core() { // Create an idle task for this core. let core_id = core_id(); let tid = get_tid(); @@ -764,6 +764,30 @@ fn get_scheduler_input(core_id: CoreId) -> &'static InterruptTicketMutex TaskId { + static CORE_COUNTER: AtomicU32 = AtomicU32::new(1); + + let core_id = if selector < 0 { + // use Round Robin to schedule the cores + CORE_COUNTER.fetch_add(1, Ordering::SeqCst) % get_processor_count() + } else { + selector as u32 + }; + + PerCoreScheduler::spawn(func, arg, prio, core_id, stack_size) +} + +pub fn getpid() -> TaskId { + core_scheduler().get_current_task_id() +} + +#[allow(clippy::result_unit_err)] pub fn join(id: TaskId) -> Result<(), ()> { let core_scheduler = core_scheduler(); @@ -792,3 +816,12 @@ pub fn join(id: TaskId) -> Result<(), ()> { fn get_task_handle(id: TaskId) -> Option { TASKS.lock().get(&id).copied() } + +#[cfg(all(target_arch = "x86_64", feature = "common-os"))] +pub(crate) static BOOT_ROOT_PAGE_TABLE: OnceCell = OnceCell::new(); + +#[cfg(all(target_arch = "x86_64", feature = "common-os"))] +pub(crate) fn get_root_page_table() -> usize { + let current_task_borrowed = core_scheduler().current_task.borrow_mut(); + current_task_borrowed.root_page_table +} diff --git a/src/scheduler/task.rs b/src/scheduler/task.rs index 735169ac0d..681b435313 100644 --- a/src/scheduler/task.rs +++ b/src/scheduler/task.rs @@ -1,3 +1,4 @@ +#[cfg(not(feature = "common-os"))] use alloc::boxed::Box; use alloc::collections::{LinkedList, VecDeque}; use alloc::rc::Rc; @@ -12,7 +13,9 @@ use core::ops::DerefMut; use crate::arch; use crate::arch::core_local::*; use crate::arch::mm::VirtAddr; -use crate::arch::scheduler::{TaskStacks, TaskTLS}; +use crate::arch::scheduler::TaskStacks; +#[cfg(not(feature = "common-os"))] +use crate::arch::scheduler::TaskTLS; use crate::scheduler::CoreId; /// Returns the most significant bit. @@ -31,7 +34,7 @@ fn msb(n: u64) -> Option { /// The status of the task - used for scheduling #[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub enum TaskStatus { +pub(crate) enum TaskStatus { Invalid, Ready, Running, @@ -91,7 +94,7 @@ pub const IDLE_PRIO: Priority = Priority::from(0); pub const NO_PRIORITIES: usize = 31; #[derive(Copy, Clone, Debug)] -pub struct TaskHandle { +pub(crate) struct TaskHandle { id: TaskId, priority: Priority, #[cfg(feature = "smp")] @@ -144,7 +147,7 @@ impl Eq for TaskHandle {} /// Realize a priority queue for task handles #[derive(Default)] -pub struct TaskHandlePriorityQueue { +pub(crate) struct TaskHandlePriorityQueue { queues: [Option>; NO_PRIORITIES], prio_bitmap: u64, } @@ -240,7 +243,7 @@ impl TaskHandlePriorityQueue { } /// Realize a priority queue for tasks -pub struct PriorityTaskQueue { +pub(crate) struct PriorityTaskQueue { queues: [LinkedList>>; NO_PRIORITIES], prio_bitmap: u64, } @@ -358,7 +361,7 @@ impl PriorityTaskQueue { not(any(target_arch = "x86_64", target_arch = "aarch64")), repr(align(64)) )] -pub struct Task { +pub(crate) struct Task { /// The ID of this context pub id: TaskId, /// Status of a task, e.g. if the task is ready or blocked @@ -376,13 +379,17 @@ pub struct Task { /// Stack of the task pub stacks: TaskStacks, /// Task Thread-Local-Storage (TLS) + #[cfg(not(feature = "common-os"))] pub tls: Option>, + // Physical address of the 1st level page table + #[cfg(all(target_arch = "x86_64", feature = "common-os"))] + pub root_page_table: usize, /// lwIP error code for this task #[cfg(feature = "newlib")] pub lwip_errno: i32, } -pub trait TaskFrame { +pub(crate) trait TaskFrame { /// Create the initial stack frame for a new task fn create_stack_frame(&mut self, func: extern "C" fn(usize), arg: usize); } @@ -406,7 +413,10 @@ impl Task { last_fpu_state: arch::processor::FPUState::new(), core_id, stacks, + #[cfg(not(feature = "common-os"))] tls: None, + #[cfg(all(target_arch = "x86_64", feature = "common-os"))] + root_page_table: arch::create_new_root_page_table(), #[cfg(feature = "newlib")] lwip_errno: 0, } @@ -424,7 +434,10 @@ impl Task { last_fpu_state: arch::processor::FPUState::new(), core_id, stacks: TaskStacks::from_boot_stacks(), + #[cfg(not(feature = "common-os"))] tls: None, + #[cfg(all(target_arch = "x86_64", feature = "common-os"))] + root_page_table: *crate::scheduler::BOOT_ROOT_PAGE_TABLE.get().unwrap(), #[cfg(feature = "newlib")] lwip_errno: 0, } @@ -448,7 +461,7 @@ impl BlockedTask { } } -pub struct BlockedTaskQueue { +pub(crate) struct BlockedTaskQueue { list: LinkedList, #[cfg(any(feature = "tcp", feature = "udp"))] network_wakeup_time: Option, diff --git a/src/syscalls/entropy.rs b/src/syscalls/entropy.rs index 32db7fc73e..e93ae6e564 100644 --- a/src/syscalls/entropy.rs +++ b/src/syscalls/entropy.rs @@ -49,6 +49,7 @@ unsafe extern "C" fn __sys_read_entropy(buf: *mut u8, len: usize, flags: u32) -> /// Returns either the number of bytes written to buf (a positive value) or /// * `-EINVAL` if `flags` contains unknown flags. /// * `-ENOSYS` if the system does not support random data generation. +#[allow(unsafe_op_in_unsafe_fn)] #[no_mangle] #[cfg_attr(target_arch = "riscv64", allow(unsafe_op_in_unsafe_fn))] // FIXME pub unsafe extern "C" fn sys_read_entropy(buf: *mut u8, len: usize, flags: u32) -> isize { diff --git a/src/syscalls/mod.rs b/src/syscalls/mod.rs index 04f7fba4e1..b8ca9e79ad 100644 --- a/src/syscalls/mod.rs +++ b/src/syscalls/mod.rs @@ -25,7 +25,7 @@ use crate::fd::{ }; use crate::fs::{self, FileAttr}; use crate::syscalls::interfaces::SyscallInterface; -#[cfg(target_os = "none")] +#[cfg(all(target_os = "none", not(feature = "common-os")))] use crate::{__sys_free, __sys_malloc, __sys_realloc}; mod condvar; @@ -42,6 +42,8 @@ mod recmutex; mod semaphore; mod spinlock; mod system; +#[cfg(feature = "common-os")] +pub(crate) mod table; mod tasks; mod timer; @@ -70,19 +72,19 @@ pub(crate) fn init() { sbrk_init(); } -#[cfg(target_os = "none")] +#[cfg(all(target_os = "none", not(feature = "common-os")))] #[no_mangle] pub extern "C" fn sys_malloc(size: usize, align: usize) -> *mut u8 { kernel_function!(__sys_malloc(size, align)) } -#[cfg(target_os = "none")] +#[cfg(all(target_os = "none", not(feature = "common-os")))] #[no_mangle] pub extern "C" fn sys_realloc(ptr: *mut u8, size: usize, align: usize, new_size: usize) -> *mut u8 { kernel_function!(__sys_realloc(ptr, size, align, new_size)) } -#[cfg(target_os = "none")] +#[cfg(all(target_os = "none", not(feature = "common-os")))] #[no_mangle] pub extern "C" fn sys_free(ptr: *mut u8, size: usize, align: usize) { kernel_function!(__sys_free(ptr, size, align)) diff --git a/src/syscalls/table.rs b/src/syscalls/table.rs new file mode 100644 index 0000000000..c0b1152598 --- /dev/null +++ b/src/syscalls/table.rs @@ -0,0 +1,88 @@ +use core::arch::asm; + +use crate::syscalls::*; + +/// number of the system call `exit` +const SYSNO_EXIT: usize = 0; +/// number of the system call `write` +const SYSNO_WRITE: usize = 1; +/// number of the system call `read` +const SYSNO_READ: usize = 2; +/// number of the system call `abort` +const SYSNO_ABORT: usize = 3; +/// number of the system call `usleep` +const SYSNO_USLEEP: usize = 4; +/// number of the system call `getpid` +const SYSNO_GETPID: usize = 5; +/// number of the system call `yield` +const SYSNO_YIELD: usize = 6; +/// number of the system call `read_entropy` +const SYSNO_READ_ENTROPY: usize = 7; +/// number of the system call `get_processor_count` +const SYSNO_GET_PROCESSOR_COUNT: usize = 8; +/// number of the system call `close` +const SYSNO_CLOSE: usize = 9; +/// number of the system call `futex_wait` +const SYSNO_FUTEX_WAIT: usize = 10; +/// number of the system call `futex_wake` +const SYSNO_FUTEX_WAKE: usize = 11; +/// number of the system call `open` +const SYSNO_OPEN: usize = 12; + +/// total number of system calls +const NO_SYSCALLS: usize = 32; + +extern "C" fn invalid_syscall(sys_no: u64) -> ! { + error!("Invalid syscall {}", sys_no); + sys_exit(1); +} + +#[allow(unused_assignments)] +#[no_mangle] +#[naked] +pub(crate) unsafe extern "C" fn sys_invalid() { + unsafe { + asm!( + "mov rdi, rax", + "call {}", + sym invalid_syscall, + options(noreturn) + ); + } +} + +#[repr(align(64))] +#[repr(C)] +pub(crate) struct SyscallTable { + handle: [*const usize; NO_SYSCALLS], +} + +impl SyscallTable { + pub const fn new() -> Self { + let mut table = SyscallTable { + handle: [sys_invalid as *const _; NO_SYSCALLS], + }; + + table.handle[SYSNO_EXIT] = sys_exit as *const _; + table.handle[SYSNO_WRITE] = sys_write as *const _; + table.handle[SYSNO_READ] = sys_read as *const _; + table.handle[SYSNO_ABORT] = sys_abort as *const _; + table.handle[SYSNO_USLEEP] = sys_usleep as *const _; + table.handle[SYSNO_GETPID] = sys_getpid as *const _; + table.handle[SYSNO_YIELD] = sys_yield as *const _; + table.handle[SYSNO_READ_ENTROPY] = sys_read_entropy as *const _; + table.handle[SYSNO_GET_PROCESSOR_COUNT] = sys_get_processor_count as *const _; + table.handle[SYSNO_CLOSE] = sys_close as *const _; + table.handle[SYSNO_FUTEX_WAIT] = sys_futex_wait as *const _; + table.handle[SYSNO_FUTEX_WAKE] = sys_futex_wake as *const _; + table.handle[SYSNO_OPEN] = sys_open as *const _; + + table + } +} + +unsafe impl Send for SyscallTable {} +unsafe impl Sync for SyscallTable {} + +#[no_mangle] +pub(crate) static SYSHANDLER_TABLE: SyscallTable = SyscallTable::new(); diff --git a/src/syscalls/tasks.rs b/src/syscalls/tasks.rs index 62ec673715..dfe07097d7 100644 --- a/src/syscalls/tasks.rs +++ b/src/syscalls/tasks.rs @@ -1,13 +1,11 @@ use alloc::collections::BTreeMap; use core::isize; #[cfg(feature = "newlib")] -use core::sync::atomic::AtomicUsize; -use core::sync::atomic::{AtomicU32, Ordering}; +use core::sync::atomic::{AtomicUsize, Ordering}; use hermit_sync::InterruptTicketMutex; use crate::arch::core_local::*; -use crate::arch::get_processor_count; use crate::arch::processor::{get_frequency, get_timestamp}; use crate::config::USER_STACK_SIZE; use crate::errno::*; @@ -229,17 +227,7 @@ extern "C" fn __sys_spawn2( stack_size: usize, selector: isize, ) -> Tid { - static CORE_COUNTER: AtomicU32 = AtomicU32::new(1); - - let core_id = if selector < 0 { - // use Round Robin to schedule the cores - CORE_COUNTER.fetch_add(1, Ordering::SeqCst) % get_processor_count() - } else { - selector as u32 - }; - - scheduler::PerCoreScheduler::spawn(func, arg, Priority::from(prio), core_id, stack_size).into() - as Tid + scheduler::spawn(func, arg, Priority::from(prio), stack_size, selector).into() } #[no_mangle]