From 50bbf2d53e16c41cd722d5237a9e2e080a0dc81a Mon Sep 17 00:00:00 2001 From: tsudzuki <93422095+LaoLittle@users.noreply.github.com> Date: Tue, 17 Oct 2023 09:33:02 +0800 Subject: [PATCH] stack overflow handler (#51) * use global_asm * initfn abi fix * add armv7, riscv64 support * remove target * add more platforms * update ci * filename update * cargo fmt && filename update * fix align * overflow check * update deps * update * make buffer bigger * overflow check * windows fix * windows fix * windows fix * windows * cargo fmt * remove explicit align * process sig mask * refactor * windows impl * improve test --- Cargo.toml | 3 +- src/detail/aarch64_unix.rs | 4 +- src/detail/asm/asm_aarch64_aapcs_elf.S | 6 +- src/detail/asm/asm_aarch64_aapcs_macho.S | 6 +- src/detail/asm/asm_arm_aapcs_elf.S | 6 +- src/detail/asm/asm_loongarch64_sysv_elf.S | 6 +- src/detail/asm/asm_riscv64_c_elf.S | 6 +- src/detail/gen.rs | 4 +- src/detail/mod.rs | 2 + src/detail/x86_64_windows.rs | 3 +- src/gen_impl.rs | 10 +-- src/reg_context.rs | 2 +- src/rt.rs | 49 +++++++++++++- src/stack/mod.rs | 11 ++-- src/stack/overflow_unix.rs | 79 +++++++++++++++++++++++ src/stack/overflow_windows.rs | 79 +++++++++++++++++++++++ src/stack/unix.rs | 3 + src/stack/windows.rs | 9 ++- 18 files changed, 252 insertions(+), 36 deletions(-) create mode 100644 src/stack/overflow_unix.rs create mode 100644 src/stack/overflow_windows.rs diff --git a/Cargo.toml b/Cargo.toml index bf8abba..b9de221 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,9 +20,10 @@ exclude = [ [target.'cfg(windows)'.dependencies.windows] -version = "0.48" +version = "0.51.1" features = [ "Win32_System_Memory", + "Win32_System_Kernel", "Win32_Foundation", "Win32_System_SystemInformation", "Win32_System_Diagnostics_Debug" diff --git a/src/detail/aarch64_unix.rs b/src/detail/aarch64_unix.rs index 8e96f90..8eea0c1 100644 --- a/src/detail/aarch64_unix.rs +++ b/src/detail/aarch64_unix.rs @@ -23,13 +23,13 @@ extern "C" { pub fn swap_registers(out_regs: *mut Registers, in_regs: *const Registers); } -#[repr(C, align(16))] +#[repr(C)] #[derive(Debug)] pub struct Registers { // We save the 13 callee-saved registers: // x19--x28, fp (x29), lr (x30), sp // and the 8 callee-saved floating point registers: - // v8--v15 + // d8--d15 gpr: [usize; 32], } diff --git a/src/detail/asm/asm_aarch64_aapcs_elf.S b/src/detail/asm/asm_aarch64_aapcs_elf.S index 8e95c56..337d18a 100644 --- a/src/detail/asm/asm_aarch64_aapcs_elf.S +++ b/src/detail/asm/asm_aarch64_aapcs_elf.S @@ -1,7 +1,7 @@ .text .globl prefetch .type prefetch,@function -.align 16 +.align 2 prefetch: prfm pldl1keep, [x0] ret @@ -10,7 +10,7 @@ prefetch: .text .globl bootstrap_green_task .type bootstrap_green_task,@function -.align 16 +.align 2 bootstrap_green_task: mov x0, x19 // arg0 mov x1, x20 // arg1 @@ -21,7 +21,7 @@ bootstrap_green_task: .text .globl swap_registers .type swap_registers,@function -.align 16 +.align 2 swap_registers: stp x19, x20, [x0, #0] stp x21, x22, [x0, #16] diff --git a/src/detail/asm/asm_aarch64_aapcs_macho.S b/src/detail/asm/asm_aarch64_aapcs_macho.S index 769c972..85f9d38 100644 --- a/src/detail/asm/asm_aarch64_aapcs_macho.S +++ b/src/detail/asm/asm_aarch64_aapcs_macho.S @@ -1,13 +1,13 @@ .text .globl _prefetch -.align 8 +.align 2 _prefetch: prfm pldl1keep, [x0] ret .text .globl _bootstrap_green_task -.align 8 +.align 2 _bootstrap_green_task: mov x0, x19 // arg0 mov x1, x20 // arg1 @@ -16,7 +16,7 @@ _bootstrap_green_task: .text .globl _swap_registers -.align 8 +.align 2 _swap_registers: stp x19, x20, [x0, #0] stp x21, x22, [x0, #16] diff --git a/src/detail/asm/asm_arm_aapcs_elf.S b/src/detail/asm/asm_arm_aapcs_elf.S index dc44234..7b42ea2 100644 --- a/src/detail/asm/asm_arm_aapcs_elf.S +++ b/src/detail/asm/asm_arm_aapcs_elf.S @@ -1,7 +1,7 @@ .text .globl prefetch .type prefetch, %function -.align 16 +.align 2 prefetch: pld [r0] bx lr @@ -10,7 +10,7 @@ prefetch: .text .globl bootstrap_green_task .type bootstrap_green_task, %function -.align 16 +.align 2 bootstrap_green_task: mov r0, r4 // arg0 mov r1, r5 // arg1 @@ -21,7 +21,7 @@ bootstrap_green_task: .text .globl swap_registers .type swap_registers, %function -.align 16 +.align 2 swap_registers: // Android doesn't like to use sp directly stmia r0!, {{v1-v7, fp}} diff --git a/src/detail/asm/asm_loongarch64_sysv_elf.S b/src/detail/asm/asm_loongarch64_sysv_elf.S index 366af4b..ce17b45 100644 --- a/src/detail/asm/asm_loongarch64_sysv_elf.S +++ b/src/detail/asm/asm_loongarch64_sysv_elf.S @@ -1,7 +1,7 @@ .text .globl prefetch .type prefetch,@function -.align 16 +.align 2 prefetch: preld 0, $a0, 0 ret @@ -10,7 +10,7 @@ prefetch: .text .globl bootstrap_green_task .type bootstrap_green_task,@function -.align 16 +.align 2 bootstrap_green_task: move $a0, $s0 // arg0 move $a1, $s1 // arg1 @@ -21,7 +21,7 @@ bootstrap_green_task: .text .globl swap_registers .type swap_registers,@function -.align 16 +.align 2 swap_registers: st.d $ra, $a0, 0 st.d $sp, $a0, 8 diff --git a/src/detail/asm/asm_riscv64_c_elf.S b/src/detail/asm/asm_riscv64_c_elf.S index ced837a..1620433 100644 --- a/src/detail/asm/asm_riscv64_c_elf.S +++ b/src/detail/asm/asm_riscv64_c_elf.S @@ -3,7 +3,7 @@ .text .globl prefetch .type prefetch,@function -.align 16 +.align 1 prefetch: ret .size prefetch,.-prefetch @@ -11,7 +11,7 @@ prefetch: .text .globl bootstrap_green_task .type bootstrap_green_task,@function -.align 16 +.align 1 bootstrap_green_task: mv a0, s2 // arg0 mv a1, s3 // arg1 @@ -22,7 +22,7 @@ bootstrap_green_task: .text .globl swap_registers .type swap_registers,@function -.align 16 +.align 1 swap_registers: sd s2, 0*8(a0) sd s3, 1*8(a0) diff --git a/src/detail/gen.rs b/src/detail/gen.rs index eb57b1e..8c9ae64 100644 --- a/src/detail/gen.rs +++ b/src/detail/gen.rs @@ -1,5 +1,5 @@ use crate::rt::ContextStack; -use crate::stack::Func; +use crate::stack::{overflow, Func}; use crate::yield_::yield_now; use crate::Error; use std::any::Any; @@ -29,6 +29,8 @@ fn catch_unwind_filter R + panic::UnwindSafe, R>(f: F) -> std::th /// the init function passed to reg_context #[inline] pub fn gen_init_impl(_: usize, f: *mut usize) -> ! { + overflow::init_once(); + let clo = move || { // consume self.f let f: &mut Option = unsafe { &mut *(f as *mut _) }; diff --git a/src/detail/mod.rs b/src/detail/mod.rs index 59890c9..c0428a6 100644 --- a/src/detail/mod.rs +++ b/src/detail/mod.rs @@ -1,3 +1,5 @@ +#![allow(clippy::eq_op)] + // Register contexts used in various architectures // // These structures all represent a context of one task throughout its diff --git a/src/detail/x86_64_windows.rs b/src/detail/x86_64_windows.rs index af476a2..b5fcd09 100644 --- a/src/detail/x86_64_windows.rs +++ b/src/detail/x86_64_windows.rs @@ -174,10 +174,11 @@ pub use self::asm_impl::*; */ // windows need to restore xmm6~xmm15, for most cases only use two xmm registers +// so we use sysv64 #[repr(C)] #[derive(Debug)] pub struct Registers { - gpr: [usize; 16], + pub(crate) gpr: [usize; 16], } impl Registers { diff --git a/src/gen_impl.rs b/src/gen_impl.rs index 6aa0a82..e1ffbeb 100644 --- a/src/gen_impl.rs +++ b/src/gen_impl.rs @@ -342,6 +342,8 @@ impl<'a, A, T> GeneratorImpl<'a, A, T> { self.f = Some(func); + let guard = (self.stack.begin() as usize, self.stack.end() as usize); + self.context.stack_guard = guard; self.context.regs.init_with( gen_init, 0, @@ -460,10 +462,10 @@ impl<'a, A, T> GeneratorImpl<'a, A, T> { // so that we can stop the inner func self.context._ref = 2; // save the old panic hook, we don't want to print anything for the Cancel - let old = ::std::panic::take_hook(); - ::std::panic::set_hook(Box::new(|_| {})); + let old = panic::take_hook(); + panic::set_hook(Box::new(|_| {})); self.resume_gen(); - ::std::panic::set_hook(old); + panic::set_hook(old); } /// cancel the generator @@ -520,7 +522,7 @@ impl<'a, A, T> Drop for GeneratorImpl<'a, A, T> { // set_stack_size::(used_stack); } else { error!("stack overflow detected!"); - std::panic::panic_any(Error::StackErr); + panic::panic_any(Error::StackErr); } } } diff --git a/src/reg_context.rs b/src/reg_context.rs index d1b759a..36e9ec5 100644 --- a/src/reg_context.rs +++ b/src/reg_context.rs @@ -4,7 +4,7 @@ use crate::stack::Stack; #[derive(Debug)] pub struct RegContext { /// Hold the registers while the task or scheduler is suspended - regs: Registers, + pub(crate) regs: Registers, } impl RegContext { diff --git a/src/rt.rs b/src/rt.rs index 0c55d42..abd6d2d 100644 --- a/src/rt.rs +++ b/src/rt.rs @@ -62,6 +62,8 @@ pub struct Context { pub local_data: *mut u8, /// propagate panic pub err: Option>, + /// cached stack guard for fast path + pub stack_guard: (usize, usize), } impl Context { @@ -76,6 +78,7 @@ impl Context { child: ptr::null_mut(), parent: ptr::null_mut(), local_data: ptr::null_mut(), + stack_guard: (0, 0), } } @@ -170,7 +173,7 @@ impl Context { /// Coroutine managing environment pub struct ContextStack { - root: *mut Context, + pub(crate) root: *mut Context, } #[cfg(nightly)] @@ -292,6 +295,22 @@ pub fn get_local_data() -> *mut u8 { ptr::null_mut() } +pub mod guard { + use crate::is_generator; + use crate::rt::ContextStack; + use crate::stack::sys::page_size; + use std::ops::Range; + + pub type Guard = Range; + + pub fn current() -> Guard { + assert!(is_generator()); + let guard = unsafe { (*(*ContextStack::current().root).child).stack_guard }; + + guard.0 - page_size()..guard.1 + } +} + #[cfg(test)] mod test { use super::is_generator; @@ -301,4 +320,32 @@ mod test { // this is the root context assert!(!is_generator()); } + + #[test] + fn test_overflow() { + use crate::*; + use std::panic::catch_unwind; + + // test signal mask + for _ in 0..2 { + let result = catch_unwind(|| { + let mut g = Gn::new_scoped(move |_s: Scope<(), ()>| { + let guard = super::guard::current(); + + // make sure the compiler does not apply any optimization on it + std::hint::black_box(unsafe { *(guard.start as *const usize) }); + + eprintln!("entered unreachable code"); + std::process::abort(); + }); + + g.next(); + }); + + assert!(matches!( + result.map_err(|err| *err.downcast::().unwrap()), + Err(Error::StackErr) + )); + } + } } diff --git a/src/stack/mod.rs b/src/stack/mod.rs index 8c6ece7..68a08e9 100644 --- a/src/stack/mod.rs +++ b/src/stack/mod.rs @@ -13,6 +13,8 @@ use std::ptr; #[cfg_attr(windows, path = "windows.rs")] pub mod sys; +pub use sys::overflow; + // must align with StackBoxHeader const ALIGN: usize = std::mem::size_of::(); const HEADER_SIZE: usize = std::mem::size_of::() / std::mem::size_of::(); @@ -58,7 +60,7 @@ impl StackBox { header.data_size = data_size; header.need_drop = need_drop; header.stack = stack.shadow_clone(); - std::mem::MaybeUninit::new(StackBox { ptr }) + MaybeUninit::new(StackBox { ptr }) } } @@ -312,13 +314,8 @@ impl Stack { /// Allocate a new stack of `size`. If size = 0, this is a `dummy_stack` pub fn new(size: usize) -> Stack { let track = (size & 1) != 0; - let mut bytes = size * std::mem::size_of::(); - // the minimal size - let min_size = SysStack::min_size(); - if bytes < min_size { - bytes = min_size; - } + let bytes = usize::max(size * std::mem::size_of::(), SysStack::min_size()); let buf = SysStack::allocate(bytes, true).expect("failed to alloc sys stack"); diff --git a/src/stack/overflow_unix.rs b/src/stack/overflow_unix.rs new file mode 100644 index 0000000..5684a77 --- /dev/null +++ b/src/stack/overflow_unix.rs @@ -0,0 +1,79 @@ +use crate::rt::{guard, ContextStack}; + +use crate::yield_::yield_now; +use libc::{sigaction, sighandler_t, SA_ONSTACK, SA_SIGINFO, SIGBUS, SIGSEGV}; +use std::mem; +use std::mem::MaybeUninit; +use std::ptr::null_mut; +use std::sync::Once; + +static mut SIG_ACTION: MaybeUninit = MaybeUninit::uninit(); + +// Signal handler for the SIGSEGV and SIGBUS handlers. We've got guard pages +// (unmapped pages) at the end of every thread's stack, so if a thread ends +// up running into the guard page it'll trigger this handler. We want to +// detect these cases and print out a helpful error saying that the stack +// has overflowed. All other signals, however, should go back to what they +// were originally supposed to do. +// +// If this is not a stack overflow, the handler un-registers itself and +// then returns (to allow the original signal to be delivered again). +// Returning from this kind of signal handler is technically not defined +// to work when reading the POSIX spec strictly, but in practice it turns +// out many large systems and all implementations allow returning from a +// signal handler to work. For a more detailed explanation see the +// comments on https://github.com/rust-lang/rust/issues/26458. +unsafe extern "C" fn signal_handler( + signum: libc::c_int, + info: *mut libc::siginfo_t, + ctx: *mut libc::ucontext_t, +) { + let _ctx = &mut *ctx; + let addr = (*info).si_addr() as usize; + let stack_guard = guard::current(); + + if !stack_guard.contains(&addr) { + println!("{}", std::backtrace::Backtrace::force_capture()); + // SIG_ACTION is available after we registered our handler + sigaction(signum, SIG_ACTION.assume_init_ref(), null_mut()); + + // we are unable to handle this + return; + } + + eprintln!( + "\ncoroutine in thread '{}' has overflowed its stack\n", + std::thread::current().name().unwrap_or("") + ); + + ContextStack::current().top().err = Some(Box::new(crate::Error::StackErr)); + + let mut sigset: libc::sigset_t = mem::zeroed(); + libc::sigemptyset(&mut sigset); + libc::sigaddset(&mut sigset, signum); + libc::sigprocmask(libc::SIG_UNBLOCK, &sigset, null_mut()); + + yield_now(); + + std::process::abort(); +} + +#[cold] +unsafe fn init() { + let mut action: sigaction = mem::zeroed(); + + action.sa_flags = SA_SIGINFO | SA_ONSTACK; + action.sa_sigaction = signal_handler as sighandler_t; + + for signal in [SIGSEGV, SIGBUS] { + sigaction(signal, &action, SIG_ACTION.as_mut_ptr()); + } +} + +pub fn init_once() { + static INIT_ONCE: Once = Once::new(); + + INIT_ONCE.call_once(|| unsafe { + init(); + }) +} diff --git a/src/stack/overflow_windows.rs b/src/stack/overflow_windows.rs new file mode 100644 index 0000000..ace56ee --- /dev/null +++ b/src/stack/overflow_windows.rs @@ -0,0 +1,79 @@ +use crate::rt::{guard, Context, ContextStack}; +use std::sync::Once; +use windows::Win32::Foundation::EXCEPTION_STACK_OVERFLOW; +use windows::Win32::System::Diagnostics::Debug::{ + AddVectoredExceptionHandler, CONTEXT, EXCEPTION_POINTERS, +}; + +unsafe extern "system" fn vectored_handler(exception_info: *mut EXCEPTION_POINTERS) -> i32 { + const EXCEPTION_CONTINUE_SEARCH: i32 = 0; + const EXCEPTION_CONTINUE_EXECUTION: i32 = -1; + + let info = &*exception_info; + let rec = &(*info.ExceptionRecord); + let context = &mut (*info.ContextRecord); + + if rec.ExceptionCode == EXCEPTION_STACK_OVERFLOW + && guard::current().contains(&(context.Rsp as usize)) + { + eprintln!( + "\ncoroutine in thread '{}' has overflowed its stack\n", + std::thread::current().name().unwrap_or("") + ); + + let env = ContextStack::current(); + let cur = env.top(); + cur.err = Some(Box::new(crate::Error::StackErr)); + + context_init(env.pop_context(cur as *mut _), context); + + //yield_now(); + + EXCEPTION_CONTINUE_EXECUTION + } else { + EXCEPTION_CONTINUE_SEARCH + } +} + +unsafe fn init() { + AddVectoredExceptionHandler(1, Some(vectored_handler)); +} + +pub fn init_once() { + static INIT_ONCE: Once = Once::new(); + + INIT_ONCE.call_once(|| unsafe { + init(); + }) +} + +#[cfg(target_arch = "x86_64")] +unsafe fn context_init(parent: &mut Context, context: &mut CONTEXT) { + let [rbx, rsp, rbp, _, r12, r13, r14, r15, _, _, _, stack_base, stack_limit, dealloc_stack, ..] = + parent.regs.regs.gpr; + + let rip = *(rsp as *const usize); + let rsp = rsp + std::mem::size_of::(); + + context.Rbx = rbx as u64; + context.Rsp = rsp as u64; + context.Rbp = rbp as u64; + context.R12 = r12 as u64; + context.R13 = r13 as u64; + context.R14 = r14 as u64; + context.R15 = r15 as u64; + context.Rip = rip as u64; + + let teb: usize; + + unsafe { + std::arch::asm!( + "mov {0}, gs:[0x30]", + out(reg) teb + ); + } + + *((teb + 0x08) as *mut usize) = stack_base; + *((teb + 0x10) as *mut usize) = stack_limit; + *((teb + 0x1478) as *mut usize) = dealloc_stack; +} diff --git a/src/stack/unix.rs b/src/stack/unix.rs index 910f713..a118abc 100644 --- a/src/stack/unix.rs +++ b/src/stack/unix.rs @@ -6,6 +6,9 @@ use std::usize; use super::SysStack; +#[path = "overflow_unix.rs"] +pub mod overflow; + #[cfg(any( target_os = "openbsd", target_os = "macos", diff --git a/src/stack/windows.rs b/src/stack/windows.rs index 2504e5c..a0435e2 100644 --- a/src/stack/windows.rs +++ b/src/stack/windows.rs @@ -10,6 +10,9 @@ use windows::Win32::System::SystemInformation::*; use super::SysStack; +#[path = "overflow_windows.rs"] +pub mod overflow; + pub unsafe fn allocate_stack(size: usize) -> io::Result { let ptr = VirtualAlloc( Some(ptr::null()), @@ -30,7 +33,7 @@ pub unsafe fn allocate_stack(size: usize) -> io::Result { pub unsafe fn protect_stack(stack: &SysStack) -> io::Result { let page_size = page_size(); - let mut old_prot = PAGE_PROTECTION_FLAGS(0); + let mut old_prot = mem::zeroed(); debug_assert!(stack.len() % page_size == 0 && stack.len() != 0); @@ -41,7 +44,7 @@ pub unsafe fn protect_stack(stack: &SysStack) -> io::Result { &mut old_prot, ); - if !ret.as_bool() { + if ret.is_err() { Err(io::Error::last_os_error()) } else { let bottom = (stack.bottom() as usize + page_size) as *mut c_void; @@ -50,7 +53,7 @@ pub unsafe fn protect_stack(stack: &SysStack) -> io::Result { } pub unsafe fn deallocate_stack(ptr: *mut c_void, _: usize) { - VirtualFree(ptr, 0, MEM_RELEASE); + let _ = VirtualFree(ptr, 0, MEM_RELEASE); } pub fn page_size() -> usize {