From 93f4212eec975835bd8193ca3dcd2446b1a723c8 Mon Sep 17 00:00:00 2001 From: Ivan Maidanski Date: Mon, 27 Nov 2023 08:44:49 +0300 Subject: [PATCH] Support saving procedure stack at an offset on E2K (multi-threaded only) In case of nested GC_do_blocking and GC_call_with_gc_active calls, there is no need to save the entire procedure stack, only the stack part from the top most GC_call_with_gc_active is no be saved. * include/private/gc_priv.h [E2K && THREADS] (GC_register_stackbottom): Declare; add comment. * include/private/gc_priv.h [E2K] (PS_SYSCALL_TAIL_BYTES, GET_PROCEDURE_STACK_SIZE_INNER): Define macro. * include/private/gc_priv.h [E2K] (GET_PROCEDURE_STACK_LOCAL): Update comment; add ps_ofs argument; call GET_PROCEDURE_STACK_SIZE_INNER(); rename ofs_sz_ll local variable to ofs_sz_ull; define adj_ps_ofs local variable; pass adj_ps_ofs as argument to E2K_READ_PROCEDURE_STACK_EX syscall. * include/private/pthread_support.h [E2K] (GC_StackContext_Rep.ps_ofs): New field. * mark_rts.c [E2K] (GC_push_current_stack): Add TODO item about ps_ofs. * misc.c [E2K && THREADS] (GC_register_stackbottom): Define variable. * misc.c [E2K] (GC_call_with_stack_base): Call GET_PROCEDURE_STACK_SIZE_INNER() and store result to base.reg_base (instead of NULL). * pthread_stop_world.c [E2K] (GC_suspend_handler_inner, GC_push_all_stacks): Pass crtn->ps_ofs to GET_PROCEDURE_STACK_LOCAL(). * pthread_support.c [E2K] (do_blocking_enter): Likewise. * pthread_support.c [E2K] (GC_record_stack_base, GC_set_stackbottom): Store sb->reg_base value to crtn->ps_ofs. * pthread_support.c [E2K] (GC_thr_init): Store GC_register_stackbottom value to sb.reg_base (instead of NULL). * pthread_support.c [E2K] (GC_set_stackbottom): Store sb->reg_base value to GC_register_stackbottom. * pthread_support.c [E2K] (GC_get_my_stackbottom): Store crtn->ps_ofs value to sb->reg_base. * pthread_support.c [E2K] (GC_call_with_gc_active): Define saved_ps_ofs and sz_ull local variables; save and restore crtn->ps_ofs value to saved_ps_ofs; call GET_PROCEDURE_STACK_SIZE_INNER() and store result to crtn->ps_ofs. --- include/private/gc_priv.h | 53 +++++++++++++++++++++++-------- include/private/pthread_support.h | 4 +++ mark_rts.c | 3 +- misc.c | 14 +++++--- pthread_stop_world.c | 5 +-- pthread_support.c | 37 ++++++++++++++------- 6 files changed, 83 insertions(+), 33 deletions(-) diff --git a/include/private/gc_priv.h b/include/private/gc_priv.h index efa763730..eee4312bb 100644 --- a/include/private/gc_priv.h +++ b/include/private/gc_priv.h @@ -1851,9 +1851,13 @@ struct GC_traced_stack_sect_s { /* NULL if no such "frame" active. */ #endif /* !THREADS */ -#ifdef IA64 +#if defined(E2K) && defined(THREADS) || defined(IA64) + /* The bottom of the register stack of the primordial thread. */ + /* E2K: holds the offset (ps_ofs) instead of a pointer. */ GC_EXTERN ptr_t GC_register_stackbottom; +#endif +#ifdef IA64 /* Similar to GC_push_all_stack_sections() but for IA-64 registers store. */ GC_INNER void GC_push_all_register_sections(ptr_t bs_lo, ptr_t bs_hi, int eager, struct GC_traced_stack_sect_s *traced_stack_sect); @@ -2026,27 +2030,48 @@ GC_INNER void GC_with_callee_saves_pushed(void (*volatile fn)(ptr_t, void *), # define PS_ALLOCA_BUF(sz) alloca(sz) # endif - /* Copy procedure (register) stack to a stack-allocated buffer. */ - /* Usable from a signal handler. The buffer is valid only within */ - /* the current function. */ -# define GET_PROCEDURE_STACK_LOCAL(pbuf, psz) \ + /* Approximate size (in bytes) of the obtained procedure stack part */ + /* belonging the syscall() itself. */ +# define PS_SYSCALL_TAIL_BYTES 0x100 + + /* Determine the current size of the whole procedure stack. The size */ + /* is valid only within the current function. */ +# define GET_PROCEDURE_STACK_SIZE_INNER(psz_ull) \ do { \ - unsigned long long ofs_sz_ll = 0; \ - \ - /* Determine buffer size to store the procedure stack. */ \ + *(psz_ull) = 0; /* might be redundant */ \ if (syscall(__NR_access_hw_stacks, E2K_GET_PROCEDURE_STACK_SIZE, \ - NULL, NULL, 0, &ofs_sz_ll) == -1) \ + NULL, NULL, 0, psz_ull) == -1) \ ABORT_ARG1("Cannot get size of procedure stack", \ ": errno= %d", errno); \ - GC_ASSERT(ofs_sz_ll > 0 && ofs_sz_ll % sizeof(word) == 0); \ - *(psz) = (size_t)ofs_sz_ll; \ + GC_ASSERT(*(psz_ull) > 0 && *(psz_ull) % sizeof(word) == 0); \ + } while (0) + + /* Copy procedure (register) stack to a stack-allocated buffer. */ + /* Usable from a signal handler. The buffer is valid only within */ + /* the current function. ps_ofs designates the offset in the */ + /* procedure stack to copy the contents from. Note: this macro */ + /* cannot be changed to a function because alloca() and both */ + /* syscall() should be called in the context of the caller. */ +# define GET_PROCEDURE_STACK_LOCAL(ps_ofs, pbuf, psz) \ + do { \ + unsigned long long ofs_sz_ull; \ + size_t adj_ps_ofs; \ + \ + GET_PROCEDURE_STACK_SIZE_INNER(&ofs_sz_ull); \ + if (ofs_sz_ull <= (ps_ofs)) \ + ABORT_ARG2("Incorrect size of procedure stack", \ + ": ofs= %lu, size= %lu", (unsigned long)(ps_ofs), \ + (unsigned long)ofs_sz_ull); \ + adj_ps_ofs = (ps_ofs) > PS_SYSCALL_TAIL_BYTES ? \ + (ps_ofs) - PS_SYSCALL_TAIL_BYTES : 0; \ + *(psz) = (size_t)ofs_sz_ull - adj_ps_ofs; \ /* Allocate buffer on the stack; cannot return NULL. */ \ *(pbuf) = PS_ALLOCA_BUF(*(psz)); \ - /* Read the procedure stack to the buffer. */ \ + /* Copy the procedure stack at the given offset to the buffer. */ \ for (;;) { \ - ofs_sz_ll = 0; \ + ofs_sz_ull = adj_ps_ofs; \ if (syscall(__NR_access_hw_stacks, E2K_READ_PROCEDURE_STACK_EX, \ - &ofs_sz_ll, *(pbuf), *(psz), NULL) != -1) \ + &ofs_sz_ull, *(pbuf), *(psz), NULL) != -1) \ break; \ if (errno != EAGAIN) \ ABORT_ARG2("Cannot read procedure stack", \ diff --git a/include/private/pthread_support.h b/include/private/pthread_support.h index 670338076..4fde5dcde 100644 --- a/include/private/pthread_support.h +++ b/include/private/pthread_support.h @@ -95,6 +95,10 @@ typedef struct GC_StackContext_Rep { word normstack_size; # endif +# ifdef E2K + size_t ps_ofs; /* the current offset of the procedure stack */ +# endif + # ifndef GC_NO_FINALIZATION unsigned char finalizer_nested; char fnlz_pad[1]; /* Explicit alignment (for some rare */ diff --git a/mark_rts.c b/mark_rts.c index 809e1f3b8..faec11385 100644 --- a/mark_rts.c +++ b/mark_rts.c @@ -874,7 +874,8 @@ STATIC void GC_push_current_stack(ptr_t cold_gc_frame, void *context) ptr_t bs_lo; size_t stack_size; - GET_PROCEDURE_STACK_LOCAL(&bs_lo, &stack_size); + /* TODO: support ps_ofs here and in GC_do_blocking_inner */ + GET_PROCEDURE_STACK_LOCAL(0, &bs_lo, &stack_size); GC_push_all_eager(bs_lo, bs_lo + stack_size); } # endif diff --git a/misc.c b/misc.c index 1fd88140c..0bd4d3c44 100644 --- a/misc.c +++ b/misc.c @@ -83,7 +83,7 @@ GC_INNER GC_bool GC_debugging_started = FALSE; ptr_t GC_stackbottom = 0; -#ifdef IA64 +#if defined(E2K) && defined(THREADS) || defined(IA64) GC_INNER ptr_t GC_register_stackbottom = NULL; #endif @@ -2229,7 +2229,12 @@ GC_API void * GC_CALL GC_call_with_stack_base(GC_stack_base_func volatile fn, /* TODO: Unnecessarily flushes register stack, */ /* but that probably doesn't hurt. */ # elif defined(E2K) - base.reg_base = NULL; /* not used by GC currently */ + { + unsigned long long sz_ull; + + GET_PROCEDURE_STACK_SIZE_INNER(&sz_ull); + base.reg_base = (void *)(word)sz_ull; + } # endif result = (*fn)(&base, arg); /* Strongly discourage the compiler from treating the above */ @@ -2338,9 +2343,9 @@ STATIC void GC_do_blocking_inner(ptr_t data, void *context) && NULL == GC_traced_stack_sect); /* for now */ UNUSED_ARG(gc_thread_handle); - GC_stackbottom = (char *)sb->mem_base; + GC_stackbottom = (char *)(sb -> mem_base); # ifdef IA64 - GC_register_stackbottom = (ptr_t)sb->reg_base; + GC_register_stackbottom = (ptr_t)(sb -> reg_base); # endif } @@ -2355,6 +2360,7 @@ STATIC void GC_do_blocking_inner(ptr_t data, void *context) # endif return &GC_stackbottom; /* gc_thread_handle */ } + #endif /* !THREADS */ GC_API void * GC_CALL GC_do_blocking(GC_fn_type fn, void * client_data) diff --git a/pthread_stop_world.c b/pthread_stop_world.c index 2360face1..b3a6745be 100644 --- a/pthread_stop_world.c +++ b/pthread_stop_world.c @@ -355,7 +355,7 @@ STATIC void GC_suspend_handler_inner(ptr_t dummy, void *context) GC_store_stack_ptr(crtn); # ifdef E2K GC_ASSERT(NULL == crtn -> backing_store_end); - GET_PROCEDURE_STACK_LOCAL(&bs_lo, &stack_size); + GET_PROCEDURE_STACK_LOCAL(crtn -> ps_ofs, &bs_lo, &stack_size); crtn -> backing_store_end = bs_lo; crtn -> backing_store_ptr = bs_lo + stack_size; # endif @@ -802,7 +802,8 @@ GC_INNER void GC_push_all_stacks(void) size_t stack_size; GC_ASSERT(NULL == crtn -> backing_store_end); - GET_PROCEDURE_STACK_LOCAL(&bs_lo, &stack_size); + GET_PROCEDURE_STACK_LOCAL(crtn -> ps_ofs, + &bs_lo, &stack_size); bs_hi = bs_lo + stack_size; } # endif diff --git a/pthread_support.c b/pthread_support.c index 2cd630aca..4f4a25229 100644 --- a/pthread_support.c +++ b/pthread_support.c @@ -1549,14 +1549,16 @@ GC_INNER_WIN32THREAD void GC_record_stack_base(GC_stack_context_t crtn, const struct GC_stack_base *sb) { # if !defined(GC_DARWIN_THREADS) && !defined(GC_WIN32_THREADS) - crtn -> stack_ptr = (ptr_t)sb->mem_base; + crtn -> stack_ptr = (ptr_t)(sb -> mem_base); # endif - if ((crtn -> stack_end = (ptr_t)sb->mem_base) == NULL) + if ((crtn -> stack_end = (ptr_t)(sb -> mem_base)) == NULL) ABORT("Bad stack base in GC_register_my_thread"); -# ifdef IA64 - crtn -> backing_store_end = (ptr_t)sb->reg_base; +# ifdef E2K + crtn -> ps_ofs = (size_t)(word)(sb -> reg_base); +# elif defined(IA64) + crtn -> backing_store_end = (ptr_t)(sb -> reg_base); # elif defined(I386) && defined(GC_WIN32_THREADS) - crtn -> initial_stack_base = (ptr_t)sb->mem_base; + crtn -> initial_stack_base = (ptr_t)(sb -> mem_base); # endif } @@ -1745,10 +1747,8 @@ GC_INNER void GC_thr_init(void) sb.mem_base = GC_stackbottom; GC_ASSERT(sb.mem_base != NULL); -# ifdef IA64 +# if defined(E2K) || defined(IA64) sb.reg_base = GC_register_stackbottom; -# elif defined(E2K) - sb.reg_base = NULL; # endif GC_ASSERT(NULL == GC_self_thread_inner()); me = GC_register_my_thread_inner(&sb, self_id); @@ -1828,7 +1828,8 @@ GC_INNER void GC_init_parallel(void) *(pTopOfStackUnset) = FALSE; \ crtn -> stack_ptr = GC_approx_sp(); \ GC_ASSERT(NULL == crtn -> backing_store_end); \ - GET_PROCEDURE_STACK_LOCAL(&bs_lo, &stack_size); \ + GET_PROCEDURE_STACK_LOCAL(crtn -> ps_ofs, \ + &bs_lo, &stack_size); \ crtn -> backing_store_end = bs_lo; \ crtn -> backing_store_ptr = bs_lo + stack_size; \ (me) -> flags |= DO_BLOCKING; \ @@ -1972,7 +1973,7 @@ GC_API void GC_CALL GC_set_stackbottom(void *gc_thread_handle, GC_ASSERT(NULL == t); /* Alter the stack bottom of the primordial thread. */ GC_stackbottom = (char*)(sb -> mem_base); -# ifdef IA64 +# if defined(E2K) || defined(IA64) GC_register_stackbottom = (ptr_t)(sb -> reg_base); # endif return; @@ -1987,7 +1988,9 @@ GC_API void GC_CALL GC_set_stackbottom(void *gc_thread_handle, && NULL == crtn -> traced_stack_sect); /* for now */ crtn -> stack_end = (ptr_t)(sb -> mem_base); -# ifdef IA64 +# ifdef E2K + crtn -> ps_ofs = (size_t)(word)(sb -> reg_base); +# elif defined(IA64) crtn -> backing_store_end = (ptr_t)(sb -> reg_base); # endif # ifdef GC_WIN32_THREADS @@ -2007,7 +2010,7 @@ GC_API void * GC_CALL GC_get_my_stackbottom(struct GC_stack_base *sb) crtn = me -> crtn; sb -> mem_base = crtn -> stack_end; # ifdef E2K - sb -> reg_base = NULL; + sb -> reg_base = (void *)(word)(crtn -> ps_ofs); # elif defined(IA64) sb -> reg_base = crtn -> backing_store_end; # endif @@ -2029,6 +2032,7 @@ GC_API void * GC_CALL GC_call_with_gc_active(GC_fn_type volatile fn, ptr_t stack_end; # ifdef E2K ptr_t saved_bs_ptr, saved_bs_end; + size_t saved_ps_ofs; # endif READER_LOCK(); /* This will block if the world is stopped. */ @@ -2075,6 +2079,14 @@ GC_API void * GC_CALL GC_call_with_gc_active(GC_fn_type volatile fn, stacksect.saved_backing_store_ptr = crtn -> backing_store_ptr; # elif defined(E2K) GC_ASSERT(crtn -> backing_store_end != NULL); + { + unsigned long long sz_ull; + + GET_PROCEDURE_STACK_SIZE_INNER(&sz_ull); + saved_ps_ofs = crtn -> ps_ofs; + GC_ASSERT(saved_ps_ofs <= (size_t)sz_ull); + crtn -> ps_ofs = (size_t)sz_ull; + } saved_bs_end = crtn -> backing_store_end; saved_bs_ptr = crtn -> backing_store_ptr; crtn -> backing_store_ptr = NULL; @@ -2102,6 +2114,7 @@ GC_API void * GC_CALL GC_call_with_gc_active(GC_fn_type volatile fn, GC_ASSERT(NULL == crtn -> backing_store_end); crtn -> backing_store_end = saved_bs_end; crtn -> backing_store_ptr = saved_bs_ptr; + crtn -> ps_ofs = saved_ps_ofs; # endif me -> flags |= DO_BLOCKING; crtn -> stack_ptr = stacksect.saved_stack_ptr;