From f09abfa9b5bfbba6e87ce0f4390a45e66a7df8a5 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Fri, 16 Aug 2024 20:37:18 +0000 Subject: [PATCH] Reduce size of Task object Move the registers onto the stack, so that they only are present when the Task is actually switched out, saving memory when the Task is not running yet or already finished. On Linux x86_64 this reduces it from 376 bytes to 184 bytes. Has some additional advantages too, such as copy_stack tasks (e.g. with always_copy_stacks) can migrate to other threads before starting if they are not stick. Also fixes a variable that got mixed up by #54639 and caused always_copy_stacks to abort, since the stack limits were wrong. --- src/gc-stacks.c | 22 +-- src/gc-stock.c | 10 +- src/init.c | 17 +- src/julia.h | 6 +- src/julia_internal.h | 3 +- src/julia_threads.h | 8 +- src/signals-unix.c | 6 +- src/stackwalk.c | 63 ++++--- src/task.c | 420 +++++++++++++++++++++++-------------------- 9 files changed, 288 insertions(+), 267 deletions(-) diff --git a/src/gc-stacks.c b/src/gc-stacks.c index 5706f4ce67c1dd..65173d3a8df379 100644 --- a/src/gc-stacks.c +++ b/src/gc-stacks.c @@ -186,14 +186,14 @@ JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz) void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task) { // avoid adding an original thread stack to the free list - if (task == ptls->root_task && !task->copy_stack) + if (task == ptls->root_task && !task->ctx.copy_stack) return; - void *stkbuf = task->stkbuf; - size_t bufsz = task->bufsz; + void *stkbuf = task->ctx.stkbuf; + size_t bufsz = task->ctx.bufsz; if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) { unsigned pool_id = select_pool(bufsz); if (pool_sizes[pool_id] == bufsz) { - task->stkbuf = NULL; + task->ctx.stkbuf = NULL; #ifdef _COMPILER_ASAN_ENABLED_ __asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz); #endif @@ -296,17 +296,17 @@ void sweep_stack_pools(void) JL_NOTSAFEPOINT jl_task_t *t = (jl_task_t*)lst[n]; assert(jl_is_task(t)); if (gc_marked(jl_astaggedvalue(t)->bits.gc)) { - if (t->stkbuf == NULL) + if (t->ctx.stkbuf == NULL) ndel++; // jl_release_task_stack called else n++; } else { ndel++; - void *stkbuf = t->stkbuf; - size_t bufsz = t->bufsz; + void *stkbuf = t->ctx.stkbuf; + size_t bufsz = t->ctx.bufsz; if (stkbuf) { - t->stkbuf = NULL; + t->ctx.stkbuf = NULL; _jl_free_stack(ptls2, stkbuf, bufsz); } #ifdef _COMPILER_TSAN_ENABLED_ @@ -338,7 +338,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void) continue; small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; size_t n = mtarraylist_length(live_tasks); - l += n + (ptls2->root_task->stkbuf != NULL); + l += n + (ptls2->root_task->ctx.stkbuf != NULL); } l += l / 20; // add 5% for margin of estimation error jl_array_t *a = jl_alloc_vec_any(l); // may gc, changing the number of tasks and forcing us to reload everything @@ -350,7 +350,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void) if (ptls2 == NULL) continue; jl_task_t *t = ptls2->root_task; - if (t->stkbuf != NULL) { + if (t->ctx.stkbuf != NULL) { if (j == l) goto restart; jl_array_data(a,void*)[j++] = t; @@ -359,7 +359,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void) size_t n = mtarraylist_length(live_tasks); for (size_t i = 0; i < n; i++) { jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, i); - if (t->stkbuf != NULL) { + if (t->ctx.stkbuf != NULL) { if (j == l) goto restart; jl_array_data(a,void*)[j++] = t; diff --git a/src/gc-stock.c b/src/gc-stock.c index 1fc2087da6503f..3ae14f378a2e7b 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -2144,9 +2144,9 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_ (ta, tid != -1 && ta == gc_all_tls_states[tid]->root_task)); } #ifdef COPY_STACKS - void *stkbuf = ta->stkbuf; - if (stkbuf && ta->copy_stack) { - gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz); + void *stkbuf = ta->ctx.stkbuf; + if (stkbuf && ta->ctx.copy_stack) { + gc_setmark_buf_(ptls, stkbuf, bits, ta->ctx.bufsz); // For gc_heap_snapshot_record: // TODO: attribute size of stack // TODO: edge to stack data @@ -2159,12 +2159,12 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_ uintptr_t lb = 0; uintptr_t ub = (uintptr_t)-1; #ifdef COPY_STACKS - if (stkbuf && ta->copy_stack && !ta->ptls) { + if (stkbuf && ta->ctx.copy_stack && !ta->ptls) { int16_t tid = jl_atomic_load_relaxed(&ta->tid); assert(tid >= 0); jl_ptls_t ptls2 = gc_all_tls_states[tid]; ub = (uintptr_t)ptls2->stackbase; - lb = ub - ta->copy_stack; + lb = ub - ta->ctx.copy_stack; offset = (uintptr_t)stkbuf - lb; } #endif diff --git a/src/init.c b/src/init.c index eff786b564e545..8ff5dae4a0fc2e 100644 --- a/src/init.c +++ b/src/init.c @@ -64,15 +64,15 @@ void jl_init_stack_limits(int ismaster, void **stack_lo, void **stack_hi) // threads since it seems to return bogus values for master thread on Linux // and possibly OSX. if (!ismaster) { -# if defined(_OS_LINUX_) +# if defined(_OS_LINUX_) || defined(_OS_FREEBSD_) pthread_attr_t attr; pthread_getattr_np(pthread_self(), &attr); void *stackaddr; size_t stacksize; pthread_attr_getstack(&attr, &stackaddr, &stacksize); pthread_attr_destroy(&attr); - *stack_hi = stackaddr; - *stack_lo = (char*)stackaddr - stacksize; + *stack_lo = stackaddr; + *stack_hi = (char*)stackaddr + stacksize; return; # elif defined(_OS_DARWIN_) extern void *pthread_get_stackaddr_np(pthread_t thread); @@ -80,19 +80,8 @@ void jl_init_stack_limits(int ismaster, void **stack_lo, void **stack_hi) pthread_t thread = pthread_self(); void *stackaddr = pthread_get_stackaddr_np(thread); size_t stacksize = pthread_get_stacksize_np(thread); - *stack_hi = stackaddr; *stack_lo = (char*)stackaddr - stacksize; - return; -# elif defined(_OS_FREEBSD_) - pthread_attr_t attr; - pthread_attr_init(&attr); - pthread_attr_get_np(pthread_self(), &attr); - void *stackaddr; - size_t stacksize; - pthread_attr_getstack(&attr, &stackaddr, &stacksize); - pthread_attr_destroy(&attr); *stack_hi = stackaddr; - *stack_lo = (char*)stackaddr - stacksize; return; # else # warning "Getting precise stack size for thread is not supported." diff --git a/src/julia.h b/src/julia.h index e211f31c6512c7..074c50fd0aa218 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2231,11 +2231,7 @@ typedef struct _jl_task_t { // current exception handler jl_handler_t *eh; // saved thread state - jl_ucontext_t ctx; - void *stkbuf; // malloc'd memory (either copybuf or stack) - size_t bufsz; // actual sizeof stkbuf - unsigned int copy_stack:31; // sizeof stack for copybuf - unsigned int started:1; + jl_ucontext_t ctx; // pointer into stkbuf, if suspended } jl_task_t; #define JL_TASK_STATE_RUNNABLE 0 diff --git a/src/julia_internal.h b/src/julia_internal.h index d4d1a3239785ca..23a9c90edf8aa7 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -65,7 +65,8 @@ static inline void asan_unpoison_task_stack(jl_task_t *ct, jl_jmp_buf *buf) that we're resetting to. The idea is to remove the poison from the frames that we're skipping over, since they won't be unwound. */ uintptr_t top = jmpbuf_sp(buf); - uintptr_t bottom = (uintptr_t)ct->stkbuf; + uintptr_t bottom = (uintptr_t)(ct->ctx.copy_stack ? (char*)ct->ptls->stackbase - ct->ptls->stacksize : (char*)ct->ctx.stkbuf); + //uintptr_t bottom = (uintptr_t)⊤ __asan_unpoison_stack_memory(bottom, top - bottom); } static inline void asan_unpoison_stack_memory(uintptr_t addr, size_t size) { diff --git a/src/julia_threads.h b/src/julia_threads.h index 3486c5b969383f..bf451b6687ee59 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -86,9 +86,13 @@ typedef ucontext_t _jl_ucontext_t; typedef struct { union { - _jl_ucontext_t ctx; - jl_stack_context_t copy_ctx; + _jl_ucontext_t *ctx; + jl_stack_context_t *copy_ctx; }; + void *stkbuf; // malloc'd memory (either copybuf or stack) + size_t bufsz; // actual sizeof stkbuf + unsigned int copy_stack:31; // sizeof stack for copybuf + unsigned int started:1; #if defined(_COMPILER_TSAN_ENABLED_) void *tsan_state; #endif diff --git a/src/signals-unix.c b/src/signals-unix.c index 5e79fd5a2e29ee..f090c6f743d2cd 100644 --- a/src/signals-unix.c +++ b/src/signals-unix.c @@ -230,13 +230,13 @@ static pthread_t signals_thread; static int is_addr_on_stack(jl_task_t *ct, void *addr) { - if (ct->copy_stack) { + if (ct->ctx.copy_stack) { jl_ptls_t ptls = ct->ptls; return ((char*)addr > (char*)ptls->stackbase - ptls->stacksize && (char*)addr < (char*)ptls->stackbase); } - return ((char*)addr > (char*)ct->stkbuf && - (char*)addr < (char*)ct->stkbuf + ct->bufsz); + return ((char*)addr > (char*)ct->ctx.stkbuf && + (char*)addr < (char*)ct->ctx.stkbuf + ct->ctx.bufsz); } static void sigdie_handler(int sig, siginfo_t *info, void *context) diff --git a/src/stackwalk.c b/src/stackwalk.c index a63694e7c3b0c9..9c3fec44cb2419 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -921,14 +921,12 @@ _os_ptr_munge(uintptr_t ptr) JL_NOTSAFEPOINT extern bt_context_t *jl_to_bt_context(void *sigctx); -static void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT +JL_DLLEXPORT size_t jl_record_backtrace(jl_task_t *t, jl_bt_element_t *bt_data, size_t max_bt_size) JL_NOTSAFEPOINT { jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; - ptls->bt_size = 0; if (t == ct) { - ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0); - return; + return rec_backtrace(bt_data, max_bt_size, 0); } bt_context_t *context = NULL; bt_context_t c; @@ -936,9 +934,11 @@ static void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT while (!jl_atomic_cmpswap(&t->tid, &old, ptls->tid) && old != ptls->tid) { int lockret = jl_lock_stackwalk(); // if this task is already running somewhere, we need to stop the thread it is running on and query its state - if (!jl_thread_suspend_and_get_state(old, 0, &c)) { + if (!jl_thread_suspend_and_get_state(old, 1, &c)) { jl_unlock_stackwalk(lockret); - return; + if (jl_atomic_load_relaxed(&t->tid) != old) + continue; + return 0; } jl_unlock_stackwalk(lockret); if (jl_atomic_load_relaxed(&t->tid) == old) { @@ -953,11 +953,11 @@ static void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT // got the wrong thread stopped, try again jl_thread_resume(old); } - if (context == NULL && (!t->copy_stack && t->started && t->stkbuf != NULL)) { + if (context == NULL && (!t->ctx.copy_stack && t->ctx.started && t->ctx.ctx != NULL)) { // need to read the context from the task stored state #if defined(_OS_WINDOWS_) memset(&c, 0, sizeof(c)); - _JUMP_BUFFER *mctx = (_JUMP_BUFFER*)&t->ctx.ctx.uc_mcontext; + _JUMP_BUFFER *mctx = (_JUMP_BUFFER*)&t->ctx.ctx->uc_mcontext; #if defined(_CPU_X86_64_) c.Rbx = mctx->Rbx; c.Rsp = mctx->Rsp; @@ -979,13 +979,13 @@ static void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT #endif context = &c; #elif defined(JL_HAVE_UNW_CONTEXT) - context = &t->ctx.ctx; + context = t->ctx.ctx; #elif defined(JL_HAVE_UCONTEXT) - context = jl_to_bt_context(&t->ctx.ctx); + context = jl_to_bt_context(t->ctx.ctx); #elif defined(JL_HAVE_ASM) memset(&c, 0, sizeof(c)); #if defined(_OS_LINUX_) && defined(__GLIBC__) - __jmp_buf *mctx = &t->ctx.ctx.uc_mcontext->__jmpbuf; + __jmp_buf *mctx = &t->ctx.ctx->uc_mcontext->__jmpbuf; mcontext_t *mc = &c.uc_mcontext; #if defined(_CPU_X86_) // https://github.com/bminor/glibc/blame/master/sysdeps/i386/__longjmp.S @@ -1071,13 +1071,13 @@ static void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT mc->pc = mc->regs[30]; context = &c; #else - #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown linux") + #pragma message("jl_record_backtrace not defined for ASM/SETJMP on unknown linux") (void)mc; (void)c; (void)mctx; #endif #elif defined(_OS_DARWIN_) - sigjmp_buf *mctx = &t->ctx.ctx.uc_mcontext; + sigjmp_buf *mctx = &t->ctx.ctx->uc_mcontext; #if defined(_CPU_X86_64_) // from https://github.com/apple/darwin-libplatform/blob/main/src/setjmp/x86_64/_setjmp.s x86_thread_state64_t *mc = (x86_thread_state64_t*)&c; @@ -1133,12 +1133,12 @@ static void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT mc->__pad = 0; // aka __ra_sign_state = not signed context = &c; #else - #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown darwin") + #pragma message("jl_record_backtrace not defined for ASM/SETJMP on unknown darwin") (void)mctx; (void)c; #endif #elif defined(_OS_FREEBSD_) - sigjmp_buf *mctx = &t->ctx.ctx.uc_mcontext; + sigjmp_buf *mctx = &t->ctx.ctx->uc_mcontext; mcontext_t *mc = &c.uc_mcontext; #if defined(_CPU_X86_64_) // https://github.com/freebsd/freebsd-src/blob/releng/13.1/lib/libc/amd64/gen/_setjmp.S @@ -1175,24 +1175,26 @@ static void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT mc->mc_fpregs.fp_q[14] = ((long*)mctx)[20]; context = &c; #else - #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown freebsd") + #pragma message("jl_record_backtrace not defined for ASM/SETJMP on unknown freebsd") (void)mctx; (void)c; #endif #else - #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown system") + #pragma message("jl_record_backtrace not defined for ASM/SETJMP on unknown system") (void)c; #endif #else - #pragma message("jl_rec_backtrace not defined for unknown task system") + #pragma message("jl_record_backtrace not defined for unknown task system") #endif } + size_t bt_size = 0; if (context) - ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, context, t->gcstack); + bt_size = rec_backtrace_ctx(bt_data, max_bt_size, context, t->gcstack); if (old == -1) jl_atomic_store_relaxed(&t->tid, old); else if (old != ptls->tid) jl_thread_resume(old); + return bt_size; } //-------------------------------------------------- @@ -1224,12 +1226,15 @@ JL_DLLEXPORT void jlbacktracet(jl_task_t *t) JL_NOTSAFEPOINT { jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; - jl_rec_backtrace(t); - size_t i, bt_size = ptls->bt_size; + ptls->bt_size = 0; jl_bt_element_t *bt_data = ptls->bt_data; + size_t bt_size = jl_record_backtrace(t, bt_data, JL_MAX_BT_SIZE); + size_t i; for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) { jl_print_bt_entry_codeloc(bt_data + i); } + if (bt_size == 0) + jl_safe_printf(" no backtrace recorded\n"); } JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT @@ -1269,14 +1274,9 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT jl_safe_printf(" ---- Root task (%p)\n", ptls2->root_task); if (t != NULL) { jl_safe_printf(" (sticky: %d, started: %d, state: %d, tid: %d)\n", - t->sticky, t->started, t_state, + t->sticky, t->ctx.started, t_state, jl_atomic_load_relaxed(&t->tid) + 1); - if (t->stkbuf != NULL) { - jlbacktracet(t); - } - else { - jl_safe_printf(" no stack\n"); - } + jlbacktracet(t); } jl_safe_printf(" ---- End root task\n"); } @@ -1291,12 +1291,9 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT jl_safe_printf(" ---- Task %zu (%p)\n", j + 1, t); // n.b. this information might not be consistent with the stack printing after it, since it could start running or change tid, etc. jl_safe_printf(" (sticky: %d, started: %d, state: %d, tid: %d)\n", - t->sticky, t->started, t_state, + t->sticky, t->ctx.started, t_state, jl_atomic_load_relaxed(&t->tid) + 1); - if (t->stkbuf != NULL) - jlbacktracet(t); - else - jl_safe_printf(" no stack\n"); + jlbacktracet(t); jl_safe_printf(" ---- End task %zu\n", j + 1); } jl_safe_printf("==== End thread %d\n", ptls2->tid + 1); diff --git a/src/task.c b/src/task.c index 1f41ebd8cd2f8e..64ea66f5f783ff 100644 --- a/src/task.c +++ b/src/task.c @@ -49,27 +49,27 @@ extern "C" { // c.f. interceptor in jl_dlopen as well void (*real_siglongjmp)(jmp_buf _Buf, int _Value) = NULL; #endif -static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_task_t *from, jl_task_t *to) { +static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_ucontext_t *from, jl_ucontext_t *to) { if (to->copy_stack) - __sanitizer_start_switch_fiber(&from->ctx.asan_fake_stack, (char*)ptls->stackbase-ptls->stacksize, ptls->stacksize); + __sanitizer_start_switch_fiber(&from->asan_fake_stack, (char*)ptls->stackbase - ptls->stacksize, ptls->stacksize); else - __sanitizer_start_switch_fiber(&from->ctx.asan_fake_stack, to->stkbuf, to->bufsz); + __sanitizer_start_switch_fiber(&from->asan_fake_stack, to->stkbuf, to->bufsz); } -static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_task_t *to) { +static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_ucontext_t *to) { if (to->copy_stack) - __sanitizer_start_switch_fiber(NULL, (char*)ptls->stackbase-ptls->stacksize, ptls->stacksize); + __sanitizer_start_switch_fiber(NULL, (char*)ptls->stackbase - ptls->stacksize, ptls->stacksize); else __sanitizer_start_switch_fiber(NULL, to->stkbuf, to->bufsz); } -static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *current) { - __sanitizer_finish_switch_fiber(current->ctx.asan_fake_stack, NULL, NULL); +static inline void sanitizer_finish_switch_fiber(jl_ucontext_t *last, jl_ucontext_t *current) { + __sanitizer_finish_switch_fiber(current->asan_fake_stack, NULL, NULL); //(const void**)&last->stkbuf, //&last->bufsz); } #else -static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_task_t *from, jl_task_t *to) JL_NOTSAFEPOINT {} -static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_task_t *to) JL_NOTSAFEPOINT {} -static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *current) JL_NOTSAFEPOINT {} +static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_ucontext_t *from, jl_ucontext_t *to) JL_NOTSAFEPOINT {} +static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_ucontext_t *to) JL_NOTSAFEPOINT {} +static inline void sanitizer_finish_switch_fiber(jl_ucontext_t *last, jl_ucontext_t *current) JL_NOTSAFEPOINT {} #endif #if defined(_COMPILER_TSAN_ENABLED_) @@ -88,7 +88,7 @@ static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *cur #ifdef COPY_STACKS #define tsan_destroy_copyctx(_ptls, _ctx) do { \ jl_ucontext_t *_tsan_macro_ctx = (_ctx); \ - if (_tsan_macro_ctx != &(_ptls)->root_task->ctx) { \ + if (_tsan_macro_ctx != (_ptls)->root_task->ctx) { \ __tsan_destroy_fiber(_tsan_macro_ctx->tsan_state); \ } \ _tsan_macro_ctx->tsan_state = NULL; \ @@ -134,7 +134,7 @@ static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *cur #define ROOT_TASK_STACK_ADJUSTMENT 3000000 #endif -static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT; +static int jl_alloc_fiber(jl_ucontext_t *t, jl_task_t *owner) JL_NOTSAFEPOINT; static void jl_set_fiber(jl_ucontext_t *t); static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t); static void jl_start_fiber_swap(jl_ucontext_t *savet, jl_ucontext_t *t); @@ -214,17 +214,17 @@ static void NOINLINE save_stack(jl_ptls_t ptls, jl_task_t *lastt, jl_task_t **pt assert(stackbase > frame_addr); size_t nb = stackbase - frame_addr; void *buf; - if (lastt->bufsz < nb) { - asan_free_copy_stack(lastt->stkbuf, lastt->bufsz); + if (lastt->ctx.bufsz < nb) { + asan_free_copy_stack(lastt->ctx.stkbuf, lastt->ctx.bufsz); buf = (void*)jl_gc_alloc_buf(ptls, nb); - lastt->stkbuf = buf; - lastt->bufsz = nb; + lastt->ctx.stkbuf = buf; + lastt->ctx.bufsz = nb; } else { - buf = lastt->stkbuf; + buf = lastt->ctx.stkbuf; } *pt = NULL; // clear the gc-root for the target task before copying the stack for saving - lastt->copy_stack = nb; + lastt->ctx.copy_stack = nb; lastt->sticky = 1; memcpy_stack_a16((uint64_t*)buf, (uint64_t*)frame_addr, nb); // this task's stack could have been modified after @@ -235,7 +235,7 @@ static void NOINLINE save_stack(jl_ptls_t ptls, jl_task_t *lastt, jl_task_t **pt JL_NO_ASAN static void NOINLINE JL_NORETURN restore_stack(jl_task_t *t, jl_ptls_t ptls, char *p) { - size_t nb = t->copy_stack; + size_t nb = t->ctx.copy_stack; char *_x = (char*)ptls->stackbase - nb; if (!p) { // switch to a stackframe that's beyond the bounds of the last switch @@ -245,44 +245,44 @@ JL_NO_ASAN static void NOINLINE JL_NORETURN restore_stack(jl_task_t *t, jl_ptls_ } restore_stack(t, ptls, p); // pass p to ensure the compiler can't tailcall this or avoid the alloca } - void *_y = t->stkbuf; + void *_y = t->ctx.stkbuf; assert(_x != NULL && _y != NULL); memcpy_stack_a16((uint64_t*)_x, (uint64_t*)_y, nb); // destroys all but the current stackframe #if defined(_OS_WINDOWS_) - jl_setcontext(&t->ctx.copy_ctx); + jl_setcontext(t->ctx.copy_ctx); #else - jl_longjmp(t->ctx.copy_ctx.uc_mcontext, 1); + jl_longjmp(t->ctx.copy_ctx->uc_mcontext, 1); #endif abort(); // unreachable } JL_NO_ASAN static void restore_stack2(jl_task_t *t, jl_ptls_t ptls, jl_task_t *lastt) { - assert(t->copy_stack && !lastt->copy_stack); - size_t nb = t->copy_stack; + assert(t->ctx.copy_stack && !lastt->ctx.copy_stack); + size_t nb = t->ctx.copy_stack; char *_x = (char*)ptls->stackbase - nb; - void *_y = t->stkbuf; + void *_y = t->ctx.stkbuf; assert(_x != NULL && _y != NULL); memcpy_stack_a16((uint64_t*)_x, (uint64_t*)_y, nb); // destroys all but the current stackframe #if defined(JL_HAVE_UNW_CONTEXT) volatile int returns = 0; - int r = unw_getcontext(&lastt->ctx.ctx); + int r = unw_getcontext(lastt->ctx.ctx); if (++returns == 2) // r is garbage after the first return return; if (r != 0 || returns != 1) abort(); #elif defined(JL_HAVE_ASM) || defined(_OS_WINDOWS_) - if (jl_setjmp(lastt->ctx.copy_ctx.uc_mcontext, 0)) + if (jl_setjmp(lastt->ctx.ctx->uc_mcontext, 0)) return; #else #error COPY_STACKS is incompatible with this platform #endif tsan_switch_to_copyctx(&t->ctx); #if defined(_OS_WINDOWS_) - jl_setcontext(&t->ctx.copy_ctx); + jl_setcontext(t->ctx.copy_ctx); #else - jl_longjmp(t->ctx.copy_ctx.uc_mcontext, 1); + jl_longjmp(t->ctx.copy_ctx->uc_mcontext, 1); #endif } #endif @@ -298,9 +298,9 @@ void JL_NORETURN jl_finish_task(jl_task_t *ct) jl_atomic_store_release(&ct->_state, JL_TASK_STATE_FAILED); else jl_atomic_store_release(&ct->_state, JL_TASK_STATE_DONE); - if (ct->copy_stack) { // early free of stkbuf - asan_free_copy_stack(ct->stkbuf, ct->bufsz); - ct->stkbuf = NULL; + if (ct->ctx.copy_stack) { // early free of stkbuf + asan_free_copy_stack(ct->ctx.stkbuf, ct->ctx.bufsz); + ct->ctx.stkbuf = NULL; } // ensure that state is cleared ct->ptls->in_finalizer = 0; @@ -344,33 +344,33 @@ JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *ptid if (ptls2) { *ptid = jl_atomic_load_relaxed(&task->tid); #ifdef COPY_STACKS - if (task->copy_stack) { + if (task->ctx.copy_stack) { *size = ptls2->stacksize; return (char *)ptls2->stackbase - *size; } #endif } - *size = task->bufsz - off; - return (void *)((char *)task->stkbuf + off); + *size = task->ctx.bufsz - off; + return (void *)((char *)task->ctx.stkbuf + off); } JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task, char **active_start, char **active_end, char **total_start, char **total_end) { - if (!task->started) { + if (!task->ctx.started) { *total_start = *active_start = 0; *total_end = *active_end = 0; return; } jl_ptls_t ptls2 = task->ptls; - if (task->copy_stack && ptls2) { + if (task->ctx.copy_stack && ptls2) { *total_start = *active_start = (char*)ptls2->stackbase - ptls2->stacksize; *total_end = *active_end = (char*)ptls2->stackbase; } - else if (task->stkbuf) { - *total_start = *active_start = (char*)task->stkbuf; + else if (task->ctx.stkbuf) { + *total_start = *active_start = (char*)task->ctx.stkbuf; #ifndef _OS_WINDOWS_ jl_ptls_t ptls0 = jl_atomic_load_relaxed(&jl_all_tls_states)[0]; if (ptls0->root_task == task) { @@ -383,12 +383,12 @@ JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task, } #endif - *total_end = *active_end = (char*)task->stkbuf + task->bufsz; + *total_end = *active_end = (char*)task->ctx.stkbuf + task->ctx.bufsz; #ifdef COPY_STACKS // save_stack stores the stack of an inactive task in stkbuf, and the // actual number of used bytes in copy_stack. - if (task->copy_stack > 1) - *active_end = (char*)task->stkbuf + task->copy_stack; + if (task->ctx.copy_stack > 1) + *active_end = (char*)task->ctx.stkbuf + task->ctx.copy_stack; #endif } else { @@ -449,20 +449,15 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt) #endif int killed = jl_atomic_load_relaxed(&lastt->_state) != JL_TASK_STATE_RUNNABLE; - if (!t->started && !t->copy_stack) { + if (!t->ctx.started && !t->ctx.copy_stack) { // may need to allocate the stack - if (t->stkbuf == NULL) { - t->stkbuf = jl_alloc_fiber(&t->ctx.ctx, &t->bufsz, t); - if (t->stkbuf == NULL) { + if (t->ctx.stkbuf == NULL) { + if (!jl_alloc_fiber(&t->ctx, t)) { #ifdef COPY_STACKS // fall back to stack copying if mmap fails - t->copy_stack = 1; + t->ctx.copy_stack = 1; + t->ctx.bufsz = 0; t->sticky = 1; - t->bufsz = 0; - if (always_copy_stacks) - memcpy(&t->ctx.copy_ctx, &ptls->copy_stack_ctx, sizeof(t->ctx.copy_ctx)); - else - memcpy(&t->ctx.ctx, &ptls->base_ctx, sizeof(t->ctx.ctx)); #else jl_throw(jl_memory_exception); #endif @@ -470,28 +465,41 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt) } } + union { + _jl_ucontext_t ctx; + jl_stack_context_t copy_ctx; + } lasttstate; + if (killed) { *pt = NULL; // can't fail after here: clear the gc-root for the target task now lastt->gcstack = NULL; lastt->eh = NULL; - if (!lastt->copy_stack && lastt->stkbuf) { + if (!lastt->ctx.copy_stack && lastt->ctx.stkbuf) { // early free of stkbuf back to the pool jl_release_task_stack(ptls, lastt); } } else { + if (lastt->ctx.copy_stack) { // save the old copy-stack #ifdef COPY_STACKS - if (lastt->copy_stack) { // save the old copy-stack - save_stack(ptls, lastt, pt); // allocates (gc-safepoint, and can also fail) - if (jl_setjmp(lastt->ctx.copy_ctx.uc_mcontext, 0)) { - sanitizer_finish_switch_fiber(ptls->previous_task, jl_atomic_load_relaxed(&ptls->current_task)); - // TODO: mutex unlock the thread we just switched from + if (jl_setjmp(lasttstate.copy_ctx.uc_mcontext, 0)) { +#ifdef MIGRATE_TASKS + ptls = lastt->ptls; +#endif + lastt->ctx.copy_ctx = NULL; + sanitizer_finish_switch_fiber(&ptls->previous_task->ctx, &lastt->ctx); return; } - } - else + save_stack(ptls, lastt, pt); // allocates (gc-safepoint, and can also fail) + lastt->ctx.copy_ctx = &lasttstate.copy_ctx; +#else + abort(); #endif - *pt = NULL; // can't fail after here: clear the gc-root for the target task now + } + else { + *pt = NULL; // can't fail after here: clear the gc-root for the target task now + lastt->ctx.ctx = &lasttstate.ctx; + } } // set up global state for new task and clear global state for old task @@ -506,41 +514,44 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt) ptls->previous_task = lastt; #endif - if (t->started) { + if (t->ctx.started) { + if (t->ctx.copy_stack) { #ifdef COPY_STACKS - if (t->copy_stack) { - if (lastt->copy_stack) { + if (lastt->ctx.copy_stack) { // Switching from copystack to copystack. Clear any shadow stack // memory above the saved shadow stack. - uintptr_t stacktop = (uintptr_t)ptls->stackbase - t->copy_stack; + uintptr_t stacktop = (uintptr_t)ptls->stackbase - t->ctx.copy_stack; uintptr_t stackbottom = ((uintptr_t)jl_get_frame_addr() & ~15); if (stackbottom < stacktop) - asan_unpoison_stack_memory(stackbottom, stacktop-stackbottom); + asan_unpoison_stack_memory(stackbottom, stacktop - stackbottom); + } + if (!killed && !lastt->ctx.copy_stack) { + sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx); + restore_stack2(t, ptls, lastt); // half jl_swap_fiber and half restore_stack } - if (!killed && !lastt->copy_stack) { - sanitizer_start_switch_fiber(ptls, lastt, t); - restore_stack2(t, ptls, lastt); - } else { + else { tsan_switch_to_copyctx(&t->ctx); if (killed) { - sanitizer_start_switch_fiber_killed(ptls, t); + sanitizer_start_switch_fiber_killed(ptls, &t->ctx); tsan_destroy_copyctx(ptls, &lastt->ctx); - } else { - sanitizer_start_switch_fiber(ptls, lastt, t); + } + else { + sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx); } - if (lastt->copy_stack) { + if (lastt->ctx.copy_stack) { restore_stack(t, ptls, NULL); // (doesn't return) + abort(); } else { restore_stack(t, ptls, (char*)1); // (doesn't return) + abort(); } } - } - else #endif - { - if (lastt->copy_stack) { + } + else { + if (lastt->ctx.copy_stack) { // Switching away from a copystack to a non-copystack. Clear // the whole shadow stack now, because otherwise we won't know // how much stack memory to clear the next time we switch to @@ -549,22 +560,23 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt) uintptr_t stackbottom = ((uintptr_t)jl_get_frame_addr() & ~15); // We're not restoring the stack, but we still need to unpoison the // stack, so it starts with a pristine stack. - asan_unpoison_stack_memory(stackbottom, stacktop-stackbottom); + asan_unpoison_stack_memory(stackbottom, stacktop - stackbottom); } if (killed) { - sanitizer_start_switch_fiber_killed(ptls, t); + sanitizer_start_switch_fiber_killed(ptls, &t->ctx); tsan_switch_to_ctx(&t->ctx); tsan_destroy_ctx(ptls, &lastt->ctx); jl_set_fiber(&t->ctx); // (doesn't return) abort(); // unreachable } else { - sanitizer_start_switch_fiber(ptls, lastt, t); - if (lastt->copy_stack) { + sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx); + if (lastt->ctx.copy_stack) { // Resume at the jl_setjmp earlier in this function, // don't do a full task swap tsan_switch_to_ctx(&t->ctx); jl_set_fiber(&t->ctx); // (doesn't return) + abort(); } else { jl_swap_fiber(&lastt->ctx, &t->ctx); @@ -573,41 +585,64 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt) } } else { - if (lastt->copy_stack) { +#ifdef _COMPILER_TSAN_ENABLED_ + t->ctx.tsan_state = __tsan_create_fiber(0); +#endif + if (lastt->ctx.copy_stack) { uintptr_t stacktop = (uintptr_t)ptls->stackbase; uintptr_t stackbottom = ((uintptr_t)jl_get_frame_addr() & ~15); // We're not restoring the stack, but we still need to unpoison the // stack, so it starts with a pristine stack. - asan_unpoison_stack_memory(stackbottom, stacktop-stackbottom); + asan_unpoison_stack_memory(stackbottom, stacktop - stackbottom); } - if (t->copy_stack && always_copy_stacks) { - tsan_switch_to_ctx(&t->ctx); + if (t->ctx.copy_stack) { +#ifdef COPY_STACKS + if (always_copy_stacks) + tsan_switch_to_ctx(&t->ctx); + else + tsan_switch_to_copyctx(&t->ctx); if (killed) { - sanitizer_start_switch_fiber_killed(ptls, t); + sanitizer_start_switch_fiber_killed(ptls, &t->ctx); tsan_destroy_ctx(ptls, &lastt->ctx); - } else { - sanitizer_start_switch_fiber(ptls, lastt, t); } -#ifdef COPY_STACKS -#if defined(_OS_WINDOWS_) - jl_setcontext(&t->ctx.copy_ctx); + else { + sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx); + } + jl_ucontext_t ctx = t->ctx; + ctx.copy_ctx = NULL; + ctx.stkbuf = (char*)ptls->stackbase - ptls->stacksize; + ctx.bufsz = ptls->stacksize; + if (always_copy_stacks) { + #if defined(_OS_WINDOWS_) + jl_setcontext(&ptls->copy_stack_ctx); + #else + jl_longjmp(ptls->copy_stack_ctx.uc_mcontext, 1); + #endif + abort(); // unreachable + } + else if (lastt->ctx.copy_stack || killed) { + // copy_stack resumes at the jl_setjmp earlier in this function, so don't swap here + jl_start_fiber_set(&ctx); // (doesn't return) + abort(); + } + else { + jl_start_fiber_swap(&lastt->ctx, &ctx); + } #else - jl_longjmp(t->ctx.copy_ctx.uc_mcontext, 1); -#endif + abort(); #endif - abort(); // unreachable } else { if (killed) { - sanitizer_start_switch_fiber_killed(ptls, t); + sanitizer_start_switch_fiber_killed(ptls, &t->ctx); tsan_switch_to_ctx(&t->ctx); tsan_destroy_ctx(ptls, &lastt->ctx); jl_start_fiber_set(&t->ctx); // (doesn't return) abort(); } - sanitizer_start_switch_fiber(ptls, lastt, t); - if (lastt->copy_stack) { - // Resume at the jl_setjmp earlier in this function + sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx); + if (lastt->ctx.copy_stack) { + // copy_stack resumes at the jl_setjmp earlier in this function, so don't swap here tsan_switch_to_ctx(&t->ctx); jl_start_fiber_set(&t->ctx); // (doesn't return) abort(); @@ -617,7 +652,13 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt) } } } - sanitizer_finish_switch_fiber(ptls->previous_task, jl_atomic_load_relaxed(&ptls->current_task)); + +#ifdef MIGRATE_TASKS + ptls = lastt->ptls; +#endif + assert(lastt == jl_atomic_load_relaxed(&ptls->current_task)); + lastt->ctx.ctx = NULL; + sanitizer_finish_switch_fiber(&ptls->previous_task->ctx, &lastt->ctx); } JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER @@ -629,7 +670,7 @@ JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER return; } int8_t gc_state = jl_gc_unsafe_enter(ptls); - if (t->started && t->stkbuf == NULL) + if (t->ctx.started && t->ctx.stkbuf == NULL) jl_error("attempt to switch to exited task"); if (ptls->in_finalizer) jl_error("task switch not allowed from inside gc finalizer"); @@ -654,7 +695,7 @@ JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER ptls->previous_task = NULL; assert(t != ct); assert(jl_atomic_load_relaxed(&t->tid) == ptls->tid); - if (!t->sticky && !t->copy_stack) + if (!t->sticky && !t->ctx.copy_stack) jl_atomic_store_release(&t->tid, -1); #else assert(ptls == ct->ptls); @@ -1071,25 +1112,24 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion jl_task_t *t = (jl_task_t*)jl_gc_alloc(ct->ptls, sizeof(jl_task_t), jl_task_type); jl_set_typetagof(t, jl_task_tag, 0); JL_PROBE_RT_NEW_TASK(ct, t); - t->copy_stack = 0; + t->ctx.copy_stack = 0; if (ssize == 0) { // stack size unspecified; use default if (always_copy_stacks) { - t->copy_stack = 1; - t->bufsz = 0; + t->ctx.copy_stack = 1; + t->ctx.bufsz = 0; } else { - t->bufsz = JL_STACK_SIZE; + t->ctx.bufsz = JL_STACK_SIZE; } - t->stkbuf = NULL; + t->ctx.stkbuf = NULL; } else { // user requested dedicated stack of a certain size if (ssize < MINSTKSZ) ssize = MINSTKSZ; - t->bufsz = ssize; - t->stkbuf = jl_alloc_fiber(&t->ctx.ctx, &t->bufsz, t); - if (t->stkbuf == NULL) + t->ctx.bufsz = ssize; + if (!jl_alloc_fiber(&t->ctx, t)) jl_throw(jl_memory_exception); } t->next = jl_nothing; @@ -1109,30 +1149,21 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion t->sticky = 1; t->gcstack = NULL; t->excstack = NULL; - t->started = 0; + t->ctx.started = 0; t->priority = 0; - jl_atomic_store_relaxed(&t->tid, t->copy_stack ? jl_atomic_load_relaxed(&ct->tid) : -1); // copy_stacks are always pinned since they can't be moved + jl_atomic_store_relaxed(&t->tid, -1); t->threadpoolid = ct->threadpoolid; t->ptls = NULL; t->world_age = ct->world_age; t->reentrant_timing = 0; jl_timing_task_init(t); -#ifdef COPY_STACKS - if (!t->copy_stack) { -#if defined(JL_DEBUG_BUILD) - memset(&t->ctx, 0, sizeof(t->ctx)); -#endif - } - else { - if (always_copy_stacks) - memcpy(&t->ctx.copy_ctx, &ct->ptls->copy_stack_ctx, sizeof(t->ctx.copy_ctx)); - else - memcpy(&t->ctx.ctx, &ct->ptls->base_ctx, sizeof(t->ctx.ctx)); - } -#endif + if (t->ctx.copy_stack) + t->ctx.copy_ctx = NULL; + else + t->ctx.ctx = NULL; #ifdef _COMPILER_TSAN_ENABLED_ - t->ctx.tsan_state = __tsan_create_fiber(0); + t->ctx.tsan_state = NULL; #endif #ifdef _COMPILER_ASAN_ENABLED_ t->ctx.asan_fake_stack = NULL; @@ -1196,7 +1227,7 @@ CFI_NORETURN jl_task_t *ct = jl_current_task; #endif jl_ptls_t ptls = ct->ptls; - sanitizer_finish_switch_fiber(ptls->previous_task, ct); + sanitizer_finish_switch_fiber(&ptls->previous_task->ctx, &ct->ctx); _start_task(); } @@ -1217,11 +1248,11 @@ CFI_NORETURN #ifdef MIGRATE_TASKS jl_task_t *pt = ptls->previous_task; ptls->previous_task = NULL; - if (!pt->sticky && !pt->copy_stack) + if (!pt->sticky && !pt->ctx.copy_stack) jl_atomic_store_release(&pt->tid, -1); #endif - ct->started = 1; + ct->ctx.started = 1; JL_PROBE_RT_START_TASK(ct); jl_timing_block_task_enter(ct, ptls, NULL); if (jl_atomic_load_relaxed(&ct->_isexception)) { @@ -1253,65 +1284,65 @@ skip_pop_exception:; abort(); } +#if defined(JL_HAVE_UNW_CONTEXT) || defined(JL_HAVE_ASM) || defined(JL_HAVE_UCONTEXT) +static int jl_alloc_fiber(jl_ucontext_t *t, jl_task_t *owner) +{ + size_t ssize = t->bufsz; + void *stkbuf = jl_malloc_stack(&ssize, owner); + t->stkbuf = stkbuf; + t->bufsz = ssize; + return stkbuf != NULL; +} +#endif + #if defined(JL_HAVE_UCONTEXT) #ifdef _OS_WINDOWS_ #define setcontext jl_setcontext #define swapcontext jl_swapcontext -#define makecontext jl_makecontext #endif -static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT +static int make_fiber(jl_ucontext_t *t, _jl_ucontext_t *ctx) { #ifndef _OS_WINDOWS_ - int r = getcontext(t); - if (r != 0) - jl_error("getcontext failed"); + int r = getcontext(ctx); + if (r != 0) abort(); #endif - void *stk = jl_malloc_stack(ssize, owner); - if (stk == NULL) - return NULL; - t->uc_stack.ss_sp = stk; - t->uc_stack.ss_size = *ssize; + ctx->uc_stack.ss_sp = (char*)t->stkbuf; + ctx->uc_stack.ss_size = t->bufsz; #ifdef _OS_WINDOWS_ - makecontext(t, &start_task); + jl_makecontext(ctx, &start_task); #else - t->uc_link = NULL; - makecontext(t, &start_task, 0); + ctx->uc_link = NULL; + makecontext(ctx, &start_task, 0); #endif - return (char*)stk; + return 1; } static void jl_start_fiber_set(jl_ucontext_t *t) { - setcontext(&t->ctx); + _jl_ucontext_t ctx; + make_fiber(t, &ctx); + setcontext(&ctx); } static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t) { + _jl_ucontext_t ctx; + make_fiber(t, &ctx); assert(lastt); tsan_switch_to_ctx(t); - swapcontext(&lastt->ctx, &t->ctx); + lastt->ctx->uc_stack.ss_sp = (char*)t->stkbuf; + lastt->ctx->uc_stack.ss_size = t->bufsz; + swapcontext(lastt->ctx, &ctx); } static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t) { tsan_switch_to_ctx(t); - swapcontext(&lastt->ctx, &t->ctx); + lastt->ctx->uc_stack.ss_sp = (char*)t->stkbuf; + lastt->ctx->uc_stack.ss_size = t->bufsz; + swapcontext(lastt->ctx, t->ctx); } static void jl_set_fiber(jl_ucontext_t *t) { - setcontext(&t->ctx); -} -#endif - -#if defined(JL_HAVE_UNW_CONTEXT) || defined(JL_HAVE_ASM) -static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) -{ - char *stkbuf = (char*)jl_malloc_stack(ssize, owner); - if (stkbuf == NULL) - return NULL; -#ifndef __clang_gcanalyzer__ - ((char**)t)[0] = stkbuf; // stash the stack pointer somewhere for start_fiber - ((size_t*)t)[1] = *ssize; // stash the stack size somewhere for start_fiber -#endif - return stkbuf; + setcontext(t->ctx); } #endif @@ -1329,15 +1360,15 @@ static inline void jl_unw_swapcontext(unw_context_t *old, unw_cursor_t *c) static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t) { unw_cursor_t c; - int r = unw_init_local(&c, &t->ctx); + int r = unw_init_local(&c, t->ctx); if (r < 0) abort(); - jl_unw_swapcontext(&lastt->ctx, &c); + jl_unw_swapcontext(lastt->ctx, &c); } static void jl_set_fiber(jl_ucontext_t *t) { unw_cursor_t c; - int r = unw_init_local(&c, &t->ctx); + int r = unw_init_local(&c, t->ctx); if (r < 0) abort(); unw_resume(&c); @@ -1345,14 +1376,14 @@ static void jl_set_fiber(jl_ucontext_t *t) #elif defined(JL_HAVE_ASM) static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t) { - if (jl_setjmp(lastt->ctx.uc_mcontext, 0)) + if (jl_setjmp(lastt->ctx->uc_mcontext, 0)) return; tsan_switch_to_ctx(t); jl_set_fiber(t); // doesn't return } static void jl_set_fiber(jl_ucontext_t *t) { - jl_longjmp(t->ctx.uc_mcontext, 1); + jl_longjmp(t->ctx->uc_mcontext, 1); } #endif @@ -1373,14 +1404,14 @@ static void jl_set_fiber(jl_ucontext_t *t) static void jl_start_fiber_set(jl_ucontext_t *t) { unw_cursor_t c; - char *stk = ((char**)&t->ctx)[0]; - size_t ssize = ((size_t*)&t->ctx)[1]; + char *stk = (char*)t->stkbuf; + size_t ssize = t->bufsz; uintptr_t fn = (uintptr_t)&start_task; stk += ssize; - int r = unw_getcontext(&t->ctx); + int r = unw_getcontext(t->ctx); if (r) abort(); - if (unw_init_local(&c, &t->ctx)) + if (unw_init_local(&c, t->ctx)) abort(); PUSH_RET(&c, stk); #if defined __linux__ @@ -1396,27 +1427,27 @@ static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t) { assert(lastt); unw_cursor_t c; - char *stk = ((char**)&t->ctx)[0]; - size_t ssize = ((size_t*)&t->ctx)[1]; + char *stk = (char*)t->stkbuf; + size_t ssize = t->bufsz; uintptr_t fn = (uintptr_t)&start_task; stk += ssize; volatile int returns = 0; - int r = unw_getcontext(&lastt->ctx); + int r = unw_getcontext(lastt->ctx); if (++returns == 2) // r is garbage after the first return return; if (r != 0 || returns != 1) abort(); - r = unw_getcontext(&t->ctx); + r = unw_getcontext(t->ctx); if (r != 0) abort(); - if (unw_init_local(&c, &t->ctx)) + if (unw_init_local(&c, t->ctx)) abort(); PUSH_RET(&c, stk); if (unw_set_reg(&c, UNW_REG_SP, (uintptr_t)stk)) abort(); if (unw_set_reg(&c, UNW_REG_IP, fn)) abort(); - jl_unw_swapcontext(&lastt->ctx, &c); + jl_unw_swapcontext(lastt->ctx, &c); } #endif @@ -1426,13 +1457,13 @@ JL_NO_ASAN static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t * assert(lastt); #ifdef JL_HAVE_UNW_CONTEXT volatile int returns = 0; - int r = unw_getcontext(&lastt->ctx); + int r = unw_getcontext(lastt->ctx); if (++returns == 2) // r is garbage after the first return return; if (r != 0 || returns != 1) abort(); #else - if (jl_setjmp(lastt->ctx.uc_mcontext, 0)) + if (jl_setjmp(lastt->ctx->uc_mcontext, 0)) return; #endif tsan_switch_to_ctx(t); @@ -1440,8 +1471,8 @@ JL_NO_ASAN static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t * } JL_NO_ASAN static void jl_start_fiber_set(jl_ucontext_t *t) { - char *stk = ((char**)&t->ctx)[0]; - size_t ssize = ((size_t*)&t->ctx)[1]; + char *stk = (char*)t->stkbuf; + size_t ssize = t->bufsz; uintptr_t fn = (uintptr_t)&start_task; stk += ssize; #ifdef _CPU_X86_64_ @@ -1539,14 +1570,14 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi) } #endif if (always_copy_stacks) { - ct->copy_stack = 1; - ct->stkbuf = NULL; - ct->bufsz = 0; + ct->ctx.copy_stack = 1; + ct->ctx.stkbuf = NULL; + ct->ctx.bufsz = 0; } else { - ct->copy_stack = 0; - ct->stkbuf = stack; - ct->bufsz = ssize; + ct->ctx.copy_stack = 0; + ct->ctx.stkbuf = stack; + ct->ctx.bufsz = ssize; } #ifdef USE_TRACY @@ -1554,7 +1585,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi) strcpy(unique_string, "Root"); ct->name = unique_string; #endif - ct->started = 1; + ct->ctx.started = 1; ct->next = jl_nothing; ct->queue = jl_nothing; ct->tls = jl_nothing; @@ -1604,11 +1635,14 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi) start_task(); // sanitizer_finish_switch_fiber is part of start_task } else { - ssize = JL_STACK_SIZE; - char *stkbuf = jl_alloc_fiber(&ptls->base_ctx, &ssize, NULL); - if (stkbuf != NULL) { - ptls->stackbase = stkbuf + ssize; - ptls->stacksize = ssize; + jl_ucontext_t base_ctx; + base_ctx.bufsz = JL_STACK_SIZE; + if (jl_alloc_fiber(&base_ctx, NULL)) { + ptls->stackbase = (char*)base_ctx.stkbuf + base_ctx.bufsz; + ptls->stacksize = base_ctx.bufsz; + } + else { + ptls->stacksize = 0; } } #endif @@ -1621,7 +1655,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi) JL_DLLEXPORT int jl_is_task_started(jl_task_t *t) JL_NOTSAFEPOINT { - return t->started; + return t->ctx.started; } JL_DLLEXPORT int16_t jl_get_task_tid(jl_task_t *t) JL_NOTSAFEPOINT