Skip to content

Commit

Permalink
Reduce size of Task object
Browse files Browse the repository at this point in the history
Move the registers onto the stack, so that they only are present when
the Task is actually switched out, saving memory when the Task is not
running yet or already finished.

On Linux x86_64 this reduces it from 376 bytes to 184 bytes.

Has some additional advantages too, such as copy_stack tasks (e.g. with
always_copy_stacks) can migrate to other threads before starting if they
are not stick.

Also fixes a variable that got mixed up by #54639 and caused
always_copy_stacks to abort, since the stack limits were wrong.
  • Loading branch information
vtjnash committed Aug 17, 2024
1 parent 5230d27 commit a4db6b8
Show file tree
Hide file tree
Showing 9 changed files with 293 additions and 298 deletions.
22 changes: 11 additions & 11 deletions src/gc-stacks.c
Original file line number Diff line number Diff line change
Expand Up @@ -186,14 +186,14 @@ JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz)
void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task)
{
// avoid adding an original thread stack to the free list
if (task == ptls->root_task && !task->copy_stack)
if (task == ptls->root_task && !task->ctx.copy_stack)
return;
void *stkbuf = task->stkbuf;
size_t bufsz = task->bufsz;
void *stkbuf = task->ctx.stkbuf;
size_t bufsz = task->ctx.bufsz;
if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
unsigned pool_id = select_pool(bufsz);
if (pool_sizes[pool_id] == bufsz) {
task->stkbuf = NULL;
task->ctx.stkbuf = NULL;
#ifdef _COMPILER_ASAN_ENABLED_
__asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz);
#endif
Expand Down Expand Up @@ -296,17 +296,17 @@ void sweep_stack_pools(void) JL_NOTSAFEPOINT
jl_task_t *t = (jl_task_t*)lst[n];
assert(jl_is_task(t));
if (gc_marked(jl_astaggedvalue(t)->bits.gc)) {
if (t->stkbuf == NULL)
if (t->ctx.stkbuf == NULL)
ndel++; // jl_release_task_stack called
else
n++;
}
else {
ndel++;
void *stkbuf = t->stkbuf;
size_t bufsz = t->bufsz;
void *stkbuf = t->ctx.stkbuf;
size_t bufsz = t->ctx.bufsz;
if (stkbuf) {
t->stkbuf = NULL;
t->ctx.stkbuf = NULL;
_jl_free_stack(ptls2, stkbuf, bufsz);
}
#ifdef _COMPILER_TSAN_ENABLED_
Expand Down Expand Up @@ -338,7 +338,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
continue;
small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks;
size_t n = mtarraylist_length(live_tasks);
l += n + (ptls2->root_task->stkbuf != NULL);
l += n + (ptls2->root_task->ctx.stkbuf != NULL);
}
l += l / 20; // add 5% for margin of estimation error
jl_array_t *a = jl_alloc_vec_any(l); // may gc, changing the number of tasks and forcing us to reload everything
Expand All @@ -350,7 +350,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
if (ptls2 == NULL)
continue;
jl_task_t *t = ptls2->root_task;
if (t->stkbuf != NULL) {
if (t->ctx.stkbuf != NULL) {
if (j == l)
goto restart;
jl_array_data(a,void*)[j++] = t;
Expand All @@ -359,7 +359,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
size_t n = mtarraylist_length(live_tasks);
for (size_t i = 0; i < n; i++) {
jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, i);
if (t->stkbuf != NULL) {
if (t->ctx.stkbuf != NULL) {
if (j == l)
goto restart;
jl_array_data(a,void*)[j++] = t;
Expand Down
10 changes: 5 additions & 5 deletions src/gc-stock.c
Original file line number Diff line number Diff line change
Expand Up @@ -2144,9 +2144,9 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
(ta, tid != -1 && ta == gc_all_tls_states[tid]->root_task));
}
#ifdef COPY_STACKS
void *stkbuf = ta->stkbuf;
if (stkbuf && ta->copy_stack) {
gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz);
void *stkbuf = ta->ctx.stkbuf;
if (stkbuf && ta->ctx.copy_stack) {
gc_setmark_buf_(ptls, stkbuf, bits, ta->ctx.bufsz);
// For gc_heap_snapshot_record:
// TODO: attribute size of stack
// TODO: edge to stack data
Expand All @@ -2159,12 +2159,12 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
uintptr_t lb = 0;
uintptr_t ub = (uintptr_t)-1;
#ifdef COPY_STACKS
if (stkbuf && ta->copy_stack && !ta->ptls) {
if (stkbuf && ta->ctx.copy_stack && !ta->ptls) {
int16_t tid = jl_atomic_load_relaxed(&ta->tid);
assert(tid >= 0);
jl_ptls_t ptls2 = gc_all_tls_states[tid];
ub = (uintptr_t)ptls2->stackbase;
lb = ub - ta->copy_stack;
lb = ub - ta->ctx.copy_stack;
offset = (uintptr_t)stkbuf - lb;
}
#endif
Expand Down
17 changes: 3 additions & 14 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,35 +64,24 @@ void jl_init_stack_limits(int ismaster, void **stack_lo, void **stack_hi)
// threads since it seems to return bogus values for master thread on Linux
// and possibly OSX.
if (!ismaster) {
# if defined(_OS_LINUX_)
# if defined(_OS_LINUX_) || defined(_OS_FREEBSD_)
pthread_attr_t attr;
pthread_getattr_np(pthread_self(), &attr);
void *stackaddr;
size_t stacksize;
pthread_attr_getstack(&attr, &stackaddr, &stacksize);
pthread_attr_destroy(&attr);
*stack_hi = stackaddr;
*stack_lo = (char*)stackaddr - stacksize;
*stack_lo = stackaddr;
*stack_hi = (char*)stackaddr + stacksize;
return;
# elif defined(_OS_DARWIN_)
extern void *pthread_get_stackaddr_np(pthread_t thread);
extern size_t pthread_get_stacksize_np(pthread_t thread);
pthread_t thread = pthread_self();
void *stackaddr = pthread_get_stackaddr_np(thread);
size_t stacksize = pthread_get_stacksize_np(thread);
*stack_hi = stackaddr;
*stack_lo = (char*)stackaddr - stacksize;
return;
# elif defined(_OS_FREEBSD_)
pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_get_np(pthread_self(), &attr);
void *stackaddr;
size_t stacksize;
pthread_attr_getstack(&attr, &stackaddr, &stacksize);
pthread_attr_destroy(&attr);
*stack_hi = stackaddr;
*stack_lo = (char*)stackaddr - stacksize;
return;
# else
# warning "Getting precise stack size for thread is not supported."
Expand Down
6 changes: 1 addition & 5 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -2231,11 +2231,7 @@ typedef struct _jl_task_t {
// current exception handler
jl_handler_t *eh;
// saved thread state
jl_ucontext_t ctx;
void *stkbuf; // malloc'd memory (either copybuf or stack)
size_t bufsz; // actual sizeof stkbuf
unsigned int copy_stack:31; // sizeof stack for copybuf
unsigned int started:1;
jl_ucontext_t ctx; // pointer into stkbuf, if suspended
} jl_task_t;

#define JL_TASK_STATE_RUNNABLE 0
Expand Down
3 changes: 2 additions & 1 deletion src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ static inline void asan_unpoison_task_stack(jl_task_t *ct, jl_jmp_buf *buf)
that we're resetting to. The idea is to remove the poison from the frames
that we're skipping over, since they won't be unwound. */
uintptr_t top = jmpbuf_sp(buf);
uintptr_t bottom = (uintptr_t)ct->stkbuf;
uintptr_t bottom = (uintptr_t)(ct->ctx.copy_stack ? (char*)ct->ptls->stackbase - ct->ptls->stacksize : (char*)ct->ctx.stkbuf);
//uintptr_t bottom = (uintptr_t)&top;
__asan_unpoison_stack_memory(bottom, top - bottom);
}
static inline void asan_unpoison_stack_memory(uintptr_t addr, size_t size) {
Expand Down
15 changes: 8 additions & 7 deletions src/julia_threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,13 @@ typedef ucontext_t _jl_ucontext_t;

typedef struct {
union {
_jl_ucontext_t ctx;
jl_stack_context_t copy_ctx;
_jl_ucontext_t *ctx;
jl_stack_context_t *copy_ctx;
};
void *stkbuf; // malloc'd memory (either copybuf or stack)
size_t bufsz; // actual sizeof stkbuf
unsigned int copy_stack:31; // sizeof stack for copybuf
unsigned int started:1;
#if defined(_COMPILER_TSAN_ENABLED_)
void *tsan_state;
#endif
Expand Down Expand Up @@ -157,11 +161,8 @@ typedef struct _jl_tls_states_t {
struct _jl_timing_block_t *timing_stack;
void *stackbase;
size_t stacksize;
union {
_jl_ucontext_t base_ctx; // base context of stack
// This hack is needed to support always_copy_stacks:
jl_stack_context_t copy_stack_ctx;
};
// This is needed to support always_copy_stacks:
jl_stack_context_t copy_stack_ctx;
// Temp storage for exception thrown in signal handler. Not rooted.
struct _jl_value_t *sig_exception;
// Temporary backtrace buffer. Scanned for gc roots when bt_size > 0.
Expand Down
6 changes: 3 additions & 3 deletions src/signals-unix.c
Original file line number Diff line number Diff line change
Expand Up @@ -230,13 +230,13 @@ static pthread_t signals_thread;

static int is_addr_on_stack(jl_task_t *ct, void *addr)
{
if (ct->copy_stack) {
if (ct->ctx.copy_stack) {
jl_ptls_t ptls = ct->ptls;
return ((char*)addr > (char*)ptls->stackbase - ptls->stacksize &&
(char*)addr < (char*)ptls->stackbase);
}
return ((char*)addr > (char*)ct->stkbuf &&
(char*)addr < (char*)ct->stkbuf + ct->bufsz);
return ((char*)addr > (char*)ct->ctx.stkbuf &&
(char*)addr < (char*)ct->ctx.stkbuf + ct->ctx.bufsz);
}

static void sigdie_handler(int sig, siginfo_t *info, void *context)
Expand Down
63 changes: 30 additions & 33 deletions src/stackwalk.c
Original file line number Diff line number Diff line change
Expand Up @@ -921,24 +921,24 @@ _os_ptr_munge(uintptr_t ptr) JL_NOTSAFEPOINT

extern bt_context_t *jl_to_bt_context(void *sigctx);

static void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
JL_DLLEXPORT size_t jl_record_backtrace(jl_task_t *t, jl_bt_element_t *bt_data, size_t max_bt_size) JL_NOTSAFEPOINT
{
jl_task_t *ct = jl_current_task;
jl_ptls_t ptls = ct->ptls;
ptls->bt_size = 0;
if (t == ct) {
ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0);
return;
return rec_backtrace(bt_data, max_bt_size, 0);
}
bt_context_t *context = NULL;
bt_context_t c;
int16_t old = -1;
while (!jl_atomic_cmpswap(&t->tid, &old, ptls->tid) && old != ptls->tid) {
int lockret = jl_lock_stackwalk();
// if this task is already running somewhere, we need to stop the thread it is running on and query its state
if (!jl_thread_suspend_and_get_state(old, 0, &c)) {
if (!jl_thread_suspend_and_get_state(old, 1, &c)) {
jl_unlock_stackwalk(lockret);
return;
if (jl_atomic_load_relaxed(&t->tid) != old)
continue;
return 0;
}
jl_unlock_stackwalk(lockret);
if (jl_atomic_load_relaxed(&t->tid) == old) {
Expand All @@ -953,11 +953,11 @@ static void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
// got the wrong thread stopped, try again
jl_thread_resume(old);
}
if (context == NULL && (!t->copy_stack && t->started && t->stkbuf != NULL)) {
if (context == NULL && (!t->ctx.copy_stack && t->ctx.started && t->ctx.ctx != NULL)) {
// need to read the context from the task stored state
#if defined(_OS_WINDOWS_)
memset(&c, 0, sizeof(c));
_JUMP_BUFFER *mctx = (_JUMP_BUFFER*)&t->ctx.ctx.uc_mcontext;
_JUMP_BUFFER *mctx = (_JUMP_BUFFER*)&t->ctx.ctx->uc_mcontext;
#if defined(_CPU_X86_64_)
c.Rbx = mctx->Rbx;
c.Rsp = mctx->Rsp;
Expand All @@ -979,13 +979,13 @@ static void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
#endif
context = &c;
#elif defined(JL_HAVE_UNW_CONTEXT)
context = &t->ctx.ctx;
context = t->ctx.ctx;
#elif defined(JL_HAVE_UCONTEXT)
context = jl_to_bt_context(&t->ctx.ctx);
context = jl_to_bt_context(t->ctx.ctx);
#elif defined(JL_HAVE_ASM)
memset(&c, 0, sizeof(c));
#if defined(_OS_LINUX_) && defined(__GLIBC__)
__jmp_buf *mctx = &t->ctx.ctx.uc_mcontext->__jmpbuf;
__jmp_buf *mctx = &t->ctx.ctx->uc_mcontext->__jmpbuf;
mcontext_t *mc = &c.uc_mcontext;
#if defined(_CPU_X86_)
// https://github.com/bminor/glibc/blame/master/sysdeps/i386/__longjmp.S
Expand Down Expand Up @@ -1071,13 +1071,13 @@ static void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
mc->pc = mc->regs[30];
context = &c;
#else
#pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown linux")
#pragma message("jl_record_backtrace not defined for ASM/SETJMP on unknown linux")
(void)mc;
(void)c;
(void)mctx;
#endif
#elif defined(_OS_DARWIN_)
sigjmp_buf *mctx = &t->ctx.ctx.uc_mcontext;
sigjmp_buf *mctx = &t->ctx.ctx->uc_mcontext;
#if defined(_CPU_X86_64_)
// from https://github.com/apple/darwin-libplatform/blob/main/src/setjmp/x86_64/_setjmp.s
x86_thread_state64_t *mc = (x86_thread_state64_t*)&c;
Expand Down Expand Up @@ -1133,12 +1133,12 @@ static void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
mc->__pad = 0; // aka __ra_sign_state = not signed
context = &c;
#else
#pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown darwin")
#pragma message("jl_record_backtrace not defined for ASM/SETJMP on unknown darwin")
(void)mctx;
(void)c;
#endif
#elif defined(_OS_FREEBSD_)
sigjmp_buf *mctx = &t->ctx.ctx.uc_mcontext;
sigjmp_buf *mctx = &t->ctx.ctx->uc_mcontext;
mcontext_t *mc = &c.uc_mcontext;
#if defined(_CPU_X86_64_)
// https://github.com/freebsd/freebsd-src/blob/releng/13.1/lib/libc/amd64/gen/_setjmp.S
Expand Down Expand Up @@ -1175,24 +1175,26 @@ static void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
mc->mc_fpregs.fp_q[14] = ((long*)mctx)[20];
context = &c;
#else
#pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown freebsd")
#pragma message("jl_record_backtrace not defined for ASM/SETJMP on unknown freebsd")
(void)mctx;
(void)c;
#endif
#else
#pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown system")
#pragma message("jl_record_backtrace not defined for ASM/SETJMP on unknown system")
(void)c;
#endif
#else
#pragma message("jl_rec_backtrace not defined for unknown task system")
#pragma message("jl_record_backtrace not defined for unknown task system")
#endif
}
size_t bt_size = 0;
if (context)
ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, context, t->gcstack);
bt_size = rec_backtrace_ctx(bt_data, max_bt_size, context, t->gcstack);
if (old == -1)
jl_atomic_store_relaxed(&t->tid, old);
else if (old != ptls->tid)
jl_thread_resume(old);
return bt_size;
}

//--------------------------------------------------
Expand Down Expand Up @@ -1224,12 +1226,15 @@ JL_DLLEXPORT void jlbacktracet(jl_task_t *t) JL_NOTSAFEPOINT
{
jl_task_t *ct = jl_current_task;
jl_ptls_t ptls = ct->ptls;
jl_rec_backtrace(t);
size_t i, bt_size = ptls->bt_size;
ptls->bt_size = 0;
jl_bt_element_t *bt_data = ptls->bt_data;
size_t bt_size = jl_record_backtrace(t, bt_data, JL_MAX_BT_SIZE);
size_t i;
for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
jl_print_bt_entry_codeloc(bt_data + i);
}
if (bt_size == 0)
jl_safe_printf(" no backtrace recorded\n");
}

JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT
Expand Down Expand Up @@ -1269,14 +1274,9 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
jl_safe_printf(" ---- Root task (%p)\n", ptls2->root_task);
if (t != NULL) {
jl_safe_printf(" (sticky: %d, started: %d, state: %d, tid: %d)\n",
t->sticky, t->started, t_state,
t->sticky, t->ctx.started, t_state,
jl_atomic_load_relaxed(&t->tid) + 1);
if (t->stkbuf != NULL) {
jlbacktracet(t);
}
else {
jl_safe_printf(" no stack\n");
}
jlbacktracet(t);
}
jl_safe_printf(" ---- End root task\n");
}
Expand All @@ -1291,12 +1291,9 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
jl_safe_printf(" ---- Task %zu (%p)\n", j + 1, t);
// n.b. this information might not be consistent with the stack printing after it, since it could start running or change tid, etc.
jl_safe_printf(" (sticky: %d, started: %d, state: %d, tid: %d)\n",
t->sticky, t->started, t_state,
t->sticky, t->ctx.started, t_state,
jl_atomic_load_relaxed(&t->tid) + 1);
if (t->stkbuf != NULL)
jlbacktracet(t);
else
jl_safe_printf(" no stack\n");
jlbacktracet(t);
jl_safe_printf(" ---- End task %zu\n", j + 1);
}
jl_safe_printf("==== End thread %d\n", ptls2->tid + 1);
Expand Down
Loading

0 comments on commit a4db6b8

Please sign in to comment.