diff --git a/src/signals-unix.c b/src/signals-unix.c index b9ff5daa1f0d4..9142da0c03ada 100644 --- a/src/signals-unix.c +++ b/src/signals-unix.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #if defined(_OS_DARWIN_) && !defined(MAP_ANONYMOUS) #define MAP_ANONYMOUS MAP_ANON @@ -368,11 +369,25 @@ static pthread_cond_t signal_caught_cond; static void jl_thread_suspend_and_get_state(int tid, unw_context_t **ctx) { + struct timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + ts.tv_sec += 1; pthread_mutex_lock(&in_signal_lock); jl_ptls_t ptls2 = jl_all_tls_states[tid]; jl_atomic_store_release(&ptls2->signal_request, 1); pthread_kill(ptls2->system_id, SIGUSR2); - pthread_cond_wait(&signal_caught_cond, &in_signal_lock); // wait for thread to acknowledge + // wait for thread to acknowledge + int err = pthread_cond_timedwait(&signal_caught_cond, &in_signal_lock, &ts); + if (err == ETIMEDOUT) { + sig_atomic_t request = 1; + if (jl_atomic_cmpswap(&ptls2->signal_request, &request, 0)) { + *ctx = NULL; + pthread_mutex_unlock(&in_signal_lock); + return; + } + err = pthread_cond_wait(&signal_caught_cond, &in_signal_lock); + } + assert(!err); assert(jl_atomic_load_acquire(&ptls2->signal_request) == 0); *ctx = signal_context; } @@ -758,6 +773,8 @@ static void *signal_listener(void *arg) int i = critical ? idx : profile_round_robin_thread_order[idx]; // notify thread to stop jl_thread_suspend_and_get_state(i, &signal_context); + if (signal_context == NULL) + continue; // do backtrace on thread contexts for critical signals // this part must be signal-handler safe diff --git a/src/task.c b/src/task.c index 637f2d453af92..9f55b9b64a833 100644 --- a/src/task.c +++ b/src/task.c @@ -423,18 +423,19 @@ static void ctx_switch(jl_task_t *lastt) else #endif *pt = NULL; // can't fail after here: clear the gc-root for the target task now - lastt->ptls = NULL; } // set up global state for new task and clear global state for old task t->ptls = ptls; jl_atomic_store_relaxed(&ptls->current_task, t); JL_GC_PROMISE_ROOTED(t); + jl_signal_fence(); + jl_set_pgcstack(&t->gcstack); + jl_signal_fence(); lastt->ptls = NULL; #ifdef MIGRATE_TASKS ptls->previous_task = lastt; #endif - jl_set_pgcstack(&t->gcstack); if (t->started) { #ifdef COPY_STACKS diff --git a/test/profile_spawnmany_exec.jl b/test/profile_spawnmany_exec.jl new file mode 100644 index 0000000000000..a061de40d5172 --- /dev/null +++ b/test/profile_spawnmany_exec.jl @@ -0,0 +1,14 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +using Profile + +function spawnmany(n) + if n > 2 + m = n รท 2 + t = Threads.@spawn spawnmany(m) + spawnmany(m) + wait(t) + end +end + +@profile spawnmany(parse(Int, get(ENV, "NTASKS", "2000000"))) diff --git a/test/threads.jl b/test/threads.jl index 736cecada3cd8..1e4c4b4f6a5f3 100644 --- a/test/threads.jl +++ b/test/threads.jl @@ -147,3 +147,39 @@ end # We don't need the watchdog anymore close(proc.in) + +# https://github.com/JuliaLang/julia/pull/42973 +Sys.islinux() && @testset "spawn and wait *a lot* of tasks in @profile" begin + # Not using threads_exec.jl for better isolation, reproducibility, and a + # tighter timeout. + script = "profile_spawnmany_exec.jl" + cmd = `$(Base.julia_cmd()) --depwarn=error --rr-detach --startup-file=no $script` + @testset for n in [20000, 200000, 2000000] + proc = run(ignorestatus(setenv(cmd, "NTASKS" => n; dir = @__DIR__)); wait = false) + done = Threads.Atomic{Bool}(false) + timeout = false + timer = Timer(100) do _ + timeout = true + for sig in [Base.SIGTERM, Base.SIGHUP, Base.SIGKILL] + for _ in 1:1000 + kill(proc, sig) + if done[] + if sig != Base.SIGTERM + @warn "Terminating `$script` required signal $sig" + end + return + end + sleep(0.001) + end + end + end + try + wait(proc) + finally + done[] = true + close(timer) + end + @test success(proc) + @test !timeout + end +end