From 0495045cc11bf9088ace3e218157df6ffc08c2b8 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Sat, 7 Sep 2024 18:16:22 -0400 Subject: [PATCH] [Profile] fix threading issue (#55704) I forgot about the existence of threads, so had hard-coded this to only support one thread. Clearly that is not sufficient though, so use the semaphore here as it is intended to be used. Fixes #55703 --------- Co-authored-by: Ian Butterworth (cherry picked from commit 4f0a333d9d76df76a6383ed2113e66c789d5ecee) --- src/signals-unix.c | 24 ++++++++++-------------- stdlib/Profile/test/runtests.jl | 3 ++- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/src/signals-unix.c b/src/signals-unix.c index a2b56952f71bd..2aafd335a68b8 100644 --- a/src/signals-unix.c +++ b/src/signals-unix.c @@ -410,6 +410,7 @@ pthread_mutex_t in_signal_lock; // shared with jl_delete_thread static bt_context_t *signal_context; // protected by in_signal_lock static int exit_signal_cond = -1; static int signal_caught_cond = -1; +static int signals_inflight = 0; int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx) { @@ -422,7 +423,7 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx) pthread_mutex_unlock(&in_signal_lock); return 0; } - if (jl_atomic_load(&ptls2->signal_request) != 0) { + while (signals_inflight) { // something is wrong, or there is already a usr2 in flight elsewhere // try to wait for it to finish or wait for timeout struct pollfd event = {signal_caught_cond, POLLIN, 0}; @@ -434,25 +435,16 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx) pthread_mutex_unlock(&in_signal_lock); return 0; } - } - // check for any stale signal_caught_cond events - struct pollfd event = {signal_caught_cond, POLLIN, 0}; - do { - err = poll(&event, 1, 0); - } while (err == -1 && errno == EINTR); - if (err == -1) { - pthread_mutex_unlock(&in_signal_lock); - return 0; - } - if ((event.revents & POLLIN) != 0) { // consume it before continuing eventfd_t got; do { err = read(signal_caught_cond, &got, sizeof(eventfd_t)); } while (err == -1 && errno == EINTR); if (err != sizeof(eventfd_t)) abort(); - assert(got == 1); (void) got; + assert(signals_inflight >= got); + signals_inflight -= got; } + signals_inflight++; sig_atomic_t request = jl_atomic_exchange(&ptls2->signal_request, 1); assert(request == 0 || request == -1); request = 1; @@ -469,6 +461,7 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx) if (err == -1) { // not ready after timeout: try to cancel this request if (jl_atomic_cmpswap(&ptls2->signal_request, &request, 0)) { + signals_inflight--; pthread_mutex_unlock(&in_signal_lock); return 0; } @@ -478,7 +471,9 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx) err = read(signal_caught_cond, &got, sizeof(eventfd_t)); } while (err == -1 && errno == EINTR); if (err != sizeof(eventfd_t)) abort(); - assert(got == 1); (void) got; + assert(signals_inflight >= got); + signals_inflight -= got; + signals_inflight++; // Now the other thread is waiting on exit_signal_cond (verify that here by // checking it is 0, and add an acquire barrier for good measure) request = jl_atomic_load_acquire(&ptls2->signal_request); @@ -505,6 +500,7 @@ static void jl_try_deliver_sigint(void) jl_safepoint_enable_sigint(); jl_wake_libuv(); pthread_mutex_lock(&in_signal_lock); + signals_inflight++; jl_atomic_store_release(&ptls2->signal_request, 2); // This also makes sure `sleep` is aborted. pthread_kill(ptls2->system_id, SIGUSR2); diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl index cbfdde61d7054..958f1fefb6981 100644 --- a/stdlib/Profile/test/runtests.jl +++ b/stdlib/Profile/test/runtests.jl @@ -168,7 +168,8 @@ let cmd = Base.julia_cmd() println("done") print(Profile.len_data()) """ - p = open(`$cmd -e $script`) + # use multiple threads here to ensure that profiling works with threading + p = open(`$cmd -t2 -e $script`) t = Timer(120) do t # should be under 10 seconds, so give it 2 minutes then report failure println("KILLING debuginfo registration test BY PROFILE TEST WATCHDOG\n")