From 02e1cbcd002aba091f970cabde656d1f7454b871 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Thu, 25 Jul 2024 22:24:32 -0700 Subject: [PATCH] Revert "Make spin locks go faster" This reverts commit c8e25d811c7171bc32ad59d6e2cf078191886c56. --- libc/calls/getloadavg-nt.c | 20 +----- libc/calls/sig.c | 26 ++------ libc/intrin/cxalock.c | 1 - libc/intrin/maps.c | 20 ++++-- libc/intrin/pthread_atfork_actual.c | 66 ++++++++++++------- libc/intrin/pthread_mutex_lock.c | 26 ++++---- libc/intrin/pthread_mutex_trylock.c | 12 +++- libc/intrin/pthread_mutex_unlock.c | 7 +- .../{thread => intrin}/pthread_spin_destroy.c | 0 libc/{thread => intrin}/pthread_spin_init.c | 0 libc/{thread => intrin}/pthread_spin_lock.c | 8 +-- .../{thread => intrin}/pthread_spin_trylock.c | 0 libc/{thread => intrin}/pthread_spin_unlock.c | 0 libc/runtime/at_quick_exit.c | 32 +++------ libc/runtime/getsymboltable.c | 44 ++++++------- libc/stdio/flockfile.c | 9 +-- 16 files changed, 122 insertions(+), 149 deletions(-) rename libc/{thread => intrin}/pthread_spin_destroy.c (100%) rename libc/{thread => intrin}/pthread_spin_init.c (100%) rename libc/{thread => intrin}/pthread_spin_lock.c (92%) rename libc/{thread => intrin}/pthread_spin_trylock.c (100%) rename libc/{thread => intrin}/pthread_spin_unlock.c (100%) diff --git a/libc/calls/getloadavg-nt.c b/libc/calls/getloadavg-nt.c index 74832e8fa17..4e8d6d847bc 100644 --- a/libc/calls/getloadavg-nt.c +++ b/libc/calls/getloadavg-nt.c @@ -30,27 +30,15 @@ static int cpus; static double load; +static pthread_spinlock_t lock; static struct NtFileTime idle1, kern1, user1; -static pthread_mutex_t getloadavg_lock; - -static void __getloadavg_lock(void) { - pthread_mutex_lock(&getloadavg_lock); -} - -static void __getloadavg_unlock(void) { - pthread_mutex_unlock(&getloadavg_lock); -} - -static void __getloadavg_wipe(void) { - pthread_mutex_init(&getloadavg_lock, 0); -} textwindows int sys_getloadavg_nt(double *a, int n) { int i, rc; uint64_t elapsed, used; struct NtFileTime idle, kern, user; BLOCK_SIGNALS; - __getloadavg_lock(); + pthread_spin_lock(&lock); if (GetSystemTimes(&idle, &kern, &user)) { elapsed = (FT(kern) - FT(kern1)) + (FT(user) - FT(user1)); if (elapsed) { @@ -66,7 +54,7 @@ textwindows int sys_getloadavg_nt(double *a, int n) { } else { rc = __winerr(); } - __getloadavg_unlock(); + pthread_spin_unlock(&lock); ALLOW_SIGNALS; return rc; } @@ -77,7 +65,5 @@ __attribute__((__constructor__(40))) static textstartup void ntinitload(void) { cpus = __get_cpu_count() / 2; cpus = MAX(1, cpus); GetSystemTimes(&idle1, &kern1, &user1); - pthread_atfork(__getloadavg_lock, __getloadavg_unlock, __getloadavg_wipe); - __getloadavg_wipe(); } } diff --git a/libc/calls/sig.c b/libc/calls/sig.c index b34da2a1de2..247c567460c 100644 --- a/libc/calls/sig.c +++ b/libc/calls/sig.c @@ -51,7 +51,6 @@ #include "libc/sysv/consts/sicode.h" #include "libc/sysv/consts/ss.h" #include "libc/thread/posixthread.internal.h" -#include "libc/thread/thread.h" #ifdef __x86_64__ /** @@ -65,20 +64,6 @@ struct SignalFrame { ucontext_t ctx; }; -static pthread_mutex_t __sig_lock_obj; - -static void __sig_wipe(void) { - pthread_mutex_init(&__sig_lock_obj, 0); -} - -static void __sig_lock(void) { - pthread_mutex_lock(&__sig_lock_obj); -} - -static void __sig_unlock(void) { - pthread_mutex_unlock(&__sig_lock_obj); -} - static textwindows bool __sig_ignored_by_default(int sig) { return sig == SIGURG || // sig == SIGCONT || // @@ -333,10 +318,11 @@ static textwindows int __sig_killer(struct PosixThread *pt, int sig, int sic) { // take control of thread // suspending the thread happens asynchronously // however getting the context blocks until it's frozen - __sig_lock(); + static pthread_spinlock_t killer_lock; + pthread_spin_lock(&killer_lock); if (SuspendThread(th) == -1u) { STRACE("SuspendThread failed w/ %d", GetLastError()); - __sig_unlock(); + pthread_spin_unlock(&killer_lock); return ESRCH; } struct NtContext nc; @@ -344,10 +330,10 @@ static textwindows int __sig_killer(struct PosixThread *pt, int sig, int sic) { if (!GetThreadContext(th, &nc)) { STRACE("GetThreadContext failed w/ %d", GetLastError()); ResumeThread(th); - __sig_unlock(); + pthread_spin_unlock(&killer_lock); return ESRCH; } - __sig_unlock(); + pthread_spin_unlock(&killer_lock); // we can't preempt threads that masked sig or are blocked // we can't preempt threads that are running in win32 code @@ -626,8 +612,6 @@ __attribute__((__constructor__(10))) textstartup void __sig_init(void) { return; AddVectoredExceptionHandler(true, (void *)__sig_crash); SetConsoleCtrlHandler((void *)__sig_console, true); - pthread_atfork(__sig_lock, __sig_unlock, __sig_wipe); - __sig_wipe(); } #endif /* __x86_64__ */ diff --git a/libc/intrin/cxalock.c b/libc/intrin/cxalock.c index bd328747ebe..e0d43f53408 100644 --- a/libc/intrin/cxalock.c +++ b/libc/intrin/cxalock.c @@ -35,5 +35,4 @@ void __cxa_unlock(void) { __attribute__((__constructor__(60))) static textstartup void __cxa_init() { pthread_atfork(__cxa_lock, __cxa_unlock, __cxa_wipe); - __cxa_wipe(); } diff --git a/libc/intrin/maps.c b/libc/intrin/maps.c index e28722d7e21..3d042e5d206 100644 --- a/libc/intrin/maps.c +++ b/libc/intrin/maps.c @@ -91,10 +91,22 @@ privileged bool __maps_lock(void) { tib = __get_tls_privileged(); if (atomic_fetch_add_explicit(&tib->tib_relock_maps, 1, memory_order_relaxed)) return true; - while (atomic_exchange_explicit(&__maps.lock, 1, memory_order_acquire)) - for (;;) - if (!atomic_load_explicit(&__maps.lock, memory_order_relaxed)) - break; + int backoff = 0; + while (atomic_exchange_explicit(&__maps.lock, 1, memory_order_acquire)) { + if (backoff < 7) { + volatile int i; + for (i = 0; i != 1 << backoff; i++) { + } + backoff++; + } else { + // STRACE("pthread_delay_np(__maps)"); +#if defined(__GNUC__) && defined(__aarch64__) + __asm__ volatile("yield"); +#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) + __asm__ volatile("pause"); +#endif + } + } return false; } diff --git a/libc/intrin/pthread_atfork_actual.c b/libc/intrin/pthread_atfork_actual.c index f6796e18422..81551720673 100644 --- a/libc/intrin/pthread_atfork_actual.c +++ b/libc/intrin/pthread_atfork_actual.c @@ -16,11 +16,21 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/atomic.h" +#include "libc/calls/state.internal.h" +#include "libc/cosmo.h" +#include "libc/dce.h" #include "libc/errno.h" +#include "libc/intrin/atomic.h" +#include "libc/intrin/dll.h" #include "libc/intrin/strace.h" #include "libc/macros.internal.h" +#include "libc/proc/proc.internal.h" +#include "libc/runtime/runtime.h" +#include "libc/str/str.h" #include "libc/thread/posixthread.internal.h" #include "libc/thread/thread.h" +#include "libc/thread/tls.h" struct AtFork { struct AtFork *p[2]; @@ -28,16 +38,16 @@ struct AtFork { }; static struct AtForks { - pthread_mutex_t lock; + pthread_spinlock_t lock; struct AtFork *list; - struct AtFork pool[256]; - int allocated; -} _atforks = { - PTHREAD_MUTEX_INITIALIZER, -}; + struct AtFork pool[64]; + atomic_int allocated; +} _atforks; static void _pthread_onfork(int i, const char *op) { struct AtFork *a; + if (!i) + pthread_spin_lock(&_atforks.lock); for (a = _atforks.list; a; a = a->p[!i]) { if (a->f[i]) { STRACE("pthread_atfork(%s, %t)", op, a->f[i]); @@ -45,41 +55,47 @@ static void _pthread_onfork(int i, const char *op) { } _atforks.list = a; } + if (i) + pthread_spin_unlock(&_atforks.lock); } void _pthread_onfork_prepare(void) { - pthread_mutex_lock(&_atforks.lock); _pthread_onfork(0, "prepare"); } void _pthread_onfork_parent(void) { _pthread_onfork(1, "parent"); - pthread_mutex_unlock(&_atforks.lock); } void _pthread_onfork_child(void) { - pthread_mutex_init(&_atforks.lock, 0); _pthread_onfork(2, "child"); } +static struct AtFork *_pthread_atfork_alloc(void) { + int i, n = ARRAYLEN(_atforks.pool); + if (atomic_load_explicit(&_atforks.allocated, memory_order_relaxed) < n && + (i = atomic_fetch_add(&_atforks.allocated, 1)) < n) { + return _atforks.pool + i; + } else { + return 0; + } +} + int _pthread_atfork(atfork_f prepare, atfork_f parent, atfork_f child) { int rc; struct AtFork *a; - pthread_mutex_lock(&_atforks.lock); - if (_atforks.allocated < ARRAYLEN(_atforks.pool)) { - a = &_atforks.pool[_atforks.allocated++]; - a->f[0] = prepare; - a->f[1] = parent; - a->f[2] = child; - a->p[0] = 0; - a->p[1] = _atforks.list; - if (_atforks.list) - _atforks.list->p[0] = a; - _atforks.list = a; - rc = 0; - } else { - rc = ENOMEM; - } - pthread_mutex_unlock(&_atforks.lock); + if (!(a = _pthread_atfork_alloc())) + return ENOMEM; + a->f[0] = prepare; + a->f[1] = parent; + a->f[2] = child; + pthread_spin_lock(&_atforks.lock); + a->p[0] = 0; + a->p[1] = _atforks.list; + if (_atforks.list) + _atforks.list->p[0] = a; + _atforks.list = a; + pthread_spin_unlock(&_atforks.lock); + rc = 0; return rc; } diff --git a/libc/intrin/pthread_mutex_lock.c b/libc/intrin/pthread_mutex_lock.c index 1c2b79ecb59..cfde8a623e4 100644 --- a/libc/intrin/pthread_mutex_lock.c +++ b/libc/intrin/pthread_mutex_lock.c @@ -31,16 +31,17 @@ #include "third_party/nsync/futex.internal.h" #include "third_party/nsync/mu.h" -static void pthread_mutex_lock_spin(atomic_int *word) { +static void pthread_mutex_lock_naive(pthread_mutex_t *mutex, uint64_t word) { int backoff = 0; + uint64_t lock; for (;;) { - if (!atomic_exchange_explicit(word, 1, memory_order_acquire)) - break; - for (;;) { - if (!atomic_load_explicit(word, memory_order_relaxed)) - break; - backoff = pthread_delay_np(word, backoff); - } + word = MUTEX_UNLOCK(word); + lock = MUTEX_LOCK(word); + if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock, + memory_order_acquire, + memory_order_relaxed)) + return; + backoff = pthread_delay_np(mutex, backoff); } } @@ -95,12 +96,7 @@ static errno_t pthread_mutex_lock_recursive(pthread_mutex_t *mutex, mutex->_pid = __pid; return 0; } - for (;;) { - word = atomic_load_explicit(&mutex->_word, memory_order_relaxed); - if (!MUTEX_LOCKED(word)) - break; - backoff = pthread_delay_np(mutex, backoff); - } + backoff = pthread_delay_np(mutex, backoff); } } @@ -125,7 +121,7 @@ static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) { if (_weaken(nsync_futex_wait_)) { pthread_mutex_lock_drepper(&mutex->_futex, MUTEX_PSHARED(word)); } else { - pthread_mutex_lock_spin(&mutex->_futex); + pthread_mutex_lock_naive(mutex, word); } return 0; } diff --git a/libc/intrin/pthread_mutex_trylock.c b/libc/intrin/pthread_mutex_trylock.c index 43ec5598335..5fd06a07860 100644 --- a/libc/intrin/pthread_mutex_trylock.c +++ b/libc/intrin/pthread_mutex_trylock.c @@ -27,8 +27,14 @@ #include "third_party/nsync/futex.internal.h" #include "third_party/nsync/mu.h" -static errno_t pthread_mutex_trylock_spin(atomic_int *word) { - if (!atomic_exchange_explicit(word, 1, memory_order_acquire)) +static errno_t pthread_mutex_trylock_naive(pthread_mutex_t *mutex, + uint64_t word) { + uint64_t lock; + word = MUTEX_UNLOCK(word); + lock = MUTEX_LOCK(word); + if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock, + memory_order_acquire, + memory_order_relaxed)) return 0; return EBUSY; } @@ -110,7 +116,7 @@ errno_t pthread_mutex_trylock(pthread_mutex_t *mutex) { if (_weaken(nsync_futex_wait_)) { return pthread_mutex_trylock_drepper(&mutex->_futex); } else { - return pthread_mutex_trylock_spin(&mutex->_futex); + return pthread_mutex_trylock_naive(mutex, word); } } diff --git a/libc/intrin/pthread_mutex_unlock.c b/libc/intrin/pthread_mutex_unlock.c index 5bbfbbd0bd3..fcb549dcbdd 100644 --- a/libc/intrin/pthread_mutex_unlock.c +++ b/libc/intrin/pthread_mutex_unlock.c @@ -28,8 +28,9 @@ #include "third_party/nsync/futex.internal.h" #include "third_party/nsync/mu.h" -static void pthread_mutex_unlock_spin(atomic_int *word) { - atomic_store_explicit(word, 0, memory_order_release); +static void pthread_mutex_unlock_naive(pthread_mutex_t *mutex, uint64_t word) { + uint64_t lock = MUTEX_UNLOCK(word); + atomic_store_explicit(&mutex->_word, lock, memory_order_release); } // see "take 3" algorithm in "futexes are tricky" by ulrich drepper @@ -101,7 +102,7 @@ errno_t pthread_mutex_unlock(pthread_mutex_t *mutex) { if (_weaken(nsync_futex_wake_)) { pthread_mutex_unlock_drepper(&mutex->_futex, MUTEX_PSHARED(word)); } else { - pthread_mutex_unlock_spin(&mutex->_futex); + pthread_mutex_unlock_naive(mutex, word); } return 0; } diff --git a/libc/thread/pthread_spin_destroy.c b/libc/intrin/pthread_spin_destroy.c similarity index 100% rename from libc/thread/pthread_spin_destroy.c rename to libc/intrin/pthread_spin_destroy.c diff --git a/libc/thread/pthread_spin_init.c b/libc/intrin/pthread_spin_init.c similarity index 100% rename from libc/thread/pthread_spin_init.c rename to libc/intrin/pthread_spin_init.c diff --git a/libc/thread/pthread_spin_lock.c b/libc/intrin/pthread_spin_lock.c similarity index 92% rename from libc/thread/pthread_spin_lock.c rename to libc/intrin/pthread_spin_lock.c index d76b26fd861..4ce73139acb 100644 --- a/libc/thread/pthread_spin_lock.c +++ b/libc/intrin/pthread_spin_lock.c @@ -38,12 +38,8 @@ * @see pthread_spin_init */ errno_t pthread_spin_lock(pthread_spinlock_t *spin) { - for (;;) { - if (!atomic_exchange_explicit(&spin->_lock, 1, memory_order_acquire)) - break; - for (;;) - if (!atomic_load_explicit(&spin->_lock, memory_order_relaxed)) - break; + while (atomic_exchange_explicit(&spin->_lock, 1, memory_order_acquire)) { + pthread_pause_np(); } return 0; } diff --git a/libc/thread/pthread_spin_trylock.c b/libc/intrin/pthread_spin_trylock.c similarity index 100% rename from libc/thread/pthread_spin_trylock.c rename to libc/intrin/pthread_spin_trylock.c diff --git a/libc/thread/pthread_spin_unlock.c b/libc/intrin/pthread_spin_unlock.c similarity index 100% rename from libc/thread/pthread_spin_unlock.c rename to libc/intrin/pthread_spin_unlock.c diff --git a/libc/runtime/at_quick_exit.c b/libc/runtime/at_quick_exit.c index 87b4c4ee942..4786d801bc0 100644 --- a/libc/runtime/at_quick_exit.c +++ b/libc/runtime/at_quick_exit.c @@ -21,46 +21,30 @@ #include "libc/runtime/runtime.h" #include "libc/thread/thread.h" -static int count; static void (*funcs[32])(void); -static pthread_mutex_t __quick_exit_lock_obj; - -static void __quick_exit_wipe(void) { - pthread_mutex_init(&__quick_exit_lock_obj, 0); -} - -static void __quick_exit_lock(void) { - pthread_mutex_lock(&__quick_exit_lock_obj); -} - -static void __quick_exit_unlock(void) { - pthread_mutex_unlock(&__quick_exit_lock_obj); -} +static int count; +static pthread_spinlock_t lock; +pthread_spinlock_t *const __at_quick_exit_lockptr = &lock; void __funcs_on_quick_exit(void) { void (*func)(void); - __quick_exit_lock(); + pthread_spin_lock(&lock); while (count) { func = funcs[--count]; - __quick_exit_unlock(); + pthread_spin_unlock(&lock); func(); - __quick_exit_lock(); + pthread_spin_lock(&lock); } } int at_quick_exit(void func(void)) { int res = 0; - __quick_exit_lock(); + pthread_spin_lock(&lock); if (count == ARRAYLEN(funcs)) { res = -1; } else { funcs[count++] = func; } - __quick_exit_unlock(); + pthread_spin_unlock(&lock); return res; } - -__attribute__((__constructor__(10))) textstartup void __quick_exit_init(void) { - pthread_atfork(__quick_exit_lock, __quick_exit_unlock, __quick_exit_wipe); - __quick_exit_wipe(); -} diff --git a/libc/runtime/getsymboltable.c b/libc/runtime/getsymboltable.c index c3ad9552c50..90dcb169f12 100644 --- a/libc/runtime/getsymboltable.c +++ b/libc/runtime/getsymboltable.c @@ -17,8 +17,6 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/assert.h" -#include "libc/atomic.h" -#include "libc/cosmo.h" #include "libc/errno.h" #include "libc/intrin/promises.h" #include "libc/intrin/strace.h" @@ -29,12 +27,14 @@ #include "libc/runtime/symbols.internal.h" #include "libc/runtime/zipos.internal.h" #include "libc/str/str.h" +#include "libc/thread/thread.h" #include "libc/x/x.h" #include "libc/zip.internal.h" #include "third_party/puff/puff.h" __static_yoink("__get_symbol"); +static pthread_spinlock_t g_lock; struct SymbolTable *__symtab; // for kprintf static ssize_t GetZipFile(struct Zipos *zipos, const char *name) { @@ -100,25 +100,6 @@ static struct SymbolTable *GetSymbolTableFromElf(void) { } } -static void GetSymbolTableInit(void) { - struct Zipos *z; - int e = errno; - if (!__symtab && !__isworker) { - if (_weaken(__zipos_get) && (z = _weaken(__zipos_get)())) { - if ((__symtab = GetSymbolTableFromZip(z))) { - __symtab->names = - (uint32_t *)((char *)__symtab + __symtab->names_offset); - __symtab->name_base = - (char *)((char *)__symtab + __symtab->name_base_offset); - } - } - if (!__symtab) { - __symtab = GetSymbolTableFromElf(); - } - } - errno = e; -} - /** * Returns symbol table singleton. * @@ -140,7 +121,24 @@ static void GetSymbolTableInit(void) { * @return symbol table, or NULL if not found */ struct SymbolTable *GetSymbolTable(void) { - static atomic_uint once; - cosmo_once(&once, GetSymbolTableInit); + struct Zipos *z; + if (pthread_spin_trylock(&g_lock)) + return 0; + int e = errno; + if (!__symtab && !__isworker) { + if (_weaken(__zipos_get) && (z = _weaken(__zipos_get)())) { + if ((__symtab = GetSymbolTableFromZip(z))) { + __symtab->names = + (uint32_t *)((char *)__symtab + __symtab->names_offset); + __symtab->name_base = + (char *)((char *)__symtab + __symtab->name_base_offset); + } + } + if (!__symtab) { + __symtab = GetSymbolTableFromElf(); + } + } + errno = e; + pthread_spin_unlock(&g_lock); return __symtab; } diff --git a/libc/stdio/flockfile.c b/libc/stdio/flockfile.c index 93df22d7791..2c381295f93 100644 --- a/libc/stdio/flockfile.c +++ b/libc/stdio/flockfile.c @@ -55,14 +55,9 @@ static void __stdio_fork_parent(void) { static void __stdio_fork_child(void) { FILE *f; - for (int i = __fflush.handles.i; i--;) { - pthread_mutexattr_t attr; - pthread_mutexattr_init(&attr); - pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); + for (int i = __fflush.handles.i; i--;) if ((f = __fflush.handles.p[i])) - pthread_mutex_init(&f->lock, &attr); - pthread_mutexattr_destroy(&attr); - } + f->lock = (pthread_mutex_t)PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP; pthread_mutex_init(&__fflush_lock_obj, 0); }