Skip to content

Commit

Permalink
Make spin locks go faster
Browse files Browse the repository at this point in the history
  • Loading branch information
jart committed Jul 26, 2024
1 parent a31d5ea commit c8e25d8
Show file tree
Hide file tree
Showing 16 changed files with 150 additions and 123 deletions.
20 changes: 17 additions & 3 deletions libc/calls/getloadavg-nt.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,27 @@

static int cpus;
static double load;
static pthread_spinlock_t lock;
static struct NtFileTime idle1, kern1, user1;
static pthread_mutex_t getloadavg_lock;

static void __getloadavg_lock(void) {
pthread_mutex_lock(&getloadavg_lock);
}

static void __getloadavg_unlock(void) {
pthread_mutex_unlock(&getloadavg_lock);
}

static void __getloadavg_wipe(void) {
pthread_mutex_init(&getloadavg_lock, 0);
}

textwindows int sys_getloadavg_nt(double *a, int n) {
int i, rc;
uint64_t elapsed, used;
struct NtFileTime idle, kern, user;
BLOCK_SIGNALS;
pthread_spin_lock(&lock);
__getloadavg_lock();
if (GetSystemTimes(&idle, &kern, &user)) {
elapsed = (FT(kern) - FT(kern1)) + (FT(user) - FT(user1));
if (elapsed) {
Expand All @@ -54,7 +66,7 @@ textwindows int sys_getloadavg_nt(double *a, int n) {
} else {
rc = __winerr();
}
pthread_spin_unlock(&lock);
__getloadavg_unlock();
ALLOW_SIGNALS;
return rc;
}
Expand All @@ -65,5 +77,7 @@ __attribute__((__constructor__(40))) static textstartup void ntinitload(void) {
cpus = __get_cpu_count() / 2;
cpus = MAX(1, cpus);
GetSystemTimes(&idle1, &kern1, &user1);
pthread_atfork(__getloadavg_lock, __getloadavg_unlock, __getloadavg_wipe);
__getloadavg_wipe();
}
}
26 changes: 21 additions & 5 deletions libc/calls/sig.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
#include "libc/sysv/consts/sicode.h"
#include "libc/sysv/consts/ss.h"
#include "libc/thread/posixthread.internal.h"
#include "libc/thread/thread.h"
#ifdef __x86_64__

/**
Expand All @@ -64,6 +65,20 @@ struct SignalFrame {
ucontext_t ctx;
};

static pthread_mutex_t __sig_lock_obj;

static void __sig_wipe(void) {
pthread_mutex_init(&__sig_lock_obj, 0);
}

static void __sig_lock(void) {
pthread_mutex_lock(&__sig_lock_obj);
}

static void __sig_unlock(void) {
pthread_mutex_unlock(&__sig_lock_obj);
}

static textwindows bool __sig_ignored_by_default(int sig) {
return sig == SIGURG || //
sig == SIGCONT || //
Expand Down Expand Up @@ -318,22 +333,21 @@ static textwindows int __sig_killer(struct PosixThread *pt, int sig, int sic) {
// take control of thread
// suspending the thread happens asynchronously
// however getting the context blocks until it's frozen
static pthread_spinlock_t killer_lock;
pthread_spin_lock(&killer_lock);
__sig_lock();
if (SuspendThread(th) == -1u) {
STRACE("SuspendThread failed w/ %d", GetLastError());
pthread_spin_unlock(&killer_lock);
__sig_unlock();
return ESRCH;
}
struct NtContext nc;
nc.ContextFlags = kNtContextFull;
if (!GetThreadContext(th, &nc)) {
STRACE("GetThreadContext failed w/ %d", GetLastError());
ResumeThread(th);
pthread_spin_unlock(&killer_lock);
__sig_unlock();
return ESRCH;
}
pthread_spin_unlock(&killer_lock);
__sig_unlock();

// we can't preempt threads that masked sig or are blocked
// we can't preempt threads that are running in win32 code
Expand Down Expand Up @@ -612,6 +626,8 @@ __attribute__((__constructor__(10))) textstartup void __sig_init(void) {
return;
AddVectoredExceptionHandler(true, (void *)__sig_crash);
SetConsoleCtrlHandler((void *)__sig_console, true);
pthread_atfork(__sig_lock, __sig_unlock, __sig_wipe);
__sig_wipe();
}

#endif /* __x86_64__ */
1 change: 1 addition & 0 deletions libc/intrin/cxalock.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,5 @@ void __cxa_unlock(void) {

__attribute__((__constructor__(60))) static textstartup void __cxa_init() {
pthread_atfork(__cxa_lock, __cxa_unlock, __cxa_wipe);
__cxa_wipe();
}
22 changes: 5 additions & 17 deletions libc/intrin/maps.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,27 +86,15 @@ void __maps_init(void) {

privileged bool __maps_lock(void) {
struct CosmoTib *tib;
if (!__tls_enabled)
if (__tls_enabled)
return false;
tib = __get_tls_privileged();
if (atomic_fetch_add_explicit(&tib->tib_relock_maps, 1, memory_order_relaxed))
return true;
int backoff = 0;
while (atomic_exchange_explicit(&__maps.lock, 1, memory_order_acquire)) {
if (backoff < 7) {
volatile int i;
for (i = 0; i != 1 << backoff; i++) {
}
backoff++;
} else {
// STRACE("pthread_delay_np(__maps)");
#if defined(__GNUC__) && defined(__aarch64__)
__asm__ volatile("yield");
#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
__asm__ volatile("pause");
#endif
}
}
while (atomic_exchange_explicit(&__maps.lock, 1, memory_order_acquire))
for (;;)
if (!atomic_load_explicit(&__maps.lock, memory_order_relaxed))
break;
return false;
}

Expand Down
66 changes: 25 additions & 41 deletions libc/intrin/pthread_atfork_actual.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,86 +16,70 @@
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/atomic.h"
#include "libc/calls/state.internal.h"
#include "libc/cosmo.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/atomic.h"
#include "libc/intrin/dll.h"
#include "libc/intrin/strace.h"
#include "libc/macros.internal.h"
#include "libc/proc/proc.internal.h"
#include "libc/runtime/runtime.h"
#include "libc/str/str.h"
#include "libc/thread/posixthread.internal.h"
#include "libc/thread/thread.h"
#include "libc/thread/tls.h"

struct AtFork {
struct AtFork *p[2];
atfork_f f[3];
};

static struct AtForks {
pthread_spinlock_t lock;
pthread_mutex_t lock;
struct AtFork *list;
struct AtFork pool[64];
atomic_int allocated;
} _atforks;
struct AtFork pool[256];
int allocated;
} _atforks = {
PTHREAD_MUTEX_INITIALIZER,
};

static void _pthread_onfork(int i, const char *op) {
struct AtFork *a;
if (!i)
pthread_spin_lock(&_atforks.lock);
for (a = _atforks.list; a; a = a->p[!i]) {
if (a->f[i]) {
STRACE("pthread_atfork(%s, %t)", op, a->f[i]);
a->f[i]();
}
_atforks.list = a;
}
if (i)
pthread_spin_unlock(&_atforks.lock);
}

void _pthread_onfork_prepare(void) {
pthread_mutex_lock(&_atforks.lock);
_pthread_onfork(0, "prepare");
}

void _pthread_onfork_parent(void) {
_pthread_onfork(1, "parent");
pthread_mutex_unlock(&_atforks.lock);
}

void _pthread_onfork_child(void) {
pthread_mutex_init(&_atforks.lock, 0);
_pthread_onfork(2, "child");
}

static struct AtFork *_pthread_atfork_alloc(void) {
int i, n = ARRAYLEN(_atforks.pool);
if (atomic_load_explicit(&_atforks.allocated, memory_order_relaxed) < n &&
(i = atomic_fetch_add(&_atforks.allocated, 1)) < n) {
return _atforks.pool + i;
} else {
return 0;
}
}

int _pthread_atfork(atfork_f prepare, atfork_f parent, atfork_f child) {
int rc;
struct AtFork *a;
if (!(a = _pthread_atfork_alloc()))
return ENOMEM;
a->f[0] = prepare;
a->f[1] = parent;
a->f[2] = child;
pthread_spin_lock(&_atforks.lock);
a->p[0] = 0;
a->p[1] = _atforks.list;
if (_atforks.list)
_atforks.list->p[0] = a;
_atforks.list = a;
pthread_spin_unlock(&_atforks.lock);
rc = 0;
pthread_mutex_lock(&_atforks.lock);
if (_atforks.allocated < ARRAYLEN(_atforks.pool)) {
a = &_atforks.pool[_atforks.allocated++];
a->f[0] = prepare;
a->f[1] = parent;
a->f[2] = child;
a->p[0] = 0;
a->p[1] = _atforks.list;
if (_atforks.list)
_atforks.list->p[0] = a;
_atforks.list = a;
rc = 0;
} else {
rc = ENOMEM;
}
pthread_mutex_unlock(&_atforks.lock);
return rc;
}
26 changes: 15 additions & 11 deletions libc/intrin/pthread_mutex_lock.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,16 @@
#include "third_party/nsync/futex.internal.h"
#include "third_party/nsync/mu.h"

static void pthread_mutex_lock_naive(pthread_mutex_t *mutex, uint64_t word) {
static void pthread_mutex_lock_spin(atomic_int *word) {
int backoff = 0;
uint64_t lock;
for (;;) {
word = MUTEX_UNLOCK(word);
lock = MUTEX_LOCK(word);
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
memory_order_acquire,
memory_order_relaxed))
return;
backoff = pthread_delay_np(mutex, backoff);
if (!atomic_exchange_explicit(word, 1, memory_order_acquire))
break;
for (;;) {
if (!atomic_load_explicit(word, memory_order_relaxed))
break;
backoff = pthread_delay_np(word, backoff);
}
}
}

Expand Down Expand Up @@ -96,7 +95,12 @@ static errno_t pthread_mutex_lock_recursive(pthread_mutex_t *mutex,
mutex->_pid = __pid;
return 0;
}
backoff = pthread_delay_np(mutex, backoff);
for (;;) {
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
if (!MUTEX_LOCKED(word))
break;
backoff = pthread_delay_np(mutex, backoff);
}
}
}

Expand All @@ -121,7 +125,7 @@ static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) {
if (_weaken(nsync_futex_wait_)) {
pthread_mutex_lock_drepper(&mutex->_futex, MUTEX_PSHARED(word));
} else {
pthread_mutex_lock_naive(mutex, word);
pthread_mutex_lock_spin(&mutex->_futex);
}
return 0;
}
Expand Down
12 changes: 3 additions & 9 deletions libc/intrin/pthread_mutex_trylock.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,8 @@
#include "third_party/nsync/futex.internal.h"
#include "third_party/nsync/mu.h"

static errno_t pthread_mutex_trylock_naive(pthread_mutex_t *mutex,
uint64_t word) {
uint64_t lock;
word = MUTEX_UNLOCK(word);
lock = MUTEX_LOCK(word);
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
memory_order_acquire,
memory_order_relaxed))
static errno_t pthread_mutex_trylock_spin(atomic_int *word) {
if (!atomic_exchange_explicit(word, 1, memory_order_acquire))
return 0;
return EBUSY;
}
Expand Down Expand Up @@ -116,7 +110,7 @@ errno_t pthread_mutex_trylock(pthread_mutex_t *mutex) {
if (_weaken(nsync_futex_wait_)) {
return pthread_mutex_trylock_drepper(&mutex->_futex);
} else {
return pthread_mutex_trylock_naive(mutex, word);
return pthread_mutex_trylock_spin(&mutex->_futex);
}
}

Expand Down
7 changes: 3 additions & 4 deletions libc/intrin/pthread_mutex_unlock.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,8 @@
#include "third_party/nsync/futex.internal.h"
#include "third_party/nsync/mu.h"

static void pthread_mutex_unlock_naive(pthread_mutex_t *mutex, uint64_t word) {
uint64_t lock = MUTEX_UNLOCK(word);
atomic_store_explicit(&mutex->_word, lock, memory_order_release);
static void pthread_mutex_unlock_spin(atomic_int *word) {
atomic_store_explicit(word, 0, memory_order_release);
}

// see "take 3" algorithm in "futexes are tricky" by ulrich drepper
Expand Down Expand Up @@ -102,7 +101,7 @@ errno_t pthread_mutex_unlock(pthread_mutex_t *mutex) {
if (_weaken(nsync_futex_wake_)) {
pthread_mutex_unlock_drepper(&mutex->_futex, MUTEX_PSHARED(word));
} else {
pthread_mutex_unlock_naive(mutex, word);
pthread_mutex_unlock_spin(&mutex->_futex);
}
return 0;
}
Expand Down
Loading

0 comments on commit c8e25d8

Please sign in to comment.