diff --git a/libc/intrin/pthread_mutex_destroy.c b/libc/intrin/pthread_mutex_destroy.c
index 297067be770..a955bb22cd6 100644
--- a/libc/intrin/pthread_mutex_destroy.c
+++ b/libc/intrin/pthread_mutex_destroy.c
@@ -22,8 +22,10 @@
 /**
  * Destroys mutex.
  *
+ * Destroying a mutex that's currently locked or being waited upon, will
+ * result in undefined behavior.
+ *
  * @return 0 on success, or error number on failure
- * @raise EINVAL if mutex is locked in our implementation
  */
 errno_t pthread_mutex_destroy(pthread_mutex_t *mutex) {
   memset(mutex, -1, sizeof(*mutex));
diff --git a/libc/proc/execve-nt.greg.c b/libc/proc/execve-nt.greg.c
index 607ac162307..226029e1b3d 100644
--- a/libc/proc/execve-nt.greg.c
+++ b/libc/proc/execve-nt.greg.c
@@ -18,11 +18,11 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/assert.h"
 #include "libc/calls/internal.h"
-#include "libc/intrin/fds.h"
 #include "libc/calls/struct/sigset.internal.h"
 #include "libc/calls/syscall-nt.internal.h"
 #include "libc/errno.h"
 #include "libc/fmt/itoa.h"
+#include "libc/intrin/fds.h"
 #include "libc/intrin/kprintf.h"
 #include "libc/mem/mem.h"
 #include "libc/nt/enum/processaccess.h"
@@ -109,10 +109,14 @@ textwindows int sys_execve_nt(const char *program, char *const argv[],
 
   // give child to libc/proc/proc.c worker thread in parent
   int64_t handle;
-  unassert(DuplicateHandle(GetCurrentProcess(), pi.hProcess, hParentProcess,
-                           &handle, 0, false, kNtDuplicateSameAccess));
-  unassert(!(handle & 0xFFFFFFFFFF000000));
-  TerminateThisProcess(0x23000000u | handle);
+  if (DuplicateHandle(GetCurrentProcess(), pi.hProcess, hParentProcess, &handle,
+                      0, false, kNtDuplicateSameAccess)) {
+    unassert(!(handle & 0xFFFFFFFFFF000000));
+    TerminateThisProcess(0x23000000u | handle);
+  } else {
+    kprintf("DuplicateHandle failed w/ %d\n", GetLastError());
+    TerminateThisProcess(ECHILD);
+  }
 }
 
 #endif /* __x86_64__ */
diff --git a/libc/proc/fork-nt.c b/libc/proc/fork-nt.c
index cfc5f13e632..4ca7b725af1 100644
--- a/libc/proc/fork-nt.c
+++ b/libc/proc/fork-nt.c
@@ -473,9 +473,8 @@ textwindows int sys_fork_nt(uint32_t dwCreationFlags) {
     // reset core runtime services
     __proc_wipe();
     WipeKeystrokes();
-    if (_weaken(__itimer_wipe)) {
+    if (_weaken(__itimer_wipe))
       _weaken(__itimer_wipe)();
-    }
     // notify pthread join
     atomic_store_explicit(&_pthread_static.ptid, GetCurrentThreadId(),
                           memory_order_release);
diff --git a/libc/proc/fork.c b/libc/proc/fork.c
index 238fe281b75..35abbbe72ca 100644
--- a/libc/proc/fork.c
+++ b/libc/proc/fork.c
@@ -54,6 +54,8 @@ extern pthread_mutex_t _pthread_lock_obj;
 static void _onfork_prepare(void) {
   if (_weaken(_pthread_onfork_prepare))
     _weaken(_pthread_onfork_prepare)();
+  if (IsWindows())
+    __proc_lock();
   _pthread_lock();
   __maps_lock();
   __fds_lock();
@@ -66,11 +68,15 @@ static void _onfork_parent(void) {
   __fds_unlock();
   __maps_unlock();
   _pthread_unlock();
+  if (IsWindows())
+    __proc_unlock();
   if (_weaken(_pthread_onfork_parent))
     _weaken(_pthread_onfork_parent)();
 }
 
 static void _onfork_child(void) {
+  if (IsWindows())
+    __proc_wipe();
   __fds_lock_obj = (pthread_mutex_t)PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
   _rand64_lock_obj = (pthread_mutex_t)PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
   _pthread_lock_obj = (pthread_mutex_t)PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
@@ -87,8 +93,6 @@ int _fork(uint32_t dwCreationFlags) {
   int ax, dx, tid, parent;
   parent = __pid;
   BLOCK_SIGNALS;
-  if (IsWindows())
-    __proc_lock();
   if (__threaded)
     _onfork_prepare();
   started = timespec_real();
@@ -149,8 +153,6 @@ int _fork(uint32_t dwCreationFlags) {
     // this is the parent process
     if (__threaded)
       _onfork_parent();
-    if (IsWindows())
-      __proc_unlock();
     STRACE("fork() → %d% m (took %ld us)", ax, micros);
   }
   ALLOW_SIGNALS;
diff --git a/libc/proc/kill-nt.c b/libc/proc/kill-nt.c
index afe759dccac..45f9b740d64 100644
--- a/libc/proc/kill-nt.c
+++ b/libc/proc/kill-nt.c
@@ -59,9 +59,8 @@ textwindows int sys_kill_nt(int pid, int sig) {
         struct Dll *e;
         BLOCK_SIGNALS;
         __proc_lock();
-        for (e = dll_first(__proc.list); e; e = dll_next(__proc.list, e)) {
+        for (e = dll_first(__proc.list); e; e = dll_next(__proc.list, e))
           TerminateProcess(PROC_CONTAINER(e)->handle, sig);
-        }
         __proc_unlock();
         ALLOW_SIGNALS;
       }
diff --git a/libc/proc/proc.c b/libc/proc/proc.c
index d147135b387..324c0835635 100644
--- a/libc/proc/proc.c
+++ b/libc/proc/proc.c
@@ -30,7 +30,6 @@
 #include "libc/intrin/strace.h"
 #include "libc/intrin/weaken.h"
 #include "libc/mem/leaks.h"
-#include "libc/mem/mem.h"
 #include "libc/nt/accounting.h"
 #include "libc/nt/enum/processaccess.h"
 #include "libc/nt/enum/processcreationflags.h"
@@ -45,12 +44,16 @@
 #include "libc/nt/synchronization.h"
 #include "libc/nt/thread.h"
 #include "libc/proc/proc.internal.h"
+#include "libc/runtime/runtime.h"
 #include "libc/str/str.h"
+#include "libc/sysv/consts/map.h"
+#include "libc/sysv/consts/prot.h"
 #include "libc/sysv/consts/sa.h"
 #include "libc/sysv/consts/sicode.h"
 #include "libc/sysv/consts/sig.h"
 #include "libc/sysv/errfuns.h"
 #include "libc/thread/tls.h"
+#include "third_party/nsync/mu.h"
 #ifdef __x86_64__
 
 /**
@@ -273,27 +276,21 @@ textwindows void __proc_wipe(void) {
 textwindows struct Proc *__proc_new(void) {
   struct Dll *e;
   struct Proc *proc = 0;
-  // fork() + wait() don't depend on malloc() so neither shall we
-  if (__proc.allocated < ARRAYLEN(__proc.pool)) {
-    proc = __proc.pool + __proc.allocated++;
-  } else {
-    if ((e = dll_first(__proc.free))) {
-      proc = PROC_CONTAINER(e);
-      dll_remove(&__proc.free, &proc->elem);
-    }
-    if (!proc) {
-      if (_weaken(malloc)) {
-        proc = may_leak(_weaken(malloc)(sizeof(struct Proc)));
-      } else {
-        enomem();
-        return 0;
-      }
-    }
+  if ((e = dll_first(__proc.free))) {
+    proc = PROC_CONTAINER(e);
+    dll_remove(&__proc.free, &proc->elem);
   }
   if (proc) {
     bzero(proc, sizeof(*proc));
-    dll_init(&proc->elem);
+  } else {
+    proc = mmap(0, sizeof(struct Proc), PROT_READ | PROT_WRITE,
+                MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    if (proc == MAP_FAILED) {
+      enomem();
+      return 0;
+    }
   }
+  dll_init(&proc->elem);
   return proc;
 }
 
diff --git a/libc/proc/proc.internal.h b/libc/proc/proc.internal.h
index fc36c1c60bb..fd59bc5f150 100644
--- a/libc/proc/proc.internal.h
+++ b/libc/proc/proc.internal.h
@@ -36,7 +36,6 @@ struct Procs {
   struct Dll *free;
   struct Dll *undead;
   struct Dll *zombies;
-  struct Proc pool[8];
   unsigned allocated;
   struct rusage ruchlds;
 };
diff --git a/libc/thread/pthread_cond_timedwait.c b/libc/thread/pthread_cond_timedwait.c
index 84e5fe7efb9..22ea8c240c4 100644
--- a/libc/thread/pthread_cond_timedwait.c
+++ b/libc/thread/pthread_cond_timedwait.c
@@ -117,18 +117,21 @@ errno_t pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
     if (MUTEX_PSHARED(muword) != PTHREAD_PROCESS_SHARED)
       return EINVAL;
 
+  errno_t err;
+  BEGIN_CANCELATION_POINT;
 #if PTHREAD_USE_NSYNC
   // favor *NSYNC if this is a process private condition variable
   // if using Mike Burrows' code isn't possible, use a naive impl
-  if (!cond->_pshared)
-    return nsync_cv_wait_with_deadline(
+  if (!cond->_pshared) {
+    err = nsync_cv_wait_with_deadline(
         (nsync_cv *)cond, (nsync_mu *)mutex,
         abstime ? *abstime : nsync_time_no_deadline, 0);
-#endif
-
-  errno_t err;
-  BEGIN_CANCELATION_POINT;
+  } else {
+    err = pthread_cond_timedwait_impl(cond, mutex, abstime);
+  }
+#else
   err = pthread_cond_timedwait_impl(cond, mutex, abstime);
+#endif
   END_CANCELATION_POINT;
   return err;
 }
diff --git a/test/libc/stdio/popen_test.c b/test/libc/stdio/popen_test.c
index ef4a2b97376..648e885ac85 100644
--- a/test/libc/stdio/popen_test.c
+++ b/test/libc/stdio/popen_test.c
@@ -45,7 +45,7 @@ void SetUpOnce(void) {
 
 void CheckForFdLeaks(void) {
   int rc, i, l = 0, e = errno;
-  for (i = 3; i < 16; ++i) {
+  for (i = 3; i < 50; ++i) {
     rc = fcntl(i, F_GETFL);
     if (rc == -1) {
       ASSERT_EQ(EBADF, errno);
@@ -149,7 +149,11 @@ void *Worker(void *arg) {
     ASSERT_NE(NULL, (f = popen(cmd, "r")));
     EXPECT_STREQ(arg1, fgets(buf, sizeof(buf), f));
     EXPECT_STREQ(arg2, fgets(buf, sizeof(buf), f));
-    ASSERT_EQ(0, pclose(f));
+    if (IsWindows())
+      // todo(jart): why does it flake with echild?
+      pclose(f);
+    else
+      ASSERT_EQ(0, pclose(f));
     free(arg2);
     free(arg1);
     free(cmd);
@@ -158,10 +162,6 @@ void *Worker(void *arg) {
 }
 
 TEST(popen, torture) {
-  if (IsWindows()) {
-    // TODO: Why does pclose() return kNtSignalAccessViolationa?!
-    return;
-  }
   int i, n = 4;
   pthread_t *t = gc(malloc(sizeof(pthread_t) * n));
   testlib_extract("/zip/echo", "echo", 0755);
diff --git a/test/libcxx/openmp_test.cc b/test/libcxx/openmp_test.cc
index 1e29592b28a..cfb4200d005 100644
--- a/test/libcxx/openmp_test.cc
+++ b/test/libcxx/openmp_test.cc
@@ -429,9 +429,9 @@ void check_gemm_works(void) {
   is_self_testing = false;
 }
 
-long m = 2333 / 3;
-long k = 577 / 3;
-long n = 713 / 3;
+long m = 2333 / 10;
+long k = 577 / 10;
+long n = 713 / 10;
 
 void check_sgemm(void) {
   float *A = new float[m * k];
diff --git a/third_party/nsync/common.c b/third_party/nsync/common.c
index d7fa348cdbf..79daaf9b1e5 100644
--- a/third_party/nsync/common.c
+++ b/third_party/nsync/common.c
@@ -153,7 +153,7 @@ static void free_waiters_populate (void) {
 		// tim cook wants us to use his lol central dispatch
 		n = 1;
 	} else {
-		n = getpagesize() / sizeof(waiter);
+		n = __pagesize / sizeof(waiter);
 	}
 	waiter *waiters = mmap (0, n * sizeof(waiter),
 				PROT_READ | PROT_WRITE,
diff --git a/third_party/nsync/common.internal.h b/third_party/nsync/common.internal.h
index a0b2c0ffe27..be42db19ea7 100644
--- a/third_party/nsync/common.internal.h
+++ b/third_party/nsync/common.internal.h
@@ -211,7 +211,7 @@ static const uint32_t NSYNC_WAITER_TAG = 0x726d2ba9;
   0x1 /* waiter reserved by a thread, even when not in use */
 #define WAITER_IN_USE 0x2 /* waiter in use by a thread */
 
-#define ASSERT(x) npassert(x)
+#define ASSERT(x) unassert(x)
 
 /* Return a pointer to the nsync_waiter_s containing struct Dll *e. */
 #define DLL_NSYNC_WAITER(e)                 \
diff --git a/third_party/nsync/mu_semaphore_futex.c b/third_party/nsync/mu_semaphore_futex.c
index 12964e7c84e..a4e605a6e93 100644
--- a/third_party/nsync/mu_semaphore_futex.c
+++ b/third_party/nsync/mu_semaphore_futex.c
@@ -28,7 +28,7 @@
  * @fileoverview Semaphores w/ Linux Futexes API.
  */
 
-#define ASSERT(x) npassert(x)
+#define ASSERT(x) unassert(x)
 
 /* Check that atomic operations on nsync_atomic_uint32_ can be applied to int. */
 static const int assert_int_size = 1 /
diff --git a/third_party/nsync/mu_semaphore_sem.c b/third_party/nsync/mu_semaphore_sem.c
index 44f8cb3c338..9b25ae7a66e 100644
--- a/third_party/nsync/mu_semaphore_sem.c
+++ b/third_party/nsync/mu_semaphore_sem.c
@@ -31,29 +31,34 @@
 #include "libc/sysv/consts/fd.h"
 #include "libc/thread/thread.h"
 #include "third_party/nsync/mu_semaphore.h"
+#include "libc/intrin/atomic.h"
+#include "libc/atomic.h"
 #include "third_party/nsync/time.h"
 
 /**
  * @fileoverview Semaphores w/ POSIX Semaphores API.
  */
 
-#define ASSERT(x) npassert(x)
-#define SEM_CONTAINER(e) DLL_CONTAINER(struct sem, list, e)
+#define ASSERT(x) unassert(x)
 
 struct sem {
 	int64_t id;
-	struct Dll list;
+	struct sem *next;
 };
 
-static struct {
-	atomic_uint once;
-	pthread_spinlock_t lock;
-	struct Dll *list;
-} g_sems;
+static _Atomic(struct sem *) g_sems;
 
 static nsync_semaphore *sem_big_enough_for_sem = (nsync_semaphore *) (uintptr_t)(1 /
 	(sizeof (struct sem) <= sizeof (*sem_big_enough_for_sem)));
 
+static void sems_push (struct sem *f) {
+	int backoff = 0;
+	f->next = atomic_load_explicit (&g_sems, memory_order_relaxed);
+	while (!atomic_compare_exchange_weak_explicit (&g_sems, &f->next, f,
+						       memory_order_acq_rel, memory_order_relaxed))
+		backoff = pthread_delay_np (&g_sems, backoff);
+}
+
 static bool nsync_mu_semaphore_sem_create (struct sem *f) {
 	int rc;
 	int lol;
@@ -73,18 +78,12 @@ static bool nsync_mu_semaphore_sem_create (struct sem *f) {
 }
 
 static void nsync_mu_semaphore_sem_fork_child (void) {
-	struct Dll *e;
 	struct sem *f;
-	for (e = dll_first (g_sems.list); e; e = dll_next (g_sems.list, e)) {
-		f = SEM_CONTAINER (e);
+	for (f = atomic_load_explicit (&g_sems, memory_order_relaxed); f; f = f->next) {
 		int rc = sys_close (f->id);
 		STRACE ("close(%ld) → %d", f->id, rc);
-	}
-	for (e = dll_first (g_sems.list); e; e = dll_next (g_sems.list, e)) {
-		f = SEM_CONTAINER (e);
 		ASSERT (nsync_mu_semaphore_sem_create (f));
 	}
-	(void) pthread_spin_init (&g_sems.lock, 0);
 }
 
 static void nsync_mu_semaphore_sem_init (void) {
@@ -93,14 +92,12 @@ static void nsync_mu_semaphore_sem_init (void) {
 
 /* Initialize *s; the initial value is 0. */
 bool nsync_mu_semaphore_init_sem (nsync_semaphore *s) {
+	static atomic_uint once;
 	struct sem *f = (struct sem *) s;
 	if (!nsync_mu_semaphore_sem_create (f))
 		return false;
-	cosmo_once (&g_sems.once, nsync_mu_semaphore_sem_init);
-	pthread_spin_lock (&g_sems.lock);
-	dll_init (&f->list);
-	dll_make_first (&g_sems.list, &f->list);
-	pthread_spin_unlock (&g_sems.lock);
+	cosmo_once (&once, nsync_mu_semaphore_sem_init);
+	sems_push(f);
 	return true;
 }