diff --git a/examples/nproc.c b/examples/nproc.c new file mode 100644 index 00000000000..73ad91934e4 --- /dev/null +++ b/examples/nproc.c @@ -0,0 +1,15 @@ +#if 0 +/*─────────────────────────────────────────────────────────────────╗ +│ To the extent possible under law, Justine Tunney has waived │ +│ all copyright and related or neighboring rights to this file, │ +│ as it is written in the following disclaimers: │ +│ • http://unlicense.org/ │ +│ • http://creativecommons.org/publicdomain/zero/1.0/ │ +╚─────────────────────────────────────────────────────────────────*/ +#endif +#include +#include + +int main(int argc, char *argv[]) { + printf("%d\n", __get_cpu_count()); +} diff --git a/libc/calls/getcpu.c b/libc/calls/getcpu.c index bdc97089e5e..b689f43fc9e 100644 --- a/libc/calls/getcpu.c +++ b/libc/calls/getcpu.c @@ -30,39 +30,63 @@ int sys_getcpu(unsigned *opt_cpu, unsigned *opt_node, void *tcache); +/** + * Determines ID of CPU on which thread is currently scheduled. + * + * This is the same as sched_getcpu(), except it also supports returning + * the ID of the current NUMA node. On some platforms this functionality + * isn't available, in which case `out_opt_node` is always be set to 0. + */ int getcpu(unsigned *out_opt_cpu, unsigned *out_opt_node) { - unsigned cpu; - unsigned node; - if (X86_HAVE(RDTSCP)) { + + if (IsWindows()) { + struct NtProcessorNumber pn; + if (out_opt_cpu) { + GetCurrentProcessorNumberEx(&pn); + *out_opt_cpu = 64 * pn.Group + pn.Number; + } + if (out_opt_node) { + unsigned short node16; + if (GetNumaProcessorNodeEx(&pn, &node16)) { + *out_opt_node = node16; + } else { + return __winerr(); + } + } + return 0; + } + +#ifdef __x86_64__ + if (X86_HAVE(RDTSCP) && (IsLinux() || IsFreebsd())) { unsigned tsc_aux; rdtscp(&tsc_aux); - cpu = TSC_AUX_CORE(tsc_aux); - node = TSC_AUX_NODE(tsc_aux); - } else if (IsWindows()) { - struct NtProcessorNumber pn; - GetCurrentProcessorNumberEx(&pn); - cpu = 64 * pn.Group + pn.Number; - unsigned short node16; - if (GetNumaProcessorNodeEx(&pn, &node16)) { - node = node16; - } else { - return __winerr(); + if (out_opt_cpu) + *out_opt_cpu = TSC_AUX_CORE(tsc_aux); + if (out_opt_node) + *out_opt_node = TSC_AUX_NODE(tsc_aux); + return 0; + } +#endif + + if (IsXnu() || IsOpenbsd() || IsNetbsd() || IsFreebsd()) { + if (out_opt_cpu) { + int rc = sched_getcpu(); + if (rc == -1) + return -1; + *out_opt_cpu = rc; } - } else if (IsAarch64()) { - long tpidr_el0; - asm("mrs\t%0,tpidr_el0" : "=r"(tpidr_el0)); - cpu = tpidr_el0 & 255; - node = 0; - } else { - int rc = sys_getcpu(&cpu, &node, 0); - if (rc == -1) - return -1; + if (out_opt_node) + *out_opt_node = 0; + return 0; } - if (out_opt_cpu) { + + unsigned cpu, node; + int rc = sys_getcpu(&cpu, &node, 0); + if (rc == -1) + return -1; + if (out_opt_cpu) *out_opt_cpu = cpu; - } - if (out_opt_node) { + if (out_opt_node) *out_opt_node = node; - } return 0; } diff --git a/libc/calls/sched_getcpu.c b/libc/calls/sched_getcpu.c index 12a0a832b26..e671e80ca6b 100644 --- a/libc/calls/sched_getcpu.c +++ b/libc/calls/sched_getcpu.c @@ -23,32 +23,82 @@ #include "libc/nexgen32e/x86feature.h" #include "libc/nt/struct/processornumber.h" #include "libc/nt/synchronization.h" +#include "libc/runtime/syslib.internal.h" #include "libc/sysv/errfuns.h" int sys_getcpu(unsigned *opt_cpu, unsigned *opt_node, void *tcache); /** * Returns ID of CPU on which thread is currently scheduled. + * + * This function is supported on the following platforms: + * + * - x86-64 + * + * - Linux: rdtsc + * - FreeBSD: rdtsc + * - Windows: win32 + * - OpenBSD: unsupported + * - NetBSD: unsupported + * - MacOS: unsupported + * + * - aarch64 + * + * - Linux: syscall + * - FreeBSD: syscall + * - MacOS: supported + * * @return cpu number on success, or -1 w/ errno */ int sched_getcpu(void) { - if (X86_HAVE(RDTSCP)) { - unsigned tsc_aux; - rdtscp(&tsc_aux); - return TSC_AUX_CORE(tsc_aux); - } else if (IsAarch64()) { - long tpidr_el0; - asm("mrs\t%0,tpidr_el0" : "=r"(tpidr_el0)); - return tpidr_el0 & 255; - } else if (IsWindows()) { + + if (IsWindows()) { struct NtProcessorNumber pn; GetCurrentProcessorNumberEx(&pn); return 64 * pn.Group + pn.Number; - } else { - unsigned cpu = 0; - int rc = sys_getcpu(&cpu, 0, 0); - if (rc == -1) - return -1; - return cpu; } + +#ifdef __x86_64__ + if (X86_HAVE(RDTSCP) && (IsLinux() || IsFreebsd())) { + // Only the Linux, FreeBSD, and Windows kernels can be counted upon + // to populate the TSC_AUX register with the current thread number. + unsigned tsc_aux; + rdtscp(&tsc_aux); + return TSC_AUX_CORE(tsc_aux); + } +#endif + +#ifdef __aarch64__ + if (IsXnu()) { + // pthread_cpu_number_np() is defined by MacOS 11.0+ (Big Sur) in + // the SDK pthread.h header file, even though there's no man page + if (__syslib && __syslib->__version >= 9) { + errno_t err; + size_t out = 0; + if ((err = __syslib->__pthread_cpu_number_np(&out))) { + errno = err; + return -1; + } + return out; + } else { + errno = ENOSYS; // upgrade your ape loader + return -1; // cc -o /usr/local/bin/ape ape/ape-m1.c + } + } +#endif + +#ifdef __aarch64__ + if (IsFreebsd()) { + register int x0 asm("x0"); + register int x8 asm("x8") = 581; // sched_getcpu + asm volatile("svc\t0" : "=r"(x0) : "r"(x8) : "memory"); + return x0; + } +#endif + + unsigned cpu = 0; + int rc = sys_getcpu(&cpu, 0, 0); + if (rc == -1) + return -1; + return cpu; } diff --git a/libc/intrin/atomic.h b/libc/intrin/atomic.h index 3d503d37f62..a2d93df8a3a 100644 --- a/libc/intrin/atomic.h +++ b/libc/intrin/atomic.h @@ -13,48 +13,26 @@ */ typedef enum { - memory_order_relaxed, - memory_order_consume, - memory_order_acquire, - memory_order_release, - memory_order_acq_rel, - memory_order_seq_cst, + memory_order_relaxed = __ATOMIC_RELAXED, + memory_order_consume = __ATOMIC_CONSUME, + memory_order_acquire = __ATOMIC_ACQUIRE, + memory_order_release = __ATOMIC_RELEASE, + memory_order_acq_rel = __ATOMIC_ACQ_REL, + memory_order_seq_cst = __ATOMIC_SEQ_CST } memory_order; -#define ATOMIC_VAR_INIT(...) __VA_ARGS__ +#if !(defined __STDC_VERSION__ && __STDC_VERSION__ > 201710L) +#define ATOMIC_VAR_INIT(...) __VA_ARGS__ +#endif + #define atomic_is_lock_free(obj) ((void)(obj), sizeof(obj) <= sizeof(void *)) #define atomic_flag atomic_bool -#define ATOMIC_FLAG_INIT ATOMIC_VAR_INIT(0) +#define ATOMIC_FLAG_INIT false #define atomic_flag_test_and_set_explicit(x, order) \ atomic_exchange_explicit(x, 1, order) #define atomic_flag_clear_explicit(x, order) atomic_store_explicit(x, 0, order) -#define atomic_compare_exchange_strong(pObject, pExpected, desired) \ - atomic_compare_exchange_strong_explicit( \ - pObject, pExpected, desired, memory_order_seq_cst, memory_order_seq_cst) -#define atomic_compare_exchange_weak(pObject, pExpected, desired) \ - atomic_compare_exchange_weak_explicit( \ - pObject, pExpected, desired, memory_order_seq_cst, memory_order_seq_cst) -#define atomic_exchange(pObject, desired) \ - atomic_exchange_explicit(pObject, desired, memory_order_seq_cst) -#define atomic_fetch_add(pObject, operand) \ - atomic_fetch_add_explicit(pObject, operand, memory_order_seq_cst) -#define atomic_fetch_and(pObject, operand) \ - atomic_fetch_and_explicit(pObject, operand, memory_order_seq_cst) -#define atomic_fetch_or(pObject, operand) \ - atomic_fetch_or_explicit(pObject, operand, memory_order_seq_cst) -#define atomic_fetch_sub(pObject, operand) \ - atomic_fetch_sub_explicit(pObject, operand, memory_order_seq_cst) -#define atomic_fetch_xor(pObject, operand) \ - atomic_fetch_xor_explicit(pObject, operand, memory_order_seq_cst) -#define atomic_load(pObject) atomic_load_explicit(pObject, memory_order_seq_cst) -#define atomic_store(pObject, desired) \ - atomic_store_explicit(pObject, desired, memory_order_seq_cst) -#define atomic_flag_test_and_set(x) \ - atomic_flag_test_and_set_explicit(x, memory_order_seq_cst) -#define atomic_flag_clear(x) atomic_flag_clear_explicit(x, memory_order_seq_cst) - #if defined(__CLANG_ATOMIC_BOOL_LOCK_FREE) #define atomic_init(obj, value) __c11_atomic_init(obj, value) @@ -84,9 +62,35 @@ typedef enum { #define atomic_store_explicit(object, desired, order) \ __c11_atomic_store(object, desired, order) +#define atomic_compare_exchange_strong(pObject, pExpected, desired) \ + atomic_compare_exchange_strong_explicit( \ + pObject, pExpected, desired, memory_order_seq_cst, memory_order_seq_cst) +#define atomic_compare_exchange_weak(pObject, pExpected, desired) \ + atomic_compare_exchange_weak_explicit( \ + pObject, pExpected, desired, memory_order_seq_cst, memory_order_seq_cst) +#define atomic_exchange(pObject, desired) \ + atomic_exchange_explicit(pObject, desired, memory_order_seq_cst) +#define atomic_fetch_add(pObject, operand) \ + atomic_fetch_add_explicit(pObject, operand, memory_order_seq_cst) +#define atomic_fetch_and(pObject, operand) \ + atomic_fetch_and_explicit(pObject, operand, memory_order_seq_cst) +#define atomic_fetch_or(pObject, operand) \ + atomic_fetch_or_explicit(pObject, operand, memory_order_seq_cst) +#define atomic_fetch_sub(pObject, operand) \ + atomic_fetch_sub_explicit(pObject, operand, memory_order_seq_cst) +#define atomic_fetch_xor(pObject, operand) \ + atomic_fetch_xor_explicit(pObject, operand, memory_order_seq_cst) +#define atomic_load(pObject) atomic_load_explicit(pObject, memory_order_seq_cst) +#define atomic_store(pObject, desired) \ + atomic_store_explicit(pObject, desired, memory_order_seq_cst) +#define atomic_flag_test_and_set(x) \ + atomic_flag_test_and_set_explicit(x, memory_order_seq_cst) +#define atomic_flag_clear(x) atomic_flag_clear_explicit(x, memory_order_seq_cst) + #elif (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 407 -#define atomic_init(obj, value) ((void)(*(obj) = (value))) +#define atomic_init(obj, value) \ + atomic_store_explicit(obj, value, __ATOMIC_RELAXED) #define atomic_thread_fence(order) __atomic_thread_fence(order) #define atomic_signal_fence(order) __atomic_signal_fence(order) #define atomic_compare_exchange_strong_explicit(pObject, pExpected, desired, \ @@ -111,6 +115,31 @@ typedef enum { #define atomic_store_explicit(pObject, desired, order) \ __atomic_store_n(pObject, desired, order) +#define atomic_compare_exchange_strong(pObject, pExpected, desired) \ + atomic_compare_exchange_strong_explicit(pObject, pExpected, desired, \ + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) +#define atomic_compare_exchange_weak(pObject, pExpected, desired) \ + atomic_compare_exchange_weak_explicit(pObject, pExpected, desired, \ + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) +#define atomic_exchange(pObject, desired) \ + atomic_exchange_explicit(pObject, desired, __ATOMIC_SEQ_CST) +#define atomic_fetch_add(pObject, operand) \ + atomic_fetch_add_explicit(pObject, operand, __ATOMIC_SEQ_CST) +#define atomic_fetch_and(pObject, operand) \ + atomic_fetch_and_explicit(pObject, operand, __ATOMIC_SEQ_CST) +#define atomic_fetch_or(pObject, operand) \ + atomic_fetch_or_explicit(pObject, operand, __ATOMIC_SEQ_CST) +#define atomic_fetch_sub(pObject, operand) \ + atomic_fetch_sub_explicit(pObject, operand, __ATOMIC_SEQ_CST) +#define atomic_fetch_xor(pObject, operand) \ + atomic_fetch_xor_explicit(pObject, operand, __ATOMIC_SEQ_CST) +#define atomic_load(pObject) atomic_load_explicit(pObject, __ATOMIC_SEQ_CST) +#define atomic_store(pObject, desired) \ + atomic_store_explicit(pObject, desired, __ATOMIC_SEQ_CST) +#define atomic_flag_test_and_set(x) \ + atomic_flag_test_and_set_explicit(x, __ATOMIC_SEQ_CST) +#define atomic_flag_clear(x) atomic_flag_clear_explicit(x, __ATOMIC_SEQ_CST) + #elif (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 401 #define atomic_init(obj, value) ((void)(*(obj) = (value))) @@ -210,6 +239,31 @@ typedef enum { #define atomic_store_explicit(object, desired, order) \ ((void)atomic_exchange_explicit(object, desired, order)) +#define atomic_compare_exchange_strong(pObject, pExpected, desired) \ + atomic_compare_exchange_strong_explicit( \ + pObject, pExpected, desired, memory_order_seq_cst, memory_order_seq_cst) +#define atomic_compare_exchange_weak(pObject, pExpected, desired) \ + atomic_compare_exchange_weak_explicit( \ + pObject, pExpected, desired, memory_order_seq_cst, memory_order_seq_cst) +#define atomic_exchange(pObject, desired) \ + atomic_exchange_explicit(pObject, desired, memory_order_seq_cst) +#define atomic_fetch_add(pObject, operand) \ + atomic_fetch_add_explicit(pObject, operand, memory_order_seq_cst) +#define atomic_fetch_and(pObject, operand) \ + atomic_fetch_and_explicit(pObject, operand, memory_order_seq_cst) +#define atomic_fetch_or(pObject, operand) \ + atomic_fetch_or_explicit(pObject, operand, memory_order_seq_cst) +#define atomic_fetch_sub(pObject, operand) \ + atomic_fetch_sub_explicit(pObject, operand, memory_order_seq_cst) +#define atomic_fetch_xor(pObject, operand) \ + atomic_fetch_xor_explicit(pObject, operand, memory_order_seq_cst) +#define atomic_load(pObject) atomic_load_explicit(pObject, memory_order_seq_cst) +#define atomic_store(pObject, desired) \ + atomic_store_explicit(pObject, desired, memory_order_seq_cst) +#define atomic_flag_test_and_set(x) \ + atomic_flag_test_and_set_explicit(x, memory_order_seq_cst) +#define atomic_flag_clear(x) atomic_flag_clear_explicit(x, memory_order_seq_cst) + #else /* non-gcc or old gcc w/o x86 */ #error "atomic operations not supported with this compiler and/or architecture" #endif diff --git a/libc/runtime/syslib.internal.h b/libc/runtime/syslib.internal.h index 90ed2994fcb..424034537b6 100644 --- a/libc/runtime/syslib.internal.h +++ b/libc/runtime/syslib.internal.h @@ -82,6 +82,7 @@ struct Syslib { char *(*__dlerror)(void); /* v9 (2024-01-31) */ int (*__pthread_cpu_number_np)(size_t *); + /* v10 (2024-05-02) */ long (*__sysctl)(int *, unsigned, void *, size_t *, void *, size_t); long (*__sysctlbyname)(const char *, void *, size_t *, void *, size_t); long (*__sysctlnametomib)(const char *, int *, size_t *); diff --git a/test/libc/calls/sched_getcpu_test.c b/test/libc/calls/sched_getcpu_test.c new file mode 100644 index 00000000000..72c85ee0547 --- /dev/null +++ b/test/libc/calls/sched_getcpu_test.c @@ -0,0 +1,113 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/atomic.h" +#include "libc/calls/calls.h" +#include "libc/dce.h" +#include "libc/intrin/atomic.h" +#include "libc/macros.h" +#include "libc/runtime/runtime.h" +#include "libc/testlib/subprocess.h" +#include "libc/testlib/testlib.h" +#include "libc/thread/thread.h" +#include "libc/thread/thread2.h" + +int cpu_count; + +void SetUpOnce(void) { + cpu_count = __get_cpu_count(); +} + +//////////////////////////////////////////////////////////////////////////////// +// AFFINITY TEST + +TEST(sched_getcpu, affinity_test) { + + if (IsXnu()) + return; + if (IsNetbsd()) + return; + if (IsOpenbsd()) + return; + + SPAWN(fork); + int n = cpu_count; + for (int i = 0; i < n; ++i) { + cpu_set_t affinity; + CPU_ZERO(&affinity); + CPU_SET(i, &affinity); + ASSERT_EQ( + 0, pthread_setaffinity_np(pthread_self(), sizeof(affinity), &affinity)); + EXPECT_EQ(i, sched_getcpu()); + } + EXITS(0); +} + +//////////////////////////////////////////////////////////////////////////////// +// KLUDGE TEST + +#define THREADS 2 +#define ITERATIONS 10000 + +int g_hits[256]; +atomic_int g_sync; + +int call_sched_getcpu(void) { + int res = sched_getcpu(); + ASSERT_NE(-1, res); + ASSERT_GE(res, 0); + ASSERT_LT(res, cpu_count); + return res; +} + +void *worker(void *arg) { + int ith = (long)arg; + int nth = THREADS; + for (int i = 0; i < ITERATIONS; ++i) { + // help execution of threads be interleaved + int sync = atomic_fetch_add(&g_sync, 1); + if (sync % nth == ith) { + g_hits[call_sched_getcpu() % ARRAYLEN(g_hits)]++; + } + } + return 0; +} + +TEST(sched_getcpu, kludge_test) { + +#ifdef __x86_64__ + if (IsXnu()) + return; +#endif + if (IsNetbsd()) + return; + if (IsOpenbsd()) + return; + + if (cpu_count < THREADS) + return; + pthread_t th[THREADS]; + for (int i = 0; i < THREADS; ++i) + ASSERT_EQ(0, pthread_create(th + i, 0, worker, (void *)(long)i)); + for (int i = 0; i < THREADS; ++i) + ASSERT_EQ(0, pthread_join(th[i], 0)); + int hit = 0; + for (int i = 0; i < ARRAYLEN(g_hits); ++i) + hit += !!g_hits[i]; + ASSERT_GE(hit, THREADS); +} diff --git a/third_party/dlmalloc/threaded.inc b/third_party/dlmalloc/threaded.inc index 2454742cd0b..7c725346120 100644 --- a/third_party/dlmalloc/threaded.inc +++ b/third_party/dlmalloc/threaded.inc @@ -21,12 +21,9 @@ #include "libc/intrin/strace.h" #include "libc/intrin/weaken.h" #include "libc/macros.h" -#include "libc/nexgen32e/rdtscp.h" -#include "libc/nexgen32e/x86feature.h" #include "libc/runtime/runtime.h" #include "libc/thread/thread.h" -#include "libc/runtime/runtime.h" -#include "libc/intrin/weaken.h" +#include "libc/thread/threads.h" #include "third_party/dlmalloc/dlmalloc.h" #if !FOOTERS || !MSPACES @@ -34,6 +31,7 @@ #endif static struct magicu magiu; +static unsigned g_cpucount; static unsigned g_heapslen; static mstate g_heaps[128]; @@ -90,18 +88,29 @@ void dlmalloc_inspect_all(void handler(void *start, void *end, } } -forceinline mstate get_arena(void) { - unsigned cpu; -#ifdef __x86_64__ - unsigned tsc_aux; - rdtscp(&tsc_aux); - cpu = TSC_AUX_CORE(tsc_aux); -#else - long tpidr_el0; - asm("mrs\t%0,tpidr_el0" : "=r"(tpidr_el0)); - cpu = tpidr_el0 & 255; -#endif - return g_heaps[__magicu_div(cpu, magiu) % g_heapslen]; +// we make malloc() scalable basically by +// +// return g_heaps[sched_getcpu() / 2]; +// +// except we cache the syscall result using thread-local storage. on +// some platforms, it's not possible to use sched_getcpu() so we use +// arbitrary assignments to help scalability, but may not be optimal +static mstate get_arena(void) { + static atomic_uint assign; + static thread_local unsigned i; + static thread_local unsigned n; + if (n == 50) + n = 0; + if (!n) { + i = sched_getcpu(); + if (i == -1) { + i = atomic_fetch_add_explicit(&assign, 1, memory_order_relaxed); + i %= g_cpucount; + } + i = __magicu_div(i, magiu) % g_heapslen; + } + ++n; + return g_heaps[i]; } static void *dlmalloc_single(size_t n) { @@ -174,19 +183,18 @@ static void threaded_dlmalloc(void) { if (!_weaken(pthread_create)) return use_single_heap(false); - if (!IsAarch64() && !X86_HAVE(RDTSCP)) - return use_single_heap(true); - // determine how many independent heaps we should install // by default we do an approximation of one heap per core // this code makes the c++ stl go 164x faster on my ryzen - cpus = __get_cpu_count(); - if (cpus == -1) + g_cpucount = cpus = __get_cpu_count(); + if (cpus == -1) { heaps = 1; - else if ((var = getenv("COSMOPOLITAN_HEAP_COUNT"))) + g_cpucount = 1; + } else if ((var = getenv("COSMOPOLITAN_HEAP_COUNT"))) { heaps = dlmalloc_atoi(var); - else + } else { heaps = cpus >> 1; + } if (heaps <= 1) return use_single_heap(true); if (heaps > ARRAYLEN(g_heaps)) diff --git a/tool/viz/malloc_scalability.c b/tool/viz/malloc_scalability.c new file mode 100644 index 00000000000..434be2123d4 --- /dev/null +++ b/tool/viz/malloc_scalability.c @@ -0,0 +1,55 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/struct/timespec.h" +#include "libc/mem/mem.h" +#include "libc/runtime/runtime.h" +#include "libc/thread/thread.h" + +#define ALLOCATIONS 1000 + +void *worker(void *arg) { + void **ptrs = malloc(ALLOCATIONS * sizeof(void *)); + for (int i = 0; i < ALLOCATIONS; ++i) + ptrs[i] = malloc(1); + for (int i = 0; i < ALLOCATIONS; ++i) + free(ptrs[i]); + free(ptrs); + return 0; +} + +void test(int n) { + struct timespec start = timespec_real(); + pthread_t *th = malloc(sizeof(pthread_t) * n); + for (int i = 0; i < n; ++i) + pthread_create(th + i, 0, worker, 0); + for (int i = 0; i < n; ++i) + pthread_join(th[i], 0); + free(th); + struct timespec end = timespec_real(); + printf("%2d threads * %d allocs = %ld us\n", n, ALLOCATIONS, + timespec_tomicros(timespec_sub(end, start))); +} + +int main(int argc, char *argv[]) { + int n = __get_cpu_count(); + if (n < 8) + n = 8; + for (int i = 1; i <= n; ++i) + test(i); +} diff --git a/tool/viz/vdsodump.c b/tool/viz/vdsodump.c new file mode 100644 index 00000000000..22174a323f5 --- /dev/null +++ b/tool/viz/vdsodump.c @@ -0,0 +1,40 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/calls.h" +#include "libc/intrin/getauxval.h" +#include "libc/runtime/runtime.h" +#include "libc/sysv/consts/auxv.h" + +int main(int argc, char *argv[]) { + struct AuxiliaryValue av; + av = __getauxval(AT_SYSINFO_EHDR); + if (!av.isfound) + return 2; + int fd = creat("vdso.so", 0644); + if (fd == -1) + return 3; + int i; + for (i = 0;; i += getpagesize()) + if (write(fd, (char *)av.value + i, getpagesize()) == -1) + break; + if (!i) + return 4; + if (close(fd)) + return 5; +}