diff --git a/Makefile b/Makefile index 28105981453..dcc3278ff18 100644 --- a/Makefile +++ b/Makefile @@ -540,7 +540,7 @@ COSMOCC_HDRS = \ $(foreach x,$(COSMOCC_PKGS),$($(x)_HDRS)) \ $(foreach x,$(COSMOCC_PKGS),$($(x)_INCS)) -o/cosmocc.h.txt: Makefile +o/cosmocc.h.txt: Makefile libc $(MAKEFILES) $(call uniq,$(foreach x,$(HDRS) $(INCS),$(dir $(x)))) $(HDRS) $(INCS) $(file >$@, $(call uniq,$(COSMOCC_HDRS))) COSMOPOLITAN_H_ROOT_HDRS = \ diff --git a/libc/calls/BUILD.mk b/libc/calls/BUILD.mk index 442dab18e15..03f6bcf8e59 100644 --- a/libc/calls/BUILD.mk +++ b/libc/calls/BUILD.mk @@ -154,6 +154,66 @@ o/$(MODE)/libc/calls/sigcrashsig.o: private \ CFLAGS += \ -Os +# avoid legacy sse decoding penalty on avx systems +o//libc/calls/cfmakeraw.o \ +o//libc/calls/clock_gettime-xnu.o \ +o//libc/calls/CPU_AND.o \ +o//libc/calls/CPU_OR.o \ +o//libc/calls/CPU_XOR.o \ +o//libc/calls/dl_iterate_phdr.o \ +o//libc/calls/dup-nt.o \ +o//libc/calls/fcntl-nt.o \ +o//libc/calls/flock-nt.o \ +o//libc/calls/fstatfs-nt.o \ +o//libc/calls/fstat-nt.o \ +o//libc/calls/futimesat.o \ +o//libc/calls/futimes.o \ +o//libc/calls/getrlimit.o \ +o//libc/calls/gettimeofday.o \ +o//libc/calls/ioctl.o \ +o//libc/calls/lutimes.o \ +o//libc/calls/metaflock.o \ +o//libc/calls/ntaccesscheck.o \ +o//libc/calls/ntspawn.o \ +o//libc/calls/open-nt.o \ +o//libc/calls/pledge-linux.o \ +o//libc/calls/ppoll.o \ +o//libc/calls/preadv.o \ +o//libc/calls/pselect.o \ +o//libc/calls/pwritev.o \ +o//libc/calls/read-nt.o \ +o//libc/calls/readv.o \ +o//libc/calls/readwrite-nt.o \ +o//libc/calls/releasefd.o \ +o//libc/calls/select.o \ +o//libc/calls/sigaction.o \ +o//libc/calls/sigenter-freebsd.o \ +o//libc/calls/sigenter-netbsd.o \ +o//libc/calls/sigenter-openbsd.o \ +o//libc/calls/sigenter-xnu.o \ +o//libc/calls/sigignore.o \ +o//libc/calls/siginfo2cosmo.o \ +o//libc/calls/signal.o \ +o//libc/calls/sig.o \ +o//libc/calls/sigtimedwait.o \ +o//libc/calls/stat2cosmo.o \ +o//libc/calls/statfs2cosmo.o \ +o//libc/calls/statfs2statvfs.o \ +o//libc/calls/tcgetattr-nt.o \ +o//libc/calls/tcgetattr.o \ +o//libc/calls/tcgetwinsize-nt.o \ +o//libc/calls/tcsetattr-nt.o \ +o//libc/calls/tcsetwinsize-nt.o \ +o//libc/calls/termios2host.o \ +o//libc/calls/timespec_sleep.o \ +o//libc/calls/uname.o \ +o//libc/calls/utimensat-old.o \ +o//libc/calls/utimes.o \ +o//libc/calls/winexec.o \ +o//libc/calls/writev.o: private \ + COPTS += \ + -mgeneral-regs-only + # these assembly files are safe to build on aarch64 o/$(MODE)/libc/calls/getcontext.o: libc/calls/getcontext.S @$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $< diff --git a/libc/intrin/BUILD.mk b/libc/intrin/BUILD.mk index fa18d9b46f4..99b0cdf89f3 100644 --- a/libc/intrin/BUILD.mk +++ b/libc/intrin/BUILD.mk @@ -97,6 +97,14 @@ o/$(MODE)/libc/intrin/x86.o: private \ -fpatchable-function-entry=0 \ -Os +# avoid the legacy sse decoding penalty on avx systems +o//libc/intrin/dll.o \ +o//libc/intrin/fds.o \ +o//libc/intrin/mmap.o \ +o//libc/intrin/demangle.o: private \ + CFLAGS += \ + -mgeneral-regs-only + # these assembly files are safe to build on aarch64 o/$(MODE)/libc/intrin/aarch64/%.o: libc/intrin/aarch64/%.S @$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $< diff --git a/libc/str/uselocale.c b/libc/str/uselocale.c deleted file mode 100644 index 408c1ce5dcb..00000000000 --- a/libc/str/uselocale.c +++ /dev/null @@ -1,25 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/str/locale.h" -#include "libc/sysv/errfuns.h" - -locale_t uselocale(locale_t l) { - // TODO: implement me! - return 0; -} diff --git a/libc/testlib/BUILD.mk b/libc/testlib/BUILD.mk index 236d8ab963c..95e11d95a21 100644 --- a/libc/testlib/BUILD.mk +++ b/libc/testlib/BUILD.mk @@ -22,6 +22,7 @@ LIBC_TESTLIB_A_ASSETS = \ LIBC_TESTLIB_A_HDRS = \ libc/testlib/aspect.internal.h \ libc/testlib/bench.h \ + libc/testlib/benchmark.h \ libc/testlib/blocktronics.h \ libc/testlib/ezbench.h \ libc/testlib/fastrandomstring.h \ diff --git a/libc/testlib/benchmark.h b/libc/testlib/benchmark.h new file mode 100644 index 00000000000..d416067112e --- /dev/null +++ b/libc/testlib/benchmark.h @@ -0,0 +1,26 @@ +#ifndef COSMOPOLITAN_LIBC_TESTLIB_BENCHMARK_H_ +#define COSMOPOLITAN_LIBC_TESTLIB_BENCHMARK_H_ +#include "libc/calls/struct/timespec.h" +#include "libc/stdio/stdio.h" +COSMOPOLITAN_C_START_ + +#define BENCHMARK(ITERATIONS, WORK_PER_RUN, CODE) \ + do { \ + struct timespec start = timespec_real(); \ + for (int __i = 0; __i < ITERATIONS; ++__i) { \ + asm volatile("" ::: "memory"); \ + CODE; \ + } \ + long long work = ((WORK_PER_RUN) ? (WORK_PER_RUN) : 1) * (ITERATIONS); \ + double nanos = \ + (timespec_tonanos(timespec_sub(timespec_real(), start)) + work - 1) / \ + (double)work; \ + if (nanos < 1000) { \ + printf("%10g ns %2dx %s\n", nanos, (ITERATIONS), #CODE); \ + } else { \ + printf("%10lld ns %2dx %s\n", (long long)nanos, (ITERATIONS), #CODE); \ + } \ + } while (0) + +COSMOPOLITAN_C_END_ +#endif /* COSMOPOLITAN_LIBC_TESTLIB_BENCHMARK_H_ */ diff --git a/test/ctl/set_bench.cc b/test/ctl/set_bench.cc index 4c565848eab..4cc1f31e6cb 100644 --- a/test/ctl/set_bench.cc +++ b/test/ctl/set_bench.cc @@ -22,26 +22,12 @@ #include "libc/mem/leaks.h" #include "libc/stdio/stdio.h" #include "libc/sysv/consts/rusage.h" +#include "libc/testlib/benchmark.h" // #include // #define ctl std // #define check() size() -#define BENCH(ITERATIONS, WORK_PER_RUN, CODE) \ - do { \ - struct timespec start = timespec_real(); \ - for (int __i = 0; __i < ITERATIONS; ++__i) { \ - asm volatile("" ::: "memory"); \ - CODE; \ - } \ - long long work = (WORK_PER_RUN) * (ITERATIONS); \ - double nanos = \ - (timespec_tonanos(timespec_sub(timespec_real(), start)) + work - \ - 1) / \ - (double)work; \ - printf("%10g ns %2dx %s\n", nanos, (ITERATIONS), #CODE); \ - } while (0) - int rand32(void) { @@ -68,19 +54,19 @@ main() { long x = 0; ctl::set s; - BENCH(1000000, 1, s.insert(rand32() % 1000000)); + BENCHMARK(1000000, 1, s.insert(rand32() % 1000000)); // s.check(); - BENCH(1000000, 1, { + BENCHMARK(1000000, 1, { auto i = s.find(rand32() % 1000000); if (i != s.end()) x += *i; }); - BENCH(1000000, 1, { + BENCHMARK(1000000, 1, { auto i = s.lower_bound(rand32() % 1000000); if (i != s.end()) x += *i; }); - BENCH(1000000, 1, s.erase(rand32() % 1000000)); + BENCHMARK(1000000, 1, s.erase(rand32() % 1000000)); eat(x); } diff --git a/test/ctl/string_bench.cc b/test/ctl/string_bench.cc index c14c839277f..b84aa98a401 100644 --- a/test/ctl/string_bench.cc +++ b/test/ctl/string_bench.cc @@ -20,27 +20,13 @@ #include "ctl/utility.h" #include "libc/dce.h" #include "libc/mem/leaks.h" +#include "libc/testlib/benchmark.h" #include "libc/calls/struct/timespec.h" #include "libc/runtime/runtime.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" -#define BENCH(ITERATIONS, WORK_PER_RUN, CODE) \ - do { \ - struct timespec start = timespec_real(); \ - for (int __i = 0; __i < ITERATIONS; ++__i) { \ - asm volatile("" ::: "memory"); \ - CODE; \ - } \ - long long work = (WORK_PER_RUN) * (ITERATIONS); \ - double nanos = \ - (timespec_tonanos(timespec_sub(timespec_real(), start)) + work - \ - 1) / \ - (double)work; \ - printf("%10g ns %2dx %s\n", nanos, (ITERATIONS), #CODE); \ - } while (0) - const char* big_c = "aaaaaaaaaaaaaaaaaaaaaaaa"; const char* small_c = "aaaaaaaaaaaaaaaaaaaaaaa"; @@ -55,98 +41,98 @@ main() { const ctl::string_view big(big_c), small(small_c); - BENCH(ITERATIONS * 10, 1, { + BENCHMARK(ITERATIONS * 10, 1, { ctl::string s; s.append("hello "); s.append("world"); }); - BENCH(ITERATIONS, 8, { + BENCHMARK(ITERATIONS, 8, { ctl::string s; for (int i = 0; i < 8; ++i) { s.append('a'); } }); - BENCH(ITERATIONS, 16, { + BENCHMARK(ITERATIONS, 16, { ctl::string s; for (int i = 0; i < 16; ++i) { s.append('a'); } }); - BENCH(ITERATIONS, 23, { + BENCHMARK(ITERATIONS, 23, { ctl::string s; for (int i = 0; i < 23; ++i) { s.append('a'); } }); - BENCH(ITERATIONS, 24, { + BENCHMARK(ITERATIONS, 24, { ctl::string s; for (int i = 0; i < 24; ++i) { s.append('a'); } }); - BENCH(ITERATIONS, 32, { + BENCHMARK(ITERATIONS, 32, { ctl::string s; for (int i = 0; i < 32; ++i) { s.append('a'); } }); - BENCH(ITERATIONS, 1, { ctl::string s(small_c); }); + BENCHMARK(ITERATIONS, 1, { ctl::string s(small_c); }); - BENCH(ITERATIONS, 1, { ctl::string s(small); }); + BENCHMARK(ITERATIONS, 1, { ctl::string s(small); }); { ctl::string small_copy("hello world"); - BENCH(ITERATIONS, 1, { ctl::string s2(small_copy); }); + BENCHMARK(ITERATIONS, 1, { ctl::string s2(small_copy); }); } - BENCH(ITERATIONS, 1, { + BENCHMARK(ITERATIONS, 1, { ctl::string s(small); ctl::string s2(ctl::move(s)); }); - BENCH(ITERATIONS, 1, { + BENCHMARK(ITERATIONS, 1, { ctl::string s(small); ctl::string s2(s); }); - BENCH(ITERATIONS, 1, { ctl::string s(big_c); }); + BENCHMARK(ITERATIONS, 1, { ctl::string s(big_c); }); - BENCH(ITERATIONS, 1, { ctl::string s(big); }); + BENCHMARK(ITERATIONS, 1, { ctl::string s(big); }); { ctl::string big_copy(big); - BENCH(ITERATIONS, 1, { ctl::string s2(big_copy); }); + BENCHMARK(ITERATIONS, 1, { ctl::string s2(big_copy); }); } - BENCH(ITERATIONS, 1, { + BENCHMARK(ITERATIONS, 1, { ctl::string s(big); ctl::string s2(ctl::move(s)); }); - BENCH(ITERATIONS, 1, { + BENCHMARK(ITERATIONS, 1, { ctl::string s(big); ctl::string s2(s); }); - BENCH(ITERATIONS, 1, { ctl::string s(23, 'a'); }); + BENCHMARK(ITERATIONS, 1, { ctl::string s(23, 'a'); }); - BENCH(ITERATIONS, 1, { ctl::string s(24, 'a'); }); + BENCHMARK(ITERATIONS, 1, { ctl::string s(24, 'a'); }); { ctl::string s(5, 'a'); - BENCH(ITERATIONS, 1, { ctl::string_view s2(s); }); + BENCHMARK(ITERATIONS, 1, { ctl::string_view s2(s); }); } { ctl::string big_trunc(48, 'a'); big_trunc.resize(4); - BENCH(ITERATIONS, 1, { ctl::string s(big_trunc); }); + BENCHMARK(ITERATIONS, 1, { ctl::string s(big_trunc); }); } CheckForMemoryLeaks(); diff --git a/test/libc/str/blake2_test.c b/test/libc/str/blake2_test.c index 65f2f34e016..568a5d7efc1 100644 --- a/test/libc/str/blake2_test.c +++ b/test/libc/str/blake2_test.c @@ -18,12 +18,13 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/str/blake2.h" #include "libc/assert.h" +#include "libc/calls/struct/timespec.h" #include "libc/mem/mem.h" #include "libc/stdio/rand.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" #include "libc/str/tab.internal.h" -#include "libc/testlib/ezbench.h" +#include "libc/testlib/benchmark.h" #include "libc/testlib/hyperion.h" #include "libc/testlib/testlib.h" @@ -90,17 +91,18 @@ TEST(BLAKE2B256Test, vectors) { free(line); } -BENCH(blake2, bench) { +BENCH(blake2, benchmark) { char fun[256]; rngset(fun, 256, _rand64, -1); - EZBENCH_N("blake2b256", 0, EZBLAKE2B256(0, 0)); - EZBENCH_N("blake2b256", 8, EZBLAKE2B256("helloooo", 8)); - EZBENCH_N("blake2b256", 31, EZBLAKE2B256(fun, 31)); - EZBENCH_N("blake2b256", 32, EZBLAKE2B256(fun, 32)); - EZBENCH_N("blake2b256", 63, EZBLAKE2B256(fun, 63)); - EZBENCH_N("blake2b256", 64, EZBLAKE2B256(fun, 64)); - EZBENCH_N("blake2b256", 128, EZBLAKE2B256(fun, 128)); - EZBENCH_N("blake2b256", 256, EZBLAKE2B256(fun, 256)); - EZBENCH_N("blake2b256", kHyperionSize, - EZBLAKE2B256(kHyperion, kHyperionSize)); + BENCHMARK(100, 0, __expropriate(EZBLAKE2B256(0, 0))); + BENCHMARK(100, 1, __expropriate(EZBLAKE2B256("h", 1))); + BENCHMARK(100, 8, __expropriate(EZBLAKE2B256("helloooo", 8))); + BENCHMARK(100, 31, __expropriate(EZBLAKE2B256(fun, 31))); + BENCHMARK(100, 32, __expropriate(EZBLAKE2B256(fun, 32))); + BENCHMARK(100, 63, __expropriate(EZBLAKE2B256(fun, 63))); + BENCHMARK(100, 64, __expropriate(EZBLAKE2B256(fun, 64))); + BENCHMARK(100, 128, __expropriate(EZBLAKE2B256(fun, 128))); + BENCHMARK(100, 256, __expropriate(EZBLAKE2B256(fun, 256))); + BENCHMARK(100, kHyperionSize, + __expropriate(EZBLAKE2B256(kHyperion, kHyperionSize))); } diff --git a/test/libc/str/highwayhash64_test.c b/test/libc/str/highwayhash64_test.c index 6ba2f443a1b..2ac0933895f 100644 --- a/test/libc/str/highwayhash64_test.c +++ b/test/libc/str/highwayhash64_test.c @@ -16,13 +16,14 @@ │ limitations under the License. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/str/highwayhash64.h" +#include "libc/calls/struct/timespec.h" #include "libc/inttypes.h" #include "libc/nexgen32e/crc32.h" #include "libc/runtime/runtime.h" #include "libc/stdio/rand.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" -#include "libc/testlib/ezbench.h" +#include "libc/testlib/benchmark.h" #include "libc/testlib/hyperion.h" #include "libc/testlib/testlib.h" #include "third_party/zlib/zlib.h" @@ -100,33 +101,31 @@ TEST(highwayhash64, test) { BENCH(highwayhash64, newbench) { char fun[256]; rngset(fun, 256, _rand64, -1); - EZBENCH_N("highwayhash64", 0, HighwayHash64(0, 0, kTestKey1)); - EZBENCH_N("highwayhash64", 8, HighwayHash64("helloooo", 8, kTestKey1)); - EZBENCH_N("highwayhash64", 31, HighwayHash64(fun, 31, kTestKey1)); - EZBENCH_N("highwayhash64", 32, HighwayHash64(fun, 32, kTestKey1)); - EZBENCH_N("highwayhash64", 63, HighwayHash64(fun, 63, kTestKey1)); - EZBENCH_N("highwayhash64", 64, HighwayHash64(fun, 64, kTestKey1)); - EZBENCH_N("highwayhash64", 128, HighwayHash64(fun, 128, kTestKey1)); - EZBENCH_N("highwayhash64", 256, HighwayHash64(fun, 256, kTestKey1)); - EZBENCH_N("highwayhash64", kHyperionSize, + BENCHMARK(10, 0, HighwayHash64(0, 0, kTestKey1)); + BENCHMARK(10, 8, HighwayHash64("helloooo", 8, kTestKey1)); + BENCHMARK(10, 31, HighwayHash64(fun, 31, kTestKey1)); + BENCHMARK(10, 32, HighwayHash64(fun, 32, kTestKey1)); + BENCHMARK(10, 63, HighwayHash64(fun, 63, kTestKey1)); + BENCHMARK(10, 64, HighwayHash64(fun, 64, kTestKey1)); + BENCHMARK(10, 128, HighwayHash64(fun, 128, kTestKey1)); + BENCHMARK(10, 256, HighwayHash64(fun, 256, kTestKey1)); + BENCHMARK(10, kHyperionSize, HighwayHash64(kHyperion, kHyperionSize, kTestKey1)); } BENCH(highwayhash64, bench) { - EZBENCH2("knuth small", donothing, - __expropriate(KnuthMultiplicativeHash32(__veil("r", "hello"), 5))); - EZBENCH2("crc32c small", donothing, __expropriate(crc32c(0, "hello", 5))); - EZBENCH2("crc32 small", donothing, - __expropriate(crc32_z(0, __veil("r", "hello"), 5))); - EZBENCH2("highwayhash64 small", donothing, - HighwayHash64((void *)"hello", 5, kTestKey1)); - EZBENCH2("crc32 big", donothing, - __expropriate(crc32_z(0, kHyperion, kHyperionSize))); - EZBENCH2("crc32c big", donothing, - __expropriate(crc32c(0, kHyperion, kHyperionSize))); - EZBENCH2("highwayhash64 big", donothing, - HighwayHash64((void *)kHyperion, kHyperionSize, kTestKey1)); - EZBENCH2("knuth big", donothing, - __expropriate(KnuthMultiplicativeHash32(__veil("r", kHyperion), - kHyperionSize))); + BENCHMARK(10, 5, + __expropriate(KnuthMultiplicativeHash32(__veil("r", "hello"), 5))); + BENCHMARK(10, 5, __expropriate(crc32c(0, "hello", 5))); + BENCHMARK(10, 5, __expropriate(crc32_z(0, __veil("r", "hello"), 5))); + BENCHMARK(10, 5, HighwayHash64((void *)"hello", 5, kTestKey1)); + BENCHMARK(10, kHyperionSize, + __expropriate(crc32_z(0, kHyperion, kHyperionSize))); + BENCHMARK(10, kHyperionSize, + __expropriate(crc32c(0, kHyperion, kHyperionSize))); + BENCHMARK(10, kHyperionSize, + HighwayHash64((void *)kHyperion, kHyperionSize, kTestKey1)); + BENCHMARK(10, kHyperionSize, + __expropriate(KnuthMultiplicativeHash32(__veil("r", kHyperion), + kHyperionSize))); } diff --git a/test/libc/tinymath/fdot_test.cc b/test/libc/tinymath/fdot_test.cc index b5747dd11ae..4d254319635 100644 --- a/test/libc/tinymath/fdot_test.cc +++ b/test/libc/tinymath/fdot_test.cc @@ -8,6 +8,7 @@ #include "libc/mem/mem.h" #include "libc/runtime/runtime.h" #include "libc/stdio/stdio.h" +#include "libc/testlib/benchmark.h" #include "libc/x/xasprintf.h" #define EXPENSIVE_TESTS 0 @@ -237,20 +238,6 @@ float nothing(float x) { float (*barrier)(float) = nothing; -#define BENCH(ITERATIONS, WORK_PER_RUN, CODE) \ - do { \ - struct timespec start = timespec_real(); \ - for (int __i = 0; __i < ITERATIONS; ++__i) { \ - asm volatile("" ::: "memory"); \ - CODE; \ - } \ - long long work = (WORK_PER_RUN) * (ITERATIONS); \ - long nanos = \ - (timespec_tonanos(timespec_sub(timespec_real(), start)) + work - 1) / \ - (double)work; \ - printf("%8ld ns %2dx %s\n", nanos, (ITERATIONS), #CODE); \ - } while (0) - int main() { ShowCrashReports(); @@ -270,12 +257,12 @@ int main() { test_fdotf_naive(); test_fdotf_hefty(); test_fdotf_ruler(); - BENCH(20, 1, (kahan = barrier(fdotf_kahan(A, B, n)))); - BENCH(20, 1, (dubble = barrier(fdotf_dubble(A, B, n)))); - BENCH(20, 1, (naive = barrier(fdotf_naive(A, B, n)))); - BENCH(20, 1, (recursive = barrier(fdotf_recursive(A, B, n)))); - BENCH(20, 1, (ruler = barrier(fdotf_ruler(A, B, n)))); - BENCH(20, 1, (hefty = barrier(fdotf_hefty(A, B, n)))); + BENCHMARK(20, 1, (kahan = barrier(fdotf_kahan(A, B, n)))); + BENCHMARK(20, 1, (dubble = barrier(fdotf_dubble(A, B, n)))); + BENCHMARK(20, 1, (naive = barrier(fdotf_naive(A, B, n)))); + BENCHMARK(20, 1, (recursive = barrier(fdotf_recursive(A, B, n)))); + BENCHMARK(20, 1, (ruler = barrier(fdotf_ruler(A, B, n)))); + BENCHMARK(20, 1, (hefty = barrier(fdotf_hefty(A, B, n)))); printf("dubble = %f (%g)\n", dubble, fabs(dubble - dubble)); printf("kahan = %f (%g)\n", kahan, fabs(kahan - dubble)); printf("naive = %f (%g)\n", naive, fabs(naive - dubble)); diff --git a/test/libc/tinymath/fsum_test.cc b/test/libc/tinymath/fsum_test.cc index 2c7e6d24c21..65f58b8e9df 100644 --- a/test/libc/tinymath/fsum_test.cc +++ b/test/libc/tinymath/fsum_test.cc @@ -8,6 +8,7 @@ #include "libc/mem/mem.h" #include "libc/runtime/runtime.h" #include "libc/stdio/stdio.h" +#include "libc/testlib/benchmark.h" #include "libc/x/xasprintf.h" #define EXPENSIVE_TESTS 0 @@ -225,20 +226,6 @@ float nothing(float x) { float (*barrier)(float) = nothing; -#define BENCH(ITERATIONS, WORK_PER_RUN, CODE) \ - do { \ - struct timespec start = timespec_real(); \ - for (int __i = 0; __i < ITERATIONS; ++__i) { \ - asm volatile("" ::: "memory"); \ - CODE; \ - } \ - long long work = (WORK_PER_RUN) * (ITERATIONS); \ - long nanos = \ - (timespec_tonanos(timespec_sub(timespec_real(), start)) + work - 1) / \ - (double)work; \ - printf("%8ld ns %2dx %s\n", nanos, (ITERATIONS), #CODE); \ - } while (0) - int main() { ShowCrashReports(); @@ -255,12 +242,12 @@ int main() { test_fsumf_naive(); test_fsumf_hefty(); test_fsumf_ruler(); - BENCH(20, 1, (kahan = barrier(fsumf_kahan(p, n)))); - BENCH(20, 1, (dubble = barrier(fsumf_dubble(p, n)))); - BENCH(20, 1, (naive = barrier(fsumf_naive(p, n)))); - BENCH(20, 1, (recursive = barrier(fsumf_recursive(p, n)))); - BENCH(20, 1, (ruler = barrier(fsumf_ruler(p, n)))); - BENCH(20, 1, (hefty = barrier(fsumf_hefty(p, n)))); + BENCHMARK(20, 1, (kahan = barrier(fsumf_kahan(p, n)))); + BENCHMARK(20, 1, (dubble = barrier(fsumf_dubble(p, n)))); + BENCHMARK(20, 1, (naive = barrier(fsumf_naive(p, n)))); + BENCHMARK(20, 1, (recursive = barrier(fsumf_recursive(p, n)))); + BENCHMARK(20, 1, (ruler = barrier(fsumf_ruler(p, n)))); + BENCHMARK(20, 1, (hefty = barrier(fsumf_hefty(p, n)))); printf("dubble = %f (%g)\n", dubble, fabs(dubble - dubble)); printf("kahan = %f (%g)\n", kahan, fabs(kahan - dubble)); printf("naive = %f (%g)\n", naive, fabs(naive - dubble)); diff --git a/third_party/dlmalloc/BUILD.mk b/third_party/dlmalloc/BUILD.mk index 8b7b9d6dc53..70af0e36441 100644 --- a/third_party/dlmalloc/BUILD.mk +++ b/third_party/dlmalloc/BUILD.mk @@ -58,6 +58,13 @@ $(THIRD_PARTY_DLMALLOC_A_OBJS): private \ -Wframe-larger-than=4096 \ -Walloca-larger-than=4096 +# avoid the legacy sse decoding penalty on avx systems +ifeq ($(MODE),) +$(THIRD_PARTY_DLMALLOC_A_OBJS): private \ + COPTS += \ + -mgeneral-regs-only +endif + THIRD_PARTY_DLMALLOC_LIBS = $(foreach x,$(THIRD_PARTY_DLMALLOC_ARTIFACTS),$($(x))) THIRD_PARTY_DLMALLOC_SRCS = $(foreach x,$(THIRD_PARTY_DLMALLOC_ARTIFACTS),$($(x)_SRCS)) THIRD_PARTY_DLMALLOC_HDRS = $(foreach x,$(THIRD_PARTY_DLMALLOC_ARTIFACTS),$($(x)_HDRS)) diff --git a/third_party/libcxx/BUILD.mk b/third_party/libcxx/BUILD.mk index fca4bebf107..820f75a8062 100644 --- a/third_party/libcxx/BUILD.mk +++ b/third_party/libcxx/BUILD.mk @@ -2148,6 +2148,9 @@ $(THIRD_PARTY_LIBCXX_A_OBJS): private \ -DLIBCXX_BUILDING_LIBCXXABI \ -D_LIBCPP_BUILDING_LIBRARY +o/$(MODE)/third_party/libcxx/locale.o: private \ + OVERRIDE_COPTS += -O -g0 + THIRD_PARTY_LIBCXX_LIBS = $(foreach x,$(THIRD_PARTY_LIBCXX_ARTIFACTS),$($(x))) THIRD_PARTY_LIBCXX_SRCS = $(foreach x,$(THIRD_PARTY_LIBCXX_ARTIFACTS),$($(x)_SRCS)) THIRD_PARTY_LIBCXX_HDRS = $(foreach x,$(THIRD_PARTY_LIBCXX_ARTIFACTS),$($(x)_HDRS)) diff --git a/third_party/nsync/BUILD.mk b/third_party/nsync/BUILD.mk index 362f1dde0d2..7576efeab33 100644 --- a/third_party/nsync/BUILD.mk +++ b/third_party/nsync/BUILD.mk @@ -56,6 +56,13 @@ $(THIRD_PARTY_NSYNC_A_OBJS): private \ -Wframe-larger-than=4096 \ -Walloca-larger-than=4096 +# avoid the legacy sse decoding penalty on avx systems +ifeq ($(MODE),) +$(THIRD_PARTY_NSYNC_A_OBJS): private \ + COPTS += \ + -mgeneral-regs-only +endif + # these assembly files are safe to build on aarch64 o/$(MODE)/third_party/nsync/compat.o: third_party/nsync/compat.S @$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $< diff --git a/third_party/nsync/mem/BUILD.mk b/third_party/nsync/mem/BUILD.mk index aa5c3c1e3df..a947a2e18da 100644 --- a/third_party/nsync/mem/BUILD.mk +++ b/third_party/nsync/mem/BUILD.mk @@ -49,6 +49,13 @@ $(THIRD_PARTY_NSYNC_MEM_A_OBJS): private \ -Wframe-larger-than=4096 \ -Walloca-larger-than=4096 +# avoid the legacy sse decoding penalty on avx systems +ifeq ($(MODE),) +$(THIRD_PARTY_NSYNC_MEM_A_OBJS): private \ + COPTS += \ + -mgeneral-regs-only +endif + THIRD_PARTY_NSYNC_MEM_LIBS = $(foreach x,$(THIRD_PARTY_NSYNC_MEM_ARTIFACTS),$($(x))) THIRD_PARTY_NSYNC_MEM_SRCS = $(foreach x,$(THIRD_PARTY_NSYNC_MEM_ARTIFACTS),$($(x)_SRCS)) THIRD_PARTY_NSYNC_MEM_CHECKS = $(foreach x,$(THIRD_PARTY_NSYNC_MEM_ARTIFACTS),$($(x)_CHECKS))