From b5fcb59a851a794a870e91435d75169915cc7141 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Fri, 13 Sep 2024 05:06:34 -0700 Subject: [PATCH] Implement more bf16/fp16 compiler runtimes Fixes #1259 --- libc/integral/c.inc | 20 +++ libc/intrin/{truncsfbf2.c => brain16.c} | 67 ++++++++- libc/intrin/extendbfsf2.c | 39 ----- .../compiler_rt => libc/intrin}/extendsftf2.c | 4 +- libc/intrin/float16.c | 137 ++++++++++++++++-- libc/intrin/truncdfbf2.c | 24 --- .../compiler_rt => libc/intrin}/trunctfsf2.c | 4 +- third_party/compiler_rt/extendhfdf2.c | 17 --- third_party/compiler_rt/extendhfsf2.c | 27 ---- third_party/compiler_rt/truncdfhf2.c | 21 --- third_party/compiler_rt/truncsfhf2.c | 27 ---- 11 files changed, 209 insertions(+), 178 deletions(-) rename libc/intrin/{truncsfbf2.c => brain16.c} (68%) delete mode 100644 libc/intrin/extendbfsf2.c rename {third_party/compiler_rt => libc/intrin}/extendsftf2.c (89%) delete mode 100644 libc/intrin/truncdfbf2.c rename {third_party/compiler_rt => libc/intrin}/trunctfsf2.c (89%) delete mode 100644 third_party/compiler_rt/extendhfdf2.c delete mode 100644 third_party/compiler_rt/extendhfsf2.c delete mode 100644 third_party/compiler_rt/truncdfhf2.c delete mode 100644 third_party/compiler_rt/truncsfhf2.c diff --git a/libc/integral/c.inc b/libc/integral/c.inc index 0f29ff5f05f..04aeb22294d 100644 --- a/libc/integral/c.inc +++ b/libc/integral/c.inc @@ -65,6 +65,26 @@ typedef __UINT64_TYPE__ uint64_t; typedef __INTMAX_TYPE__ intmax_t; typedef __UINTMAX_TYPE__ uintmax_t; +/* TODO(jart): re-import compiler-rt once they have it */ +#if defined(__x86_64__) && defined(__FLT128_MAX_10_EXP__) +#undef __FLT128_MAX_10_EXP__ +#undef __FLT128_DENORM_MIN__ +#undef __FLT128_MIN_EXP__ +#undef __FLT128_MIN_10_EXP__ +#undef __FLT128_MANT_DIG__ +#undef __FLT128_HAS_INFINITY__ +#undef __FLT128_EPSILON__ +#undef __FLT128_MAX_EXP__ +#undef __FLT128_HAS_DENORM__ +#undef __FLT128_DIG__ +#undef __FLT128_MIN__ +#undef __FLT128_MAX__ +#undef __FLT128_NORM_MAX__ +#undef __FLT128_HAS_QUIET_NAN__ +#undef __FLT128_IS_IEC_60559__ +#undef __FLT128_DECIMAL_DIG__ +#endif + #define __DEFINED_max_align_t typedef long double max_align_t; diff --git a/libc/intrin/truncsfbf2.c b/libc/intrin/brain16.c similarity index 68% rename from libc/intrin/truncsfbf2.c rename to libc/intrin/brain16.c index b2d12e33d74..95b0050b8bc 100644 --- a/libc/intrin/truncsfbf2.c +++ b/libc/intrin/brain16.c @@ -17,12 +17,53 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -__bf16 __truncsfbf2(float f) { +/** + * @fileoverview bf16 compiler runtime + */ + +_Float32 __extendbfsf2(__bf16 f) { + union { + __bf16 f; + uint16_t i; + } ub = {f}; + + // convert brain16 to binary32 + uint32_t x = (uint32_t)ub.i << 16; + + // force nan to quiet + if ((x & 0x7fffffff) > 0x7f800000) + x |= 0x00400000; + + // pun to _Float32 + union { + uint32_t i; + _Float32 f; + } uf = {x}; + return uf.f; +} + +_Float64 __extendbfdf2(__bf16 f) { + return __extendbfsf2(f); +} + +#ifdef __x86_64__ +__float80 __extendbfxf2(__bf16 f) { + return __extendbfsf2(f); +} +#endif + +#ifdef __aarch64__ +_Float128 __extendbftf2(__bf16 f) { + return __extendbfsf2(f); +} +#endif + +__bf16 __truncsfbf2(_Float32 f) { union { - float f; - unsigned i; + _Float32 f; + uint32_t i; } uf = {f}; - unsigned x = uf.i; + uint32_t x = uf.i; if ((x & 0x7fffffff) > 0x7f800000) // force nan to quiet @@ -33,8 +74,24 @@ __bf16 __truncsfbf2(float f) { // pun to bf16 union { - unsigned short i; + uint16_t i; __bf16 f; } ub = {x}; return ub.f; } + +__bf16 __truncdfbf2(_Float64 f) { + return __truncsfbf2(f); +} + +#ifdef __x86_64__ +__bf16 __truncxfbf2(__float80 f) { + return __truncsfbf2(f); +} +#endif + +#ifdef __aarch64__ +__bf16 __trunctfbf2(_Float128 f) { + return __truncsfbf2(f); +} +#endif diff --git a/libc/intrin/extendbfsf2.c b/libc/intrin/extendbfsf2.c deleted file mode 100644 index 1773bac676c..00000000000 --- a/libc/intrin/extendbfsf2.c +++ /dev/null @@ -1,39 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2024 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ - -float __extendbfsf2(__bf16 f) { - union { - __bf16 f; - unsigned short i; - } ub = {f}; - - // convert brain16 to binary32 - unsigned x = (unsigned)ub.i << 16; - - // force nan to quiet - if ((x & 0x7fffffff) > 0x7f800000) - x |= 0x00400000; - - // pun to float - union { - unsigned i; - float f; - } uf = {x}; - return uf.f; -} diff --git a/third_party/compiler_rt/extendsftf2.c b/libc/intrin/extendsftf2.c similarity index 89% rename from third_party/compiler_rt/extendsftf2.c rename to libc/intrin/extendsftf2.c index 1509b45e4ce..444140e1a7a 100644 --- a/third_party/compiler_rt/extendsftf2.c +++ b/libc/intrin/extendsftf2.c @@ -8,8 +8,6 @@ //===----------------------------------------------------------------------===// // -__static_yoink("huge_compiler_rt_license"); - #define QUAD_PRECISION #include "third_party/compiler_rt/fp_lib.inc" @@ -19,7 +17,7 @@ __static_yoink("huge_compiler_rt_license"); #include "third_party/compiler_rt/fp_extend_impl.inc" COMPILER_RT_ABI long double __extendsftf2(float a) { - return __extendXfYf2__(a); + return __extendXfYf2__(a); } #endif diff --git a/libc/intrin/float16.c b/libc/intrin/float16.c index 476a2f6c942..434f0cafd7a 100644 --- a/libc/intrin/float16.c +++ b/libc/intrin/float16.c @@ -21,22 +21,135 @@ * @fileoverview fp16 compiler runtime */ -#define asint(x) ((union pun){x}).i -#define isnan(x) (((x) & 0x7fff) > 0x7c00) +#define isnan16(x) (((x) & 0x7fff) > 0x7c00) -union pun { - _Float16 f; - unsigned short i; -}; +static inline _Float16 tofloat16(int x) { + union { + uint16_t i; + _Float16 f; + } u = {x}; + return u.f; +} + +static inline int fromfloat16(_Float16 x) { + union { + _Float16 f; + uint16_t i; + } u = {x}; + return u.i; +} + +static inline _Float32 tofloat32(uint32_t w) { + union { + uint32_t as_bits; + _Float32 as_value; + } fp32; + fp32.as_bits = w; + return fp32.as_value; +} + +static inline uint32_t fromfloat32(_Float32 f) { + union { + _Float32 as_value; + uint32_t as_bits; + } fp32; + fp32.as_value = f; + return fp32.as_bits; +} + +static inline _Float32 fabs32(_Float32 x) { + return tofloat32(fromfloat32(x) & 0x7fffffffu); +} int __eqhf2(_Float16 fx, _Float16 fy) { - int x = asint(fx); - int y = asint(fy); - return (x == y) & !isnan(x) & !isnan(y); + int x = fromfloat16(fx); + int y = fromfloat16(fy); + return (x == y) & !isnan16(x) & !isnan16(y); } int __nehf2(_Float16 fx, _Float16 fy) { - int x = asint(fx); - int y = asint(fy); - return (x != y) & !isnan(x) & !isnan(y); + int x = fromfloat16(fx); + int y = fromfloat16(fy); + return (x != y) & !isnan16(x) & !isnan16(y); +} + +_Float32 __extendhfsf2(_Float16 f) { + uint16_t h = fromfloat16(f); + const uint32_t w = (uint32_t)h << 16; + const uint32_t sign = w & 0x80000000u; + const uint32_t two_w = w + w; + const uint32_t exp_offset = 0xE0u << 23; +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || \ + defined(__GNUC__) && !defined(__STRICT_ANSI__) + const _Float32 exp_scale = 0x1.0p-112f; +#else + const _Float32 exp_scale = tofloat32(0x7800000u); +#endif + const _Float32 normalized_value = + tofloat32((two_w >> 4) + exp_offset) * exp_scale; + const uint32_t magic_mask = 126u << 23; + const _Float32 magic_bias = 0.5f; + const _Float32 denormalized_value = + tofloat32((two_w >> 17) | magic_mask) - magic_bias; + const uint32_t denormalized_cutoff = 1u << 27; + const uint32_t result = + sign | (two_w < denormalized_cutoff ? fromfloat32(denormalized_value) + : fromfloat32(normalized_value)); + return tofloat32(result); +} + +_Float64 __extendhfdf2(_Float16 f) { + return __extendhfsf2(f); +} + +#ifdef __x86_64__ +__float80 __extendhfxf2(_Float16 f) { + return __extendhfsf2(f); +} +#endif + +#ifdef __aarch64__ +_Float128 __extendhftf2(_Float16 f) { + return __extendhfsf2(f); +} +#endif + +_Float16 __truncsfhf2(_Float32 f) { +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || \ + defined(__GNUC__) && !defined(__STRICT_ANSI__) + const _Float32 scale_to_inf = 0x1.0p+112f; + const _Float32 scale_to_zero = 0x1.0p-110f; +#else + const _Float32 scale_to_inf = tofloat32(0x77800000u); + const _Float32 scale_to_zero = tofloat32(0x08800000u); +#endif + _Float32 base = (fabs32(f) * scale_to_inf) * scale_to_zero; + const uint32_t w = fromfloat32(f); + const uint32_t shl1_w = w + w; + const uint32_t sign = w & 0x80000000u; + uint32_t bias = shl1_w & 0xFF000000u; + if (bias < 0x71000000u) + bias = 0x71000000u; + base = tofloat32((bias >> 1) + 0x07800000u) + base; + const uint32_t bits = fromfloat32(base); + const uint32_t exp_bits = (bits >> 13) & 0x00007C00u; + const uint32_t mantissa_bits = bits & 0x00000FFFu; + const uint32_t nonsign = exp_bits + mantissa_bits; + return tofloat16((sign >> 16) | (shl1_w > 0xFF000000u ? 0x7E00u : nonsign)); +} + +_Float16 __truncdfhf2(_Float64 f) { + return __truncsfhf2(f); +} + +#ifdef __x86_64__ +_Float16 __truncxfhf2(__float80 f) { + return __truncsfhf2(f); +} +#endif + +#ifdef __aarch64__ +_Float16 __trunctfhf2(_Float128 f) { + return __truncsfhf2(f); } +#endif diff --git a/libc/intrin/truncdfbf2.c b/libc/intrin/truncdfbf2.c deleted file mode 100644 index 65dfff08c73..00000000000 --- a/libc/intrin/truncdfbf2.c +++ /dev/null @@ -1,24 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2024 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ - -__bf16 __truncsfbf2(float); -__bf16 __truncdfbf2(double f) { - // TODO(jart): What else are we supposed to do here? - return __truncsfbf2(f); -} diff --git a/third_party/compiler_rt/trunctfsf2.c b/libc/intrin/trunctfsf2.c similarity index 89% rename from third_party/compiler_rt/trunctfsf2.c rename to libc/intrin/trunctfsf2.c index 3ebda815138..bbb961dfe7b 100644 --- a/third_party/compiler_rt/trunctfsf2.c +++ b/libc/intrin/trunctfsf2.c @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -__static_yoink("huge_compiler_rt_license"); - #define QUAD_PRECISION #include "third_party/compiler_rt/fp_lib.inc" @@ -18,7 +16,7 @@ __static_yoink("huge_compiler_rt_license"); #include "third_party/compiler_rt/fp_trunc_impl.inc" COMPILER_RT_ABI float __trunctfsf2(long double a) { - return __truncXfYf2__(a); + return __truncXfYf2__(a); } #endif diff --git a/third_party/compiler_rt/extendhfdf2.c b/third_party/compiler_rt/extendhfdf2.c deleted file mode 100644 index 729eb04c1a2..00000000000 --- a/third_party/compiler_rt/extendhfdf2.c +++ /dev/null @@ -1,17 +0,0 @@ -//===-- lib/extendhfdf2.c - half -> dubble conversion -------------*- C -*-===// -// -// The Cosmopolitan Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// - -#define SRC_HALF -#define DST_DOUBLE -#include "third_party/compiler_rt/fp16_extend_impl.inc" - -COMPILER_RT_ABI dst_t __extendhfdf2(src_t a) { - return __extendXfYf2__(a); -} diff --git a/third_party/compiler_rt/extendhfsf2.c b/third_party/compiler_rt/extendhfsf2.c deleted file mode 100644 index f891d95420e..00000000000 --- a/third_party/compiler_rt/extendhfsf2.c +++ /dev/null @@ -1,27 +0,0 @@ -//===-- lib/extendhfsf2.c - half -> single conversion -------------*- C -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#define SRC_HALF -#define DST_SINGLE -#include "fp16_extend_impl.inc" - -// Use a forwarding definition and noinline to implement a poor man's alias, -// as there isn't a good cross-platform way of defining one. -COMPILER_RT_ABI NOINLINE float __extendhfsf2(src_t a) { - return __extendXfYf2__(a); -} - -COMPILER_RT_ABI float __gnu_h2f_ieee(src_t a) { return __extendhfsf2(a); } - -#if defined(__ARM_EABI__) -#if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI float __aeabi_h2f(src_t a) { return __extendhfsf2(a); } -#else -COMPILER_RT_ALIAS(__extendhfsf2, __aeabi_h2f) -#endif -#endif diff --git a/third_party/compiler_rt/truncdfhf2.c b/third_party/compiler_rt/truncdfhf2.c deleted file mode 100644 index 9a01e2c2e1e..00000000000 --- a/third_party/compiler_rt/truncdfhf2.c +++ /dev/null @@ -1,21 +0,0 @@ -//===-- lib/truncdfhf2.c - double -> half conversion --------------*- C -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#define SRC_DOUBLE -#define DST_HALF -#include "fp16_trunc_impl.inc" - -COMPILER_RT_ABI dst_t __truncdfhf2(double a) { return __truncXfYf2__(a); } - -#if defined(__ARM_EABI__) -#if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI dst_t __aeabi_d2h(double a) { return __truncdfhf2(a); } -#else -COMPILER_RT_ALIAS(__truncdfhf2, __aeabi_d2h) -#endif -#endif diff --git a/third_party/compiler_rt/truncsfhf2.c b/third_party/compiler_rt/truncsfhf2.c deleted file mode 100644 index d15e1884f23..00000000000 --- a/third_party/compiler_rt/truncsfhf2.c +++ /dev/null @@ -1,27 +0,0 @@ -//===-- lib/truncsfhf2.c - single -> half conversion --------------*- C -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#define SRC_SINGLE -#define DST_HALF -#include "fp16_trunc_impl.inc" - -// Use a forwarding definition and noinline to implement a poor man's alias, -// as there isn't a good cross-platform way of defining one. -COMPILER_RT_ABI NOINLINE dst_t __truncsfhf2(float a) { - return __truncXfYf2__(a); -} - -COMPILER_RT_ABI dst_t __gnu_f2h_ieee(float a) { return __truncsfhf2(a); } - -#if defined(__ARM_EABI__) -#if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI dst_t __aeabi_f2h(float a) { return __truncsfhf2(a); } -#else -COMPILER_RT_ALIAS(__truncsfhf2, __aeabi_f2h) -#endif -#endif