Skip to content

Commit

Permalink
Implement more bf16/fp16 compiler runtimes
Browse files Browse the repository at this point in the history
Fixes #1259
  • Loading branch information
jart committed Sep 13, 2024
1 parent 6b10f4d commit b5fcb59
Show file tree
Hide file tree
Showing 11 changed files with 209 additions and 178 deletions.
20 changes: 20 additions & 0 deletions libc/integral/c.inc
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,26 @@ typedef __UINT64_TYPE__ uint64_t;
typedef __INTMAX_TYPE__ intmax_t;
typedef __UINTMAX_TYPE__ uintmax_t;

/* TODO(jart): re-import compiler-rt once they have it */
#if defined(__x86_64__) && defined(__FLT128_MAX_10_EXP__)
#undef __FLT128_MAX_10_EXP__
#undef __FLT128_DENORM_MIN__
#undef __FLT128_MIN_EXP__
#undef __FLT128_MIN_10_EXP__
#undef __FLT128_MANT_DIG__
#undef __FLT128_HAS_INFINITY__
#undef __FLT128_EPSILON__
#undef __FLT128_MAX_EXP__
#undef __FLT128_HAS_DENORM__
#undef __FLT128_DIG__
#undef __FLT128_MIN__
#undef __FLT128_MAX__
#undef __FLT128_NORM_MAX__
#undef __FLT128_HAS_QUIET_NAN__
#undef __FLT128_IS_IEC_60559__
#undef __FLT128_DECIMAL_DIG__
#endif

#define __DEFINED_max_align_t
typedef long double max_align_t;

Expand Down
67 changes: 62 additions & 5 deletions libc/intrin/truncsfbf2.c → libc/intrin/brain16.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,53 @@
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/

__bf16 __truncsfbf2(float f) {
/**
* @fileoverview bf16 compiler runtime
*/

_Float32 __extendbfsf2(__bf16 f) {
union {
__bf16 f;
uint16_t i;
} ub = {f};

// convert brain16 to binary32
uint32_t x = (uint32_t)ub.i << 16;

// force nan to quiet
if ((x & 0x7fffffff) > 0x7f800000)
x |= 0x00400000;

// pun to _Float32
union {
uint32_t i;
_Float32 f;
} uf = {x};
return uf.f;
}

_Float64 __extendbfdf2(__bf16 f) {
return __extendbfsf2(f);
}

#ifdef __x86_64__
__float80 __extendbfxf2(__bf16 f) {
return __extendbfsf2(f);
}
#endif

#ifdef __aarch64__
_Float128 __extendbftf2(__bf16 f) {
return __extendbfsf2(f);
}
#endif

__bf16 __truncsfbf2(_Float32 f) {
union {
float f;
unsigned i;
_Float32 f;
uint32_t i;
} uf = {f};
unsigned x = uf.i;
uint32_t x = uf.i;

if ((x & 0x7fffffff) > 0x7f800000)
// force nan to quiet
Expand All @@ -33,8 +74,24 @@ __bf16 __truncsfbf2(float f) {

// pun to bf16
union {
unsigned short i;
uint16_t i;
__bf16 f;
} ub = {x};
return ub.f;
}

__bf16 __truncdfbf2(_Float64 f) {
return __truncsfbf2(f);
}

#ifdef __x86_64__
__bf16 __truncxfbf2(__float80 f) {
return __truncsfbf2(f);
}
#endif

#ifdef __aarch64__
__bf16 __trunctfbf2(_Float128 f) {
return __truncsfbf2(f);
}
#endif
39 changes: 0 additions & 39 deletions libc/intrin/extendbfsf2.c

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
//===----------------------------------------------------------------------===//
//

__static_yoink("huge_compiler_rt_license");

#define QUAD_PRECISION
#include "third_party/compiler_rt/fp_lib.inc"

Expand All @@ -19,7 +17,7 @@ __static_yoink("huge_compiler_rt_license");
#include "third_party/compiler_rt/fp_extend_impl.inc"

COMPILER_RT_ABI long double __extendsftf2(float a) {
return __extendXfYf2__(a);
return __extendXfYf2__(a);
}

#endif
137 changes: 125 additions & 12 deletions libc/intrin/float16.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,135 @@
* @fileoverview fp16 compiler runtime
*/

#define asint(x) ((union pun){x}).i
#define isnan(x) (((x) & 0x7fff) > 0x7c00)
#define isnan16(x) (((x) & 0x7fff) > 0x7c00)

union pun {
_Float16 f;
unsigned short i;
};
static inline _Float16 tofloat16(int x) {
union {
uint16_t i;
_Float16 f;
} u = {x};
return u.f;
}

static inline int fromfloat16(_Float16 x) {
union {
_Float16 f;
uint16_t i;
} u = {x};
return u.i;
}

static inline _Float32 tofloat32(uint32_t w) {
union {
uint32_t as_bits;
_Float32 as_value;
} fp32;
fp32.as_bits = w;
return fp32.as_value;
}

static inline uint32_t fromfloat32(_Float32 f) {
union {
_Float32 as_value;
uint32_t as_bits;
} fp32;
fp32.as_value = f;
return fp32.as_bits;
}

static inline _Float32 fabs32(_Float32 x) {
return tofloat32(fromfloat32(x) & 0x7fffffffu);
}

int __eqhf2(_Float16 fx, _Float16 fy) {
int x = asint(fx);
int y = asint(fy);
return (x == y) & !isnan(x) & !isnan(y);
int x = fromfloat16(fx);
int y = fromfloat16(fy);
return (x == y) & !isnan16(x) & !isnan16(y);
}

int __nehf2(_Float16 fx, _Float16 fy) {
int x = asint(fx);
int y = asint(fy);
return (x != y) & !isnan(x) & !isnan(y);
int x = fromfloat16(fx);
int y = fromfloat16(fy);
return (x != y) & !isnan16(x) & !isnan16(y);
}

_Float32 __extendhfsf2(_Float16 f) {
uint16_t h = fromfloat16(f);
const uint32_t w = (uint32_t)h << 16;
const uint32_t sign = w & 0x80000000u;
const uint32_t two_w = w + w;
const uint32_t exp_offset = 0xE0u << 23;
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || \
defined(__GNUC__) && !defined(__STRICT_ANSI__)
const _Float32 exp_scale = 0x1.0p-112f;
#else
const _Float32 exp_scale = tofloat32(0x7800000u);
#endif
const _Float32 normalized_value =
tofloat32((two_w >> 4) + exp_offset) * exp_scale;
const uint32_t magic_mask = 126u << 23;
const _Float32 magic_bias = 0.5f;
const _Float32 denormalized_value =
tofloat32((two_w >> 17) | magic_mask) - magic_bias;
const uint32_t denormalized_cutoff = 1u << 27;
const uint32_t result =
sign | (two_w < denormalized_cutoff ? fromfloat32(denormalized_value)
: fromfloat32(normalized_value));
return tofloat32(result);
}

_Float64 __extendhfdf2(_Float16 f) {
return __extendhfsf2(f);
}

#ifdef __x86_64__
__float80 __extendhfxf2(_Float16 f) {
return __extendhfsf2(f);
}
#endif

#ifdef __aarch64__
_Float128 __extendhftf2(_Float16 f) {
return __extendhfsf2(f);
}
#endif

_Float16 __truncsfhf2(_Float32 f) {
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || \
defined(__GNUC__) && !defined(__STRICT_ANSI__)
const _Float32 scale_to_inf = 0x1.0p+112f;
const _Float32 scale_to_zero = 0x1.0p-110f;
#else
const _Float32 scale_to_inf = tofloat32(0x77800000u);
const _Float32 scale_to_zero = tofloat32(0x08800000u);
#endif
_Float32 base = (fabs32(f) * scale_to_inf) * scale_to_zero;
const uint32_t w = fromfloat32(f);
const uint32_t shl1_w = w + w;
const uint32_t sign = w & 0x80000000u;
uint32_t bias = shl1_w & 0xFF000000u;
if (bias < 0x71000000u)
bias = 0x71000000u;
base = tofloat32((bias >> 1) + 0x07800000u) + base;
const uint32_t bits = fromfloat32(base);
const uint32_t exp_bits = (bits >> 13) & 0x00007C00u;
const uint32_t mantissa_bits = bits & 0x00000FFFu;
const uint32_t nonsign = exp_bits + mantissa_bits;
return tofloat16((sign >> 16) | (shl1_w > 0xFF000000u ? 0x7E00u : nonsign));
}

_Float16 __truncdfhf2(_Float64 f) {
return __truncsfhf2(f);
}

#ifdef __x86_64__
_Float16 __truncxfhf2(__float80 f) {
return __truncsfhf2(f);
}
#endif

#ifdef __aarch64__
_Float16 __trunctfhf2(_Float128 f) {
return __truncsfhf2(f);
}
#endif
24 changes: 0 additions & 24 deletions libc/intrin/truncdfbf2.c

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
//
//===----------------------------------------------------------------------===//

__static_yoink("huge_compiler_rt_license");

#define QUAD_PRECISION
#include "third_party/compiler_rt/fp_lib.inc"

Expand All @@ -18,7 +16,7 @@ __static_yoink("huge_compiler_rt_license");
#include "third_party/compiler_rt/fp_trunc_impl.inc"

COMPILER_RT_ABI float __trunctfsf2(long double a) {
return __truncXfYf2__(a);
return __truncXfYf2__(a);
}

#endif
17 changes: 0 additions & 17 deletions third_party/compiler_rt/extendhfdf2.c

This file was deleted.

Loading

0 comments on commit b5fcb59

Please sign in to comment.