Skip to content

Commit

Permalink
Automatic commit by thirdparty_tool: update fp16 to commit 98b0a46bce…
Browse files Browse the repository at this point in the history
…017382a6351a19577ec43a715b6835.

Used commit of the fp16 repository: https://github.com/Maratyszcza/FP16//commits/98b0a46bce017382a6351a19577ec43a715b6835
  • Loading branch information
mbautin committed Sep 21, 2024
1 parent 1665662 commit 3adbdae
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 148 deletions.
10 changes: 6 additions & 4 deletions src/inline-thirdparty/fp16/fp16.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@

#include <fp16/fp16.h>

#if defined(PSIMD_H)
#include <fp16/psimd.h>
#endif

#endif /* FP16_H */

// This file is part of the fp16 inline third-party dependency of YugabyteDB.
// Git repo: https://github.com/Maratyszcza/FP16/
// Git commit: 98b0a46bce017382a6351a19577ec43a715b6835
//
// See also src/inline-thirdparty/README.md.
24 changes: 15 additions & 9 deletions src/inline-thirdparty/fp16/fp16/bitcasts.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include <immintrin.h>
#endif

#if defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
#if defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
#include <intrin.h>
#endif

Expand All @@ -22,9 +22,9 @@ static inline float fp32_from_bits(uint32_t w) {
return as_float(w);
#elif defined(__CUDA_ARCH__)
return __uint_as_float((unsigned int) w);
#elif defined(__INTEL_COMPILER)
#elif defined(__INTEL_COMPILER) || defined(_MSC_VER) && (_MSC_VER >= 1932) && (defined(_M_IX86) || defined(_M_X64))
return _castu32_f32(w);
#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
#elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
return _CopyFloatFromInt32((__int32) w);
#else
union {
Expand All @@ -40,9 +40,9 @@ static inline uint32_t fp32_to_bits(float f) {
return as_uint(f);
#elif defined(__CUDA_ARCH__)
return (uint32_t) __float_as_uint(f);
#elif defined(__INTEL_COMPILER)
#elif defined(__INTEL_COMPILER) || defined(_MSC_VER) && (_MSC_VER >= 1932) && (defined(_M_IX86) || defined(_M_X64))
return _castf32_u32(f);
#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
#elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
return (uint32_t) _CopyInt32FromFloat(f);
#else
union {
Expand All @@ -58,9 +58,9 @@ static inline double fp64_from_bits(uint64_t w) {
return as_double(w);
#elif defined(__CUDA_ARCH__)
return __longlong_as_double((long long) w);
#elif defined(__INTEL_COMPILER)
#elif defined(__INTEL_COMPILER) || defined(_MSC_VER) && (_MSC_VER >= 1932) && (defined(_M_IX86) || defined(_M_X64))
return _castu64_f64(w);
#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
#elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
return _CopyDoubleFromInt64((__int64) w);
#else
union {
Expand All @@ -76,9 +76,9 @@ static inline uint64_t fp64_to_bits(double f) {
return as_ulong(f);
#elif defined(__CUDA_ARCH__)
return (uint64_t) __double_as_longlong(f);
#elif defined(__INTEL_COMPILER)
#elif defined(__INTEL_COMPILER) || defined(_MSC_VER) && (_MSC_VER >= 1932) && (defined(_M_IX86) || defined(_M_X64))
return _castf64_u64(f);
#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
#elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
return (uint64_t) _CopyInt64FromDouble(f);
#else
union {
Expand All @@ -90,3 +90,9 @@ static inline uint64_t fp64_to_bits(double f) {
}

#endif /* FP16_BITCASTS_H */

// This file is part of the fp16 inline third-party dependency of YugabyteDB.
// Git repo: https://github.com/Maratyszcza/FP16/
// Git commit: 98b0a46bce017382a6351a19577ec43a715b6835
//
// See also src/inline-thirdparty/README.md.
78 changes: 74 additions & 4 deletions src/inline-thirdparty/fp16/fp16/fp16.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,18 @@
#include <math.h>
#endif

#ifdef _MSC_VER
#include <fp16/bitcasts.h>
#include <fp16/macros.h>

#if defined(_MSC_VER)
#include <intrin.h>
#endif

#include <fp16/bitcasts.h>
#if defined(__F16C__) && FP16_USE_NATIVE_CONVERSION && !FP16_USE_FLOAT16_TYPE && !FP16_USE_FP16_TYPE
#include <immintrin.h>
#endif
#if (defined(__aarch64__) || defined(_M_ARM64)) && FP16_USE_NATIVE_CONVERSION && !FP16_USE_FLOAT16_TYPE && !FP16_USE_FP16_TYPE
#include <arm_neon.h>
#endif


/*
Expand Down Expand Up @@ -106,6 +113,31 @@ static inline uint32_t fp16_ieee_to_fp32_bits(uint16_t h) {
* floating-point operations and bitcasts between integer and floating-point variables.
*/
static inline float fp16_ieee_to_fp32_value(uint16_t h) {
#if FP16_USE_NATIVE_CONVERSION
#if FP16_USE_FLOAT16_TYPE
union {
uint16_t as_bits;
_Float16 as_value;
} fp16 = { h };
return (float) fp16.as_value;
#elif FP16_USE_FP16_TYPE
union {
uint16_t as_bits;
__fp16 as_value;
} fp16 = { h };
return (float) fp16.as_value;
#else
#if (defined(__INTEL_COMPILER) || defined(__GNUC__)) && defined(__F16C__)
return _cvtsh_ss((unsigned short) h);
#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__)
return _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128((int) (unsigned int) h)));
#elif defined(_M_ARM64) || defined(__aarch64__)
return vgetq_lane_f32(vcvt_f32_f16(vreinterpret_f16_u16(vdup_n_u16(h))), 0);
#else
#error "Archtecture- or compiler-specific implementation required"
#endif
#endif
#else
/*
* Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word:
* +---+-----+------------+-------------------+
Expand Down Expand Up @@ -211,6 +243,7 @@ static inline float fp16_ieee_to_fp32_value(uint16_t h) {
const uint32_t result = sign |
(two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
return fp32_from_bits(result);
#endif
}

/*
Expand All @@ -221,14 +254,44 @@ static inline float fp16_ieee_to_fp32_value(uint16_t h) {
* floating-point operations and bitcasts between integer and floating-point variables.
*/
static inline uint16_t fp16_ieee_from_fp32_value(float f) {
#if FP16_USE_NATIVE_CONVERSION
#if FP16_USE_FLOAT16_TYPE
union {
_Float16 as_value;
uint16_t as_bits;
} fp16 = { (_Float16) f };
return fp16.as_bits;
#elif FP16_USE_FP16_TYPE
union {
__fp16 as_value;
uint16_t as_bits;
} fp16 = { (__fp16) f };
return fp16.as_bits;
#else
#if (defined(__INTEL_COMPILER) || defined(__GNUC__)) && defined(__F16C__)
return _cvtss_sh(f, _MM_FROUND_CUR_DIRECTION);
#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__)
return (uint16_t) _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(f), _MM_FROUND_CUR_DIRECTION));
#elif defined(_M_ARM64) || defined(__aarch64__)
return vget_lane_u16(vcvt_f16_f32(vdupq_n_f32(f)), 0);
#else
#error "Archtecture- or compiler-specific implementation required"
#endif
#endif
#else
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
const float scale_to_inf = 0x1.0p+112f;
const float scale_to_zero = 0x1.0p-110f;
#else
const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
#endif
float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
#if defined(_MSC_VER) && defined(_M_IX86_FP) && (_M_IX86_FP == 0) || defined(__GNUC__) && defined(__FLT_EVAL_METHOD__) && (__FLT_EVAL_METHOD__ != 0)
const volatile float saturated_f = fabsf(f) * scale_to_inf;
#else
const float saturated_f = fabsf(f) * scale_to_inf;
#endif
float base = saturated_f * scale_to_zero;

const uint32_t w = fp32_to_bits(f);
const uint32_t shl1_w = w + w;
Expand All @@ -244,6 +307,7 @@ static inline uint16_t fp16_ieee_from_fp32_value(float f) {
const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
const uint32_t nonsign = exp_bits + mantissa_bits;
return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
#endif
}

/*
Expand Down Expand Up @@ -449,3 +513,9 @@ static inline uint16_t fp16_alt_from_fp32_value(float f) {
}

#endif /* FP16_FP16_H */

// This file is part of the fp16 inline third-party dependency of YugabyteDB.
// Git repo: https://github.com/Maratyszcza/FP16/
// Git commit: 98b0a46bce017382a6351a19577ec43a715b6835
//
// See also src/inline-thirdparty/README.md.
52 changes: 52 additions & 0 deletions src/inline-thirdparty/fp16/fp16/macros.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#pragma once
#ifndef FP16_MACROS_H
#define FP16_MACROS_H

#ifndef FP16_USE_NATIVE_CONVERSION
#if (defined(__INTEL_COMPILER) || defined(__GNUC__)) && defined(__F16C__)
#define FP16_USE_NATIVE_CONVERSION 1
#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__)
#define FP16_USE_NATIVE_CONVERSION 1
#elif defined(_MSC_VER) && defined(_M_ARM64)
#define FP16_USE_NATIVE_CONVERSION 1
#elif defined(__GNUC__) && defined(__aarch64__)
#define FP16_USE_NATIVE_CONVERSION 1
#endif
#if !defined(FP16_USE_NATIVE_CONVERSION)
#define FP16_USE_NATIVE_CONVERSION 0
#endif // !defined(FP16_USE_NATIVE_CONVERSION)
#endif // !define(FP16_USE_NATIVE_CONVERSION)

#ifndef FP16_USE_FLOAT16_TYPE
#if !defined(__clang__) && !defined(__INTEL_COMPILER) && defined(__GNUC__) && (__GNUC__ >= 12)
#if defined(__F16C__)
#define FP16_USE_FLOAT16_TYPE 1
#endif
#endif
#if !defined(FP16_USE_FLOAT16_TYPE)
#define FP16_USE_FLOAT16_TYPE 0
#endif // !defined(FP16_USE_FLOAT16_TYPE)
#endif // !defined(FP16_USE_FLOAT16_TYPE)

#ifndef FP16_USE_FP16_TYPE
#if defined(__clang__)
#if defined(__F16C__) || defined(__aarch64__)
#define FP16_USE_FP16_TYPE 1
#endif
#elif defined(__GNUC__)
#if defined(__aarch64__)
#define FP16_USE_FP16_TYPE 1
#endif
#endif
#if !defined(FP16_USE_FP16_TYPE)
#define FP16_USE_FP16_TYPE 0
#endif // !defined(FP16_USE_FP16_TYPE)
#endif // !defined(FP16_USE_FP16_TYPE)

#endif /* FP16_MACROS_H */

// This file is part of the fp16 inline third-party dependency of YugabyteDB.
// Git repo: https://github.com/Maratyszcza/FP16/
// Git commit: 98b0a46bce017382a6351a19577ec43a715b6835
//
// See also src/inline-thirdparty/README.md.
131 changes: 0 additions & 131 deletions src/inline-thirdparty/fp16/fp16/psimd.h

This file was deleted.

0 comments on commit 3adbdae

Please sign in to comment.