From 944639a8474b6071eae8482ff618019b375c35a6 Mon Sep 17 00:00:00 2001 From: Alex Reinking Date: Wed, 5 Jun 2024 16:09:45 -0400 Subject: [PATCH 1/3] Fix OpenCL positive and negative INF constants. Fixes #8257 --- src/CodeGen_OpenCL_Dev.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/CodeGen_OpenCL_Dev.cpp b/src/CodeGen_OpenCL_Dev.cpp index d7c7951936f3..9221acc57db7 100644 --- a/src/CodeGen_OpenCL_Dev.cpp +++ b/src/CodeGen_OpenCL_Dev.cpp @@ -12,6 +12,7 @@ #include "EliminateBoolVectors.h" #include "EmulateFloat16Math.h" #include "ExprUsesVar.h" +#include "Float16.h" #include "IRMutator.h" #include "IROperator.h" #include "Simplify.h" @@ -1183,11 +1184,15 @@ void CodeGen_OpenCL_Dev::init_module() { } if (target.has_feature(Target::CLHalf)) { + constexpr unsigned short nan_f16 = float16_t::exponent_mask | float16_t::mantissa_mask; + constexpr unsigned short neg_inf_f16 = float16_t::sign_mask | float16_t::exponent_mask; + constexpr unsigned short inf_f16 = float16_t::exponent_mask; + src_stream << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" << "inline half half_from_bits(unsigned short x) {return __builtin_astype(x, half);}\n" - << "inline half nan_f16() { return half_from_bits(32767); }\n" - << "inline half neg_inf_f16() { return half_from_bits(31744); }\n" - << "inline half inf_f16() { return half_from_bits(64512); }\n" + << "inline half nan_f16() { return half_from_bits(" << nan_f16 << "); }\n" + << "inline half neg_inf_f16() { return half_from_bits(" << neg_inf_f16 << "); }\n" + << "inline half inf_f16() { return half_from_bits(" << inf_f16 << "); }\n" << "inline bool is_nan_f16(half x) {return isnan(x); }\n" << "inline bool is_inf_f16(half x) {return isinf(x); }\n" << "inline bool is_finite_f16(half x) {return isfinite(x); }\n" From 91b441873676112807c5fa4bcc5e76cf3f88ef8e Mon Sep 17 00:00:00 2001 From: Alex Reinking Date: Thu, 6 Jun 2024 08:57:23 -0400 Subject: [PATCH 2/3] Use the float16_t::make_* functions instead. --- src/CodeGen_OpenCL_Dev.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/CodeGen_OpenCL_Dev.cpp b/src/CodeGen_OpenCL_Dev.cpp index 9221acc57db7..f99d5a859df7 100644 --- a/src/CodeGen_OpenCL_Dev.cpp +++ b/src/CodeGen_OpenCL_Dev.cpp @@ -1184,9 +1184,9 @@ void CodeGen_OpenCL_Dev::init_module() { } if (target.has_feature(Target::CLHalf)) { - constexpr unsigned short nan_f16 = float16_t::exponent_mask | float16_t::mantissa_mask; - constexpr unsigned short neg_inf_f16 = float16_t::sign_mask | float16_t::exponent_mask; - constexpr unsigned short inf_f16 = float16_t::exponent_mask; + const uint16_t nan_f16 = float16_t::make_nan().to_bits(); + const uint16_t neg_inf_f16 = float16_t::make_negative_infinity().to_bits(); + const uint16_t inf_f16 = float16_t::make_infinity().to_bits(); src_stream << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" << "inline half half_from_bits(unsigned short x) {return __builtin_astype(x, half);}\n" From 44fe3f37416ccfe7c0b8d97b6fcd1038863c4430 Mon Sep 17 00:00:00 2001 From: Alex Reinking Date: Fri, 7 Jun 2024 12:29:43 -0400 Subject: [PATCH 3/3] Enable gpu_f16_intrinsics on OpenCL+CLHalf --- test/correctness/gpu_f16_intrinsics.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/correctness/gpu_f16_intrinsics.cpp b/test/correctness/gpu_f16_intrinsics.cpp index b8bad8eb0a37..17032ecbff07 100644 --- a/test/correctness/gpu_f16_intrinsics.cpp +++ b/test/correctness/gpu_f16_intrinsics.cpp @@ -4,8 +4,9 @@ using namespace Halide; int main(int argc, char *argv[]) { auto target = get_jit_target_from_environment(); - if (!target.has_feature(Target::Metal)) { - printf("[SKIP] No metal target enabled.\n"); + if (!target.has_feature(Target::Metal) && + !target.features_all_of({Target::OpenCL, Target::CLHalf})) { + printf("[SKIP] Test only applies to Metal and OpenCL+CLHalf.\n"); return 0; }