From f67439cb43fca6f8d7345305fc9dff6fca1468c2 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Tue, 21 Nov 2023 18:06:36 -0300 Subject: [PATCH] Make have_fma consistent between interpreter and compiled (#52206) Currently the interpreter always returns false. Which isn't very good. Make it follow whatever the JIT will do. (cherry picked from commit a6c656e6c47ff2b1237c92e90ba73ac267fc1dc0) --- src/jl_exported_funcs.inc | 1 + src/llvm-cpufeatures.cpp | 4 ++-- src/processor.h | 2 ++ src/processor_arm.cpp | 16 ++++++++++++++++ src/processor_fallback.cpp | 5 +++++ src/processor_x86.cpp | 11 +++++++++++ src/runtime_intrinsics.c | 11 ++++++++--- test/llvmpasses/cpu-features.ll | 2 ++ test/sysinfo.jl | 2 ++ 9 files changed, 49 insertions(+), 5 deletions(-) diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index 745905b113622..ee255a0e1a876 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -214,6 +214,7 @@ XX(jl_get_binding_wr) \ XX(jl_get_cpu_name) \ XX(jl_get_cpu_features) \ + XX(jl_cpu_has_fma) \ XX(jl_get_current_task) \ XX(jl_get_default_sysimg_path) \ XX(jl_get_excstack) \ diff --git a/src/llvm-cpufeatures.cpp b/src/llvm-cpufeatures.cpp index 77f1baf6237c4..3d3fb6b0583da 100644 --- a/src/llvm-cpufeatures.cpp +++ b/src/llvm-cpufeatures.cpp @@ -60,7 +60,7 @@ static bool have_fma(Function &intr, Function &caller, const Triple &TT) JL_NOTS StringRef FS = FSAttr.isValid() ? FSAttr.getValueAsString() : jl_ExecutionEngine->getTargetFeatureString(); - SmallVector Features; + SmallVector Features; FS.split(Features, ','); for (StringRef Feature : Features) if (TT.isARM()) { @@ -68,7 +68,7 @@ static bool have_fma(Function &intr, Function &caller, const Triple &TT) JL_NOTS return typ == "f32" || typ == "f64"; else if (Feature == "+vfp4sp") return typ == "f32"; - } else { + } else if (TT.isX86()) { if (Feature == "+fma" || Feature == "+fma4") return typ == "f32" || typ == "f64"; } diff --git a/src/processor.h b/src/processor.h index a3ebdf4f8c605..09ee780f2b9c5 100644 --- a/src/processor.h +++ b/src/processor.h @@ -224,6 +224,8 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void); // Return the features of the host CPU as a julia string. JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void); // Dump the name and feature set of the host CPU +JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits); +// Check if the CPU has native FMA instructions; // For debugging only JL_DLLEXPORT void jl_dump_host_cpu(void); JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char* data); diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index f47d1509f4975..0018d2ec925d9 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -1808,6 +1808,22 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void) return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str()); } +JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits) +{ +#ifdef _CPU_AARCH64_ + return jl_true; +#else + TargetData target = jit_targets.front(); + FeatureList features = target.en.features; + if (bits == 32 && test_nbit(features, Feature::vfp4sp)) + return jl_true; + else if ((bits == 64 || bits == 32) && test_nbit(features, Feature::vfp4)) + return jl_true; + else + return jl_false; +#endif +} + jl_image_t jl_init_processor_sysimg(void *hdl) { if (!jit_targets.empty()) diff --git a/src/processor_fallback.cpp b/src/processor_fallback.cpp index 08a4274f44ac8..833cd02b5fdfc 100644 --- a/src/processor_fallback.cpp +++ b/src/processor_fallback.cpp @@ -172,6 +172,11 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void) return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str()); } +JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits) +{ + return jl_false; // Match behaviour of have_fma in src/llvm-cpufeatures.cpp (assume false) +} + JL_DLLEXPORT void jl_dump_host_cpu(void) { jl_safe_printf("CPU: %s\n", host_cpu_name().c_str()); diff --git a/src/processor_x86.cpp b/src/processor_x86.cpp index 73e0992bcf37c..d96e2061ee674 100644 --- a/src/processor_x86.cpp +++ b/src/processor_x86.cpp @@ -4,6 +4,7 @@ // CPUID +#include "julia.h" extern "C" JL_DLLEXPORT void jl_cpuid(int32_t CPUInfo[4], int32_t InfoType) { asm volatile ( @@ -1055,6 +1056,16 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void) return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str()); } +JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits) +{ + TargetData target = jit_targets.front(); + FeatureList features = target.en.features; + if ((bits == 32 || bits == 64) && (test_nbit(features, Feature::fma) || test_nbit(features, Feature::fma4))) + return jl_true; + else + return jl_false; +} + jl_image_t jl_init_processor_sysimg(void *hdl) { if (!jit_targets.empty()) diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index ed320aa9a6c35..844a5ca8338c7 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -1454,6 +1454,7 @@ un_fintrinsic(trunc_float,trunc_llvm) un_fintrinsic(rint_float,rint_llvm) un_fintrinsic(sqrt_float,sqrt_llvm) un_fintrinsic(sqrt_float,sqrt_llvm_fast) +jl_value_t *jl_cpu_has_fma(int bits); JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a) { @@ -1463,7 +1464,11 @@ JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a) JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *typ) { - JL_TYPECHK(have_fma, datatype, typ); - // TODO: run-time feature check? - return jl_false; + JL_TYPECHK(have_fma, datatype, typ); // TODO what about float16/bfloat16? + if (typ == (jl_value_t*)jl_float32_type) + return jl_cpu_has_fma(32); + else if (typ == (jl_value_t*)jl_float64_type) + return jl_cpu_has_fma(64); + else + return jl_false; } diff --git a/test/llvmpasses/cpu-features.ll b/test/llvmpasses/cpu-features.ll index eea3d1b288204..080b6e07d3086 100644 --- a/test/llvmpasses/cpu-features.ll +++ b/test/llvmpasses/cpu-features.ll @@ -5,6 +5,8 @@ ; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -CPUFeatures -simplifycfg -S %s | FileCheck %s ; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13" +target triple = "x86_64-linux-gnu" declare i1 @julia.cpu.have_fma.f64() declare double @with_fma(double %0, double %1, double %2) diff --git a/test/sysinfo.jl b/test/sysinfo.jl index cb943cfd38843..f2e233d608338 100644 --- a/test/sysinfo.jl +++ b/test/sysinfo.jl @@ -12,6 +12,8 @@ Base.Sys.loadavg() @test length(ccall(:jl_get_cpu_name, String, ())) != 0 @test length(ccall(:jl_get_cpu_features, String, ())) >= 0 +foo_fma() = Core.Intrinsics.have_fma(Int64) +@test ccall(:jl_cpu_has_fma, Bool, (Cint,), 64) == foo_fma() if Sys.isunix() mktempdir() do tempdir