Skip to content

Commit

Permalink
Make have_fma consistent between interpreter and compiled (#52206)
Browse files Browse the repository at this point in the history
Currently the interpreter always returns false. Which isn't very good.
Make it follow whatever the JIT will do.

(cherry picked from commit a6c656e)
  • Loading branch information
gbaraldi authored and KristofferC committed Nov 27, 2023
1 parent 0218599 commit f67439c
Show file tree
Hide file tree
Showing 9 changed files with 49 additions and 5 deletions.
1 change: 1 addition & 0 deletions src/jl_exported_funcs.inc
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@
XX(jl_get_binding_wr) \
XX(jl_get_cpu_name) \
XX(jl_get_cpu_features) \
XX(jl_cpu_has_fma) \
XX(jl_get_current_task) \
XX(jl_get_default_sysimg_path) \
XX(jl_get_excstack) \
Expand Down
4 changes: 2 additions & 2 deletions src/llvm-cpufeatures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,15 @@ static bool have_fma(Function &intr, Function &caller, const Triple &TT) JL_NOTS
StringRef FS =
FSAttr.isValid() ? FSAttr.getValueAsString() : jl_ExecutionEngine->getTargetFeatureString();

SmallVector<StringRef, 6> Features;
SmallVector<StringRef, 128> Features;
FS.split(Features, ',');
for (StringRef Feature : Features)
if (TT.isARM()) {
if (Feature == "+vfp4")
return typ == "f32" || typ == "f64";
else if (Feature == "+vfp4sp")
return typ == "f32";
} else {
} else if (TT.isX86()) {
if (Feature == "+fma" || Feature == "+fma4")
return typ == "f32" || typ == "f64";
}
Expand Down
2 changes: 2 additions & 0 deletions src/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,8 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void);
// Return the features of the host CPU as a julia string.
JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void);
// Dump the name and feature set of the host CPU
JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits);
// Check if the CPU has native FMA instructions;
// For debugging only
JL_DLLEXPORT void jl_dump_host_cpu(void);
JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char* data);
Expand Down
16 changes: 16 additions & 0 deletions src/processor_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1808,6 +1808,22 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void)
return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str());
}

JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
{
#ifdef _CPU_AARCH64_
return jl_true;
#else
TargetData<feature_sz> target = jit_targets.front();
FeatureList<feature_sz> features = target.en.features;
if (bits == 32 && test_nbit(features, Feature::vfp4sp))
return jl_true;
else if ((bits == 64 || bits == 32) && test_nbit(features, Feature::vfp4))
return jl_true;
else
return jl_false;
#endif
}

jl_image_t jl_init_processor_sysimg(void *hdl)
{
if (!jit_targets.empty())
Expand Down
5 changes: 5 additions & 0 deletions src/processor_fallback.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,11 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void)
return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str());
}

JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
{
return jl_false; // Match behaviour of have_fma in src/llvm-cpufeatures.cpp (assume false)
}

JL_DLLEXPORT void jl_dump_host_cpu(void)
{
jl_safe_printf("CPU: %s\n", host_cpu_name().c_str());
Expand Down
11 changes: 11 additions & 0 deletions src/processor_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

// CPUID

#include "julia.h"
extern "C" JL_DLLEXPORT void jl_cpuid(int32_t CPUInfo[4], int32_t InfoType)
{
asm volatile (
Expand Down Expand Up @@ -1055,6 +1056,16 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void)
return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str());
}

JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
{
TargetData<feature_sz> target = jit_targets.front();
FeatureList<feature_sz> features = target.en.features;
if ((bits == 32 || bits == 64) && (test_nbit(features, Feature::fma) || test_nbit(features, Feature::fma4)))
return jl_true;
else
return jl_false;
}

jl_image_t jl_init_processor_sysimg(void *hdl)
{
if (!jit_targets.empty())
Expand Down
11 changes: 8 additions & 3 deletions src/runtime_intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -1454,6 +1454,7 @@ un_fintrinsic(trunc_float,trunc_llvm)
un_fintrinsic(rint_float,rint_llvm)
un_fintrinsic(sqrt_float,sqrt_llvm)
un_fintrinsic(sqrt_float,sqrt_llvm_fast)
jl_value_t *jl_cpu_has_fma(int bits);

JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a)
{
Expand All @@ -1463,7 +1464,11 @@ JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a)

JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *typ)
{
JL_TYPECHK(have_fma, datatype, typ);
// TODO: run-time feature check?
return jl_false;
JL_TYPECHK(have_fma, datatype, typ); // TODO what about float16/bfloat16?
if (typ == (jl_value_t*)jl_float32_type)
return jl_cpu_has_fma(32);
else if (typ == (jl_value_t*)jl_float64_type)
return jl_cpu_has_fma(64);
else
return jl_false;
}
2 changes: 2 additions & 0 deletions test/llvmpasses/cpu-features.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -CPUFeatures -simplifycfg -S %s | FileCheck %s
; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
target triple = "x86_64-linux-gnu"

declare i1 @julia.cpu.have_fma.f64()
declare double @with_fma(double %0, double %1, double %2)
Expand Down
2 changes: 2 additions & 0 deletions test/sysinfo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ Base.Sys.loadavg()

@test length(ccall(:jl_get_cpu_name, String, ())) != 0
@test length(ccall(:jl_get_cpu_features, String, ())) >= 0
foo_fma() = Core.Intrinsics.have_fma(Int64)
@test ccall(:jl_cpu_has_fma, Bool, (Cint,), 64) == foo_fma()

if Sys.isunix()
mktempdir() do tempdir
Expand Down

0 comments on commit f67439c

Please sign in to comment.