From 8be73019dca31c5177fa53d30485a4ce6cf7e5ce Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Thu, 5 Jan 2017 22:27:14 -0500
Subject: [PATCH] only use accurate powf function

The powi intrinsic optimization over calling powf is that it is inaccurate.
We don't need that.

When it is equally accurate (e.g. tiny constant powers),
LLVM will already recognize and optimize any call to a function named `powf`,
and produce the same speedup.

fix #19872
---
 base/fastmath.jl         |  6 +++---
 base/inference.jl        |  2 +-
 base/math.jl             | 11 +++++------
 src/codegen.cpp          | 11 ++---------
 src/intrinsics.cpp       | 37 +++++++++----------------------------
 src/intrinsics.h         |  2 +-
 src/julia_internal.h     |  2 +-
 src/runtime_intrinsics.c | 28 +++-------------------------
 test/math.jl             |  7 +++++++
 9 files changed, 32 insertions(+), 74 deletions(-)

diff --git a/base/fastmath.jl b/base/fastmath.jl
index 36d975506e48cd..7accae693b7d16 100644
--- a/base/fastmath.jl
+++ b/base/fastmath.jl
@@ -23,7 +23,7 @@ module FastMath
 
 export @fastmath
 
-import Core.Intrinsics: powi_llvm, sqrt_llvm_fast, neg_float_fast,
+import Core.Intrinsics: powf_llvm, sqrt_llvm_fast, neg_float_fast,
     add_float_fast, sub_float_fast, mul_float_fast, div_float_fast, rem_float_fast,
     eq_float_fast, ne_float_fast, lt_float_fast, le_float_fast
 
@@ -243,8 +243,8 @@ end
 
 # builtins
 
-pow_fast{T<:FloatTypes}(x::T, y::Integer) = pow_fast(x, Int32(y))
-pow_fast{T<:FloatTypes}(x::T, y::Int32) = Base.powi_llvm(x, y)
+pow_fast{T<:FloatTypes}(x::T, y::Integer) = pow_fast(x, convert(T, y))
+pow_fast{T<:FloatTypes}(x::T, y::T) = powf_llvm(x, y)
 
 # TODO: Change sqrt_llvm intrinsic to avoid nan checking; add nan
 # checking to sqrt in math.jl; remove sqrt_llvm_fast intrinsic
diff --git a/base/inference.jl b/base/inference.jl
index 1cd127abe97b77..966e19891d1402 100644
--- a/base/inference.jl
+++ b/base/inference.jl
@@ -427,7 +427,7 @@ add_tfunc(floor_llvm, 1, 1, math_tfunc)
 add_tfunc(trunc_llvm, 1, 1, math_tfunc)
 add_tfunc(rint_llvm, 1, 1, math_tfunc)
 add_tfunc(sqrt_llvm, 1, 1, math_tfunc)
-add_tfunc(powi_llvm, 2, 2, math_tfunc)
+add_tfunc(powf_llvm, 2, 2, math_tfunc)
 add_tfunc(sqrt_llvm_fast, 1, 1, math_tfunc)
     ## same-type comparisons ##
 cmp_tfunc(x::ANY, y::ANY) = Bool
diff --git a/base/math.jl b/base/math.jl
index 523f7fec892992..7bea44e6c33558 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -32,7 +32,7 @@ using Base: sign_mask, exponent_mask, exponent_one, exponent_bias,
             exponent_half, exponent_max, exponent_raw_max, fpinttype,
             significand_mask, significand_bits, exponent_bits
 
-using Core.Intrinsics: sqrt_llvm, powi_llvm
+using Core.Intrinsics: sqrt_llvm, powf_llvm
 
 # non-type specific math functions
 
@@ -680,11 +680,10 @@ end
 ^(x::Float64, y::Float64) = nan_dom_err(ccall((:pow,libm),  Float64, (Float64,Float64), x, y), x+y)
 ^(x::Float32, y::Float32) = nan_dom_err(ccall((:powf,libm), Float32, (Float32,Float32), x, y), x+y)
 
-^(x::Float64, y::Integer) = x^Int32(y)
-^(x::Float64, y::Int32) = powi_llvm(x, y)
-^(x::Float32, y::Integer) = x^Int32(y)
-^(x::Float32, y::Int32) = powi_llvm(x, y)
-^(x::Float16, y::Integer) = Float16(Float32(x)^y)
+^(x::Float64, y::Integer) = x ^ Float64(y)
+^(x::Float64, y::Float64) = powf_llvm(x, y)
+^(x::Float32, y::Integer) = x ^ Float32(y)
+^(x::Float32, y::Float32) = powf_llvm(x, y)
 
 function angle_restrict_symm(theta)
     const P1 = 4 * 7.8539812564849853515625e-01
diff --git a/src/codegen.cpp b/src/codegen.cpp
index b8aa96d2cfc14c..76f6a913c8bd7f 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -394,10 +394,8 @@ static Function *expect_func;
 static Function *jldlsym_func;
 static Function *jlnewbits_func;
 static Function *jltypeassert_func;
-#if JL_LLVM_VERSION < 30600
 static Function *jlpow_func;
 static Function *jlpowf_func;
-#endif
 //static Function *jlgetnthfield_func;
 static Function *jlgetnthfieldchecked_func;
 //static Function *jlsetnthfield_func;
@@ -5974,7 +5972,6 @@ static void init_julia_llvm_env(Module *m)
                          "jl_gc_diff_total_bytes", m);
     add_named_global(diff_gc_total_bytes_func, *jl_gc_diff_total_bytes);
 
-#if JL_LLVM_VERSION < 30600
     Type *powf_type[2] = { T_float32, T_float32 };
     jlpowf_func = Function::Create(FunctionType::get(T_float32, powf_type, false),
                                    Function::ExternalLinkage,
@@ -5986,13 +5983,9 @@ static void init_julia_llvm_env(Module *m)
                                   Function::ExternalLinkage,
                                   "pow", m);
     add_named_global(jlpow_func,
-#ifdef _COMPILER_MICROSOFT_
-        static_cast<double (*)(double, double)>(&pow),
-#else
-        &pow,
-#endif
+        static_cast<double (*)(double, double)>(&::pow),
         false);
-#endif
+
     std::vector<Type*> array_owner_args(0);
     array_owner_args.push_back(T_pjlvalue);
     jlarray_data_owner_func =
diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
index 27cc70985c0708..cca85d6a145c9e 100644
--- a/src/intrinsics.cpp
+++ b/src/intrinsics.cpp
@@ -71,7 +71,7 @@ static void jl_init_intrinsic_functions_codegen(Module *m)
     float_func[rint_llvm] = true;
     float_func[sqrt_llvm] = true;
     float_func[sqrt_llvm_fast] = true;
-    float_func[powi_llvm] = true;
+    float_func[powf_llvm] = true;
 }
 
 extern "C"
@@ -915,33 +915,6 @@ static jl_cgval_t emit_intrinsic(intrinsic f, jl_value_t **args, size_t nargs,
         return mark_julia_type(ans, false, x.typ, ctx);
     }
 
-    case powi_llvm: {
-        const jl_cgval_t &x = argv[0];
-        const jl_cgval_t &y = argv[1];
-        if (!jl_is_bitstype(x.typ) || !jl_is_bitstype(y.typ) || jl_datatype_size(y.typ) != 4)
-            return emit_runtime_call(f, argv, nargs, ctx);
-        Type *xt = FLOATT(bitstype_to_llvm(x.typ));
-        Type *yt = T_int32;
-        if (!xt)
-            return emit_runtime_call(f, argv, nargs, ctx);
-
-        Value *xv = emit_unbox(xt, x, x.typ);
-        Value *yv = emit_unbox(yt, y, y.typ);
-#if JL_LLVM_VERSION >= 30600
-        Value *powi = Intrinsic::getDeclaration(jl_Module, Intrinsic::powi, makeArrayRef(xt));
-#if JL_LLVM_VERSION >= 30700
-        Value *ans = builder.CreateCall(powi, {xv, yv});
-#else
-        Value *ans = builder.CreateCall2(powi, xv, yv);
-#endif
-#else
-        // issue #6506
-        Value *ans = builder.CreateCall2(prepare_call(xt == T_float64 ? jlpow_func : jlpowf_func),
-                xv, builder.CreateSIToFP(yv, xt));
-#endif
-        return mark_julia_type(ans, false, x.typ, ctx);
-    }
-
     default: {
         assert(nargs >= 1 && "invalid nargs for intrinsic call");
         const jl_cgval_t &xinfo = argv[0];
@@ -1296,6 +1269,14 @@ static Value *emit_untyped_intrinsic(intrinsic f, Value **argvalues, size_t narg
         Value *sqrtintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt, makeArrayRef(t));
         return builder.CreateCall(sqrtintr, x);
     }
+    case powf_llvm: {
+        Function *powf = (t == T_float64 ? jlpow_func : jlpowf_func);
+#if JL_LLVM_VERSION >= 30700
+        return builder.CreateCall(prepare_call(powf), {x, y});
+#else
+        return builder.CreateCall2(prepare_call(powf), x, y);
+#endif
+    }
 
     default:
         assert(0 && "invalid intrinsic");
diff --git a/src/intrinsics.h b/src/intrinsics.h
index 479b048b940a50..3af75228fc84ba 100644
--- a/src/intrinsics.h
+++ b/src/intrinsics.h
@@ -91,7 +91,7 @@
     ADD_I(trunc_llvm, 1) \
     ADD_I(rint_llvm, 1) \
     ADD_I(sqrt_llvm, 1) \
-    ADD_I(powi_llvm, 2) \
+    ADD_I(powf_llvm, 2) \
     ALIAS(sqrt_llvm_fast, sqrt_llvm) \
     /*  pointer access */ \
     ADD_I(pointerref, 3) \
diff --git a/src/julia_internal.h b/src/julia_internal.h
index 60b931afedddc4..075d387ae27075 100644
--- a/src/julia_internal.h
+++ b/src/julia_internal.h
@@ -677,7 +677,7 @@ JL_DLLEXPORT jl_value_t *jl_floor_llvm(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_trunc_llvm(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_rint_llvm(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_sqrt_llvm(jl_value_t *a);
-JL_DLLEXPORT jl_value_t *jl_powi_llvm(jl_value_t *a, jl_value_t *b);
+JL_DLLEXPORT jl_value_t *jl_powf_llvm(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_abs_float(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_copysign_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_flipsign_int(jl_value_t *a, jl_value_t *b);
diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
index d69b1be21e6dbc..d534d1a1a10a32 100644
--- a/src/runtime_intrinsics.c
+++ b/src/runtime_intrinsics.c
@@ -947,6 +947,8 @@ bi_iintrinsic_fast(jl_LLVMFlipSign, flipsign, flipsign_int,  )
         *pr = fp_select(a, sqrt)
 #define copysign_float(a, b) \
         fp_select2(a, b, copysign)
+#define pow_float(a, b) \
+        fp_select2(a, b, pow)
 
 un_fintrinsic(abs_float,abs_float)
 bi_fintrinsic(copysign_float,copysign_float)
@@ -955,31 +957,7 @@ un_fintrinsic(floor_float,floor_llvm)
 un_fintrinsic(trunc_float,trunc_llvm)
 un_fintrinsic(rint_float,rint_llvm)
 un_fintrinsic(sqrt_float,sqrt_llvm)
-
-JL_DLLEXPORT jl_value_t *jl_powi_llvm(jl_value_t *a, jl_value_t *b)
-{
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_value_t *ty = jl_typeof(a);
-    if (!jl_is_bitstype(ty))
-        jl_error("powi_llvm: a is not a bitstype");
-    if (!jl_is_bitstype(jl_typeof(b)) || jl_datatype_size(jl_typeof(b)) != 4)
-        jl_error("powi_llvm: b is not a 32-bit bitstype");
-    int sz = jl_datatype_size(ty);
-    jl_value_t *newv = jl_gc_alloc(ptls, sz, ty);
-    void *pa = jl_data_ptr(a), *pr = jl_data_ptr(newv);
-    switch (sz) {
-    /* choose the right size c-type operation */
-    case 4:
-        *(float*)pr = powf(*(float*)pa, (float)jl_unbox_int32(b));
-        break;
-    case 8:
-        *(double*)pr = pow(*(double*)pa, (double)jl_unbox_int32(b));
-        break;
-    default:
-        jl_error("powi_llvm: runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64");
-    }
-    return newv;
-}
+bi_fintrinsic(pow_float,powf_llvm)
 
 JL_DLLEXPORT jl_value_t *jl_select_value(jl_value_t *isfalse, jl_value_t *a, jl_value_t *b)
 {
diff --git a/test/math.jl b/test/math.jl
index 4912688318119c..c0631320ea8153 100644
--- a/test/math.jl
+++ b/test/math.jl
@@ -996,6 +996,13 @@ end
     end
 end
 
+@testset "issue #19872" begin
+    f19872(x) = x ^ 3
+    @test issubnormal(2.0 ^ (-1024))
+    @test f19872(2.0) === 8.0
+    @test !issubnormal(0.0)
+end
+
 @test Base.Math.f32(complex(1.0,1.0)) == complex(Float32(1.),Float32(1.))
 @test Base.Math.f16(complex(1.0,1.0)) == complex(Float16(1.),Float16(1.))