From 5a1f97198c30914249fcbcf984dd92440b267066 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 22 Feb 2017 15:00:06 -0500 Subject: [PATCH 1/4] update DomainError showerror handling to ignoring inlining --- base/replutil.jl | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/base/replutil.jl b/base/replutil.jl index 4f60d3140e15e..dd771314b02e1 100644 --- a/base/replutil.jl +++ b/base/replutil.jl @@ -219,28 +219,29 @@ showerror(io::IO, ex::InitError) = showerror(io, ex, []) function showerror(io::IO, ex::DomainError, bt; backtrace=true) print(io, "DomainError:") for b in bt - code = StackTraces.lookup(b)[1] - if !code.from_c - if code.func == :nan_dom_err + for code in StackTraces.lookup(b) + if code.from_c + continue + elseif code.func === :nan_dom_err continue elseif code.func in (:log, :log2, :log10, :sqrt) print(io, "\n$(code.func) will only return a complex result if called ", "with a complex argument. Try $(string(code.func))(complex(x)).") - elseif (code.func == :^ && code.file == Symbol("intfuncs.jl")) || - code.func == :power_by_squaring #3024 + elseif (code.func === :^ && + (code.file === Symbol("intfuncs.jl") || code.file === Symbol(joinpath(".", "intfuncs.jl")))) || + code.func === :power_by_squaring #3024 print(io, "\nCannot raise an integer x to a negative power -n. ", "\nMake x a float by adding a zero decimal (e.g. 2.0^-n instead ", "of 2^-n), or write 1/x^n, float(x)^-n, or (x//1)^-n.") - elseif code.func == :^ && - (code.file == Symbol("promotion.jl") || code.file == Symbol("math.jl") || - code.file == Symbol(joinpath(".","promotion.jl")) || - code.file == Symbol(joinpath(".","math.jl"))) + elseif code.func === :^ && + (code.file === Symbol("math.jl") || code.file === Symbol(joinpath(".", "math.jl"))) print(io, "\nExponentiation yielding a complex result requires a complex ", "argument.\nReplace x^y with (x+0im)^y, Complex(x)^y, or similar.") end - break + @goto showbacktrace end end + @label showbacktrace backtrace && show_backtrace(io, bt) nothing end From 1c5d7339e72b64ee9289c3645eb651cc611c18b1 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 22 Feb 2017 14:58:46 -0500 Subject: [PATCH 2/4] only use accurate powf function The powi intrinsic optimization over calling powf is that it is inaccurate. We don't need that. When it is equally accurate (e.g. tiny constant powers), LLVM will already recognize and optimize any call to a function named `powf`, and produce the same speedup. fix #19872 --- base/fastmath.jl | 6 +++--- base/inference.jl | 1 - base/math.jl | 23 +++++++++++------------ src/codegen.cpp | 23 ----------------------- src/intrinsics.cpp | 28 ---------------------------- src/intrinsics.h | 1 - src/julia_internal.h | 1 - src/runtime_intrinsics.c | 25 ------------------------- test/math.jl | 12 ++++++++++++ 9 files changed, 26 insertions(+), 94 deletions(-) diff --git a/base/fastmath.jl b/base/fastmath.jl index f99e44b0f50e1..33c6b9884a7ac 100644 --- a/base/fastmath.jl +++ b/base/fastmath.jl @@ -23,7 +23,7 @@ module FastMath export @fastmath -import Core.Intrinsics: powi_llvm, sqrt_llvm_fast, neg_float_fast, +import Core.Intrinsics: sqrt_llvm_fast, neg_float_fast, add_float_fast, sub_float_fast, mul_float_fast, div_float_fast, rem_float_fast, eq_float_fast, ne_float_fast, lt_float_fast, le_float_fast @@ -243,8 +243,8 @@ end # builtins -pow_fast(x::FloatTypes, y::Integer) = pow_fast(x, Int32(y)) -pow_fast(x::FloatTypes, y::Int32) = Base.powi_llvm(x, y) +pow_fast(x::Float32, y::Integer) = ccall("llvm.powi.f32", llvmcall, Float32, (Float32, Int32), x, y) +pow_fast(x::Float64, y::Integer) = ccall("llvm.powi.f64", llvmcall, Float64, (Float64, Int32), x, y) # TODO: Change sqrt_llvm intrinsic to avoid nan checking; add nan # checking to sqrt in math.jl; remove sqrt_llvm_fast intrinsic diff --git a/base/inference.jl b/base/inference.jl index eb52cbde5f204..83a4a5d7f2998 100644 --- a/base/inference.jl +++ b/base/inference.jl @@ -468,7 +468,6 @@ add_tfunc(floor_llvm, 1, 1, math_tfunc) add_tfunc(trunc_llvm, 1, 1, math_tfunc) add_tfunc(rint_llvm, 1, 1, math_tfunc) add_tfunc(sqrt_llvm, 1, 1, math_tfunc) -add_tfunc(powi_llvm, 2, 2, math_tfunc) add_tfunc(sqrt_llvm_fast, 1, 1, math_tfunc) ## same-type comparisons ## cmp_tfunc(x::ANY, y::ANY) = Bool diff --git a/base/math.jl b/base/math.jl index 9fae027458b00..c75740809c400 100644 --- a/base/math.jl +++ b/base/math.jl @@ -24,7 +24,7 @@ using Base: sign_mask, exponent_mask, exponent_one, exponent_bias, exponent_half, exponent_max, exponent_raw_max, fpinttype, significand_mask, significand_bits, exponent_bits -using Core.Intrinsics: sqrt_llvm, powi_llvm +using Core.Intrinsics: sqrt_llvm const IEEEFloat = Union{Float16,Float32,Float64} # non-type specific math functions @@ -286,6 +286,8 @@ exp10(x::Float32) = 10.0f0^x exp10(x::Integer) = exp10(float(x)) # utility for converting NaN return to DomainError +# the branch in nan_dom_err prevents its callers from inlining, so be sure to force it +# until the heuristics can be improved @inline nan_dom_err(f, x) = isnan(f) & !isnan(x) ? throw(DomainError()) : f # functions that return NaN on non-NaN argument for domain error @@ -403,9 +405,9 @@ log1p(x) for f in (:sin, :cos, :tan, :asin, :acos, :acosh, :atanh, :log, :log2, :log10, :lgamma, :log1p) @eval begin - ($f)(x::Float64) = nan_dom_err(ccall(($(string(f)),libm), Float64, (Float64,), x), x) - ($f)(x::Float32) = nan_dom_err(ccall(($(string(f,"f")),libm), Float32, (Float32,), x), x) - ($f)(x::Real) = ($f)(float(x)) + @inline ($f)(x::Float64) = nan_dom_err(ccall(($(string(f)), libm), Float64, (Float64,), x), x) + @inline ($f)(x::Float32) = nan_dom_err(ccall(($(string(f, "f")), libm), Float32, (Float32,), x), x) + @inline ($f)(x::Real) = ($f)(float(x)) end end @@ -683,14 +685,11 @@ function modf(x::Float64) f, _modf_temp[] end -^(x::Float64, y::Float64) = nan_dom_err(ccall((:pow,libm), Float64, (Float64,Float64), x, y), x+y) -^(x::Float32, y::Float32) = nan_dom_err(ccall((:powf,libm), Float32, (Float32,Float32), x, y), x+y) - -^(x::Float64, y::Integer) = x^Int32(y) -^(x::Float64, y::Int32) = powi_llvm(x, y) -^(x::Float32, y::Integer) = x^Int32(y) -^(x::Float32, y::Int32) = powi_llvm(x, y) -^(x::Float16, y::Integer) = Float16(Float32(x)^y) +@inline ^(x::Float64, y::Float64) = nan_dom_err(ccall("llvm.pow.f64", llvmcall, Float64, (Float64, Float64), x, y), x + y) +@inline ^(x::Float32, y::Float32) = nan_dom_err(ccall("llvm.pow.f32", llvmcall, Float32, (Float32, Float32), x, y), x + y) +@inline ^(x::Float64, y::Integer) = x ^ Float64(y) +@inline ^(x::Float32, y::Integer) = x ^ Float32(y) +@inline ^(x::Float16, y::Integer) = Float16(Float32(x) ^ Float32(y)) ^{p}(x::Float16, ::Type{Val{p}}) = Float16(Float32(x)^Val{p}) function angle_restrict_symm(theta) diff --git a/src/codegen.cpp b/src/codegen.cpp index 26c67da603b17..4d4ec294e5f0e 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -397,10 +397,6 @@ static Function *jldlsym_func; static Function *jlnewbits_func; static Function *jltypeassert_func; static Function *jldepwarnpi_func; -#if JL_LLVM_VERSION < 30600 -static Function *jlpow_func; -static Function *jlpowf_func; -#endif //static Function *jlgetnthfield_func; static Function *jlgetnthfieldchecked_func; //static Function *jlsetnthfield_func; @@ -6857,25 +6853,6 @@ static void init_julia_llvm_env(Module *m) "jl_gc_diff_total_bytes", m); add_named_global(diff_gc_total_bytes_func, *jl_gc_diff_total_bytes); -#if JL_LLVM_VERSION < 30600 - Type *powf_type[2] = { T_float32, T_float32 }; - jlpowf_func = Function::Create(FunctionType::get(T_float32, powf_type, false), - Function::ExternalLinkage, - "powf", m); - add_named_global(jlpowf_func, &powf, false); - - Type *pow_type[2] = { T_float64, T_float64 }; - jlpow_func = Function::Create(FunctionType::get(T_float64, pow_type, false), - Function::ExternalLinkage, - "pow", m); - add_named_global(jlpow_func, -#ifdef _COMPILER_MICROSOFT_ - static_cast(&pow), -#else - &pow, -#endif - false); -#endif std::vector array_owner_args(0); array_owner_args.push_back(T_pjlvalue); jlarray_data_owner_func = diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index 78e4eca282bcb..060422a10a222 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -71,7 +71,6 @@ static void jl_init_intrinsic_functions_codegen(Module *m) float_func[rint_llvm] = true; float_func[sqrt_llvm] = true; float_func[sqrt_llvm_fast] = true; - float_func[powi_llvm] = true; } extern "C" @@ -851,33 +850,6 @@ static jl_cgval_t emit_intrinsic(intrinsic f, jl_value_t **args, size_t nargs, return mark_julia_type(ans, false, x.typ, ctx); } - case powi_llvm: { - const jl_cgval_t &x = argv[0]; - const jl_cgval_t &y = argv[1]; - if (!jl_is_primitivetype(x.typ) || !jl_is_primitivetype(y.typ) || jl_datatype_size(y.typ) != 4) - return emit_runtime_call(f, argv, nargs, ctx); - Type *xt = FLOATT(bitstype_to_llvm(x.typ)); - Type *yt = T_int32; - if (!xt) - return emit_runtime_call(f, argv, nargs, ctx); - - Value *xv = emit_unbox(xt, x, x.typ); - Value *yv = emit_unbox(yt, y, y.typ); -#if JL_LLVM_VERSION >= 30600 - Value *powi = Intrinsic::getDeclaration(jl_Module, Intrinsic::powi, makeArrayRef(xt)); -#if JL_LLVM_VERSION >= 30700 - Value *ans = builder.CreateCall(powi, {xv, yv}); -#else - Value *ans = builder.CreateCall2(powi, xv, yv); -#endif -#else - // issue #6506 - Value *ans = builder.CreateCall2(prepare_call(xt == T_float64 ? jlpow_func : jlpowf_func), - xv, builder.CreateSIToFP(yv, xt)); -#endif - return mark_julia_type(ans, false, x.typ, ctx); - } - default: { assert(nargs >= 1 && "invalid nargs for intrinsic call"); const jl_cgval_t &xinfo = argv[0]; diff --git a/src/intrinsics.h b/src/intrinsics.h index 058ccc68faaa8..15ea45dc2d73d 100644 --- a/src/intrinsics.h +++ b/src/intrinsics.h @@ -91,7 +91,6 @@ ADD_I(trunc_llvm, 1) \ ADD_I(rint_llvm, 1) \ ADD_I(sqrt_llvm, 1) \ - ADD_I(powi_llvm, 2) \ ALIAS(sqrt_llvm_fast, sqrt_llvm) \ /* pointer access */ \ ADD_I(pointerref, 3) \ diff --git a/src/julia_internal.h b/src/julia_internal.h index fc0ede8097841..135d6c5655021 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -680,7 +680,6 @@ JL_DLLEXPORT jl_value_t *jl_floor_llvm(jl_value_t *a); JL_DLLEXPORT jl_value_t *jl_trunc_llvm(jl_value_t *a); JL_DLLEXPORT jl_value_t *jl_rint_llvm(jl_value_t *a); JL_DLLEXPORT jl_value_t *jl_sqrt_llvm(jl_value_t *a); -JL_DLLEXPORT jl_value_t *jl_powi_llvm(jl_value_t *a, jl_value_t *b); JL_DLLEXPORT jl_value_t *jl_abs_float(jl_value_t *a); JL_DLLEXPORT jl_value_t *jl_copysign_float(jl_value_t *a, jl_value_t *b); JL_DLLEXPORT jl_value_t *jl_flipsign_int(jl_value_t *a, jl_value_t *b); diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index d107a3a898294..f391588c12d6b 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -925,31 +925,6 @@ un_fintrinsic(trunc_float,trunc_llvm) un_fintrinsic(rint_float,rint_llvm) un_fintrinsic(sqrt_float,sqrt_llvm) -JL_DLLEXPORT jl_value_t *jl_powi_llvm(jl_value_t *a, jl_value_t *b) -{ - jl_ptls_t ptls = jl_get_ptls_states(); - jl_value_t *ty = jl_typeof(a); - if (!jl_is_primitivetype(ty)) - jl_error("powi_llvm: a is not a primitive type"); - if (!jl_is_primitivetype(jl_typeof(b)) || jl_datatype_size(jl_typeof(b)) != 4) - jl_error("powi_llvm: b is not a 32-bit primitive type"); - int sz = jl_datatype_size(ty); - jl_value_t *newv = jl_gc_alloc(ptls, sz, ty); - void *pa = jl_data_ptr(a), *pr = jl_data_ptr(newv); - switch (sz) { - /* choose the right size c-type operation */ - case 4: - *(float*)pr = powf(*(float*)pa, (float)jl_unbox_int32(b)); - break; - case 8: - *(double*)pr = pow(*(double*)pa, (double)jl_unbox_int32(b)); - break; - default: - jl_error("powi_llvm: runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64"); - } - return newv; -} - JL_DLLEXPORT jl_value_t *jl_select_value(jl_value_t *isfalse, jl_value_t *a, jl_value_t *b) { JL_TYPECHK(isfalse, bool, isfalse); diff --git a/test/math.jl b/test/math.jl index 9c43e610a9bc9..ca5b2912cfc94 100644 --- a/test/math.jl +++ b/test/math.jl @@ -590,6 +590,18 @@ end end end +@testset "issue #19872" begin + f19872a(x) = x ^ 5 + f19872b(x) = x ^ (-1024) + @test 0 < f19872b(2.0) < 1e-300 + @test issubnormal(2.0 ^ (-1024)) + @test issubnormal(f19872b(2.0)) + @test !issubnormal(f19872b(0.0)) + @test f19872a(2.0) === 32.0 + @test !issubnormal(f19872a(2.0)) + @test !issubnormal(0.0) +end + # no domain error is thrown for negative values @test invoke(cbrt, Tuple{AbstractFloat}, -1.0) == -1.0 From cc7990b1936440e1b4bbe33895aa4dcdd6e2a754 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Thu, 23 Feb 2017 13:10:52 -0500 Subject: [PATCH 3/4] make several pure-marked math functions actually pure --- base/float.jl | 8 -------- base/math.jl | 21 +++++++++++++++------ base/special/exp.jl | 8 ++++---- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/base/float.jl b/base/float.jl index f3b7abdfa5c4c..612e9360c08a7 100644 --- a/base/float.jl +++ b/base/float.jl @@ -812,14 +812,6 @@ fpinttype(::Type{Float64}) = UInt64 fpinttype(::Type{Float32}) = UInt32 fpinttype(::Type{Float16}) = UInt16 -@pure significand_bits{T<:AbstractFloat}(::Type{T}) = trailing_ones(significand_mask(T)) -@pure exponent_bits{T<:AbstractFloat}(::Type{T}) = sizeof(T)*8 - significand_bits(T) - 1 -@pure exponent_bias{T<:AbstractFloat}(::Type{T}) = Int(exponent_one(T) >> significand_bits(T)) -# maximum float exponent -@pure exponent_max{T<:AbstractFloat}(::Type{T}) = Int(exponent_mask(T) >> significand_bits(T)) - exponent_bias(T) -# maximum float exponent without bias -@pure exponent_raw_max{T<:AbstractFloat}(::Type{T}) = Int(exponent_mask(T) >> significand_bits(T)) - ## TwicePrecision utilities # The numeric constants are half the number of bits in the mantissa for (F, T, n) in ((Float16, UInt16, 5), (Float32, UInt32, 12), (Float64, UInt64, 26)) diff --git a/base/math.jl b/base/math.jl index c75740809c400..d4db49a661030 100644 --- a/base/math.jl +++ b/base/math.jl @@ -20,13 +20,23 @@ import Base: log, exp, sin, cos, tan, sinh, cosh, tanh, asin, max, min, minmax, ^, exp2, muladd, rem, exp10, expm1, log1p -using Base: sign_mask, exponent_mask, exponent_one, exponent_bias, - exponent_half, exponent_max, exponent_raw_max, fpinttype, - significand_mask, significand_bits, exponent_bits +using Base: sign_mask, exponent_mask, exponent_one, + exponent_half, fpinttype, significand_mask using Core.Intrinsics: sqrt_llvm -const IEEEFloat = Union{Float16,Float32,Float64} +const IEEEFloat = Union{Float16, Float32, Float64} + +for T in (Float16, Float32, Float64) + @eval significand_bits(::Type{$T}) = $(trailing_ones(significand_mask(T))) + @eval exponent_bits(::Type{$T}) = $(sizeof(T)*8 - significand_bits(T) - 1) + @eval exponent_bias(::Type{$T}) = $(Int(exponent_one(T) >> significand_bits(T))) + # maximum float exponent + @eval exponent_max(::Type{$T}) = $(Int(exponent_mask(T) >> significand_bits(T)) - exponent_bias(T)) + # maximum float exponent without bias + @eval exponent_raw_max(::Type{$T}) = $(Int(exponent_mask(T) >> significand_bits(T))) +end + # non-type specific math functions """ @@ -229,8 +239,6 @@ for f in (:cbrt, :sinh, :cosh, :tanh, :atan, :asinh, :exp2, :expm1) ($f)(x::Real) = ($f)(float(x)) end end -# pure julia exp function -include("special/exp.jl") exp(x::Real) = exp(float(x)) # fallback definitions to prevent infinite loop from $f(x::Real) def above @@ -950,6 +958,7 @@ end cbrt(a::Float16) = Float16(cbrt(Float32(a))) # More special functions +include("special/exp.jl") include("special/trig.jl") include("special/gamma.jl") diff --git a/base/special/exp.jl b/base/special/exp.jl index 9d7012a98034f..7f21519c79619 100644 --- a/base/special/exp.jl +++ b/base/special/exp.jl @@ -56,8 +56,8 @@ MINEXP(::Type{Float32}) = -103.97207708f0 # log 2^-150 @inline exp_kernel(x::Float32) = @horner(x, 1.6666625440f-1, -2.7667332906f-3) # for values smaller than this threshold just use a Taylor expansion -exp_small_thres(::Type{Float64}) = 2.0^-28 -exp_small_thres(::Type{Float32}) = 2.0f0^-13 +@eval exp_small_thres(::Type{Float64}) = $(2.0^-28) +@eval exp_small_thres(::Type{Float32}) = $(2.0f0^-13) """ exp(x) @@ -114,13 +114,13 @@ function exp{T<:Union{Float32,Float64}}(x::T) # scale back if k > -significand_bits(T) # multiply by 2.0 first to prevent overflow, which helps extends the range - k == exponent_max(T) && return y*T(2.0)*T(2.0)^(exponent_max(T) - 1) + k == exponent_max(T) && return y * T(2.0) * T(2.0)^(exponent_max(T) - 1) twopk = reinterpret(T, rem(exponent_bias(T) + k, fpinttype(T)) << significand_bits(T)) return y*twopk else # add significand_bits(T) + 1 to lift the range outside the subnormals twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, fpinttype(T)) << significand_bits(T)) - return y*twopk*T(2.0)^(-significand_bits(T) - 1) + return y * twopk * T(2.0)^(-significand_bits(T) - 1) end elseif xa < reinterpret(Unsigned, exp_small_thres(T)) # |x| < exp_small_thres # Taylor approximation for small x From 0c5eac2bc5603b82c9ac114bdc8999831cba2f6f Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Thu, 23 Feb 2017 14:10:23 -0500 Subject: [PATCH 4/4] make sure that the indirection through the `Val{p}` type doesn't stop inlining of `^` --- base/intfuncs.jl | 17 ++++++++++------- base/math.jl | 2 +- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/base/intfuncs.jl b/base/intfuncs.jl index d299a8da27944..69c0df7d11c17 100644 --- a/base/intfuncs.jl +++ b/base/intfuncs.jl @@ -199,16 +199,19 @@ end # to enable compile-time optimizations specialized to p. # However, we still need a fallback that calls the general ^. # To avoid ambiguities for methods that dispatch on the -# first argument, we dispatch the fallback via internal_pow: -^(x, p) = internal_pow(x, p) -internal_pow{p}(x, ::Type{Val{p}}) = x^p +# first argument, we dispatch the fallback via internal_pow. +# We mark these @inline since if the target is marked @inline, +# we want to make sure that gets propagated, +# even if it is over the inlining threshold. +@inline ^(x, p) = internal_pow(x, p) +@inline internal_pow{p}(x, ::Type{Val{p}}) = x^p # inference.jl has complicated logic to inline x^2 and x^3 for # numeric types. In terms of Val we can do it much more simply: -internal_pow(x::Number, ::Type{Val{0}}) = one(x) -internal_pow(x::Number, ::Type{Val{1}}) = x -internal_pow(x::Number, ::Type{Val{2}}) = x*x -internal_pow(x::Number, ::Type{Val{3}}) = x*x*x +@inline internal_pow(x::Number, ::Type{Val{0}}) = one(x) +@inline internal_pow(x::Number, ::Type{Val{1}}) = x +@inline internal_pow(x::Number, ::Type{Val{2}}) = x*x +@inline internal_pow(x::Number, ::Type{Val{3}}) = x*x*x # b^p mod m diff --git a/base/math.jl b/base/math.jl index d4db49a661030..944514da5e736 100644 --- a/base/math.jl +++ b/base/math.jl @@ -698,7 +698,7 @@ end @inline ^(x::Float64, y::Integer) = x ^ Float64(y) @inline ^(x::Float32, y::Integer) = x ^ Float32(y) @inline ^(x::Float16, y::Integer) = Float16(Float32(x) ^ Float32(y)) -^{p}(x::Float16, ::Type{Val{p}}) = Float16(Float32(x)^Val{p}) +@inline ^{p}(x::Float16, ::Type{Val{p}}) = Float16(Float32(x) ^ Val{p}) function angle_restrict_symm(theta) const P1 = 4 * 7.8539812564849853515625e-01