Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

inline ^ for literal powers of numbers #20637

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions base/fastmath.jl
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ const rewrite_op =
function make_fastmath(expr::Expr)
if expr.head === :quote
return expr
elseif expr.head == :call && expr.args[1] == :^ && expr.args[3] isa Integer
# literal integer powers can be inlined with @fastmath
return Expr(:call, :pow_fast, expr.args[2], :(Val{$(expr.args[3])}))
end
op = get(rewrite_op, expr.head, :nothing)
if op !== :nothing
Expand Down Expand Up @@ -245,6 +248,8 @@ end

pow_fast(x::Float32, y::Integer) = ccall("llvm.powi.f32", llvmcall, Float32, (Float32, Int32), x, y)
pow_fast(x::Float64, y::Integer) = ccall("llvm.powi.f64", llvmcall, Float64, (Float64, Int32), x, y)
@inline @generated pow_fast{p}(x::Base.HWNumber, ::Type{Val{p}}) = Base.inlined_pow(:x, p)
pow_fast{p}(x::FloatTypes, ::Type{Val{p}}) = pow_fast(x, p) # inlines already via llvm.powi

# TODO: Change sqrt_llvm intrinsic to avoid nan checking; add nan
# checking to sqrt in math.jl; remove sqrt_llvm_fast intrinsic
Expand Down
3 changes: 0 additions & 3 deletions base/gmp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -443,9 +443,6 @@ end
^(x::Integer, y::BigInt ) = bigint_pow(BigInt(x), y)
^(x::Bool , y::BigInt ) = Base.power_by_squaring(x, y)

# override default inlining of x^2 and x^3 etc.
^{p}(x::BigInt, ::Type{Val{p}}) = x^p
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we still call GMP in this case?


function powermod(x::BigInt, p::BigInt, m::BigInt)
r = BigInt()
ccall((:__gmpz_powm, :libgmp), Void,
Expand Down
107 changes: 107 additions & 0 deletions base/intfuncs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,113 @@ const HWNumber = Union{HWReal, Complex{<:HWReal}, Rational{<:HWReal}}
@inline internal_pow(x::HWNumber, ::Type{Val{2}}) = x*x
@inline internal_pow(x::HWNumber, ::Type{Val{3}}) = x*x*x

# This table represents the optimal "power tree"
# based on Knuth's "TAOCP vol 2: Seminumerical Algorithms",
# third edition, section 4.6.3, Fig. 15. It was
# transcribed into this array form in the gcc source
# code (tree-ssa-math-opts.c), but since this is just
# a table of mathematical facts it should not be copyrightable.
#
# To compute x^q in a minimal number of multiplications
# for 1 ≤ q ≤ 255, you compute x^r * x^s for
# r = power_tree[i] and s = i-q, recursively
# (memoizing each power computation), and we implicitly
# define power_tree[0] = 0.
const power_tree = [
1, 1, 2, 2, 3, 3, 4, # 1 - 7
4, 6, 5, 6, 6, 10, 7, 9, # 8 - 15
8, 16, 9, 16, 10, 12, 11, 13, # 16 - 23
12, 17, 13, 18, 14, 24, 15, 26, # 24 - 31
16, 17, 17, 19, 18, 33, 19, 26, # 32 - 39
20, 25, 21, 40, 22, 27, 23, 44, # 40 - 47
24, 32, 25, 34, 26, 29, 27, 44, # 48 - 55
28, 31, 29, 34, 30, 60, 31, 36, # 56 - 63
32, 64, 33, 34, 34, 46, 35, 37, # 64 - 71
36, 65, 37, 50, 38, 48, 39, 69, # 72 - 79
40, 49, 41, 43, 42, 51, 43, 58, # 80 - 87
44, 64, 45, 47, 46, 59, 47, 76, # 88 - 95
48, 65, 49, 66, 50, 67, 51, 66, # 96 - 103
52, 70, 53, 74, 54, 104, 55, 74, # 104 - 111
56, 64, 57, 69, 58, 78, 59, 68, # 112 - 119
60, 61, 61, 80, 62, 75, 63, 68, # 120 - 127
64, 65, 65, 128, 66, 129, 67, 90, # 128 - 135
68, 73, 69, 131, 70, 94, 71, 88, # 136 - 143
72, 128, 73, 98, 74, 132, 75, 121, # 144 - 151
76, 102, 77, 124, 78, 132, 79, 106, # 152 - 159
80, 97, 81, 160, 82, 99, 83, 134, # 160 - 167
84, 86, 85, 95, 86, 160, 87, 100, # 168 - 175
88, 113, 89, 98, 90, 107, 91, 122, # 176 - 183
92, 111, 93, 102, 94, 126, 95, 150, # 184 - 191
96, 128, 97, 130, 98, 133, 99, 195, # 192 - 199
100, 128, 101, 123, 102, 164, 103, 138, # 200 - 207
104, 145, 105, 146, 106, 109, 107, 149, # 208 - 215
108, 200, 109, 146, 110, 170, 111, 157, # 216 - 223
112, 128, 113, 130, 114, 182, 115, 132, # 224 - 231
116, 200, 117, 132, 118, 158, 119, 206, # 232 - 239
120, 240, 121, 162, 122, 147, 123, 152, # 240 - 247
124, 166, 125, 214, 126, 138, 127, 153, # 248 - 255
]

# return the inlined/unrolled expression to compute x^p
# by repeated multiplications (using an optimal addition
# chain for |p|<256 and power-by-squaring thereafter.
function inlined_pow(x::Symbol, p::Int)
p == 0 && return :(one($x))
ex = Union{Expr,Symbol}[] # expressions to compute intermediate powers
if p < 0
x′ = gensym()
push!(ex, :($x′ = inv($x)))
Copy link
Member

@StefanKarpinski StefanKarpinski Feb 23, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this likely to be more accurate if the inversion happens at the end?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe, for integers, but in that case it's also likelier to overflow. For floating-point values, I don't see why it would be more accurate to put the inv at the end.

x = x′
p = -p
end
pows_computed = Dict(1 => x) # powers q => variable storing x^q
pows_to_compute = [p]
while !isempty(pows_to_compute)
q = pop!(pows_to_compute)
if !haskey(pows_computed, q)
if q ≤ length(power_tree) # use optimal addition chain
q1 = power_tree[q]
q2 = q-q1
assert(q1 > 0 && q2 > 0)
has1 = haskey(pows_computed, q1)
has2 = haskey(pows_computed, q2)
if has1 && has2
xq = gensym()
push!(ex, :($xq = $(pows_computed[q1]) * $(pows_computed[q2])))
pows_computed[q] = xq
else
push!(pows_to_compute, q) # try again after computing q1, q2
has1 || push!(pows_to_compute, q1)
has2 || push!(pows_to_compute, q2)
end
else # q too big, use power-by-squaring algorithm
q1 = q >> 1
if haskey(pows_computed, q1)
xq = gensym()
xq1 = pows_computed[q1]
push!(ex, iseven(q) ? :($xq = $xq1 * $xq1) : :($xq = $xq1 * $xq1 * $x))
pows_computed[q] = xq
else
push!(pows_to_compute, q) # try again after computing q1
push!(pows_to_compute, q1)
end
end
end
end
push!(ex, pows_computed[p]) # the variable for the final result x^p
return Expr(:block, ex...)
end
inlined_pow(x::Symbol, p::Integer) = inlined_pow(x, Int(p))

# for cases where we use power_by_squaring, inline
# the unrolled expression for literal p
# (this also allows integer^-p to give a floating-point result
# in a type-stable fashion).
@inline @generated internal_pow{p}(x::HWNumber, ::Type{Val{p}}) = inlined_pow(:x, p)

# Float32/Float64 may not inline without @fastmath, for accuracy
internal_pow{p}(x::Union{Float32,Float64}, ::Type{Val{p}}) = x^p

# b^p mod m

"""
Expand Down