From 75393f6618782c87d4b321bb587b375c0d52326a Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 1 Oct 2024 10:42:30 -0400 Subject: [PATCH] make faster BigFloats (#55906) We can coalesce the two required allocations for the MFPR BigFloat API design into one allocation, hopefully giving a easy performance boost. It would have been slightly easier and more efficient if MPFR BigFloat was already a VLA instead of containing a pointer here, but that does not prevent the optimization. --- base/Base.jl | 1 - base/mpfr.jl | 161 ++++++++++++++++-------- base/{rawbigints.jl => rawbigfloats.jl} | 68 ++++------ stdlib/Random/src/generation.jl | 2 +- test/dict.jl | 2 +- test/mpfr.jl | 6 +- 6 files changed, 138 insertions(+), 102 deletions(-) rename base/{rawbigints.jl => rawbigfloats.jl} (58%) diff --git a/base/Base.jl b/base/Base.jl index 10a8dd1532f92..23633f0b5138b 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -306,7 +306,6 @@ end include("hashing.jl") include("rounding.jl") include("div.jl") -include("rawbigints.jl") include("float.jl") include("twiceprecision.jl") include("complex.jl") diff --git a/base/mpfr.jl b/base/mpfr.jl index d393469aa26a1..9d1a0843ebe06 100644 --- a/base/mpfr.jl +++ b/base/mpfr.jl @@ -18,12 +18,10 @@ import setrounding, maxintfloat, widen, significand, frexp, tryparse, iszero, isone, big, _string_n, decompose, minmax, _precision_with_base_2, sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand, - uinttype, exponent_max, exponent_min, ieee754_representation, significand_mask, - RawBigIntRoundingIncrementHelper, truncated, RawBigInt - + uinttype, exponent_max, exponent_min, ieee754_representation, significand_mask using .Base.Libc -import ..Rounding: +import ..Rounding: Rounding, rounding_raw, setrounding_raw, rounds_to_nearest, rounds_away_from_zero, tie_breaker_is_to_even, correct_rounding_requires_increment @@ -39,7 +37,6 @@ else const libmpfr = "libmpfr.so.6" end - version() = VersionNumber(unsafe_string(ccall((:mpfr_get_version,libmpfr), Ptr{Cchar}, ()))) patches() = split(unsafe_string(ccall((:mpfr_get_patches,libmpfr), Ptr{Cchar}, ())),' ') @@ -120,44 +117,116 @@ const mpfr_special_exponent_zero = typemin(Clong) + true const mpfr_special_exponent_nan = mpfr_special_exponent_zero + true const mpfr_special_exponent_inf = mpfr_special_exponent_nan + true +struct BigFloatLayout + prec::Clong + sign::Cint + exp::Clong + d::Ptr{Limb} + # possible padding + p::Limb # Tuple{Vararg{Limb}} +end +const offset_prec = fieldoffset(BigFloatLayout, 1) % Int +const offset_sign = fieldoffset(BigFloatLayout, 2) % Int +const offset_exp = fieldoffset(BigFloatLayout, 3) % Int +const offset_d = fieldoffset(BigFloatLayout, 4) % Int +const offset_p_limbs = ((fieldoffset(BigFloatLayout, 5) % Int + sizeof(Limb) - 1) ÷ sizeof(Limb)) +const offset_p = offset_p_limbs * sizeof(Limb) + """ BigFloat <: AbstractFloat Arbitrary precision floating point number type. """ -mutable struct BigFloat <: AbstractFloat - prec::Clong - sign::Cint - exp::Clong - d::Ptr{Limb} - # _d::Buffer{Limb} # Julia gc handle for memory @ d - _d::String # Julia gc handle for memory @ d (optimized) +struct BigFloat <: AbstractFloat + d::Memory{Limb} # Not recommended for general use: # used internally by, e.g. deepcopy - global function _BigFloat(prec::Clong, sign::Cint, exp::Clong, d::String) - # ccall-based version, inlined below - #z = new(zero(Clong), zero(Cint), zero(Clong), C_NULL, d) - #ccall((:mpfr_custom_init,libmpfr), Cvoid, (Ptr{Limb}, Clong), d, prec) # currently seems to be a no-op in mpfr - #NAN_KIND = Cint(0) - #ccall((:mpfr_custom_init_set,libmpfr), Cvoid, (Ref{BigFloat}, Cint, Clong, Ptr{Limb}), z, NAN_KIND, prec, d) - #return z - return new(prec, sign, exp, pointer(d), d) - end + global _BigFloat(d::Memory{Limb}) = new(d) function BigFloat(; precision::Integer=_precision_with_base_2(BigFloat)) precision < 1 && throw(DomainError(precision, "`precision` cannot be less than 1.")) nb = ccall((:mpfr_custom_get_size,libmpfr), Csize_t, (Clong,), precision) - nb = (nb + Core.sizeof(Limb) - 1) ÷ Core.sizeof(Limb) # align to number of Limb allocations required for this - #d = Vector{Limb}(undef, nb) - d = _string_n(nb * Core.sizeof(Limb)) - EXP_NAN = mpfr_special_exponent_nan - return _BigFloat(Clong(precision), one(Cint), EXP_NAN, d) # +NAN + nl = (nb + offset_p + sizeof(Limb) - 1) ÷ Core.sizeof(Limb) # align to number of Limb allocations required for this + d = Memory{Limb}(undef, nl % Int) + # ccall-based version, inlined below + z = _BigFloat(d) # initialize to +NAN + #ccall((:mpfr_custom_init,libmpfr), Cvoid, (Ptr{Limb}, Clong), BigFloatData(d), prec) # currently seems to be a no-op in mpfr + #NAN_KIND = Cint(0) + #ccall((:mpfr_custom_init_set,libmpfr), Cvoid, (Ref{BigFloat}, Cint, Clong, Ptr{Limb}), z, NAN_KIND, prec, BigFloatData(d)) + z.prec = Clong(precision) + z.sign = one(Cint) + z.exp = mpfr_special_exponent_nan + return z end end -# The rounding mode here shouldn't matter. -significand_limb_count(x::BigFloat) = div(sizeof(x._d), sizeof(Limb), RoundToZero) +""" +Segment of raw words of bits interpreted as a big integer. Less +significant words come first. Each word is in machine-native bit-order. +""" +struct BigFloatData{Limb} + d::Memory{Limb} +end + +# BigFloat interface +@inline function Base.getproperty(x::BigFloat, s::Symbol) + d = getfield(x, :d) + p = Base.unsafe_convert(Ptr{Limb}, d) + if s === :prec + return GC.@preserve d unsafe_load(Ptr{Clong}(p) + offset_prec) + elseif s === :sign + return GC.@preserve d unsafe_load(Ptr{Cint}(p) + offset_sign) + elseif s === :exp + return GC.@preserve d unsafe_load(Ptr{Clong}(p) + offset_exp) + elseif s === :d + return BigFloatData(d) + else + return throw(FieldError(typeof(x), s)) + end +end + +@inline function Base.setproperty!(x::BigFloat, s::Symbol, v) + d = getfield(x, :d) + p = Base.unsafe_convert(Ptr{Limb}, d) + if s === :prec + return GC.@preserve d unsafe_store!(Ptr{Clong}(p) + offset_prec, v) + elseif s === :sign + return GC.@preserve d unsafe_store!(Ptr{Cint}(p) + offset_sign, v) + elseif s === :exp + return GC.@preserve d unsafe_store!(Ptr{Clong}(p) + offset_exp, v) + #elseif s === :d # not mutable + else + return throw(FieldError(x, s)) + end +end + +# Ref interface: make sure the conversion to C is done properly +Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ptr{BigFloat}) = error("not compatible with mpfr") +Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ref{BigFloat}) = error("not compatible with mpfr") +Base.cconvert(::Type{Ref{BigFloat}}, x::BigFloat) = x.d # BigFloatData is the Ref type for BigFloat +function Base.unsafe_convert(::Type{Ref{BigFloat}}, x::BigFloatData) + d = getfield(x, :d) + p = Base.unsafe_convert(Ptr{Limb}, d) + GC.@preserve d unsafe_store!(Ptr{Ptr{Limb}}(p) + offset_d, p + offset_p, :monotonic) # :monotonic ensure that TSAN knows that this isn't a data race + return Ptr{BigFloat}(p) +end +Base.unsafe_convert(::Type{Ptr{Limb}}, fd::BigFloatData) = Base.unsafe_convert(Ptr{Limb}, getfield(fd, :d)) + offset_p +function Base.setindex!(fd::BigFloatData, v, i) + d = getfield(fd, :d) + @boundscheck 1 <= i <= length(d) - offset_p_limbs || throw(BoundsError(fd, i)) + @inbounds d[i + offset_p_limbs] = v + return fd +end +function Base.getindex(fd::BigFloatData, i) + d = getfield(fd, :d) + @boundscheck 1 <= i <= length(d) - offset_p_limbs || throw(BoundsError(fd, i)) + @inbounds d[i + offset_p_limbs] +end +Base.length(fd::BigFloatData) = length(getfield(fd, :d)) - offset_p_limbs +Base.copyto!(fd::BigFloatData, limbs) = copyto!(getfield(fd, :d), offset_p_limbs + 1, limbs) # for Random + +include("rawbigfloats.jl") rounding_raw(::Type{BigFloat}) = something(Base.ScopedValues.get(CURRENT_ROUNDING_MODE), ROUNDING_MODE[]) setrounding_raw(::Type{BigFloat}, r::MPFRRoundingMode) = ROUNDING_MODE[]=r @@ -165,24 +234,12 @@ function setrounding_raw(f::Function, ::Type{BigFloat}, r::MPFRRoundingMode) Base.ScopedValues.@with(CURRENT_ROUNDING_MODE => r, f()) end - rounding(::Type{BigFloat}) = convert(RoundingMode, rounding_raw(BigFloat)) setrounding(::Type{BigFloat}, r::RoundingMode) = setrounding_raw(BigFloat, convert(MPFRRoundingMode, r)) setrounding(f::Function, ::Type{BigFloat}, r::RoundingMode) = setrounding_raw(f, BigFloat, convert(MPFRRoundingMode, r)) -# overload the definition of unsafe_convert to ensure that `x.d` is assigned -# it may have been dropped in the event that the BigFloat was serialized -Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ptr{BigFloat}) = x -@inline function Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ref{BigFloat}) - x = x[] - if x.d == C_NULL - x.d = pointer(x._d) - end - return convert(Ptr{BigFloat}, Base.pointer_from_objref(x)) -end - """ BigFloat(x::Union{Real, AbstractString} [, rounding::RoundingMode=rounding(BigFloat)]; [precision::Integer=precision(BigFloat)]) @@ -283,17 +340,18 @@ function BigFloat(x::Float64, r::MPFRRoundingMode=rounding_raw(BigFloat); precis nlimbs = (precision + 8*Core.sizeof(Limb) - 1) ÷ (8*Core.sizeof(Limb)) # Limb is a CLong which is a UInt32 on windows (thank M$) which makes this more complicated and slower. + zd = z.d if Limb === UInt64 for i in 1:nlimbs-1 - unsafe_store!(z.d, 0x0, i) + @inbounds setindex!(zd, 0x0, i) end - unsafe_store!(z.d, val, nlimbs) + @inbounds setindex!(zd, val, nlimbs) else for i in 1:nlimbs-2 - unsafe_store!(z.d, 0x0, i) + @inbounds setindex!(zd, 0x0, i) end - unsafe_store!(z.d, val % UInt32, nlimbs-1) - unsafe_store!(z.d, (val >> 32) % UInt32, nlimbs) + @inbounds setindex!(zd, val % UInt32, nlimbs-1) + @inbounds setindex!(zd, (val >> 32) % UInt32, nlimbs) end z end @@ -440,12 +498,12 @@ function to_ieee754(::Type{T}, x::BigFloat, rm) where {T<:AbstractFloat} ret_u = if is_regular & !rounds_to_inf & !rounds_to_zero if !exp_is_huge_p # significand - v = RawBigInt{Limb}(x._d, significand_limb_count(x)) + v = x.d::BigFloatData len = max(ieee_precision + min(exp_diff, 0), 0)::Int signif = truncated(U, v, len) & significand_mask(T) # round up if necessary - rh = RawBigIntRoundingIncrementHelper(v, len) + rh = BigFloatDataRoundingIncrementHelper(v, len) incr = correct_rounding_requires_increment(rh, rm, sb) # exponent @@ -1193,10 +1251,8 @@ set_emin!(x) = check_exponent_err(ccall((:mpfr_set_emin, libmpfr), Cint, (Clong, function Base.deepcopy_internal(x::BigFloat, stackdict::IdDict) get!(stackdict, x) do - # d = copy(x._d) - d = x._d - d′ = GC.@preserve d unsafe_string(pointer(d), sizeof(d)) # creates a definitely-new String - y = _BigFloat(x.prec, x.sign, x.exp, d′) + d′ = copy(getfield(x, :d)) + y = _BigFloat(d′) #ccall((:mpfr_custom_move,libmpfr), Cvoid, (Ref{BigFloat}, Ptr{Limb}), y, d) # unnecessary return y end::BigFloat @@ -1210,7 +1266,8 @@ function decompose(x::BigFloat)::Tuple{BigInt, Int, Int} s.size = cld(x.prec, 8*sizeof(Limb)) # limbs b = s.size * sizeof(Limb) # bytes ccall((:__gmpz_realloc2, libgmp), Cvoid, (Ref{BigInt}, Culong), s, 8b) # bits - memcpy(s.d, x.d, b) + xd = x.d + GC.@preserve xd memcpy(s.d, Base.unsafe_convert(Ptr{Limb}, xd), b) s, x.exp - 8b, x.sign end diff --git a/base/rawbigints.jl b/base/rawbigfloats.jl similarity index 58% rename from base/rawbigints.jl rename to base/rawbigfloats.jl index a9bb18e163e2d..4377edfc463d8 100644 --- a/base/rawbigints.jl +++ b/base/rawbigfloats.jl @@ -1,41 +1,21 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -""" -Segment of raw words of bits interpreted as a big integer. Less -significant words come first. Each word is in machine-native bit-order. -""" -struct RawBigInt{T<:Unsigned} - d::String - word_count::Int - - function RawBigInt{T}(d::String, word_count::Int) where {T<:Unsigned} - new{T}(d, word_count) - end -end +# Some operations on BigFloat can be done more directly by treating the data portion ("BigFloatData") as a BigInt -elem_count(x::RawBigInt, ::Val{:words}) = x.word_count +elem_count(x::BigFloatData, ::Val{:words}) = length(x) elem_count(x::Unsigned, ::Val{:bits}) = sizeof(x) * 8 -word_length(::RawBigInt{T}) where {T} = elem_count(zero(T), Val(:bits)) -elem_count(x::RawBigInt{T}, ::Val{:bits}) where {T} = word_length(x) * elem_count(x, Val(:words)) +word_length(::BigFloatData{T}) where {T} = elem_count(zero(T), Val(:bits)) +elem_count(x::BigFloatData{T}, ::Val{:bits}) where {T} = word_length(x) * elem_count(x, Val(:words)) reversed_index(n::Int, i::Int) = n - i - 1 reversed_index(x, i::Int, v::Val) = reversed_index(elem_count(x, v), i)::Int -split_bit_index(x::RawBigInt, i::Int) = divrem(i, word_length(x), RoundToZero) - -function get_elem_words_raw(x::RawBigInt{T}, i::Int) where {T} - @boundscheck if (i < 0) || (elem_count(x, Val(:words)) ≤ i) - throw(BoundsError(x, i)) - end - d = x.d - j = i + 1 - (GC.@preserve d unsafe_load(Ptr{T}(pointer(d)), j))::T -end +split_bit_index(x::BigFloatData, i::Int) = divrem(i, word_length(x), RoundToZero) """ `i` is the zero-based index of the wanted word in `x`, starting from the less significant words. """ -function get_elem(x::RawBigInt, i::Int, ::Val{:words}, ::Val{:ascending}) - @inbounds @inline get_elem_words_raw(x, i) +function get_elem(x::BigFloatData{T}, i::Int, ::Val{:words}, ::Val{:ascending}) where {T} + @inbounds return x[i + 1]::T end function get_elem(x, i::Int, v::Val, ::Val{:descending}) @@ -43,9 +23,9 @@ function get_elem(x, i::Int, v::Val, ::Val{:descending}) get_elem(x, j, v, Val(:ascending)) end -word_is_nonzero(x::RawBigInt, i::Int, v::Val) = !iszero(get_elem(x, i, Val(:words), v)) +word_is_nonzero(x::BigFloatData, i::Int, v::Val) = !iszero(get_elem(x, i, Val(:words), v)) -word_is_nonzero(x::RawBigInt, v::Val) = let x = x +word_is_nonzero(x::BigFloatData, v::Val) = let x = x i -> word_is_nonzero(x, i, v) end @@ -53,7 +33,7 @@ end Returns a `Bool` indicating whether the `len` least significant words of `x` are nonzero. """ -function tail_is_nonzero(x::RawBigInt, len::Int, ::Val{:words}) +function tail_is_nonzero(x::BigFloatData, len::Int, ::Val{:words}) any(word_is_nonzero(x, Val(:ascending)), 0:(len - 1)) end @@ -61,7 +41,7 @@ end Returns a `Bool` indicating whether the `len` least significant bits of the `i`-th (zero-based index) word of `x` are nonzero. """ -function tail_is_nonzero(x::RawBigInt, len::Int, i::Int, ::Val{:word}) +function tail_is_nonzero(x::BigFloatData, len::Int, i::Int, ::Val{:word}) !iszero(len) && !iszero(get_elem(x, i, Val(:words), Val(:ascending)) << (word_length(x) - len)) end @@ -70,7 +50,7 @@ end Returns a `Bool` indicating whether the `len` least significant bits of `x` are nonzero. """ -function tail_is_nonzero(x::RawBigInt, len::Int, ::Val{:bits}) +function tail_is_nonzero(x::BigFloatData, len::Int, ::Val{:bits}) if 0 < len word_count, bit_count_in_word = split_bit_index(x, len) tail_is_nonzero(x, bit_count_in_word, word_count, Val(:word)) || @@ -90,7 +70,7 @@ end """ Returns a `Bool` that is the `i`-th (zero-based index) bit of `x`. """ -function get_elem(x::RawBigInt, i::Int, ::Val{:bits}, v::Val{:ascending}) +function get_elem(x::BigFloatData, i::Int, ::Val{:bits}, v::Val{:ascending}) vb = Val(:bits) if 0 ≤ i < elem_count(x, vb) word_index, bit_index_in_word = split_bit_index(x, i) @@ -106,7 +86,7 @@ Returns an integer of type `R`, consisting of the `len` most significant bits of `x`. If there are less than `len` bits in `x`, the least significant bits are zeroed. """ -function truncated(::Type{R}, x::RawBigInt, len::Int) where {R<:Integer} +function truncated(::Type{R}, x::BigFloatData, len::Int) where {R<:Integer} ret = zero(R) if 0 < len word_count, bit_count_in_word = split_bit_index(x, len) @@ -116,7 +96,7 @@ function truncated(::Type{R}, x::RawBigInt, len::Int) where {R<:Integer} for w ∈ 0:(word_count - 1) ret <<= k - if w < lenx + if w < lenx # if the output type is larger, truncate turns into zero-extend word = get_elem(x, w, vals...) ret |= R(word) end @@ -124,7 +104,7 @@ function truncated(::Type{R}, x::RawBigInt, len::Int) where {R<:Integer} if !iszero(bit_count_in_word) ret <<= bit_count_in_word - if word_count < lenx + if word_count < lenx # if the output type is larger, truncate turns into zero-extend wrd = get_elem(x, word_count, vals...) ret |= R(wrd >>> (k - bit_count_in_word)) end @@ -133,14 +113,14 @@ function truncated(::Type{R}, x::RawBigInt, len::Int) where {R<:Integer} ret::R end -struct RawBigIntRoundingIncrementHelper{T<:Unsigned} - n::RawBigInt{T} +struct BigFloatDataRoundingIncrementHelper{T<:Unsigned} + n::BigFloatData{T} trunc_len::Int final_bit::Bool round_bit::Bool - function RawBigIntRoundingIncrementHelper{T}(n::RawBigInt{T}, len::Int) where {T<:Unsigned} + function BigFloatDataRoundingIncrementHelper{T}(n::BigFloatData{T}, len::Int) where {T<:Unsigned} vals = (Val(:bits), Val(:descending)) f = get_elem(n, len - 1, vals...) r = get_elem(n, len , vals...) @@ -148,15 +128,15 @@ struct RawBigIntRoundingIncrementHelper{T<:Unsigned} end end -function RawBigIntRoundingIncrementHelper(n::RawBigInt{T}, len::Int) where {T<:Unsigned} - RawBigIntRoundingIncrementHelper{T}(n, len) +function BigFloatDataRoundingIncrementHelper(n::BigFloatData{T}, len::Int) where {T<:Unsigned} + BigFloatDataRoundingIncrementHelper{T}(n, len) end -(h::RawBigIntRoundingIncrementHelper)(::Rounding.FinalBit) = h.final_bit +(h::BigFloatDataRoundingIncrementHelper)(::Rounding.FinalBit) = h.final_bit -(h::RawBigIntRoundingIncrementHelper)(::Rounding.RoundBit) = h.round_bit +(h::BigFloatDataRoundingIncrementHelper)(::Rounding.RoundBit) = h.round_bit -function (h::RawBigIntRoundingIncrementHelper)(::Rounding.StickyBit) +function (h::BigFloatDataRoundingIncrementHelper)(::Rounding.StickyBit) v = Val(:bits) n = h.n tail_is_nonzero(n, elem_count(n, v) - h.trunc_len - 1, v) diff --git a/stdlib/Random/src/generation.jl b/stdlib/Random/src/generation.jl index d8bb48d2764d2..b605dff9e5d80 100644 --- a/stdlib/Random/src/generation.jl +++ b/stdlib/Random/src/generation.jl @@ -66,7 +66,7 @@ function _rand!(rng::AbstractRNG, z::BigFloat, sp::SamplerBigFloat) limbs[end] |= Limb_high_bit end z.sign = 1 - GC.@preserve limbs unsafe_copyto!(z.d, pointer(limbs), sp.nlimbs) + copyto!(z.d, limbs) randbool end diff --git a/test/dict.jl b/test/dict.jl index 13c60d5a6a053..909afb3607907 100644 --- a/test/dict.jl +++ b/test/dict.jl @@ -1049,7 +1049,7 @@ Dict(1 => rand(2,3), 'c' => "asdf") # just make sure this does not trigger a dep # issue #26939 d26939 = WeakKeyDict() - (@noinline d -> d[big"1.0" + 1.1] = 1)(d26939) + (@noinline d -> d[big"1" + 1] = 1)(d26939) GC.gc() # primarily to make sure this doesn't segfault @test count(d26939) == 0 @test length(d26939.ht) == 1 diff --git a/test/mpfr.jl b/test/mpfr.jl index 63da732df1c09..c212bdfc92821 100644 --- a/test/mpfr.jl +++ b/test/mpfr.jl @@ -1089,11 +1089,11 @@ end end end -@testset "RawBigInt truncation OOB read" begin +@testset "BigFloatData truncation OOB read" begin @testset "T: $T" for T ∈ (UInt8, UInt16, UInt32, UInt64, UInt128) - v = Base.RawBigInt{T}("a"^sizeof(T), 1) + v = Base.MPFR.BigFloatData{T}(fill(typemax(T), 1 + Base.MPFR.offset_p_limbs)) @testset "bit_count: $bit_count" for bit_count ∈ (0:10:80) - @test Base.truncated(UInt128, v, bit_count) isa Any + @test Base.MPFR.truncated(UInt128, v, bit_count) isa Any end end end