From 9f915633bbda66013aa2eb133eb63afb05e1b0e2 Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Tue, 2 Jul 2024 16:11:17 -0400 Subject: [PATCH] Improve type-stability of TOML parser This changes the output of the TOML parser to provide specialize `Vector{T}` less aggressively, so that combinatorially expensive types like `Vector{Vector{Float64}}` or `Vector{Union{Float64,Int64}}` are instead returned as `Vector{Any}` Vectors of homogeneous leaf types, like `Vector{Float64}` are still supported as before. This change makes the TOML parser fully type-stable, except for its dynamic usage of Dates. Co-authored-by: Gabriel Baraldi --- base/toml_parser.jl | 76 +++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 31 deletions(-) diff --git a/base/toml_parser.jl b/base/toml_parser.jl index 10a281fd719faa..e466805a193fff 100644 --- a/base/toml_parser.jl +++ b/base/toml_parser.jl @@ -493,8 +493,10 @@ function recurse_dict!(l::Parser, d::Dict, dotted_keys::AbstractVector{String}, d = d::TOMLDict key = dotted_keys[i] d = get!(TOMLDict, d, key) - if d isa Vector + if d isa Vector{Any} d = d[end] + elseif d isa Vector + return ParserError(ErrKeyAlreadyHasValue) end check && @try check_allowed_add_key(l, d, i == length(dotted_keys)) end @@ -536,7 +538,7 @@ function parse_array_table(l)::Union{Nothing, ParserError} d = @try recurse_dict!(l, l.root, @view(table_key[1:end-1]), false) k = table_key[end] old = get!(() -> [], d, k) - if old isa Vector + if old isa Vector{Any} if old in l.static_arrays return ParserError(ErrAddArrayToStaticArray) end @@ -666,41 +668,21 @@ end # Array # ######### -function push!!(v::Vector, el) - # Since these types are typically non-inferable, they are a big invalidation risk, - # and since it's used by the package-loading infrastructure the cost of invalidation - # is high. Therefore, this is written to reduce the "exposed surface area": e.g., rather - # than writing `T[el]` we write it as `push!(Vector{T}(undef, 1), el)` so that there - # is no ambiguity about what types of objects will be created. - T = eltype(v) - t = typeof(el) - if el isa T || t === T - push!(v, el::T) - return v - elseif T === Union{} - out = Vector{t}(undef, 1) - out[1] = el - return out - else - if T isa Union - newT = Any - else - newT = Union{T, typeof(el)} - end - new = Array{newT}(undef, length(v)) - copy!(new, v) - return push!(new, el) +function copyto_typed!(a::Vector{T}, b::Vector) where T + for i in 1:length(b) + a[i] = b[i]::T end + return nothing end -function parse_array(l::Parser)::Err{Vector} +function parse_array(l::Parser{Dates})::Err{Vector} where Dates skip_ws_nl(l) - array = Vector{Union{}}() + array = Vector{Any}() empty_array = accept(l, ']') while !empty_array v = @try parse_value(l) # TODO: Worth to function barrier this? - array = push!!(array, v) + array = push!(array, v) # There can be an arbitrary number of newlines and comments before a value and before the closing bracket. skip_ws_nl(l) comma = accept(l, ',') @@ -710,8 +692,40 @@ function parse_array(l::Parser)::Err{Vector} return ParserError(ErrExpectedCommaBetweenItemsArray) end end - push!(l.static_arrays, array) - return array + # check for static type throughout array + T = !isempty(array) ? typeof(array[1]) : Union{} + for el in array + if typeof(el) != T + T = Any + break + end + end + if T === Any + new = array + elseif T === String + new = Array{T}(undef, length(array)) + copyto_typed!(new, array) + elseif T === Bool + new = Array{T}(undef, length(array)) + copyto_typed!(new, array) + elseif T === Int64 + new = Array{T}(undef, length(array)) + copyto_typed!(new, array) + elseif T === UInt64 + new = Array{T}(undef, length(array)) + copyto_typed!(new, array) + elseif T === Float64 + new = Array{T}(undef, length(array)) + copyto_typed!(new, array) + elseif T === Union{} + new = Union{}[] + elseif (T === TOMLDict) || (T == BigInt) || (T === UInt128) || (T === Int128) || (T <: Vector) || + (T === Dates.Date) || (T === Dates.Time) || (T === Dates.DateTime) + # do nothing, leave as Vector{Any} + new = array + else @assert false end + push!(l.static_arrays, new) + return new end