From 5dd7a0b1ec98ef10cb4e40d57697bcb385add549 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Wed, 5 Oct 2022 21:49:35 +0900 Subject: [PATCH] improve performance issue of `@nospecialize`-d keyword func call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit tries to fix and improve performance for calling keyword funcs whose arguments types are not fully known but `@nospecialize`-d. The final result would look like (this particular example is taken from our Julia-level compiler implementation): ```julia abstract type CallInfo end struct NoCallInfo <: CallInfo end struct NewInstruction stmt::Any type::Any info::CallInfo line::Union{Int32,Nothing} # if nothing, copy the line from previous statement in the insertion location flag::Union{UInt8,Nothing} # if nothing, IR flags will be recomputed on insertion function NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info::CallInfo), line::Union{Int32,Nothing}, flag::Union{UInt8,Nothing}) return new(stmt, type, info, line, flag) end end @nospecialize function NewInstruction(newinst::NewInstruction; stmt=newinst.stmt, type=newinst.type, info::CallInfo=newinst.info, line::Union{Int32,Nothing}=newinst.line, flag::Union{UInt8,Nothing}=newinst.flag) return NewInstruction(stmt, type, info, line, flag) end @specialize using BenchmarkTools struct VirtualKwargs stmt::Any type::Any info::CallInfo end vkws = VirtualKwargs(nothing, Any, NoCallInfo()) newinst = NewInstruction(nothing, Any, NoCallInfo(), nothing, nothing) runner(newinst, vkws) = NewInstruction(newinst; vkws.stmt, vkws.type, vkws.info) @benchmark runner($newinst, $vkws) ``` > on master ``` BenchmarkTools.Trial: 10000 samples with 186 evaluations. Range (min … max): 559.898 ns … 4.173 μs ┊ GC (min … max): 0.00% … 85.29% Time (median): 605.608 ns ┊ GC (median): 0.00% Time (mean ± σ): 638.170 ns ± 125.080 ns ┊ GC (mean ± σ): 0.06% ± 0.85% █▇▂▆▄ ▁█▇▄▂ ▂ ██████▅██████▇▇▇██████▇▇▇▆▆▅▄▅▄▂▄▄▅▇▆▆▆▆▆▅▆▆▄▄▅▅▄▃▄▄▄▅▃▅▅▆▅▆▆ █ 560 ns Histogram: log(frequency) by time 1.23 μs < Memory estimate: 32 bytes, allocs estimate: 2. ``` > on this commit ```julia BenchmarkTools.Trial: 10000 samples with 1000 evaluations. Range (min … max): 3.080 ns … 83.177 ns ┊ GC (min … max): 0.00% … 0.00% Time (median): 3.098 ns ┊ GC (median): 0.00% Time (mean ± σ): 3.118 ns ± 0.885 ns ┊ GC (mean ± σ): 0.00% ± 0.00% ▂▅▇█▆▅▄▂ ▂▄▆▆▇████████▆▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▂▂▂▁▂▂▂▂▂▂▁▁▂▁▂▂▂▂▂▂▂▂▂ ▃ 3.08 ns Histogram: frequency by time 3.19 ns < Memory estimate: 0 bytes, allocs estimate: 0. ``` So for this particular case it achieves roughly 200x speed up. This is because this commit allows inlining of a call to keyword sorter as well as removal of `NamedTuple` call. Especially this commit is composed of the following improvements: - Add early return case for `structdiff`: This change improves the return type inference for a case when compared `NamedTuple`s are type unstable but there is no difference in their names, e.g. given two `NamedTuple{(:a,:b),T} where T<:Tuple{Any,Any}`s. And in such case the optimizer will remove `structdiff` and succeeding `pairs` calls, letting the keyword sorter to be inlined. - Tweak the core `NamedTuple{names}(args::Tuple)` constructor so that it directly forms `:splatnew` allocation rather than redirects to the general `NamedTuple` constructor, that could be confused for abstract input tuple type. - Improve `nfields_tfunc` accuracy as for abstract `NamedTuple` types. This improvement lets `inline_splatnew` to handle more abstract `NamedTuple`s, especially whose names are fully known but its fields tuple type is abstract. Those improvements are combined to allow our SROA pass to optimize away `NamedTuple` and `tuple` calls generated for keyword argument handling. E.g. the IR for the example `NewInstruction` constructor is now fairly optimized, like: ```julia julia> Base.code_ircode((NewInstruction,Any,Any,CallInfo)) do newinst, stmt, type, info NewInstruction(newinst; stmt, type, info) end |> only 2 1 ── %1 = Base.getfield(_2, :line)::Union{Nothing, Int32} │╻╷ Type##kw │ %2 = Base.getfield(_2, :flag)::Union{Nothing, UInt8} ││┃ getproperty │ %3 = (isa)(%1, Nothing)::Bool ││ │ %4 = (isa)(%2, Nothing)::Bool ││ │ %5 = (Core.Intrinsics.and_int)(%3, %4)::Bool ││ └─── goto #3 if not %5 ││ 2 ── %7 = %new(Main.NewInstruction, _3, _4, _5, nothing, nothing)::NewInstruction NewInstruction └─── goto #10 ││ 3 ── %9 = (isa)(%1, Int32)::Bool ││ │ %10 = (isa)(%2, Nothing)::Bool ││ │ %11 = (Core.Intrinsics.and_int)(%9, %10)::Bool ││ └─── goto #5 if not %11 ││ 4 ── %13 = π (%1, Int32) ││ │ %14 = %new(Main.NewInstruction, _3, _4, _5, %13, nothing)::NewInstruction│││╻ NewInstruction └─── goto #10 ││ 5 ── %16 = (isa)(%1, Nothing)::Bool ││ │ %17 = (isa)(%2, UInt8)::Bool ││ │ %18 = (Core.Intrinsics.and_int)(%16, %17)::Bool ││ └─── goto #7 if not %18 ││ 6 ── %20 = π (%2, UInt8) ││ │ %21 = %new(Main.NewInstruction, _3, _4, _5, nothing, %20)::NewInstruction│││╻ NewInstruction └─── goto #10 ││ 7 ── %23 = (isa)(%1, Int32)::Bool ││ │ %24 = (isa)(%2, UInt8)::Bool ││ │ %25 = (Core.Intrinsics.and_int)(%23, %24)::Bool ││ └─── goto #9 if not %25 ││ 8 ── %27 = π (%1, Int32) ││ │ %28 = π (%2, UInt8) ││ │ %29 = %new(Main.NewInstruction, _3, _4, _5, %27, %28)::NewInstruction │││╻ NewInstruction └─── goto #10 ││ 9 ── Core.throw(ErrorException("fatal error in type inference (type bound)"))::Union{} └─── unreachable ││ 10 ┄ %33 = φ (#2 => %7, #4 => %14, #6 => %21, #8 => %29)::NewInstruction ││ └─── goto #11 ││ 11 ─ return %33 │ => NewInstruction ``` --- base/boot.jl | 3 +- base/compiler/abstractinterpretation.jl | 6 ++-- base/compiler/ssair/passes.jl | 12 ++++++- base/compiler/tfuncs.jl | 17 ++++++++-- base/namedtuple.jl | 11 +++++-- test/compiler/inference.jl | 12 +++++++ test/compiler/inline.jl | 43 +++++++++++++++++++++++++ test/compiler/irutils.jl | 1 + 8 files changed, 95 insertions(+), 10 deletions(-) diff --git a/base/boot.jl b/base/boot.jl index 5f3b99df1c716d..38d011a0a5d05f 100644 --- a/base/boot.jl +++ b/base/boot.jl @@ -615,7 +615,8 @@ end NamedTuple() = NamedTuple{(),Tuple{}}(()) -NamedTuple{names}(args::Tuple) where {names} = NamedTuple{names,typeof(args)}(args) +eval(Core, :(NamedTuple{names}(args::Tuple) where {names} = + $(Expr(:splatnew, :(NamedTuple{names,typeof(args)}), :args)))) using .Intrinsics: sle_int, add_int diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index a5b7deacf121c3..d2298eeada0db5 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -2109,16 +2109,16 @@ function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtyp elseif ehead === :splatnew t, isexact = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv)) nothrow = false # TODO: More precision - if length(e.args) == 2 && isconcretetype(t) && !ismutabletype(t) + if length(e.args) == 2 && isconcretedispatch(t) && !ismutabletype(t) at = abstract_eval_value(interp, e.args[2], vtypes, sv) n = fieldcount(t) if isa(at, Const) && isa(at.val, Tuple) && n == length(at.val::Tuple) && let t = t, at = at; _all(i->getfield(at.val::Tuple, i) isa fieldtype(t, i), 1:n); end - nothrow = isexact && isconcretedispatch(t) + nothrow = isexact t = Const(ccall(:jl_new_structt, Any, (Any, Any), t, at.val)) elseif isa(at, PartialStruct) && at ⊑ᵢ Tuple && n == length(at.fields::Vector{Any}) && let t = t, at = at; _all(i->(at.fields::Vector{Any})[i] ⊑ᵢ fieldtype(t, i), 1:n); end - nothrow = isexact && isconcretedispatch(t) + nothrow = isexact t = PartialStruct(t, at.fields::Vector{Any}) end end diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 8ad72285fcaeb3..3d7b54eb361491 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -401,6 +401,16 @@ function lift_leaves(compact::IncrementalCompact, end lift_arg!(compact, leaf, cache_key, def, 1+field, lifted_leaves) continue + # NOTE we can enable this, but most `:splatnew` expressions are transformed into + # `:new` expressions by the inlinear + # elseif isexpr(def, :splatnew) && length(def.args) == 2 && isa(def.args[2], AnySSAValue) + # tplssa = def.args[2]::AnySSAValue + # tplexpr = compact[tplssa][:inst] + # if is_known_call(tplexpr, tuple, compact) && 1 ≤ field < length(tplexpr.args) + # lift_arg!(compact, tplssa, cache_key, tplexpr, 1+field, lifted_leaves) + # continue + # end + # return nothing elseif is_getfield_captures(def, compact) # Walk to new_opaque_closure ocleaf = def.args[2] @@ -469,7 +479,7 @@ function lift_arg!( end end lifted_leaves[cache_key] = LiftedValue(lifted) - nothing + return nothing end function walk_to_def(compact::IncrementalCompact, @nospecialize(leaf)) diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index 9c206270976b8d..37bc4d40657911 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -406,8 +406,15 @@ function nfields_tfunc(@nospecialize(x)) x = unwrap_unionall(widenconst(x)) isconstType(x) && return Const(nfields(x.parameters[1])) if isa(x, DataType) && !isabstracttype(x) - if !(x.name === Tuple.name && isvatuple(x)) && - !(x.name === _NAMEDTUPLE_NAME && !isconcretetype(x)) + if x.name === Tuple.name + isvatuple(x) && return Int + return Const(length(x.types)) + elseif x.name === _NAMEDTUPLE_NAME + length(x.parameters) == 2 || return Int + names = x.parameters[1] + isa(names, Tuple{Vararg{Symbol}}) || return nfields_tfunc(x.parameters[2]) + return Const(length(names)) + else return Const(isdefined(x, :types) ? length(x.types) : length(x.name.names)) end end @@ -1594,6 +1601,12 @@ function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...) end if istuple return Type{<:appl} + elseif isa(appl, DataType) && appl.name === _NAMEDTUPLE_NAME && length(appl.parameters) == 2 && + (appl.parameters[1] === () || appl.parameters[2] === Tuple{}) + # if the first/second parameter of `NamedTuple` is known to be empty, + # the second/first argument should also be empty tuple type, + # so refine it here + return Const(NamedTuple{(),Tuple{}}) end ans = Type{appl} for i = length(outervars):-1:1 diff --git a/base/namedtuple.jl b/base/namedtuple.jl index 3e9f1272d588e4..b53304de7d8cc4 100644 --- a/base/namedtuple.jl +++ b/base/namedtuple.jl @@ -335,7 +335,7 @@ reverse(nt::NamedTuple) = NamedTuple{reverse(keys(nt))}(reverse(values(nt))) end """ - structdiff(a::NamedTuple{an}, b::Union{NamedTuple{bn},Type{NamedTuple{bn}}}) where {an,bn} + structdiff(a::NamedTuple, b::Union{NamedTuple,Type{NamedTuple}}) Construct a copy of named tuple `a`, except with fields that exist in `b` removed. `b` can be a named tuple, or a type of the form `NamedTuple{field_names}`. @@ -343,14 +343,19 @@ Construct a copy of named tuple `a`, except with fields that exist in `b` remove function structdiff(a::NamedTuple{an}, b::Union{NamedTuple{bn}, Type{NamedTuple{bn}}}) where {an, bn} if @generated names = diff_names(an, bn) + isempty(names) && return (;) # just a fast pass idx = Int[ fieldindex(a, names[n]) for n in 1:length(names) ] types = Tuple{Any[ fieldtype(a, idx[n]) for n in 1:length(idx) ]...} vals = Any[ :(getfield(a, $(idx[n]))) for n in 1:length(idx) ] - :( NamedTuple{$names,$types}(($(vals...),)) ) + return :( NamedTuple{$names,$types}(($(vals...),)) ) else names = diff_names(an, bn) + # N.B this early return is necessary to get a better type stability, + # and also allows us to cut off the cost from constructing + # potentially type unstable closure passed to the `map` below + isempty(names) && return (;) types = Tuple{Any[ fieldtype(typeof(a), names[n]) for n in 1:length(names) ]...} - NamedTuple{names,types}(map(Fix1(getfield, a), names)) + return NamedTuple{names,types}(map(n::Symbol->getfield(a, n), names)) end end diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl index dcd57916589cf4..e174fc1470eb80 100644 --- a/test/compiler/inference.jl +++ b/test/compiler/inference.jl @@ -1526,6 +1526,11 @@ end @test nfields_tfunc(Tuple{Int, Vararg{Int}}) === Int @test nfields_tfunc(Tuple{Int, Integer}) === Const(2) @test nfields_tfunc(Union{Tuple{Int, Float64}, Tuple{Int, Int}}) === Const(2) +@test nfields_tfunc(@NamedTuple{a::Int,b::Integer}) === Const(2) +@test nfields_tfunc(NamedTuple{(:a,:b),T} where T<:Tuple{Int,Integer}) === Const(2) +@test nfields_tfunc(NamedTuple{(:a,:b)}) === Const(2) +@test nfields_tfunc(NamedTuple{names,Tuple{Any,Any}} where names) === Const(2) +@test nfields_tfunc(Union{NamedTuple{(:a,:b)},NamedTuple{(:c,:d)}}) === Const(2) using Core.Compiler: typeof_tfunc @test typeof_tfunc(Tuple{Vararg{Int}}) == Type{Tuple{Vararg{Int,N}}} where N @@ -2336,6 +2341,13 @@ end # Equivalence of Const(T.instance) and T for singleton types @test Const(nothing) ⊑ Nothing && Nothing ⊑ Const(nothing) +# `apply_type_tfunc` should always return accurate result for empty NamedTuple case +import Core: Const +import Core.Compiler: apply_type_tfunc +@test apply_type_tfunc(Const(NamedTuple), Const(()), Type{T} where T<:Tuple{}) === Const(typeof((;))) +@test apply_type_tfunc(Const(NamedTuple), Const(()), Type{T} where T<:Tuple) === Const(typeof((;))) +@test apply_type_tfunc(Const(NamedTuple), Tuple{Vararg{Symbol}}, Type{Tuple{}}) === Const(typeof((;))) + # Don't pessimize apply_type to anything worse than Type and yield Bottom for invalid Unions @test Core.Compiler.return_type(Core.apply_type, Tuple{Type{Union}}) == Type{Union{}} @test Core.Compiler.return_type(Core.apply_type, Tuple{Type{Union},Any}) == Type diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index 1f594f74a24f43..f582277ec06f78 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -1770,3 +1770,46 @@ f_ifelse_3(a, b) = Core.ifelse(a, true, b) @test fully_eliminated(f_ifelse_1, Tuple{Any, Any}; retval=Core.Argument(2)) @test fully_eliminated(f_ifelse_2, Tuple{Any, Any}; retval=Core.Argument(3)) @test !fully_eliminated(f_ifelse_3, Tuple{Any, Any}) + +# inline_splatnew for abstract `NamedTuple` +@eval construct_splatnew(T, fields) = $(Expr(:splatnew, :T, :fields)) +for tt = Any[(Int,Int), (Integer,Integer), (Any,Any)] + let src = code_typed1(tt) do a, b + construct_splatnew(NamedTuple{(:a,:b),typeof((a,b))}, (a,b)) + end + @test count(issplatnew, src.code) == 0 + @test count(isnew, src.code) == 1 + end +end + +# optimize away `NamedTuple`s used for handling `@nospecialize`d keyword-argument +# https://github.com/JuliaLang/julia/pull/47059 +abstract type CallInfo end +struct NewInstruction + stmt::Any + type::Any + info::CallInfo + line::Int32 + flag::UInt8 + function NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info::CallInfo), + line::Int32, flag::UInt8) + return new(stmt, type, info, line, flag) + end +end +@nospecialize +function NewInstruction(newinst::NewInstruction; + stmt=newinst.stmt, + type=newinst.type, + info::CallInfo=newinst.info, + line::Int32=newinst.line, + flag::UInt8=newinst.flag) + return NewInstruction(stmt, type, info, line, flag) +end +@specialize +let src = code_typed1((NewInstruction,Any,Any,CallInfo)) do newinst, stmt, type, info + NewInstruction(newinst; stmt, type, info) + end + @test count(issplatnew, src.code) == 0 + @test count(iscall((src,NamedTuple)), src.code) == 0 + @test count(isnew, src.code) == 1 +end diff --git a/test/compiler/irutils.jl b/test/compiler/irutils.jl index 76f883d6cea2c3..ef8fe3efbb3150 100644 --- a/test/compiler/irutils.jl +++ b/test/compiler/irutils.jl @@ -8,6 +8,7 @@ get_code(args...; kwargs...) = code_typed1(args...; kwargs...).code # check if `x` is a statement with a given `head` isnew(@nospecialize x) = isexpr(x, :new) +issplatnew(@nospecialize x) = isexpr(x, :splatnew) isreturn(@nospecialize x) = isa(x, ReturnNode) # check if `x` is a dynamic call of a given function