Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Report any recompilation in time macro #45015

Merged
merged 10 commits into from
Apr 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ Library changes
tasks mutating the dictionary or set ([#44534]).
* Predicate function negation `!f` now returns a composed function `(!) ∘ f` instead of an anonymous function ([#44752]).
* `RoundFromZero` now works for non-`BigFloat` types ([#41246]).
* `@time` now separates out % time spent recompiling invalidated methods ([#45015]).


Standard library changes
Expand Down
50 changes: 36 additions & 14 deletions base/timing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,21 @@ function gc_alloc_count(diff::GC_Diff)
diff.malloc + diff.realloc + diff.poolalloc + diff.bigalloc
end

# cumulative total time spent on compilation, in nanoseconds
cumulative_compile_time_ns_before() = ccall(:jl_cumulative_compile_time_ns_before, UInt64, ())
cumulative_compile_time_ns_after() = ccall(:jl_cumulative_compile_time_ns_after, UInt64, ())
# cumulative total time spent on compilation and recompilation, in nanoseconds
function cumulative_compile_time_ns()
comp = ccall(:jl_cumulative_compile_time_ns, UInt64, ())
recomp = ccall(:jl_cumulative_recompile_time_ns, UInt64, ())
return comp, recomp
end

function cumulative_compile_timing(b::Bool)
if b
ccall(:jl_cumulative_compile_timing_enable, Cvoid, ())
else
ccall(:jl_cumulative_compile_timing_disable, Cvoid, ())
end
return
end

# total time spend in garbage collection, in nanoseconds
gc_time_ns() = ccall(:jl_gc_total_hrtime, UInt64, ())
Expand Down Expand Up @@ -114,7 +126,7 @@ function format_bytes(bytes) # also used by InteractiveUtils
end
end

function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, newline=false, _lpad=true)
function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, recompile_time=0, newline=false, _lpad=true)
timestr = Ryu.writefixed(Float64(elapsedtime/1e9), 6)
str = sprint() do io
_lpad && print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "")
Expand Down Expand Up @@ -142,15 +154,20 @@ function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, ne
end
print(io, Ryu.writefixed(Float64(100*compile_time/elapsedtime), 2), "% compilation time")
end
if recompile_time > 0
print(io, ": ", Ryu.writefixed(Float64(100*recompile_time/compile_time), 0), "% of which was recompilation")
end
parens && print(io, ")")
end
newline ? println(str) : print(str)
nothing
end

function timev_print(elapsedtime, diff::GC_Diff, compile_time, _lpad)
function timev_print(elapsedtime, diff::GC_Diff, compile_times, _lpad)
allocs = gc_alloc_count(diff)
time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, true, _lpad)
compile_time = first(compile_times)
recompile_time = last(compile_times)
time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, recompile_time, true, _lpad)
padded_nonzero_print(elapsedtime, "elapsed time (ns)")
padded_nonzero_print(diff.total_time, "gc time (ns)")
padded_nonzero_print(diff.allocd, "bytes allocated")
Expand Down Expand Up @@ -181,8 +198,8 @@ end

A macro to execute an expression, printing the time it took to execute, the number of
allocations, and the total number of bytes its execution caused to be allocated, before
returning the value of the expression. Any time spent garbage collecting (gc) or
compiling is shown as a percentage.
returning the value of the expression. Any time spent garbage collecting (gc), compiling
new code, or recompiling invalidated code is shown as a percentage.

Optionally provide a description string to print before the time report.

Expand All @@ -201,6 +218,9 @@ See also [`@showtime`](@ref), [`@timev`](@ref), [`@timed`](@ref), [`@elapsed`](@
!!! compat "Julia 1.8"
The option to add a description was introduced in Julia 1.8.

!!! compat "Julia 1.9"
Recompilation time being shown separately from compilation time was introduced in Julia 1.9

```julia-repl
julia> x = rand(10,10);

Expand Down Expand Up @@ -238,16 +258,18 @@ macro time(msg, ex)
Experimental.@force_compile
local stats = gc_num()
local elapsedtime = time_ns()
local compile_elapsedtime = cumulative_compile_time_ns_before()
cumulative_compile_timing(true)
local compile_elapsedtimes = cumulative_compile_time_ns()
local val = @__tryfinally($(esc(ex)),
(elapsedtime = time_ns() - elapsedtime;
compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime)
cumulative_compile_timing(false);
compile_elapsedtimes = cumulative_compile_time_ns() .- compile_elapsedtimes)
)
local diff = GC_Diff(gc_num(), stats)
local _msg = $(esc(msg))
local has_msg = !isnothing(_msg)
has_msg && print(_msg, ": ")
time_print(elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), compile_elapsedtime, true, !has_msg)
time_print(elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), first(compile_elapsedtimes), last(compile_elapsedtimes), true, !has_msg)
val
end
end
Expand Down Expand Up @@ -320,16 +342,16 @@ macro timev(msg, ex)
Experimental.@force_compile
local stats = gc_num()
local elapsedtime = time_ns()
local compile_elapsedtime = cumulative_compile_time_ns_before()
local compile_elapsedtimes = cumulative_compile_time_ns()
Comment on lines -323 to +345
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@IanButterworth: doesn't this need to call cumulative_compile_timing(true) here as well, to enable compilation timing?

Otherwise, I think this is currently wrong: i think the transformation here isn't enabling compilation time tracking, so you'll probably always see 0 compilation time, unless compilation time tracking is already enabled?

I guess we must not have any tests for this, specifically? :(

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes. My bad. Fix here. Haven't run the tests yet, might need work #46100

local val = @__tryfinally($(esc(ex)),
(elapsedtime = time_ns() - elapsedtime;
compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime)
compile_elapsedtimes = cumulative_compile_time_ns() .- compile_elapsedtimes)
)
local diff = GC_Diff(gc_num(), stats)
local _msg = $(esc(msg))
local has_msg = !isnothing(_msg)
has_msg && print(_msg, ": ")
timev_print(elapsedtime, diff, compile_elapsedtime, !has_msg)
timev_print(elapsedtime, diff, compile_elapsedtimes, !has_msg)
val
end
end
Expand Down
13 changes: 11 additions & 2 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
auto &context = *ctx;
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
bool is_recompile = false;
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();
// if we don't have any decls already, try to generate it now
Expand All @@ -330,6 +331,10 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
else if (jl_is_method(mi->def.method))
src = jl_uncompress_ir(mi->def.method, codeinst, (jl_array_t*)src);
}
else {
// identify whether this is an invalidated method that is being recompiled
is_recompile = jl_atomic_load_relaxed(&mi->cache) != NULL;
}
if (src == NULL && jl_is_method(mi->def.method) &&
jl_symbol_name(mi->def.method->name)[0] != '@') {
if (mi->def.method->source != jl_nothing) {
Expand All @@ -356,8 +361,12 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
else {
codeinst = NULL;
}
if (jl_codegen_lock.count == 1 && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
if (jl_codegen_lock.count == 1 && measure_compile_time_enabled) {
uint64_t t_comp = jl_hrtime() - compiler_start_time;
if (is_recompile)
jl_atomic_fetch_add_relaxed(&jl_cumulative_recompile_time, t_comp);
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, t_comp);
}
JL_UNLOCK(&jl_codegen_lock);
JL_GC_POP();
return codeinst;
Expand Down
14 changes: 11 additions & 3 deletions src/jlapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -471,20 +471,28 @@ JL_DLLEXPORT void (jl_cpu_wake)(void)
jl_cpu_wake();
}

JL_DLLEXPORT uint64_t jl_cumulative_compile_time_ns_before(void)
JL_DLLEXPORT void jl_cumulative_compile_timing_enable(void)
{
// Increment the flag to allow reentrant callers to `@time`.
jl_atomic_fetch_add(&jl_measure_compile_time_enabled, 1);
return jl_atomic_load_relaxed(&jl_cumulative_compile_time);
}

JL_DLLEXPORT uint64_t jl_cumulative_compile_time_ns_after(void)
JL_DLLEXPORT void jl_cumulative_compile_timing_disable(void)
{
// Decrement the flag when done measuring, allowing other callers to continue measuring.
jl_atomic_fetch_add(&jl_measure_compile_time_enabled, -1);
}

JL_DLLEXPORT uint64_t jl_cumulative_compile_time_ns(void)
{
return jl_atomic_load_relaxed(&jl_cumulative_compile_time);
}

JL_DLLEXPORT uint64_t jl_cumulative_recompile_time_ns(void)
{
return jl_atomic_load_relaxed(&jl_cumulative_recompile_time);
}

JL_DLLEXPORT void jl_get_fenv_consts(int *ret)
{
ret[0] = FE_INEXACT;
Expand Down
1 change: 1 addition & 0 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ static inline uint64_t cycleclock(void) JL_NOTSAFEPOINT
// Global *atomic* integers controlling *process-wide* measurement of compilation time.
extern JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled;
extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time;
extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time;

#define jl_return_address() ((uintptr_t)__builtin_return_address(0))

Expand Down
1 change: 1 addition & 0 deletions src/threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k)
jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED;
JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled = 0;
JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time = 0;
JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time = 0;

// return calling thread's ID
// Also update the suspended_threads list in signals-mach when changing the
Expand Down
27 changes: 13 additions & 14 deletions test/misc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -334,24 +334,23 @@ function timev_macro_scope()
end
@test timev_macro_scope() == 1

before = Base.cumulative_compile_time_ns_before();
before_comp, before_recomp = Base.cumulative_compile_time_ns() # no need to turn timing on, @time will do that

# exercise concurrent calls to `@time` for reentrant compilation time measurement.
t1 = @async @time begin
sleep(2)
@eval module M ; f(x,y) = x+y ; end
@eval M.f(2,3)
end
t2 = @async begin
sleep(1)
@time 2 + 2
@sync begin
t1 = @async @time begin
sleep(2)
@eval module M ; f(x,y) = x+y ; end
@eval M.f(2,3)
end
t2 = @async begin
sleep(1)
@time 2 + 2
end
end

after = Base.cumulative_compile_time_ns_after();
@test after >= before;

# wait for completion of these tasks before restoring stdout, to suppress their @time prints.
wait(t1); wait(t2)
after_comp, after_recomp = Base.cumulative_compile_time_ns() # no need to turn timing off, @time will do that
@test after_comp >= before_comp;

end # redirect_stdout

Expand Down