Skip to content

Commit

Permalink
Add fallback if we have make a weird GC decision. (#50682)
Browse files Browse the repository at this point in the history
If something odd happens during GC (the PC goes to sleep) or a very big
transient the heuristics might make a bad decision. What this PR
implements is if we try to make our target more than double the one we
had before we fallback to a more conservative method. This fixes the new
issue @vtjnash found in #40644
for me.

(cherry picked from commit ab94fad)
  • Loading branch information
gbaraldi authored and KristofferC committed Aug 10, 2023
1 parent aed6f5a commit 8cfb350
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 28 deletions.
2 changes: 1 addition & 1 deletion Make.inc
Original file line number Diff line number Diff line change
Expand Up @@ -1503,7 +1503,7 @@ endef
WINE ?= wine

ifeq ($(BINARY),32)
HEAPLIM := --heap-size-hint=500M
HEAPLIM := --heap-size-hint=1000M
else
HEAPLIM :=
endif
Expand Down
6 changes: 3 additions & 3 deletions src/gc-debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -1223,18 +1223,18 @@ void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect
if (!gc_logging_enabled) {
return;
}
jl_safe_printf("GC: pause %.2fms. collected %fMB. %s %s\n",
jl_safe_printf("\nGC: pause %.2fms. collected %fMB. %s %s\n",
pause/1e6, freed/(double)(1<<20),
full ? "full" : "incr",
recollect ? "recollect" : ""
);

jl_safe_printf("Heap stats: bytes_mapped %.2f MB, bytes_resident %.2f MB, heap_size %.2f MB, heap_target %.2f MB, live_bytes %.2f MB\n, Fragmentation %.3f",
jl_safe_printf("Heap stats: bytes_mapped %.2f MB, bytes_resident %.2f MB,\nheap_size %.2f MB, heap_target %.2f MB, Fragmentation %.3f\n",
jl_atomic_load_relaxed(&gc_heap_stats.bytes_mapped)/(double)(1<<20),
jl_atomic_load_relaxed(&gc_heap_stats.bytes_resident)/(double)(1<<20),
// live_bytes/(double)(1<<20), live byes tracking is not accurate.
jl_atomic_load_relaxed(&gc_heap_stats.heap_size)/(double)(1<<20),
jl_atomic_load_relaxed(&gc_heap_stats.heap_target)/(double)(1<<20),
live_bytes/(double)(1<<20),
(double)live_bytes/(double)jl_atomic_load_relaxed(&gc_heap_stats.heap_size)
);
// Should fragmentation use bytes_resident instead of heap_size?
Expand Down
79 changes: 63 additions & 16 deletions src/gc.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license

#include "gc.h"
#include "julia.h"
#include "julia_gcext.h"
#include "julia_assert.h"
#ifdef __GLIBC__
Expand Down Expand Up @@ -696,8 +697,8 @@ static uint64_t old_heap_size = 0;
static uint64_t old_alloc_diff = 0;
static uint64_t old_freed_diff = 0;
static uint64_t gc_end_time = 0;


static int thrash_counter = 0;
static int thrashing = 0;
// global variables for GC stats

// Resetting the object to a young object, this is used when marking the
Expand Down Expand Up @@ -1163,7 +1164,10 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT
dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_num.bigalloc);
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc);
dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.free_acc);
jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc - free_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size));
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0);
}
}
}
Expand Down Expand Up @@ -3251,9 +3255,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
// If the live data outgrows the suggested max_total_memory
// we keep going with minimum intervals and full gcs until
// we either free some space or get an OOM error.
if (live_bytes > max_total_memory) {
sweep_full = 1;
}
if (gc_sweep_always_full) {
sweep_full = 1;
}
Expand Down Expand Up @@ -3302,7 +3303,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
gc_num.last_full_sweep = gc_end_time;
}

int thrashing = 0; // maybe we should report this to the user or error out?
size_t heap_size = jl_atomic_load_relaxed(&gc_heap_stats.heap_size);
double target_allocs = 0.0;
double min_interval = default_collect_interval;
Expand All @@ -3313,24 +3313,32 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
double collect_smooth_factor = 0.5;
double tuning_factor = 0.03;
double alloc_mem = jl_gc_smooth(old_alloc_diff, alloc_diff, alloc_smooth_factor);
double alloc_time = jl_gc_smooth(old_mut_time, mutator_time, alloc_smooth_factor);
double alloc_time = jl_gc_smooth(old_mut_time, mutator_time + sweep_time, alloc_smooth_factor); // Charge sweeping to the mutator
double gc_mem = jl_gc_smooth(old_freed_diff, freed_diff, collect_smooth_factor);
double gc_time = jl_gc_smooth(old_pause_time, pause, collect_smooth_factor);
double gc_time = jl_gc_smooth(old_pause_time, pause - sweep_time, collect_smooth_factor);
old_alloc_diff = alloc_diff;
old_mut_time = mutator_time;
old_freed_diff = freed_diff;
old_pause_time = pause;
old_heap_size = heap_size;
thrashing = gc_time > mutator_time * 98 ? 1 : 0;
old_heap_size = heap_size; // TODO: Update these values dynamically instead of just during the GC
if (gc_time > alloc_time * 95 && !(thrash_counter < 4))
thrash_counter += 1;
else if (thrash_counter > 0)
thrash_counter -= 1;
if (alloc_mem != 0 && alloc_time != 0 && gc_mem != 0 && gc_time != 0 ) {
double alloc_rate = alloc_mem/alloc_time;
double gc_rate = gc_mem/gc_time;
target_allocs = sqrt(((double)heap_size/min_interval * alloc_rate)/(gc_rate * tuning_factor)); // work on multiples of min interval
}
}
if (target_allocs == 0.0 || thrashing) // If we are thrashing go back to default
target_allocs = 2*sqrt((double)heap_size/min_interval);
if (thrashing == 0 && thrash_counter >= 3)
thrashing = 1;
else if (thrashing == 1 && thrash_counter <= 2)
thrashing = 0; // maybe we should report this to the user or error out?

int bad_result = (target_allocs*min_interval + heap_size) > 2 * jl_atomic_load_relaxed(&gc_heap_stats.heap_target); // Don't follow through on a bad decision
if (target_allocs == 0.0 || thrashing || bad_result) // If we are thrashing go back to default
target_allocs = 2*sqrt((double)heap_size/min_interval);
uint64_t target_heap = (uint64_t)target_allocs*min_interval + heap_size;
if (target_heap > max_total_memory && !thrashing) // Allow it to go over if we are thrashing if we die we die
target_heap = max_total_memory;
Expand Down Expand Up @@ -3594,10 +3602,10 @@ void jl_gc_init(void)
total_mem = uv_get_total_memory();
uint64_t constrained_mem = uv_get_constrained_memory();
if (constrained_mem > 0 && constrained_mem < total_mem)
total_mem = constrained_mem;
jl_gc_set_max_memory(constrained_mem - 250*1024*1024); // LLVM + other libraries need some amount of memory
#endif
if (jl_options.heap_size_hint)
jl_gc_set_max_memory(jl_options.heap_size_hint);
jl_gc_set_max_memory(jl_options.heap_size_hint - 250*1024*1024);

t_start = jl_hrtime();
}
Expand Down Expand Up @@ -3700,7 +3708,26 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old));
jl_atomic_store_relaxed(&ptls->gc_num.realloc,
jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1);
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, sz-old);

int64_t diff = sz - old;
if (diff < 0) {
uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc);
if (free_acc + diff < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc + (-diff));
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_acc + (-diff)));
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0);
}
}
else {
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
if (alloc_acc + diff < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + diff);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + diff);
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
}
}
}
return realloc(p, sz);
}
Expand Down Expand Up @@ -3817,7 +3844,27 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz));
jl_atomic_store_relaxed(&ptls->gc_num.realloc,
jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1);
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, allocsz-oldsz);

int64_t diff = allocsz - oldsz;
if (diff < 0) {
uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc);
if (free_acc + diff < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc + (-diff));
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_acc + (-diff)));
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0);
}
}
else {
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
if (alloc_acc + diff < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + diff);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + diff);
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
}
}

int last_errno = errno;
#ifdef _OS_WINDOWS_
DWORD last_error = GetLastError();
Expand Down
4 changes: 2 additions & 2 deletions test/cmdlineargs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -971,6 +971,6 @@ end
@test lines[3] == "foo"
@test lines[4] == "bar"
end
#heap-size-hint
@test readchomp(`$(Base.julia_cmd()) --startup-file=no --heap-size-hint=500M -e "println(@ccall jl_gc_get_max_memory()::UInt64)"`) == "524288000"
#heap-size-hint, we reserve 250 MB for non GC memory (llvm, etc.)
@test readchomp(`$(Base.julia_cmd()) --startup-file=no --heap-size-hint=500M -e "println(@ccall jl_gc_get_max_memory()::UInt64)"`) == "$((500-250)*1024*1024)"
end
7 changes: 1 addition & 6 deletions test/testenv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,7 @@ if !@isdefined(testenv_defined)
function addprocs_with_testenv(X; rr_allowed=true, kwargs...)
exename = rr_allowed ? `$rr_exename $test_exename` : test_exename
if X isa Integer
if Sys.iswindows()
heap_size=round(Int,(Sys.free_memory()/(1024^2)/(X+1)))
heap_size -= 300 # I don't know anymore
else
heap_size=round(Int,(Sys.total_memory()/(1024^2)/(X+1)))
end
heap_size=round(Int,(Sys.free_memory()/(1024^2)/(X+1)))
push!(test_exeflags.exec, "--heap-size-hint=$(heap_size)M")
end
addprocs(X; exename=exename, exeflags=test_exeflags, kwargs...)
Expand Down

0 comments on commit 8cfb350

Please sign in to comment.