Skip to content

Commit

Permalink
pythonGH-117108: Change the size of the GC increment to about 1% of t…
Browse files Browse the repository at this point in the history
…he total heap size. (pythonGH-117120)
  • Loading branch information
markshannon authored Mar 22, 2024
1 parent e2e0b4b commit e28477f
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 28 deletions.
3 changes: 2 additions & 1 deletion Include/internal/pycore_gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ struct _gc_runtime_state {
/* a list of callbacks to be invoked when collection is performed */
PyObject *callbacks;

Py_ssize_t heap_size;
Py_ssize_t work_to_do;
/* Which of the old spaces is the visited space */
int visited_space;
Expand Down Expand Up @@ -321,7 +322,7 @@ extern void _PyGC_Unfreeze(PyInterpreterState *interp);
/* Number of frozen objects */
extern Py_ssize_t _PyGC_GetFreezeCount(PyInterpreterState *interp);

extern PyObject *_PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation);
extern PyObject *_PyGC_GetObjects(PyInterpreterState *interp, int generation);
extern PyObject *_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs);

// Functions to clear types free lists
Expand Down
35 changes: 25 additions & 10 deletions Lib/test/test_gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1058,7 +1058,19 @@ class Z:
callback.assert_not_called()
gc.enable()


class IncrementalGCTests(unittest.TestCase):

def setUp(self):
# Reenable GC as it is disabled module-wide
gc.enable()

def tearDown(self):
gc.disable()

@unittest.skipIf(Py_GIL_DISABLED, "Free threading does not support incremental GC")
# Use small increments to emulate longer running process in a shorter time
@gc_threshold(200, 10)
def test_incremental_gc_handles_fast_cycle_creation(self):

class LinkedList:
Expand All @@ -1080,28 +1092,31 @@ def make_ll(depth):
head = LinkedList(head, head.prev)
return head

head = make_ll(10000)
count = 10000
head = make_ll(1000)
count = 1000

# We expect the counts to go negative eventually
# as there will some objects we aren't counting,
# e.g. the gc stats dicts. The test merely checks
# that the counts don't grow.
# There will be some objects we aren't counting,
# e.g. the gc stats dicts. This test checks
# that the counts don't grow, so we try to
# correct for the uncounted objects
# This is just an estimate.
CORRECTION = 20

enabled = gc.isenabled()
gc.enable()
olds = []
for i in range(1000):
newhead = make_ll(200)
count += 200
for i in range(20_000):
newhead = make_ll(20)
count += 20
newhead.surprise = head
olds.append(newhead)
if len(olds) == 50:
if len(olds) == 20:
stats = gc.get_stats()
young = stats[0]
incremental = stats[1]
old = stats[2]
collected = young['collected'] + incremental['collected'] + old['collected']
count += CORRECTION
live = count - collected
self.assertLess(live, 25000)
del olds[:]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
The cycle GC now chooses the size of increments based on the total heap
size, instead of the rate of object creation. This ensures that it can keep
up with growing heaps.
2 changes: 1 addition & 1 deletion Modules/gcmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ gc_get_objects_impl(PyObject *module, Py_ssize_t generation)
}

PyInterpreterState *interp = _PyInterpreterState_GET();
return _PyGC_GetObjects(interp, generation);
return _PyGC_GetObjects(interp, (int)generation);
}

/*[clinic input]
Expand Down
30 changes: 15 additions & 15 deletions Python/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ _PyGC_Init(PyInterpreterState *interp)
if (gcstate->callbacks == NULL) {
return _PyStatus_NO_MEMORY();
}
gcstate->heap_size = 0;

return _PyStatus_OK();
}
Expand Down Expand Up @@ -1232,7 +1233,7 @@ gc_collect_region(PyThreadState *tstate,
struct gc_collection_stats *stats);

static inline Py_ssize_t
gc_list_set_space(PyGC_Head *list, uintptr_t space)
gc_list_set_space(PyGC_Head *list, int space)
{
Py_ssize_t size = 0;
PyGC_Head *gc;
Expand All @@ -1258,9 +1259,9 @@ gc_list_set_space(PyGC_Head *list, uintptr_t space)
* N == 1.4 (1 + 4/threshold)
*/

/* Multiply by 4 so that the default incremental threshold of 10
* scans objects at 20% the rate of object creation */
#define SCAN_RATE_MULTIPLIER 2
/* Divide by 10, so that the default incremental threshold of 10
* scans objects at 1% of the heap size */
#define SCAN_RATE_DIVISOR 10

static void
add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats)
Expand Down Expand Up @@ -1313,7 +1314,7 @@ gc_collect_young(PyThreadState *tstate,
if (scale_factor < 1) {
scale_factor = 1;
}
gcstate->work_to_do += survivor_count + survivor_count * SCAN_RATE_MULTIPLIER / scale_factor;
gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
add_stats(gcstate, 0, stats);
}

Expand Down Expand Up @@ -1384,12 +1385,12 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat
static void
completed_cycle(GCState *gcstate)
{
#ifdef Py_DEBUG
PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head;
assert(gc_list_is_empty(not_visited));
#endif
gcstate->visited_space = flip_old_space(gcstate->visited_space);
if (gcstate->work_to_do > 0) {
gcstate->work_to_do = 0;
}
gcstate->work_to_do = 0;
}

static void
Expand All @@ -1404,13 +1405,13 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
if (scale_factor < 1) {
scale_factor = 1;
}
Py_ssize_t increment_size = 0;
gc_list_merge(&gcstate->young.head, &increment);
gcstate->young.count = 0;
if (gcstate->visited_space) {
/* objects in visited space have bit set, so we set it here */
gc_list_set_space(&increment, 1);
}
Py_ssize_t increment_size = 0;
while (increment_size < gcstate->work_to_do) {
if (gc_list_is_empty(not_visited)) {
break;
Expand All @@ -1425,14 +1426,11 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
PyGC_Head survivors;
gc_list_init(&survivors);
gc_collect_region(tstate, &increment, &survivors, UNTRACK_TUPLES, stats);
Py_ssize_t survivor_count = gc_list_size(&survivors);
gc_list_merge(&survivors, visited);
assert(gc_list_is_empty(&increment));
gcstate->work_to_do += survivor_count + survivor_count * SCAN_RATE_MULTIPLIER / scale_factor;
gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
gcstate->work_to_do -= increment_size;
if (gcstate->work_to_do < 0) {
gcstate->work_to_do = 0;
}

validate_old(gcstate);
add_stats(gcstate, 1, stats);
if (gc_list_is_empty(not_visited)) {
Expand Down Expand Up @@ -1678,7 +1676,7 @@ _PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs)
}

PyObject *
_PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation)
_PyGC_GetObjects(PyInterpreterState *interp, int generation)
{
assert(generation >= -1 && generation < NUM_GENERATIONS);
GCState *gcstate = &interp->gc;
Expand Down Expand Up @@ -1974,6 +1972,7 @@ _PyObject_GC_Link(PyObject *op)
gc->_gc_next = 0;
gc->_gc_prev = 0;
gcstate->young.count++; /* number of allocated GC objects */
gcstate->heap_size++;
if (gcstate->young.count > gcstate->young.threshold &&
gcstate->enabled &&
gcstate->young.threshold &&
Expand Down Expand Up @@ -2095,6 +2094,7 @@ PyObject_GC_Del(void *op)
if (gcstate->young.count > 0) {
gcstate->young.count--;
}
gcstate->heap_size--;
PyObject_Free(((char *)op)-presize);
}

Expand Down
2 changes: 1 addition & 1 deletion Python/gc_free_threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -1305,7 +1305,7 @@ visit_get_objects(const mi_heap_t *heap, const mi_heap_area_t *area,
}

PyObject *
_PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation)
_PyGC_GetObjects(PyInterpreterState *interp, int generation)
{
PyObject *result = PyList_New(0);
if (!result) {
Expand Down

0 comments on commit e28477f

Please sign in to comment.