From 8bef34f625e21886b1c64544c060e19ee2e229bf Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 26 Mar 2024 11:11:42 +0000 Subject: [PATCH] GH-117108: Set the "old space bit" to "visited" for all young objects (#117213) Change old space bit of young objects from 0 to gcstate->visited_space. This ensures that any object created *and* collected during cycle GC has the bit set correctly. --- Include/internal/pycore_gc.h | 14 ++++- Include/internal/pycore_object.h | 4 +- Lib/test/test_gc.py | 24 +-------- ...-03-25-12-51-12.gh-issue-117108.tNqDEo.rst | 3 ++ Python/gc.c | 54 +++++++++++++------ 5 files changed, 56 insertions(+), 43 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-03-25-12-51-12.gh-issue-117108.tNqDEo.rst diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index e729616936f03b..c4482c4ffcfa60 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -113,7 +113,19 @@ static inline void _PyObject_GC_SET_SHARED_INLINE(PyObject *op) { /* Bit 1 is set when the object is in generation which is GCed currently. */ #define _PyGC_PREV_MASK_COLLECTING 2 -/* Bit 0 is set if the object belongs to old space 1 */ +/* Bit 0 in _gc_next is the old space bit. + * It is set as follows: + * Young: gcstate->visited_space + * old[0]: 0 + * old[1]: 1 + * permanent: 0 + * + * During a collection all objects handled should have the bit set to + * gcstate->visited_space, as objects are moved from the young gen + * and the increment into old[gcstate->visited_space]. + * When object are moved from the pending space, old[gcstate->visited_space^1] + * into the increment, the old space bit is flipped. +*/ #define _PyGC_NEXT_MASK_OLD_SPACE_1 1 #define _PyGC_PREV_SHIFT 2 diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 13fe543133f11e..0b17ddf0c973ef 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -318,8 +318,8 @@ static inline void _PyObject_GC_TRACK( PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev); _PyGCHead_SET_NEXT(last, gc); _PyGCHead_SET_PREV(gc, last); - _PyGCHead_SET_NEXT(gc, generation0); - assert((gc->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1) == 0); + /* Young objects will be moved into the visited space during GC, so set the bit here */ + gc->_gc_next = ((uintptr_t)generation0) | interp->gc.visited_space; generation0->_gc_prev = (uintptr_t)gc; #endif } diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 57acbac5859e7f..3bf5c9ed41ee44 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -823,32 +823,10 @@ def test_get_objects_generations(self): self.assertTrue( any(l is element for element in gc.get_objects(generation=0)) ) - self.assertFalse( - any(l is element for element in gc.get_objects(generation=1)) - ) - self.assertFalse( - any(l is element for element in gc.get_objects(generation=2)) - ) - gc.collect(generation=0) - self.assertFalse( - any(l is element for element in gc.get_objects(generation=0)) - ) - self.assertTrue( - any(l is element for element in gc.get_objects(generation=1)) - ) - self.assertFalse( - any(l is element for element in gc.get_objects(generation=2)) - ) - gc.collect(generation=2) + gc.collect() self.assertFalse( any(l is element for element in gc.get_objects(generation=0)) ) - self.assertFalse( - any(l is element for element in gc.get_objects(generation=1)) - ) - self.assertTrue( - any(l is element for element in gc.get_objects(generation=2)) - ) del l gc.collect() diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-03-25-12-51-12.gh-issue-117108.tNqDEo.rst b/Misc/NEWS.d/next/Core and Builtins/2024-03-25-12-51-12.gh-issue-117108.tNqDEo.rst new file mode 100644 index 00000000000000..a28c83ee6efe40 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-03-25-12-51-12.gh-issue-117108.tNqDEo.rst @@ -0,0 +1,3 @@ +Change the old space bit of objects in the young generation from 0 to +gcstate->visited, so that any objects created during GC will have the old +bit set correctly if they get moved into the old generation. diff --git a/Python/gc.c b/Python/gc.c index 6d86059174a8cd..36e20d05c205a5 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -455,10 +455,20 @@ validate_consistent_old_space(PyGC_Head *head) assert(prev == GC_PREV(head)); } +static void +gc_list_validate_space(PyGC_Head *head, int space) { + PyGC_Head *gc = GC_NEXT(head); + while (gc != head) { + assert(gc_old_space(gc) == space); + gc = GC_NEXT(gc); + } +} + #else #define validate_list(x, y) do{}while(0) #define validate_old(g) do{}while(0) #define validate_consistent_old_space(l) do{}while(0) +#define gc_list_validate_space(l, s) do{}while(0) #endif /*** end of list stuff ***/ @@ -949,6 +959,7 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) /* Invoke the callbacks we decided to honor. It's safe to invoke them * because they can't reference unreachable objects. */ + int visited_space = get_gc_state()->visited_space; while (! gc_list_is_empty(&wrcb_to_call)) { PyObject *temp; PyObject *callback; @@ -983,6 +994,7 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) Py_DECREF(op); if (wrcb_to_call._gc_next == (uintptr_t)gc) { /* object is still alive -- move it */ + gc_set_old_space(gc, visited_space); gc_list_move(gc, old); } else { @@ -1389,6 +1401,14 @@ completed_cycle(GCState *gcstate) assert(gc_list_is_empty(not_visited)); #endif gcstate->visited_space = flip_old_space(gcstate->visited_space); + /* Make sure all young objects have old space bit set correctly */ + PyGC_Head *young = &gcstate->young.head; + PyGC_Head *gc = GC_NEXT(young); + while (gc != young) { + PyGC_Head *next = GC_NEXT(gc); + gc_set_old_space(gc, gcstate->visited_space); + gc = next; + } gcstate->work_to_do = 0; } @@ -1406,10 +1426,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) } gc_list_merge(&gcstate->young.head, &increment); gcstate->young.count = 0; - if (gcstate->visited_space) { - /* objects in visited space have bit set, so we set it here */ - gc_list_set_space(&increment, 1); - } + gc_list_validate_space(&increment, gcstate->visited_space); Py_ssize_t increment_size = 0; while (increment_size < gcstate->work_to_do) { if (gc_list_is_empty(not_visited)) { @@ -1421,9 +1438,11 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_set_old_space(gc, gcstate->visited_space); increment_size += expand_region_transitively_reachable(&increment, gc, gcstate); } + gc_list_validate_space(&increment, gcstate->visited_space); PyGC_Head survivors; gc_list_init(&survivors); gc_collect_region(tstate, &increment, &survivors, UNTRACK_TUPLES, stats); + gc_list_validate_space(&survivors, gcstate->visited_space); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; @@ -1444,23 +1463,18 @@ gc_collect_full(PyThreadState *tstate, GCState *gcstate = &tstate->interp->gc; validate_old(gcstate); PyGC_Head *young = &gcstate->young.head; - PyGC_Head *old0 = &gcstate->old[0].head; - PyGC_Head *old1 = &gcstate->old[1].head; - /* merge all generations into old0 */ - gc_list_merge(young, old0); + PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head; + PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; + /* merge all generations into visited */ + gc_list_validate_space(young, gcstate->visited_space); + gc_list_set_space(pending, gcstate->visited_space); + gc_list_merge(young, pending); gcstate->young.count = 0; - PyGC_Head *gc = GC_NEXT(old1); - while (gc != old1) { - PyGC_Head *next = GC_NEXT(gc); - gc_set_old_space(gc, 0); - gc = next; - } - gc_list_merge(old1, old0); + gc_list_merge(pending, visited); - gc_collect_region(tstate, old0, old0, + gc_collect_region(tstate, visited, visited, UNTRACK_TUPLES | UNTRACK_DICTS, stats); - gcstate->visited_space = 1; gcstate->young.count = 0; gcstate->old[0].count = 0; gcstate->old[1].count = 0; @@ -1527,6 +1541,7 @@ gc_collect_region(PyThreadState *tstate, /* Clear weakrefs and invoke callbacks as necessary. */ stats->collected += handle_weakrefs(&unreachable, to); + gc_list_validate_space(to, gcstate->visited_space); validate_list(to, collecting_clear_unreachable_clear); validate_list(&unreachable, collecting_set_unreachable_clear); @@ -1560,6 +1575,7 @@ gc_collect_region(PyThreadState *tstate, * this if they insist on creating this type of structure. */ handle_legacy_finalizers(tstate, gcstate, &finalizers, to); + gc_list_validate_space(to, gcstate->visited_space); validate_list(to, collecting_clear_unreachable_clear); } @@ -1708,6 +1724,10 @@ void _PyGC_Freeze(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; + /* The permanent_generation has its old space bit set to zero */ + if (gcstate->visited_space) { + gc_list_set_space(&gcstate->young.head, 0); + } gc_list_merge(&gcstate->young.head, &gcstate->permanent_generation.head); gcstate->young.count = 0; PyGC_Head*old0 = &gcstate->old[0].head;