From 44d7dffcc515b4bc2f9496fdee3c9ff391a7d6c0 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 8 Aug 2024 17:36:25 +0000 Subject: [PATCH] gh-117139: Garbage collector support for deferred refcounting * The free-threaded GC now visits interpreter stacks to keep objects that use deferred reference counting alive. * Interpreter frames are zero initialized in the free-threaded GC so that the GC doesn't see garbage data. This is a temporary measure until stack spilling around escaping calls is implemented. Co-authored-by: Ken Jin --- Include/internal/pycore_frame.h | 24 ++++++++ Include/internal/pycore_gc.h | 3 + Include/internal/pycore_stackref.h | 6 +- Python/frame.c | 10 +--- Python/gc.c | 11 ++++ Python/gc_free_threading.c | 90 +++++++++++++++++++++++++++--- 6 files changed, 123 insertions(+), 21 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index d3a5be000fbce7..d3d745574667aa 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -128,6 +128,13 @@ static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame * // Don't leave a dangling pointer to the old frame when creating generators // and coroutines: dest->previous = NULL; + +#ifdef Py_GIL_DISABLED + PyCodeObject *co = (PyCodeObject *)dest->f_executable; + for (int i = stacktop; i < co->co_nlocalsplus + co->co_stacksize; i++) { + dest->localsplus[i] = PyStackRef_NULL; + } +#endif } /* Consumes reference to func and locals. @@ -153,6 +160,16 @@ _PyFrame_Initialize( for (int i = null_locals_from; i < code->co_nlocalsplus; i++) { frame->localsplus[i] = PyStackRef_NULL; } + +#ifdef Py_GIL_DISABLED + // On GIL disabled, we walk the entire stack in GC. Since stacktop + // is not always in sync with the real stack pointer, we have + // no choice but to traverse the entire stack. + // This just makes sure we don't pass the GC invalid stack values. + for (int i = code->co_nlocalsplus; i < code->co_nlocalsplus + code->co_stacksize; i++) { + frame->localsplus[i] = PyStackRef_NULL; + } +#endif } /* Gets the pointer to the locals array @@ -314,6 +331,13 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int frame->instr_ptr = _PyCode_CODE(code); frame->owner = FRAME_OWNED_BY_THREAD; frame->return_offset = 0; + +#ifdef Py_GIL_DISABLED + assert(code->co_nlocalsplus == 0); + for (int i = 0; i < code->co_stacksize; i++) { + frame->localsplus[i] = PyStackRef_NULL; + } +#endif return frame; } diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 5dd5b0c78d42fa..c66a280242a3ba 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -382,6 +382,9 @@ extern void _Py_ScheduleGC(PyThreadState *tstate); extern void _Py_RunGC(PyThreadState *tstate); +// Functions to clear generator frames +extern int _PyGC_VisitFrameStack(struct _PyInterpreterFrame *frame, visitproc visit, void *arg); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 1b35a3e3269257..8c9bb1ae8c4908 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -150,8 +150,7 @@ PyStackRef_FromPyObjectNew(PyObject *obj) // Make sure we don't take an already tagged value. assert(((uintptr_t)obj & Py_TAG_BITS) == 0); assert(obj != NULL); - // TODO (gh-117139): Add deferred objects later. - if (_Py_IsImmortal(obj)) { + if (_Py_IsImmortal(obj) || _PyObject_HasDeferredRefcount(obj)) { return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_DEFERRED }; } else { @@ -220,7 +219,8 @@ PyStackRef_DUP(_PyStackRef stackref) { if (PyStackRef_IsDeferred(stackref)) { assert(PyStackRef_IsNull(stackref) || - _Py_IsImmortal(PyStackRef_AsPyObjectBorrow(stackref))); + _Py_IsImmortal(PyStackRef_AsPyObjectBorrow(stackref)) || + _PyObject_HasDeferredRefcount(PyStackRef_AsPyObjectBorrow(stackref))); return stackref; } Py_INCREF(PyStackRef_AsPyObjectBorrow(stackref)); diff --git a/Python/frame.c b/Python/frame.c index 25fa2824630f9b..3192968a0fb1b5 100644 --- a/Python/frame.c +++ b/Python/frame.c @@ -15,15 +15,7 @@ _PyFrame_Traverse(_PyInterpreterFrame *frame, visitproc visit, void *arg) Py_VISIT(frame->f_locals); Py_VISIT(frame->f_funcobj); Py_VISIT(_PyFrame_GetCode(frame)); - /* locals */ - _PyStackRef *locals = _PyFrame_GetLocalsArray(frame); - _PyStackRef *sp = frame->stackpointer; - /* locals and stack */ - while (sp > locals) { - sp--; - Py_VISIT(PyStackRef_AsPyObjectBorrow(*sp)); - } - return 0; + return _PyGC_VisitFrameStack(frame, visit, arg); } PyFrameObject * diff --git a/Python/gc.c b/Python/gc.c index 38a0da91a97510..291c1079f5f9c3 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -534,6 +534,17 @@ visit_decref(PyObject *op, void *parent) return 0; } +int +_PyGC_VisitFrameStack(_PyInterpreterFrame *frame, visitproc visit, void *arg) +{ + _PyStackRef *i = frame->localsplus; + /* locals and stack */ + for (; i < frame->stackpointer; i++) { + Py_VISIT(PyStackRef_AsPyObjectBorrow(*i)); + } + return 0; +} + /* Subtract internal references from gc_refs. After this, gc_refs is >= 0 * for all objects in containers, and is GC_REACHABLE for all tracked gc * objects not in containers. The ones with gc_refs > 0 are directly diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 543bee24652dc9..ea402270659689 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -164,15 +164,31 @@ gc_decref(PyObject *op) static void disable_deferred_refcounting(PyObject *op) { - if (_PyObject_HasDeferredRefcount(op)) { - op->ob_gc_bits &= ~_PyGC_BITS_DEFERRED; - op->ob_ref_shared -= _Py_REF_SHARED(_Py_REF_DEFERRED, 0); - - if (PyType_Check(op)) { - // Disable thread-local refcounting for heap types - PyTypeObject *type = (PyTypeObject *)op; - if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { - _PyType_ReleaseId((PyHeapTypeObject *)op); + if (!_PyObject_HasDeferredRefcount(op)) { + return; + } + + op->ob_gc_bits &= ~_PyGC_BITS_DEFERRED; + op->ob_ref_shared -= _Py_REF_SHARED(_Py_REF_DEFERRED, 0); + + if (PyType_Check(op)) { + // Disable thread-local refcounting for heap types + PyTypeObject *type = (PyTypeObject *)op; + if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { + _PyType_ReleaseId((PyHeapTypeObject *)op); + } + } + else if (PyGen_CheckExact(op) || PyCoro_CheckExact(op) || PyAsyncGen_CheckExact(op)) { + // Ensure any non-refcounted pointers in locals are converted to + // strong references. This ensures that the generator/coroutine is not + // freed before its locals. + PyGenObject *gen = (PyGenObject *)op; + struct _PyInterpreterFrame *frame = &gen->gi_iframe; + assert(frame->stackpointer != NULL); + for (_PyStackRef *ref = frame->localsplus; ref < frame->stackpointer; ref++) { + if (!PyStackRef_IsNull(*ref) && PyStackRef_IsDeferred(*ref)) { + // Convert a deferred reference to a strong reference. + *ref = PyStackRef_FromPyObjectSteal(PyStackRef_AsPyObjectSteal(*ref)); } } } @@ -313,6 +329,41 @@ gc_visit_heaps(PyInterpreterState *interp, mi_block_visit_fun *visitor, return err; } +static inline void +gc_visit_stackref(_PyStackRef stackref) +{ + // Note: we MUST check that it is deferred before checking the rest. + // Otherwise we might read into invalid memory due to non-deferred references + // being dead already. + if (PyStackRef_IsDeferred(stackref) && !PyStackRef_IsNull(stackref)) { + PyObject *obj = PyStackRef_AsPyObjectBorrow(stackref); + if (_PyObject_GC_IS_TRACKED(obj)) { + gc_add_refs(obj, 1); + } + } +} + +// Add 1 to the gc_refs for every deferred reference on each thread's stack. +static void +gc_visit_thread_stacks(PyInterpreterState *interp) +{ + HEAD_LOCK(&_PyRuntime); + for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { + _PyInterpreterFrame *f = p->current_frame; + while (f != NULL) { + if (f->f_executable != NULL && PyCode_Check(f->f_executable)) { + PyCodeObject *co = (PyCodeObject *)f->f_executable; + int max_stack = co->co_nlocalsplus + co->co_stacksize; + for (int i = 0; i < max_stack; i++) { + gc_visit_stackref(f->localsplus[i]); + } + } + f = f->previous; + } + } + HEAD_UNLOCK(&_PyRuntime); +} + static void merge_queued_objects(_PyThreadStateImpl *tstate, struct collection_state *state) { @@ -617,6 +668,9 @@ deduce_unreachable_heap(PyInterpreterState *interp, gc_visit_heaps(interp, &validate_gc_objects, &state->base); #endif + // Visit the thread stacks to account for any deferred references. + gc_visit_thread_stacks(interp); + // Transitively mark reachable objects by clearing the // _PyGC_BITS_UNREACHABLE flag. if (gc_visit_heaps(interp, &mark_heap_visitor, &state->base) < 0) { @@ -897,6 +951,24 @@ visit_decref_unreachable(PyObject *op, void *data) return 0; } +int +_PyGC_VisitFrameStack(_PyInterpreterFrame *frame, visitproc visit, void *arg) +{ + _PyStackRef *ref = frame->localsplus; + /* locals and stack */ + for (; ref < frame->stackpointer; ref++) { + // This is a bit tricky! We want to ignore deferred references when + // computing the incoming references, but otherwise treat them like + // regular references. + if (PyStackRef_IsDeferred(*ref) && + (visit == visit_decref || visit == visit_decref_unreachable)) { + continue; + } + Py_VISIT(PyStackRef_AsPyObjectBorrow(*ref)); + } + return 0; +} + // Handle objects that may have resurrected after a call to 'finalize_garbage'. static int handle_resurrected_objects(struct collection_state *state)