From 5bb5f69e4953017493ec6afa954da207af6fbad1 Mon Sep 17 00:00:00 2001 From: Jeremy Maitin-Shepard Date: Wed, 14 Jun 2023 16:43:34 -0700 Subject: [PATCH] gh-87135: Hang non-main threads that attempt to acquire the GIL during finalization --- Doc/c-api/init.rst | 68 ++++++++++++++----- Include/pythread.h | 32 ++++++++- Lib/test/test_threading.py | 64 +++++++++++++++++ ...2-08-05-19-41-20.gh-issue-87135.SCNBYj.rst | 15 ++++ .../2021-09-22-14-19-02.bpo-42969.8Z2mth.rst | 3 + Modules/_testcapimodule.c | 28 ++++++++ Python/ceval_gil.c | 11 +-- Python/thread_nt.h | 8 +++ Python/thread_pthread.h | 8 +++ 9 files changed, 215 insertions(+), 22 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2022-08-05-19-41-20.gh-issue-87135.SCNBYj.rst create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-09-22-14-19-02.bpo-42969.8Z2mth.rst diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index 1dab0af2659b4ec..c2a62764ae4ac6a 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -401,7 +401,11 @@ Initializing and finalizing the interpreter freed. Some memory allocated by extension modules may not be freed. Some extensions may not work properly if their initialization routine is called more than once; this can happen if an application calls :c:func:`Py_Initialize` and - :c:func:`Py_FinalizeEx` more than once. + :c:func:`Py_FinalizeEx` more than once. :c:func:`Py_FinalizeEx` must not be + called recursively from within itself. Therefore, it must not be called by any + code that may be run as part of the interpreter shutdown process, such as + :py:mod:`atexit` handlers, object finalizers, or any code that may be run while + flushing the stdout and stderr files. .. audit-event:: cpython._PySys_ClearAuditHooks "" c.Py_FinalizeEx @@ -804,6 +808,29 @@ thread, where the CPython global runtime was originally initialized. The only exception is if :c:func:`exec` will be called immediately after. +.. _cautions-regarding-runtime-finalization: + +Cautions regarding runtime finalization +--------------------------------------- + +In the late stage of :term:`interpreter shutdown`, after attempting to wait for +non-daemon threads to exit (though this can be interrupted by +:class:`KeyboardInterrupt`) and running the :mod:`atexit` functions, the runtime +is marked as *finalizing*: :c:func:`_Py_IsFinalizing` and +:func:`sys.is_finalizing` return true. At this point, only the *finalization +thread* that initiated finalization (typically the main thread) is allowed to +acquire the :term:`GIL`. + +If any thread, other than the finalization thread, attempts to acquire the GIL +during finalization, either explicitly via :c:func:`PyGILState_Ensure`, +:c:macro:`Py_END_ALLOW_THREADS`, :c:func:`PyEval_AcquireThread`, or +:c:func:`PyEval_AcquireLock`, or implicitly when the interpreter attempts to +reacquire it after having yielded it, the thread enters a permanently blocked +state where it remains until the program exits. In most cases this is harmless, +but this can result in deadlock if a later stage of finalization attempts to +acquire a lock owned by the blocked thread, or otherwise waits on the blocked +thread. + High-level API -------------- @@ -847,11 +874,14 @@ code, or when embedding the Python interpreter: ensues. .. note:: - Calling this function from a thread when the runtime is finalizing - will terminate the thread, even if the thread was not created by Python. - You can use :c:func:`_Py_IsFinalizing` or :func:`sys.is_finalizing` to - check if the interpreter is in process of being finalized before calling - this function to avoid unwanted termination. + Calling this function from a thread when the runtime is finalizing will + hang the thread until the program exits, even if the thread was not + created by Python. Refer to + :ref:`cautions-regarding-runtime-finalization` for more details. + + .. versionchanged:: 3.12 + Hangs the current thread, rather than terminating it, if called while the + interpreter is finalizing. .. c:function:: PyThreadState* PyThreadState_Get() @@ -893,11 +923,14 @@ with sub-interpreters: to call arbitrary Python code. Failure is a fatal error. .. note:: - Calling this function from a thread when the runtime is finalizing - will terminate the thread, even if the thread was not created by Python. - You can use :c:func:`_Py_IsFinalizing` or :func:`sys.is_finalizing` to - check if the interpreter is in process of being finalized before calling - this function to avoid unwanted termination. + Calling this function from a thread when the runtime is finalizing will + hang the thread until the program exits, even if the thread was not + created by Python. Refer to + :ref:`cautions-regarding-runtime-finalization` for more details. + + .. versionchanged:: 3.12 + Hangs the current thread, rather than terminating it, if called while the + interpreter is finalizing. .. c:function:: void PyGILState_Release(PyGILState_STATE) @@ -1175,17 +1208,20 @@ All of the following functions must be called after :c:func:`Py_Initialize`. If this thread already has the lock, deadlock ensues. .. note:: - Calling this function from a thread when the runtime is finalizing - will terminate the thread, even if the thread was not created by Python. - You can use :c:func:`_Py_IsFinalizing` or :func:`sys.is_finalizing` to - check if the interpreter is in process of being finalized before calling - this function to avoid unwanted termination. + Calling this function from a thread when the runtime is finalizing will + hang the thread until the program exits, even if the thread was not + created by Python. Refer to + :ref:`cautions-regarding-runtime-finalization` for more details. .. versionchanged:: 3.8 Updated to be consistent with :c:func:`PyEval_RestoreThread`, :c:func:`Py_END_ALLOW_THREADS`, and :c:func:`PyGILState_Ensure`, and terminate the current thread if called while the interpreter is finalizing. + .. versionchanged:: 3.12 + Hangs the current thread, rather than terminating it, if called while the + interpreter is finalizing. + :c:func:`PyEval_RestoreThread` is a higher-level function which is always available (even when threads have not been initialized). diff --git a/Include/pythread.h b/Include/pythread.h index 63714437c496b74..2374fe47ec3f0b6 100644 --- a/Include/pythread.h +++ b/Include/pythread.h @@ -17,7 +17,37 @@ typedef enum PyLockStatus { PyAPI_FUNC(void) PyThread_init_thread(void); PyAPI_FUNC(unsigned long) PyThread_start_new_thread(void (*)(void *), void *); -PyAPI_FUNC(void) _Py_NO_RETURN PyThread_exit_thread(void); +/* Terminates the current thread. + * + * WARNING: This function is only safe to call if all functions in the full call + * stack are written to safely allow it. Additionally, the behavior is + * platform-dependent. This function should be avoided, and is no longer called + * by Python itself. It is retained only for compatibility with existing C + * extension code. + * + * With pthreads, calls `pthread_exit` which attempts to unwind the stack and + * call C++ destructors. If a `noexcept` function is reached, the program is + * terminated. + * + * On Windows, calls `_endthreadex` which kills the thread without calling C++ + * destructors. + * + * In either case there is a risk of invalid references remaining to data on the + * thread stack. + */ +Py_DEPRECATED(3.12) PyAPI_FUNC(void) _Py_NO_RETURN PyThread_exit_thread(void); + +#ifndef Py_LIMITED_API +/* Hangs the thread indefinitely without exiting it. + * + * bpo-42969: There is no safe way to exit a thread other than returning + * normally from its start function. This is used during finalization in lieu + * of actually exiting the thread. Since the program is expected to terminate + * soon anyway, it does not matter if the thread stack stays around until then. + */ +PyAPI_FUNC(void) _Py_NO_RETURN _PyThread_hang_thread(void); +#endif /* !Py_LIMITED_API */ + PyAPI_FUNC(unsigned long) PyThread_get_thread_ident(void); #if (defined(__APPLE__) || defined(__linux__) || defined(_WIN32) \ diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index 9e4972ecb640df8..68622707046f47e 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -1037,6 +1037,70 @@ def exit_handler(): self.assertEqual(out, b'') self.assertIn(b"can't create new thread at interpreter shutdown", err) + @cpython_only + def test_finalize_daemon_thread_hang(self): + # bpo-42969: tests that daemon threads hang during finalization + script = textwrap.dedent(''' + import os + import sys + import threading + import time + import _testcapi + + lock = threading.Lock() + lock.acquire() + thread_started_event = threading.Event() + def thread_func(): + try: + thread_started_event.set() + _testcapi.finalize_thread_hang(lock.acquire) + finally: + # Control must not reach here. + os._exit(2) + + t = threading.Thread(target=thread_func) + t.daemon = True + t.start() + thread_started_event.wait() + # Sleep to ensure daemon thread is blocked on `lock.acquire` + # + # Note: This test is designed so that in the unlikely case that + # `0.1` seconds is not sufficient time for the thread to become + # blocked on `lock.acquire`, the test will still pass, it just + # won't be properly testing the thread behavior during + # finalization. + time.sleep(0.1) + + def run_during_finalization(): + # Wake up daemon thread + lock.release() + # Sleep to give the daemon thread time to crash if it is going + # to. + # + # Note: If due to an exceptionally slow execution this delay is + # insufficient, the test will still pass but will simply be + # ineffective as a test. + time.sleep(0.1) + # If control reaches here, the test succeeded. + os._exit(0) + + # Replace sys.stderr.flush as a way to run code during finalization + orig_flush = sys.stderr.flush + def do_flush(*args, **kwargs): + orig_flush(*args, **kwargs) + if not sys.is_finalizing: + return + sys.stderr.flush = orig_flush + run_during_finalization() + + sys.stderr.flush = do_flush + + # If the follow exit code is retained, `run_during_finalization` + # did not run. + sys.exit(1) + ''') + assert_python_ok("-c", script) + class ThreadJoinOnShutdown(BaseTestCase): def _run_and_join(self, script): diff --git a/Misc/NEWS.d/next/C API/2022-08-05-19-41-20.gh-issue-87135.SCNBYj.rst b/Misc/NEWS.d/next/C API/2022-08-05-19-41-20.gh-issue-87135.SCNBYj.rst new file mode 100644 index 000000000000000..6387d69bc267c67 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2022-08-05-19-41-20.gh-issue-87135.SCNBYj.rst @@ -0,0 +1,15 @@ +Attempting to acquire the GIL after runtime finalization has begun in a +different thread now causes the thread to hang rather than terminate, which +avoids potential crashes or memory corruption caused by attempting to +terminate a thread that is running code not specifically designed to support +termination. In most cases this hanging is harmless since the process will +soon exit anyway. + +The ``PyThread_exit_thread`` function is now deprecated. Its behavior is +inconsistent across platforms, and it can only be used safely in the +unlikely case that every function in the entire call stack has been designed +to support the platform-dependent termination mechanism. It is recommended +that users of this function change their design to not require thread +termination. In the unlikely case that thread termination is needed and can +be done safely, users may migrate to calling platform-specific APIs such as +``pthread_exit`` (POSIX) or ``_endthreadex`` (Windows) directly. diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-09-22-14-19-02.bpo-42969.8Z2mth.rst b/Misc/NEWS.d/next/Core and Builtins/2021-09-22-14-19-02.bpo-42969.8Z2mth.rst new file mode 100644 index 000000000000000..e5e03baa6ac0df0 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-09-22-14-19-02.bpo-42969.8Z2mth.rst @@ -0,0 +1,3 @@ +Daemon threads and other threads not created by Python are now paused rather +than unsafely terminated if they attempt to acquire the GIL during Python +finalization. diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 35687b49f24a0d1..fb21bd157c964c5 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -3324,6 +3324,33 @@ test_atexit(PyObject *self, PyObject *Py_UNUSED(args)) Py_RETURN_NONE; } +// Used by `finalize_thread_hang`. +#ifdef _POSIX_THREADS +static void finalize_thread_hang_cleanup_callback(void *Py_UNUSED(arg)) { + // Should not reach here. + assert(0 && "pthread thread termination was triggered unexpectedly"); +} +#endif + +// Tests that finalization does not trigger pthread cleanup. +// +// Must be called with a single nullary callable function that should block +// (with GIL released) until finalization is in progress. +static PyObject * +finalize_thread_hang(PyObject *self, PyObject *arg) +{ +#ifdef _POSIX_THREADS + pthread_cleanup_push(finalize_thread_hang_cleanup_callback, NULL); +#endif + PyObject_CallNoArgs(arg); + // Should not reach here. + Py_FatalError("thread unexpectedly did not hang"); +#ifdef _POSIX_THREADS + pthread_cleanup_pop(0); +#endif + Py_RETURN_NONE; +} + static PyMethodDef TestMethods[] = { {"set_errno", set_errno, METH_VARARGS}, @@ -3468,6 +3495,7 @@ static PyMethodDef TestMethods[] = { {"function_get_kw_defaults", function_get_kw_defaults, METH_O, NULL}, {"function_set_kw_defaults", function_set_kw_defaults, METH_VARARGS, NULL}, {"test_atexit", test_atexit, METH_NOARGS}, + {"finalize_thread_hang", finalize_thread_hang, METH_O, NULL}, {NULL, NULL} /* sentinel */ }; diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index bb1279f46cf9f7f..68f5b0ca47c44ae 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -382,7 +382,7 @@ take_gil(PyThreadState *tstate) This code path can be reached by a daemon thread after Py_Finalize() completes. In this case, tstate is a dangling pointer: points to PyThreadState freed memory. */ - PyThread_exit_thread(); + _PyThread_hang_thread(); } assert(is_tstate_valid(tstate)); @@ -424,7 +424,9 @@ take_gil(PyThreadState *tstate) if (drop_requested) { RESET_GIL_DROP_REQUEST(interp); } - PyThread_exit_thread(); + // gh-87135: hang the thread as *thread_exit() is not a safe + // API. It lacks stack unwind and local variable destruction. + _PyThread_hang_thread(); } assert(is_tstate_valid(tstate)); @@ -455,7 +457,7 @@ take_gil(PyThreadState *tstate) if (tstate_must_exit(tstate)) { /* bpo-36475: If Py_Finalize() has been called and tstate is not - the thread which called Py_Finalize(), exit immediately the + the thread which called Py_Finalize(), bpo-42969: hang the thread. This code path can be reached by a daemon thread which was waiting @@ -463,7 +465,7 @@ take_gil(PyThreadState *tstate) wait_for_thread_shutdown() from Py_Finalize(). */ MUTEX_UNLOCK(gil->mutex); drop_gil(ceval, tstate); - PyThread_exit_thread(); + _PyThread_hang_thread(); } assert(is_tstate_valid(tstate)); @@ -1123,4 +1125,3 @@ _Py_HandlePending(PyThreadState *tstate) return 0; } - diff --git a/Python/thread_nt.h b/Python/thread_nt.h index 26f441bd6d3c569..8343ce9307f7a30 100644 --- a/Python/thread_nt.h +++ b/Python/thread_nt.h @@ -257,6 +257,14 @@ PyThread_exit_thread(void) _endthreadex(0); } +void _Py_NO_RETURN +_PyThread_hang_thread(void) +{ + while (1) { + SleepEx(INFINITE, TRUE); + } +} + /* * Lock support. It has to be implemented as semaphores. * I [Dag] tried to implement it with mutex but I could find a way to diff --git a/Python/thread_pthread.h b/Python/thread_pthread.h index 76d6f3bcdf9c407..bcf953f34e87488 100644 --- a/Python/thread_pthread.h +++ b/Python/thread_pthread.h @@ -359,6 +359,14 @@ PyThread_exit_thread(void) pthread_exit(0); } +void _Py_NO_RETURN +_PyThread_hang_thread(void) +{ + while (1) { + pause(); + } +} + #ifdef USE_SEMAPHORES /*