From 81adac25d413540643353a3eee02680151d1451e Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 20 Jun 2024 11:29:08 -0400 Subject: [PATCH] gh-117511: Make PyMutex public in the non-limited API (#117731) --- Doc/c-api/init.rst | 43 ++++++++++++ Doc/whatsnew/3.13.rst | 5 ++ Include/Python.h | 1 + Include/cpython/lock.h | 63 +++++++++++++++++ Include/cpython/weakrefobject.h | 2 +- Include/internal/pycore_critical_section.h | 6 +- Include/internal/pycore_lock.h | 68 +------------------ Include/internal/pycore_warnings.h | 2 +- Include/lock.h | 16 +++++ Include/object.h | 6 +- Makefile.pre.in | 2 + ...-04-10-16-48-04.gh-issue-117511.RZtBRK.rst | 1 + Modules/_testinternalcapi/test_lock.c | 16 ++--- Objects/object.c | 2 +- PCbuild/pythoncore.vcxproj | 2 + PCbuild/pythoncore.vcxproj.filters | 6 ++ Python/critical_section.c | 2 +- Python/lock.c | 52 +++++++------- 18 files changed, 185 insertions(+), 110 deletions(-) create mode 100644 Include/cpython/lock.h create mode 100644 Include/lock.h create mode 100644 Misc/NEWS.d/next/C API/2024-04-10-16-48-04.gh-issue-117511.RZtBRK.rst diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index 9e118d4f36145f8..58c79031de53207 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -55,6 +55,11 @@ The following functions can be safely called before Python is initialized: * :c:func:`PyMem_RawCalloc` * :c:func:`PyMem_RawFree` +* Synchronization: + + * :c:func:`PyMutex_Lock` + * :c:func:`PyMutex_Unlock` + .. note:: The following functions **should not be called** before @@ -2152,3 +2157,41 @@ be used in new code. .. c:function:: void PyThread_delete_key_value(int key) .. c:function:: void PyThread_ReInitTLS() +Synchronization Primitives +========================== + +The C-API provides a basic mutual exclusion lock. + +.. c:type:: PyMutex + + A mutual exclusion lock. The :c:type:`!PyMutex` should be initialized to + zero to represent the unlocked state. For example:: + + PyMutex mutex = {0}; + + Instances of :c:type:`!PyMutex` should not be copied or moved. Both the + contents and address of a :c:type:`!PyMutex` are meaningful, and it must + remain at a fixed, writable location in memory. + + .. note:: + + A :c:type:`!PyMutex` currently occupies one byte, but the size should be + considered unstable. The size may change in future Python releases + without a deprecation period. + + .. versionadded:: 3.13 + +.. c:function:: void PyMutex_Lock(PyMutex *m) + + Lock mutex *m*. If another thread has already locked it, the calling + thread will block until the mutex is unlocked. While blocked, the thread + will temporarily release the :term:`GIL` if it is held. + + .. versionadded:: 3.13 + +.. c:function:: void PyMutex_Unlock(PyMutex *m) + + Unlock mutex *m*. The mutex must be locked --- otherwise, the function will + issue a fatal error. + + .. versionadded:: 3.13 diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index fbff65ba60b5177..ad7473475e31cb2 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -2178,6 +2178,11 @@ New Features :c:func:`PyEval_GetLocals` return :term:`strong references ` rather than borrowed references. (Added as part of :pep:`667`.) +* Add :c:type:`PyMutex` API, a lightweight mutex that occupies a single byte. + The :c:func:`PyMutex_Lock` function will release the GIL (if currently held) + if the operation needs to block. + (Contributed by Sam Gross in :gh:`108724`.) + Build Changes ============= diff --git a/Include/Python.h b/Include/Python.h index a1b33f6d3c42b21..5cc4fb5bb9d3720 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -64,6 +64,7 @@ #include "pybuffer.h" #include "pystats.h" #include "pyatomic.h" +#include "lock.h" #include "object.h" #include "refcount.h" #include "objimpl.h" diff --git a/Include/cpython/lock.h b/Include/cpython/lock.h new file mode 100644 index 000000000000000..8ee03e82f74dfdc --- /dev/null +++ b/Include/cpython/lock.h @@ -0,0 +1,63 @@ +#ifndef Py_CPYTHON_LOCK_H +# error "this header file must not be included directly" +#endif + +#define _Py_UNLOCKED 0 +#define _Py_LOCKED 1 + +// A mutex that occupies one byte. The lock can be zero initialized to +// represent the unlocked state. +// +// Typical initialization: +// PyMutex m = (PyMutex){0}; +// +// Or initialize as global variables: +// static PyMutex m; +// +// Typical usage: +// PyMutex_Lock(&m); +// ... +// PyMutex_Unlock(&m); +// +// The contents of the PyMutex are not part of the public API, but are +// described to aid in understanding the implementation and debugging. Only +// the two least significant bits are used. The remaining bits are always zero: +// 0b00: unlocked +// 0b01: locked +// 0b10: unlocked and has parked threads +// 0b11: locked and has parked threads +typedef struct PyMutex { + uint8_t _bits; // (private) +} PyMutex; + +// exported function for locking the mutex +PyAPI_FUNC(void) PyMutex_Lock(PyMutex *m); + +// exported function for unlocking the mutex +PyAPI_FUNC(void) PyMutex_Unlock(PyMutex *m); + +// Locks the mutex. +// +// If the mutex is currently locked, the calling thread will be parked until +// the mutex is unlocked. If the current thread holds the GIL, then the GIL +// will be released while the thread is parked. +static inline void +_PyMutex_Lock(PyMutex *m) +{ + uint8_t expected = _Py_UNLOCKED; + if (!_Py_atomic_compare_exchange_uint8(&m->_bits, &expected, _Py_LOCKED)) { + PyMutex_Lock(m); + } +} +#define PyMutex_Lock _PyMutex_Lock + +// Unlocks the mutex. +static inline void +_PyMutex_Unlock(PyMutex *m) +{ + uint8_t expected = _Py_LOCKED; + if (!_Py_atomic_compare_exchange_uint8(&m->_bits, &expected, _Py_UNLOCKED)) { + PyMutex_Unlock(m); + } +} +#define PyMutex_Unlock _PyMutex_Unlock diff --git a/Include/cpython/weakrefobject.h b/Include/cpython/weakrefobject.h index dcca166d7357cc7..28acf7265a08563 100644 --- a/Include/cpython/weakrefobject.h +++ b/Include/cpython/weakrefobject.h @@ -36,7 +36,7 @@ struct _PyWeakReference { * Normally this can be derived from wr_object, but in some cases we need * to lock after wr_object has been set to Py_None. */ - struct _PyMutex *weakrefs_lock; + PyMutex *weakrefs_lock; #endif }; diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h index 7bebcdb67c71693..3e15c3aabffa974 100644 --- a/Include/internal/pycore_critical_section.h +++ b/Include/internal/pycore_critical_section.h @@ -202,7 +202,7 @@ _PyCriticalSection2_BeginSlow(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2, static inline void _PyCriticalSection_Begin(_PyCriticalSection *c, PyMutex *m) { - if (PyMutex_LockFast(&m->v)) { + if (PyMutex_LockFast(&m->_bits)) { PyThreadState *tstate = _PyThreadState_GET(); c->mutex = m; c->prev = tstate->critical_section; @@ -255,8 +255,8 @@ _PyCriticalSection2_Begin(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) m2 = tmp; } - if (PyMutex_LockFast(&m1->v)) { - if (PyMutex_LockFast(&m2->v)) { + if (PyMutex_LockFast(&m1->_bits)) { + if (PyMutex_LockFast(&m2->_bits)) { PyThreadState *tstate = _PyThreadState_GET(); c->base.mutex = m1; c->mutex2 = m2; diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index 882c4888e5058c8..8aa73946e2c6451 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -13,48 +13,10 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif - -// A mutex that occupies one byte. The lock can be zero initialized. -// -// Only the two least significant bits are used. The remaining bits should be -// zero: -// 0b00: unlocked -// 0b01: locked -// 0b10: unlocked and has parked threads -// 0b11: locked and has parked threads -// -// Typical initialization: -// PyMutex m = (PyMutex){0}; -// -// Or initialize as global variables: -// static PyMutex m; -// -// Typical usage: -// PyMutex_Lock(&m); -// ... -// PyMutex_Unlock(&m); - -// NOTE: In Py_GIL_DISABLED builds, `struct _PyMutex` is defined in Include/object.h. -// The Py_GIL_DISABLED builds need the definition in Include/object.h for the -// `ob_mutex` field in PyObject. For the default (non-free-threaded) build, -// we define the struct here to avoid exposing it in the public API. -#ifndef Py_GIL_DISABLED -struct _PyMutex { uint8_t v; }; -#endif - -typedef struct _PyMutex PyMutex; - -#define _Py_UNLOCKED 0 -#define _Py_LOCKED 1 +//_Py_UNLOCKED is defined as 0 and _Py_LOCKED as 1 in Include/cpython/lock.h #define _Py_HAS_PARKED 2 #define _Py_ONCE_INITIALIZED 4 -// (private) slow path for locking the mutex -PyAPI_FUNC(void) _PyMutex_LockSlow(PyMutex *m); - -// (private) slow path for unlocking the mutex -PyAPI_FUNC(void) _PyMutex_UnlockSlow(PyMutex *m); - static inline int PyMutex_LockFast(uint8_t *lock_bits) { @@ -62,35 +24,11 @@ PyMutex_LockFast(uint8_t *lock_bits) return _Py_atomic_compare_exchange_uint8(lock_bits, &expected, _Py_LOCKED); } -// Locks the mutex. -// -// If the mutex is currently locked, the calling thread will be parked until -// the mutex is unlocked. If the current thread holds the GIL, then the GIL -// will be released while the thread is parked. -static inline void -PyMutex_Lock(PyMutex *m) -{ - uint8_t expected = _Py_UNLOCKED; - if (!_Py_atomic_compare_exchange_uint8(&m->v, &expected, _Py_LOCKED)) { - _PyMutex_LockSlow(m); - } -} - -// Unlocks the mutex. -static inline void -PyMutex_Unlock(PyMutex *m) -{ - uint8_t expected = _Py_LOCKED; - if (!_Py_atomic_compare_exchange_uint8(&m->v, &expected, _Py_UNLOCKED)) { - _PyMutex_UnlockSlow(m); - } -} - // Checks if the mutex is currently locked. static inline int PyMutex_IsLocked(PyMutex *m) { - return (_Py_atomic_load_uint8(&m->v) & _Py_LOCKED) != 0; + return (_Py_atomic_load_uint8(&m->_bits) & _Py_LOCKED) != 0; } // Re-initializes the mutex after a fork to the unlocked state. @@ -121,7 +59,7 @@ static inline void PyMutex_LockFlags(PyMutex *m, _PyLockFlags flags) { uint8_t expected = _Py_UNLOCKED; - if (!_Py_atomic_compare_exchange_uint8(&m->v, &expected, _Py_LOCKED)) { + if (!_Py_atomic_compare_exchange_uint8(&m->_bits, &expected, _Py_LOCKED)) { _PyMutex_LockTimed(m, -1, flags); } } diff --git a/Include/internal/pycore_warnings.h b/Include/internal/pycore_warnings.h index 114796df42b2b6a..f9f6559312f4ef6 100644 --- a/Include/internal/pycore_warnings.h +++ b/Include/internal/pycore_warnings.h @@ -14,7 +14,7 @@ struct _warnings_runtime_state { PyObject *filters; /* List */ PyObject *once_registry; /* Dict */ PyObject *default_action; /* String */ - struct _PyMutex mutex; + PyMutex mutex; long filters_version; }; diff --git a/Include/lock.h b/Include/lock.h new file mode 100644 index 000000000000000..782b9dbc70d0569 --- /dev/null +++ b/Include/lock.h @@ -0,0 +1,16 @@ +#ifndef Py_LOCK_H +#define Py_LOCK_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_LOCK_H +# include "cpython/lock.h" +# undef Py_CPYTHON_LOCK_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_LOCK_H */ diff --git a/Include/object.h b/Include/object.h index ed39e7dc877810f..a1e5b33b0fdaaef 100644 --- a/Include/object.h +++ b/Include/object.h @@ -137,17 +137,13 @@ struct _object { // fields have been merged. #define _Py_UNOWNED_TID 0 -// NOTE: In non-free-threaded builds, `struct _PyMutex` is defined in -// pycore_lock.h. See pycore_lock.h for more details. -struct _PyMutex { uint8_t v; }; - struct _object { // ob_tid stores the thread id (or zero). It is also used by the GC and the // trashcan mechanism as a linked list pointer and by the GC to store the // computed "gc_refs" refcount. uintptr_t ob_tid; uint16_t _padding; - struct _PyMutex ob_mutex; // per-object lock + PyMutex ob_mutex; // per-object lock uint8_t ob_gc_bits; // gc-related state uint32_t ob_ref_local; // local reference count Py_ssize_t ob_ref_shared; // shared (atomic) reference count diff --git a/Makefile.pre.in b/Makefile.pre.in index 22dba279faa9358..e140018407a19b6 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1019,6 +1019,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/intrcheck.h \ $(srcdir)/Include/iterobject.h \ $(srcdir)/Include/listobject.h \ + $(srcdir)/Include/lock.h \ $(srcdir)/Include/longobject.h \ $(srcdir)/Include/marshal.h \ $(srcdir)/Include/memoryobject.h \ @@ -1092,6 +1093,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/cpython/import.h \ $(srcdir)/Include/cpython/initconfig.h \ $(srcdir)/Include/cpython/listobject.h \ + $(srcdir)/Include/cpython/lock.h \ $(srcdir)/Include/cpython/longintrepr.h \ $(srcdir)/Include/cpython/longobject.h \ $(srcdir)/Include/cpython/memoryobject.h \ diff --git a/Misc/NEWS.d/next/C API/2024-04-10-16-48-04.gh-issue-117511.RZtBRK.rst b/Misc/NEWS.d/next/C API/2024-04-10-16-48-04.gh-issue-117511.RZtBRK.rst new file mode 100644 index 000000000000000..586685a3407a3db --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-04-10-16-48-04.gh-issue-117511.RZtBRK.rst @@ -0,0 +1 @@ +Make the :c:type:`PyMutex` public in the non-limited C API. diff --git a/Modules/_testinternalcapi/test_lock.c b/Modules/_testinternalcapi/test_lock.c index 1544fe1363c7c5b..8d678412fe71791 100644 --- a/Modules/_testinternalcapi/test_lock.c +++ b/Modules/_testinternalcapi/test_lock.c @@ -36,9 +36,9 @@ test_lock_basic(PyObject *self, PyObject *obj) // uncontended lock and unlock PyMutex_Lock(&m); - assert(m.v == 1); + assert(m._bits == 1); PyMutex_Unlock(&m); - assert(m.v == 0); + assert(m._bits == 0); Py_RETURN_NONE; } @@ -57,10 +57,10 @@ lock_thread(void *arg) _Py_atomic_store_int(&test_data->started, 1); PyMutex_Lock(m); - assert(m->v == 1); + assert(m->_bits == 1); PyMutex_Unlock(m); - assert(m->v == 0); + assert(m->_bits == 0); _PyEvent_Notify(&test_data->done); } @@ -73,7 +73,7 @@ test_lock_two_threads(PyObject *self, PyObject *obj) memset(&test_data, 0, sizeof(test_data)); PyMutex_Lock(&test_data.m); - assert(test_data.m.v == 1); + assert(test_data.m._bits == 1); PyThread_start_new_thread(lock_thread, &test_data); @@ -82,17 +82,17 @@ test_lock_two_threads(PyObject *self, PyObject *obj) uint8_t v; do { pysleep(10); // allow some time for the other thread to try to lock - v = _Py_atomic_load_uint8_relaxed(&test_data.m.v); + v = _Py_atomic_load_uint8_relaxed(&test_data.m._bits); assert(v == 1 || v == 3); iters++; } while (v != 3 && iters < 200); // both the "locked" and the "has parked" bits should be set - assert(test_data.m.v == 3); + assert(test_data.m._bits == 3); PyMutex_Unlock(&test_data.m); PyEvent_Wait(&test_data.done); - assert(test_data.m.v == 0); + assert(test_data.m._bits == 0); Py_RETURN_NONE; } diff --git a/Objects/object.c b/Objects/object.c index 16f940f46f137e6..dc6402f0a99cd05 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2372,7 +2372,7 @@ new_reference(PyObject *op) #else op->ob_tid = _Py_ThreadId(); op->_padding = 0; - op->ob_mutex = (struct _PyMutex){ 0 }; + op->ob_mutex = (PyMutex){ 0 }; op->ob_gc_bits = 0; op->ob_ref_local = 1; op->ob_ref_shared = 0; diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 96960f0579a9368..bfc2baf74c954c5 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -156,6 +156,7 @@ + @@ -310,6 +311,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 2e4bd786be47e91..2686b92c6375cf6 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -114,6 +114,9 @@ Include + + Include + Include @@ -396,6 +399,9 @@ Include\cpython + + Include + Include diff --git a/Python/critical_section.c b/Python/critical_section.c index 2214d80eeb297b8..ac679ac3f8a0197 100644 --- a/Python/critical_section.c +++ b/Python/critical_section.c @@ -14,7 +14,7 @@ _PyCriticalSection_BeginSlow(_PyCriticalSection *c, PyMutex *m) c->prev = (uintptr_t)tstate->critical_section; tstate->critical_section = (uintptr_t)c; - _PyMutex_LockSlow(m); + PyMutex_Lock(m); c->mutex = m; } diff --git a/Python/lock.c b/Python/lock.c index 555f4c25b9b2142..7c6a5175e88ff11 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -47,18 +47,12 @@ _Py_yield(void) #endif } -void -_PyMutex_LockSlow(PyMutex *m) -{ - _PyMutex_LockTimed(m, -1, _PY_LOCK_DETACH); -} - PyLockStatus _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags) { - uint8_t v = _Py_atomic_load_uint8_relaxed(&m->v); + uint8_t v = _Py_atomic_load_uint8_relaxed(&m->_bits); if ((v & _Py_LOCKED) == 0) { - if (_Py_atomic_compare_exchange_uint8(&m->v, &v, v|_Py_LOCKED)) { + if (_Py_atomic_compare_exchange_uint8(&m->_bits, &v, v|_Py_LOCKED)) { return PY_LOCK_ACQUIRED; } } @@ -83,7 +77,7 @@ _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags) for (;;) { if ((v & _Py_LOCKED) == 0) { // The lock is unlocked. Try to grab it. - if (_Py_atomic_compare_exchange_uint8(&m->v, &v, v|_Py_LOCKED)) { + if (_Py_atomic_compare_exchange_uint8(&m->_bits, &v, v|_Py_LOCKED)) { return PY_LOCK_ACQUIRED; } continue; @@ -104,17 +98,17 @@ _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags) if (!(v & _Py_HAS_PARKED)) { // We are the first waiter. Set the _Py_HAS_PARKED flag. newv = v | _Py_HAS_PARKED; - if (!_Py_atomic_compare_exchange_uint8(&m->v, &v, newv)) { + if (!_Py_atomic_compare_exchange_uint8(&m->_bits, &v, newv)) { continue; } } - int ret = _PyParkingLot_Park(&m->v, &newv, sizeof(newv), timeout, + int ret = _PyParkingLot_Park(&m->_bits, &newv, sizeof(newv), timeout, &entry, (flags & _PY_LOCK_DETACH) != 0); if (ret == Py_PARK_OK) { if (entry.handed_off) { // We own the lock now. - assert(_Py_atomic_load_uint8_relaxed(&m->v) & _Py_LOCKED); + assert(_Py_atomic_load_uint8_relaxed(&m->_bits) & _Py_LOCKED); return PY_LOCK_ACQUIRED; } } @@ -136,7 +130,7 @@ _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags) } } - v = _Py_atomic_load_uint8_relaxed(&m->v); + v = _Py_atomic_load_uint8_relaxed(&m->_bits); } } @@ -158,13 +152,13 @@ mutex_unpark(PyMutex *m, struct mutex_entry *entry, int has_more_waiters) v |= _Py_HAS_PARKED; } } - _Py_atomic_store_uint8(&m->v, v); + _Py_atomic_store_uint8(&m->_bits, v); } int _PyMutex_TryUnlock(PyMutex *m) { - uint8_t v = _Py_atomic_load_uint8(&m->v); + uint8_t v = _Py_atomic_load_uint8(&m->_bits); for (;;) { if ((v & _Py_LOCKED) == 0) { // error: the mutex is not locked @@ -172,24 +166,16 @@ _PyMutex_TryUnlock(PyMutex *m) } else if ((v & _Py_HAS_PARKED)) { // wake up a single thread - _PyParkingLot_Unpark(&m->v, (_Py_unpark_fn_t *)mutex_unpark, m); + _PyParkingLot_Unpark(&m->_bits, (_Py_unpark_fn_t *)mutex_unpark, m); return 0; } - else if (_Py_atomic_compare_exchange_uint8(&m->v, &v, _Py_UNLOCKED)) { + else if (_Py_atomic_compare_exchange_uint8(&m->_bits, &v, _Py_UNLOCKED)) { // fast-path: no waiters return 0; } } } -void -_PyMutex_UnlockSlow(PyMutex *m) -{ - if (_PyMutex_TryUnlock(m) < 0) { - Py_FatalError("unlocking mutex that is not locked"); - } -} - // _PyRawMutex stores a linked list of `struct raw_mutex_entry`, one for each // thread waiting on the mutex, directly in the mutex itself. struct raw_mutex_entry { @@ -584,3 +570,19 @@ uint32_t _PySeqLock_AfterFork(_PySeqLock *seqlock) return 0; } + +#undef PyMutex_Lock +void +PyMutex_Lock(PyMutex *m) +{ + _PyMutex_LockTimed(m, -1, _PY_LOCK_DETACH); +} + +#undef PyMutex_Unlock +void +PyMutex_Unlock(PyMutex *m) +{ + if (_PyMutex_TryUnlock(m) < 0) { + Py_FatalError("unlocking mutex that is not locked"); + } +}