From bee112a94d688c8048ddeddaa7bbd5150aecad11 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Wed, 16 Oct 2024 14:53:21 +0300 Subject: [PATCH 001/106] gh-124872: Replace enter/exit events with "switched" (#125532) Users want to know when the current context switches to a different context object. Right now this happens when and only when a context is entered or exited, so the enter and exit events are synonymous with "switched". However, if the changes proposed for gh-99633 are implemented, the current context will also switch for reasons other than context enter or exit. Since users actually care about context switches and not enter or exit, replace the enter and exit events with a single switched event. The former exit event was emitted just before exiting the context. The new switched event is emitted after the context is exited to match the semantics users expect of an event with a past-tense name. If users need the ability to clean up before the switch takes effect, another event type can be added in the future. It is not added here because YAGNI. I skipped 0 in the enum as a matter of practice. Skipping 0 makes it easier to troubleshoot when code forgets to set zeroed memory, and it aligns with best practices for other tools (e.g., https://protobuf.dev/programming-guides/dos-donts/#unspecified-enum). Co-authored-by: Richard Hansen Co-authored-by: Victor Stinner --- Doc/c-api/contextvars.rst | 14 ++--- Include/cpython/context.h | 17 ++---- Lib/test/test_capi/test_watchers.py | 87 +++++++++++++++------------- Modules/_testcapi/watchers.c | 79 +++++++++++++------------ Python/context.c | 31 ++++++---- Tools/c-analyzer/cpython/ignored.tsv | 4 +- 6 files changed, 118 insertions(+), 114 deletions(-) diff --git a/Doc/c-api/contextvars.rst b/Doc/c-api/contextvars.rst index 8eba54a80dc80d..b7c6550ff34aac 100644 --- a/Doc/c-api/contextvars.rst +++ b/Doc/c-api/contextvars.rst @@ -123,16 +123,10 @@ Context object management functions: Enumeration of possible context object watcher events: - - ``Py_CONTEXT_EVENT_ENTER``: A context has been entered, causing the - :term:`current context` to switch to it. The object passed to the watch - callback is the now-current :class:`contextvars.Context` object. Each - enter event will eventually have a corresponding exit event for the same - context object after any subsequently entered contexts have themselves been - exited. - - ``Py_CONTEXT_EVENT_EXIT``: A context is about to be exited, which will - cause the :term:`current context` to switch back to what it was before the - context was entered. The object passed to the watch callback is the - still-current :class:`contextvars.Context` object. + - ``Py_CONTEXT_SWITCHED``: The :term:`current context` has switched to a + different context. The object passed to the watch callback is the + now-current :class:`contextvars.Context` object, or None if no context is + current. .. versionadded:: 3.14 diff --git a/Include/cpython/context.h b/Include/cpython/context.h index 3c9be7873b9399..3a7a4b459c09ad 100644 --- a/Include/cpython/context.h +++ b/Include/cpython/context.h @@ -29,20 +29,11 @@ PyAPI_FUNC(int) PyContext_Exit(PyObject *); typedef enum { /* - * A context has been entered, causing the "current context" to switch to - * it. The object passed to the watch callback is the now-current - * contextvars.Context object. Each enter event will eventually have a - * corresponding exit event for the same context object after any - * subsequently entered contexts have themselves been exited. + * The current context has switched to a different context. The object + * passed to the watch callback is the now-current contextvars.Context + * object, or None if no context is current. */ - Py_CONTEXT_EVENT_ENTER, - /* - * A context is about to be exited, which will cause the "current context" - * to switch back to what it was before the context was entered. The - * object passed to the watch callback is the still-current - * contextvars.Context object. - */ - Py_CONTEXT_EVENT_EXIT, + Py_CONTEXT_SWITCHED = 1, } PyContextEvent; /* diff --git a/Lib/test/test_capi/test_watchers.py b/Lib/test/test_capi/test_watchers.py index f21d2627c6094b..4bb764bf9d0963 100644 --- a/Lib/test/test_capi/test_watchers.py +++ b/Lib/test/test_capi/test_watchers.py @@ -577,68 +577,66 @@ class TestContextObjectWatchers(unittest.TestCase): def context_watcher(self, which_watcher): wid = _testcapi.add_context_watcher(which_watcher) try: - yield wid + switches = _testcapi.get_context_switches(which_watcher) + except ValueError: + switches = None + try: + yield switches finally: _testcapi.clear_context_watcher(wid) - def assert_event_counts(self, exp_enter_0, exp_exit_0, - exp_enter_1, exp_exit_1): - self.assertEqual( - exp_enter_0, _testcapi.get_context_watcher_num_enter_events(0)) - self.assertEqual( - exp_exit_0, _testcapi.get_context_watcher_num_exit_events(0)) - self.assertEqual( - exp_enter_1, _testcapi.get_context_watcher_num_enter_events(1)) - self.assertEqual( - exp_exit_1, _testcapi.get_context_watcher_num_exit_events(1)) + def assert_event_counts(self, want_0, want_1): + self.assertEqual(len(_testcapi.get_context_switches(0)), want_0) + self.assertEqual(len(_testcapi.get_context_switches(1)), want_1) def test_context_object_events_dispatched(self): # verify that all counts are zero before any watchers are registered - self.assert_event_counts(0, 0, 0, 0) + self.assert_event_counts(0, 0) # verify that all counts remain zero when a context object is # entered and exited with no watchers registered ctx = contextvars.copy_context() - ctx.run(self.assert_event_counts, 0, 0, 0, 0) - self.assert_event_counts(0, 0, 0, 0) + ctx.run(self.assert_event_counts, 0, 0) + self.assert_event_counts(0, 0) # verify counts are as expected when first watcher is registered with self.context_watcher(0): - self.assert_event_counts(0, 0, 0, 0) - ctx.run(self.assert_event_counts, 1, 0, 0, 0) - self.assert_event_counts(1, 1, 0, 0) + self.assert_event_counts(0, 0) + ctx.run(self.assert_event_counts, 1, 0) + self.assert_event_counts(2, 0) # again with second watcher registered with self.context_watcher(1): - self.assert_event_counts(1, 1, 0, 0) - ctx.run(self.assert_event_counts, 2, 1, 1, 0) - self.assert_event_counts(2, 2, 1, 1) + self.assert_event_counts(2, 0) + ctx.run(self.assert_event_counts, 3, 1) + self.assert_event_counts(4, 2) # verify counts are reset and don't change after both watchers are cleared - ctx.run(self.assert_event_counts, 0, 0, 0, 0) - self.assert_event_counts(0, 0, 0, 0) + ctx.run(self.assert_event_counts, 0, 0) + self.assert_event_counts(0, 0) - def test_enter_error(self): - with self.context_watcher(2): - with catch_unraisable_exception() as cm: - ctx = contextvars.copy_context() - ctx.run(int, 0) - self.assertEqual( - cm.unraisable.err_msg, - "Exception ignored in " - f"Py_CONTEXT_EVENT_EXIT watcher callback for {ctx!r}" - ) - self.assertEqual(str(cm.unraisable.exc_value), "boom!") + def test_callback_error(self): + ctx_outer = contextvars.copy_context() + ctx_inner = contextvars.copy_context() + unraisables = [] - def test_exit_error(self): - ctx = contextvars.copy_context() - def _in_context(stack): - stack.enter_context(self.context_watcher(2)) + def _in_outer(): + with self.context_watcher(2): + with catch_unraisable_exception() as cm: + ctx_inner.run(lambda: unraisables.append(cm.unraisable)) + unraisables.append(cm.unraisable) - with catch_unraisable_exception() as cm: - with ExitStack() as stack: - ctx.run(_in_context, stack) - self.assertEqual(str(cm.unraisable.exc_value), "boom!") + try: + ctx_outer.run(_in_outer) + self.assertEqual([x.err_msg for x in unraisables], + ["Exception ignored in Py_CONTEXT_SWITCHED " + f"watcher callback for {ctx!r}" + for ctx in [ctx_inner, ctx_outer]]) + self.assertEqual([str(x.exc_value) for x in unraisables], + ["boom!", "boom!"]) + finally: + # Break reference cycle + unraisables = None def test_clear_out_of_range_watcher_id(self): with self.assertRaisesRegex(ValueError, r"Invalid context watcher ID -1"): @@ -654,5 +652,12 @@ def test_allocate_too_many_watchers(self): with self.assertRaisesRegex(RuntimeError, r"no more context watcher IDs available"): _testcapi.allocate_too_many_context_watchers() + def test_exit_base_context(self): + ctx = contextvars.Context() + _testcapi.clear_context_stack() + with self.context_watcher(0) as switches: + ctx.run(lambda: None) + self.assertEqual(switches, [ctx, None]) + if __name__ == "__main__": unittest.main() diff --git a/Modules/_testcapi/watchers.c b/Modules/_testcapi/watchers.c index b4233d07134aea..321d3aeffb6ad1 100644 --- a/Modules/_testcapi/watchers.c +++ b/Modules/_testcapi/watchers.c @@ -626,16 +626,12 @@ allocate_too_many_func_watchers(PyObject *self, PyObject *args) // Test contexct object watchers #define NUM_CONTEXT_WATCHERS 2 static int context_watcher_ids[NUM_CONTEXT_WATCHERS] = {-1, -1}; -static int num_context_object_enter_events[NUM_CONTEXT_WATCHERS] = {0, 0}; -static int num_context_object_exit_events[NUM_CONTEXT_WATCHERS] = {0, 0}; +static PyObject *context_switches[NUM_CONTEXT_WATCHERS]; static int handle_context_watcher_event(int which_watcher, PyContextEvent event, PyObject *ctx) { - if (event == Py_CONTEXT_EVENT_ENTER) { - num_context_object_enter_events[which_watcher]++; - } - else if (event == Py_CONTEXT_EVENT_EXIT) { - num_context_object_exit_events[which_watcher]++; + if (event == Py_CONTEXT_SWITCHED) { + PyList_Append(context_switches[which_watcher], ctx); } else { return -1; @@ -667,31 +663,28 @@ error_context_event_handler(PyContextEvent event, PyObject *ctx) { static PyObject * add_context_watcher(PyObject *self, PyObject *which_watcher) { - int watcher_id; + static const PyContext_WatchCallback callbacks[] = { + &first_context_watcher_callback, + &second_context_watcher_callback, + &error_context_event_handler, + }; assert(PyLong_Check(which_watcher)); long which_l = PyLong_AsLong(which_watcher); - if (which_l == 0) { - watcher_id = PyContext_AddWatcher(first_context_watcher_callback); - context_watcher_ids[0] = watcher_id; - num_context_object_enter_events[0] = 0; - num_context_object_exit_events[0] = 0; - } - else if (which_l == 1) { - watcher_id = PyContext_AddWatcher(second_context_watcher_callback); - context_watcher_ids[1] = watcher_id; - num_context_object_enter_events[1] = 0; - num_context_object_exit_events[1] = 0; - } - else if (which_l == 2) { - watcher_id = PyContext_AddWatcher(error_context_event_handler); - } - else { + if (which_l < 0 || which_l >= (long)Py_ARRAY_LENGTH(callbacks)) { PyErr_Format(PyExc_ValueError, "invalid watcher %d", which_l); return NULL; } + int watcher_id = PyContext_AddWatcher(callbacks[which_l]); if (watcher_id < 0) { return NULL; } + if (which_l >= 0 && which_l < NUM_CONTEXT_WATCHERS) { + context_watcher_ids[which_l] = watcher_id; + Py_XSETREF(context_switches[which_l], PyList_New(0)); + if (context_switches[which_l] == NULL) { + return NULL; + } + } return PyLong_FromLong(watcher_id); } @@ -708,8 +701,7 @@ clear_context_watcher(PyObject *self, PyObject *watcher_id) for (int i = 0; i < NUM_CONTEXT_WATCHERS; i++) { if (watcher_id_l == context_watcher_ids[i]) { context_watcher_ids[i] = -1; - num_context_object_enter_events[i] = 0; - num_context_object_exit_events[i] = 0; + Py_CLEAR(context_switches[i]); } } } @@ -717,21 +709,34 @@ clear_context_watcher(PyObject *self, PyObject *watcher_id) } static PyObject * -get_context_watcher_num_enter_events(PyObject *self, PyObject *watcher_id) +clear_context_stack(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args)) { - assert(PyLong_Check(watcher_id)); - long watcher_id_l = PyLong_AsLong(watcher_id); - assert(watcher_id_l >= 0 && watcher_id_l < NUM_CONTEXT_WATCHERS); - return PyLong_FromLong(num_context_object_enter_events[watcher_id_l]); + PyThreadState *tstate = PyThreadState_Get(); + if (tstate->context == NULL) { + Py_RETURN_NONE; + } + if (((PyContext *)tstate->context)->ctx_prev != NULL) { + PyErr_SetString(PyExc_RuntimeError, + "must first exit all non-base contexts"); + return NULL; + } + Py_CLEAR(tstate->context); + Py_RETURN_NONE; } static PyObject * -get_context_watcher_num_exit_events(PyObject *self, PyObject *watcher_id) +get_context_switches(PyObject *Py_UNUSED(self), PyObject *watcher_id) { assert(PyLong_Check(watcher_id)); long watcher_id_l = PyLong_AsLong(watcher_id); - assert(watcher_id_l >= 0 && watcher_id_l < NUM_CONTEXT_WATCHERS); - return PyLong_FromLong(num_context_object_exit_events[watcher_id_l]); + if (watcher_id_l < 0 || watcher_id_l >= NUM_CONTEXT_WATCHERS) { + PyErr_Format(PyExc_ValueError, "invalid watcher %ld", watcher_id_l); + return NULL; + } + if (context_switches[watcher_id_l] == NULL) { + return PyList_New(0); + } + return Py_NewRef(context_switches[watcher_id_l]); } static PyObject * @@ -835,10 +840,8 @@ static PyMethodDef test_methods[] = { // Code object watchers. {"add_context_watcher", add_context_watcher, METH_O, NULL}, {"clear_context_watcher", clear_context_watcher, METH_O, NULL}, - {"get_context_watcher_num_enter_events", - get_context_watcher_num_enter_events, METH_O, NULL}, - {"get_context_watcher_num_exit_events", - get_context_watcher_num_exit_events, METH_O, NULL}, + {"clear_context_stack", clear_context_stack, METH_NOARGS, NULL}, + {"get_context_switches", get_context_switches, METH_O, NULL}, {"allocate_too_many_context_watchers", (PyCFunction) allocate_too_many_context_watchers, METH_NOARGS, NULL}, {NULL}, diff --git a/Python/context.c b/Python/context.c index 8bc487a33c890b..95aa82206270f9 100644 --- a/Python/context.c +++ b/Python/context.c @@ -102,10 +102,8 @@ PyContext_CopyCurrent(void) static const char * context_event_name(PyContextEvent event) { switch (event) { - case Py_CONTEXT_EVENT_ENTER: - return "Py_CONTEXT_EVENT_ENTER"; - case Py_CONTEXT_EVENT_EXIT: - return "Py_CONTEXT_EVENT_EXIT"; + case Py_CONTEXT_SWITCHED: + return "Py_CONTEXT_SWITCHED"; default: return "?"; } @@ -115,6 +113,13 @@ context_event_name(PyContextEvent event) { static void notify_context_watchers(PyThreadState *ts, PyContextEvent event, PyObject *ctx) { + if (ctx == NULL) { + // This will happen after exiting the last context in the stack, which + // can occur if context_get was never called before entering a context + // (e.g., called `contextvars.Context().run()` on a fresh thread, as + // PyContext_Enter doesn't call context_get). + ctx = Py_None; + } assert(Py_REFCNT(ctx) > 0); PyInterpreterState *interp = ts->interp; assert(interp->_initialized); @@ -175,6 +180,16 @@ PyContext_ClearWatcher(int watcher_id) } +static inline void +context_switched(PyThreadState *ts) +{ + ts->context_ver++; + // ts->context is used instead of context_get() because context_get() might + // throw if ts->context is NULL. + notify_context_watchers(ts, Py_CONTEXT_SWITCHED, ts->context); +} + + static int _PyContext_Enter(PyThreadState *ts, PyObject *octx) { @@ -191,9 +206,7 @@ _PyContext_Enter(PyThreadState *ts, PyObject *octx) ctx->ctx_entered = 1; ts->context = Py_NewRef(ctx); - ts->context_ver++; - - notify_context_watchers(ts, Py_CONTEXT_EVENT_ENTER, octx); + context_switched(ts); return 0; } @@ -227,13 +240,11 @@ _PyContext_Exit(PyThreadState *ts, PyObject *octx) return -1; } - notify_context_watchers(ts, Py_CONTEXT_EVENT_EXIT, octx); Py_SETREF(ts->context, (PyObject *)ctx->ctx_prev); - ts->context_ver++; ctx->ctx_prev = NULL; ctx->ctx_entered = 0; - + context_switched(ts); return 0; } diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index e6c599a2ac4a46..2605825d3d0078 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -455,8 +455,8 @@ Modules/_testcapi/watchers.c - pyfunc_watchers - Modules/_testcapi/watchers.c - func_watcher_ids - Modules/_testcapi/watchers.c - func_watcher_callbacks - Modules/_testcapi/watchers.c - context_watcher_ids - -Modules/_testcapi/watchers.c - num_context_object_enter_events - -Modules/_testcapi/watchers.c - num_context_object_exit_events - +Modules/_testcapi/watchers.c - context_switches - +Modules/_testcapi/watchers.c add_context_watcher callbacks - Modules/_testcapimodule.c - BasicStaticTypes - Modules/_testcapimodule.c - num_basic_static_types_used - Modules/_testcapimodule.c - ContainerNoGC_members - From 51410d8bdcfe0fd215f94a098dc6cd0919c648a1 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Wed, 16 Oct 2024 08:51:15 -0400 Subject: [PATCH 002/106] gh-125217: Turn off optimization around_PyEval_EvalFrameDefault to avoid MSVC crash (#125477) --- Python/ceval.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Python/ceval.c b/Python/ceval.c index f4e0add3034707..43776e773e0deb 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -761,6 +761,16 @@ _PyObjectArray_Free(PyObject **array, PyObject **scratch) * so consume 3 units of C stack */ #define PY_EVAL_C_STACK_UNITS 2 +#if defined(_MSC_VER) && defined(_Py_USING_PGO) && defined(_Py_JIT) +/* _PyEval_EvalFrameDefault is too large to optimize for speed with + PGO on MSVC when the JIT is enabled. Disable that optimization + around this function only. If this is fixed upstream, we should + gate this on the version of MSVC. + */ +# pragma optimize("t", off) +/* This setting is reversed below following _PyEval_EvalFrameDefault */ +#endif + PyObject* _Py_HOT_FUNCTION _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int throwflag) { @@ -1136,6 +1146,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int } +#if defined(_MSC_VER) && defined(_Py_USING_PGO) && defined(_Py_JIT) +# pragma optimize("", on) +#endif + #if defined(__GNUC__) # pragma GCC diagnostic pop #elif defined(_MSC_VER) /* MS_WINDOWS */ From feda9aa73ab95d17a291db22c416146f8e70edeb Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Wed, 16 Oct 2024 14:13:07 +0100 Subject: [PATCH 003/106] gh-125444: Fix illegal instruction for older Arm architectures (#125574) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Arm v5 it is not possible to get the thread ID via c13 register hence the illegal instruction. The c13 register started to provide thread ID since Arm v6K architecture variant. Other variants of Arm v6 (T2, Z and base) don’t provide the thread ID via c13. For the sake of simplicity we group v5 and v6 together and consider that instructions for Arm v7 only. --- Include/internal/mimalloc/mimalloc/prim.h | 4 ++-- Include/object.h | 2 +- .../2024-10-16-12-12-39.gh-issue-125444.9tG2X6.rst | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-16-12-12-39.gh-issue-125444.9tG2X6.rst diff --git a/Include/internal/mimalloc/mimalloc/prim.h b/Include/internal/mimalloc/mimalloc/prim.h index 8a60d528458e6c..322ab29e6b41c2 100644 --- a/Include/internal/mimalloc/mimalloc/prim.h +++ b/Include/internal/mimalloc/mimalloc/prim.h @@ -151,9 +151,9 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { // If you test on another platform and it works please send a PR :-) // see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register. #elif defined(__GNUC__) && ( \ - (defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ + (defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || (defined(__arm__) && __ARM_ARCH >= 7) || defined(__aarch64__))) \ || (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__))) \ - || (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ + || (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || (defined(__arm__) && __ARM_ARCH >= 7) || defined(__aarch64__))) \ || (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ || (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ ) diff --git a/Include/object.h b/Include/object.h index 5be4dedadc20eb..7e1b0966fc5e34 100644 --- a/Include/object.h +++ b/Include/object.h @@ -192,7 +192,7 @@ _Py_ThreadId(void) __asm__("movq %%gs:0, %0" : "=r" (tid)); // x86_64 macOSX uses GS #elif defined(__x86_64__) __asm__("movq %%fs:0, %0" : "=r" (tid)); // x86_64 Linux, BSD uses FS -#elif defined(__arm__) +#elif defined(__arm__) && __ARM_ARCH >= 7 __asm__ ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tid)); #elif defined(__aarch64__) && defined(__APPLE__) __asm__ ("mrs %0, tpidrro_el0" : "=r" (tid)); diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-16-12-12-39.gh-issue-125444.9tG2X6.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-16-12-12-39.gh-issue-125444.9tG2X6.rst new file mode 100644 index 00000000000000..13c1e745edf8d5 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-16-12-12-39.gh-issue-125444.9tG2X6.rst @@ -0,0 +1 @@ +Fix illegal instruction for older Arm architectures. Patch by Diego Russo, testing by Ross Burton. From e4d90be84536746a966478acc4c0cf43a201f492 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Wed, 16 Oct 2024 15:24:41 +0200 Subject: [PATCH 004/106] gh-125584: Require network resource in ``test_urllib2.HandlerTests.test_ftp_error`` (#125586) --- Lib/test/test_urllib2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 19179fdc9508ca..b90ccc2f125b93 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -794,6 +794,7 @@ def connect_ftp(self, user, passwd, host, port, dirs, self.assertEqual(headers.get("Content-type"), mimetype) self.assertEqual(int(headers["Content-length"]), len(data)) + @support.requires_resource("network") def test_ftp_error(self): class ErrorFTPHandler(urllib.request.FTPHandler): def __init__(self, exception): From d83fcf8371f2f33c7797bc8f5423a8bca8c46e5c Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Wed, 16 Oct 2024 17:27:19 +0300 Subject: [PATCH 005/106] CI: Bump Python to 3.13 and mypy to 1.12 in mypy workflow (#125592) * Bump mypy to 1.12 & Python to 3.13 * Remove unnecessary `type: ignore` --- .github/workflows/mypy.yml | 2 +- Tools/clinic/libclinic/converter.py | 4 +--- Tools/requirements-dev.txt | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 1b2d998182e0f7..e5b05302b5ac27 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -53,7 +53,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.13" cache: pip cache-dependency-path: Tools/requirements-dev.txt - run: pip install -r Tools/requirements-dev.txt diff --git a/Tools/clinic/libclinic/converter.py b/Tools/clinic/libclinic/converter.py index 2abf06dc4e89a2..86853bb4fba253 100644 --- a/Tools/clinic/libclinic/converter.py +++ b/Tools/clinic/libclinic/converter.py @@ -545,9 +545,7 @@ def closure(f: CConverterClassT) -> CConverterClassT: if not kwargs: added_f = f else: - # type ignore due to a mypy regression :( - # https://github.com/python/mypy/issues/17646 - added_f = functools.partial(f, **kwargs) # type: ignore[misc] + added_f = functools.partial(f, **kwargs) if format_unit: legacy_converters[format_unit] = added_f return f diff --git a/Tools/requirements-dev.txt b/Tools/requirements-dev.txt index 408a9ea6607f9e..57f0b982b00f5d 100644 --- a/Tools/requirements-dev.txt +++ b/Tools/requirements-dev.txt @@ -1,6 +1,6 @@ # Requirements file for external linters and checks we run on # Tools/clinic, Tools/cases_generator/, and Tools/peg_generator/ in CI -mypy==1.11.2 +mypy==1.12 # needed for peg_generator: types-psutil==6.0.0.20240901 From 760872efecb95017db8e38a8eda614bf23d2a22c Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 16 Oct 2024 11:39:17 -0400 Subject: [PATCH 006/106] gh-125451: Fix deadlock in ProcessPoolExecutor shutdown (#125492) There was a deadlock when `ProcessPoolExecutor` shuts down at the same time that a queueing thread handles an error processing a task. Don't use `_shutdown_lock` to protect the `_ThreadWakeup` pipes -- use an internal lock instead. This fixes the ordering deadlock where the `ExecutorManagerThread` holds the `_shutdown_lock` and joins the queueing thread, while the queueing thread is attempting to acquire the `_shutdown_lock` while closing the `_ThreadWakeup`. --- Lib/concurrent/futures/process.py | 50 ++++++++----------- .../test_concurrent_futures/test_shutdown.py | 3 -- ...-10-14-17-29-34.gh-issue-125451.fmP3T9.rst | 2 + 3 files changed, 23 insertions(+), 32 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-14-17-29-34.gh-issue-125451.fmP3T9.rst diff --git a/Lib/concurrent/futures/process.py b/Lib/concurrent/futures/process.py index 7092b4757b5429..42eee72bc1457f 100644 --- a/Lib/concurrent/futures/process.py +++ b/Lib/concurrent/futures/process.py @@ -68,27 +68,31 @@ class _ThreadWakeup: def __init__(self): self._closed = False + self._lock = threading.Lock() self._reader, self._writer = mp.Pipe(duplex=False) def close(self): - # Please note that we do not take the shutdown lock when + # Please note that we do not take the self._lock when # calling clear() (to avoid deadlocking) so this method can # only be called safely from the same thread as all calls to - # clear() even if you hold the shutdown lock. Otherwise we + # clear() even if you hold the lock. Otherwise we # might try to read from the closed pipe. - if not self._closed: - self._closed = True - self._writer.close() - self._reader.close() + with self._lock: + if not self._closed: + self._closed = True + self._writer.close() + self._reader.close() def wakeup(self): - if not self._closed: - self._writer.send_bytes(b"") + with self._lock: + if not self._closed: + self._writer.send_bytes(b"") def clear(self): - if not self._closed: - while self._reader.poll(): - self._reader.recv_bytes() + if self._closed: + raise RuntimeError('operation on closed _ThreadWakeup') + while self._reader.poll(): + self._reader.recv_bytes() def _python_exit(): @@ -167,10 +171,8 @@ def __init__(self, work_id, fn, args, kwargs): class _SafeQueue(Queue): """Safe Queue set exception to the future object linked to a job""" - def __init__(self, max_size=0, *, ctx, pending_work_items, shutdown_lock, - thread_wakeup): + def __init__(self, max_size=0, *, ctx, pending_work_items, thread_wakeup): self.pending_work_items = pending_work_items - self.shutdown_lock = shutdown_lock self.thread_wakeup = thread_wakeup super().__init__(max_size, ctx=ctx) @@ -179,8 +181,7 @@ def _on_queue_feeder_error(self, e, obj): tb = format_exception(type(e), e, e.__traceback__) e.__cause__ = _RemoteTraceback('\n"""\n{}"""'.format(''.join(tb))) work_item = self.pending_work_items.pop(obj.work_id, None) - with self.shutdown_lock: - self.thread_wakeup.wakeup() + self.thread_wakeup.wakeup() # work_item can be None if another process terminated. In this # case, the executor_manager_thread fails all work_items # with BrokenProcessPool @@ -296,12 +297,10 @@ def __init__(self, executor): # if there is no pending work item. def weakref_cb(_, thread_wakeup=self.thread_wakeup, - shutdown_lock=self.shutdown_lock, mp_util_debug=mp.util.debug): mp_util_debug('Executor collected: triggering callback for' ' QueueManager wakeup') - with shutdown_lock: - thread_wakeup.wakeup() + thread_wakeup.wakeup() self.executor_reference = weakref.ref(executor, weakref_cb) @@ -429,11 +428,6 @@ def wait_result_broken_or_wakeup(self): elif wakeup_reader in ready: is_broken = False - # No need to hold the _shutdown_lock here because: - # 1. we're the only thread to use the wakeup reader - # 2. we're also the only thread to call thread_wakeup.close() - # 3. we want to avoid a possible deadlock when both reader and writer - # would block (gh-105829) self.thread_wakeup.clear() return result_item, is_broken, cause @@ -721,10 +715,9 @@ def __init__(self, max_workers=None, mp_context=None, # as it could result in a deadlock if a worker process dies with the # _result_queue write lock still acquired. # - # _shutdown_lock must be locked to access _ThreadWakeup.close() and - # .wakeup(). Care must also be taken to not call clear or close from - # more than one thread since _ThreadWakeup.clear() is not protected by - # the _shutdown_lock + # Care must be taken to only call clear and close from the + # executor_manager_thread, since _ThreadWakeup.clear() is not protected + # by a lock. self._executor_manager_thread_wakeup = _ThreadWakeup() # Create communication channels for the executor @@ -735,7 +728,6 @@ def __init__(self, max_workers=None, mp_context=None, self._call_queue = _SafeQueue( max_size=queue_size, ctx=self._mp_context, pending_work_items=self._pending_work_items, - shutdown_lock=self._shutdown_lock, thread_wakeup=self._executor_manager_thread_wakeup) # Killed worker processes can produce spurious "broken pipe" # tracebacks in the queue's own worker thread. But we detect killed diff --git a/Lib/test/test_concurrent_futures/test_shutdown.py b/Lib/test/test_concurrent_futures/test_shutdown.py index ba3618614a9bf9..7a4065afd46fc8 100644 --- a/Lib/test/test_concurrent_futures/test_shutdown.py +++ b/Lib/test/test_concurrent_futures/test_shutdown.py @@ -253,9 +253,6 @@ def test_cancel_futures_wait_false(self): class ProcessPoolShutdownTest(ExecutorShutdownTest): - # gh-125451: 'lock' cannot be serialized, the test is broken - # and hangs randomly - @unittest.skipIf(True, "broken test") def test_processes_terminate(self): def acquire_lock(lock): lock.acquire() diff --git a/Misc/NEWS.d/next/Library/2024-10-14-17-29-34.gh-issue-125451.fmP3T9.rst b/Misc/NEWS.d/next/Library/2024-10-14-17-29-34.gh-issue-125451.fmP3T9.rst new file mode 100644 index 00000000000000..589988d4d6273f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-14-17-29-34.gh-issue-125451.fmP3T9.rst @@ -0,0 +1,2 @@ +Fix deadlock when :class:`concurrent.futures.ProcessPoolExecutor` shuts down +concurrently with an error when feeding a job to a worker process. From aab3210271136ad8e8fecd927b806602c463e1f2 Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Wed, 16 Oct 2024 22:53:30 +0200 Subject: [PATCH 007/106] gh-125615: Fix grammar nit in tutorial's interactive interpreter appendix (GH-125617) Replace "without ... nor" with "with neither ... nor" --- Doc/tutorial/appendix.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/tutorial/appendix.rst b/Doc/tutorial/appendix.rst index da664f2f360ff1..6a1611afadb57c 100644 --- a/Doc/tutorial/appendix.rst +++ b/Doc/tutorial/appendix.rst @@ -20,7 +20,7 @@ This one supports color, multiline editing, history browsing, and paste mode. To disable color, see :ref:`using-on-controlling-color` for details. Function keys provide some additional functionality. :kbd:`F1` enters the interactive help browser :mod:`pydoc`. -:kbd:`F2` allows for browsing command-line history without output nor the +:kbd:`F2` allows for browsing command-line history with neither output nor the :term:`>>>` and :term:`...` prompts. :kbd:`F3` enters "paste mode", which makes pasting larger blocks of code easier. Press :kbd:`F3` to return to the regular prompt. From aecbc2e6f40f8066f478c2d0f3be5b550e36cfd3 Mon Sep 17 00:00:00 2001 From: Vincent Fazio Date: Wed, 16 Oct 2024 17:01:42 -0500 Subject: [PATCH 008/106] gh-115382: Fix cross compiles when host and target use same SOABI Co-authored-by: Erlend E. Aasland --- Lib/sysconfig/__init__.py | 15 ++++++++++++++- Lib/test/libregrtest/main.py | 1 + Lib/test/pythoninfo.py | 1 + ...2024-03-03-20-28-23.gh-issue-115382.97hJFE.rst | 1 + configure | 2 +- configure.ac | 2 +- 6 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2024-03-03-20-28-23.gh-issue-115382.97hJFE.rst diff --git a/Lib/sysconfig/__init__.py b/Lib/sysconfig/__init__.py index 80aef3447117e5..43f9276799b848 100644 --- a/Lib/sysconfig/__init__.py +++ b/Lib/sysconfig/__init__.py @@ -340,7 +340,20 @@ def _init_posix(vars): """Initialize the module as appropriate for POSIX systems.""" # _sysconfigdata is generated at build time, see _generate_posix_vars() name = _get_sysconfigdata_name() - _temp = __import__(name, globals(), locals(), ['build_time_vars'], 0) + + # For cross builds, the path to the target's sysconfigdata must be specified + # so it can be imported. It cannot be in PYTHONPATH, as foreign modules in + # sys.path can cause crashes when loaded by the host interpreter. + # Rely on truthiness as a valueless env variable is still an empty string. + # See OS X note in _generate_posix_vars re _sysconfigdata. + if (path := os.environ.get('_PYTHON_SYSCONFIGDATA_PATH')): + from importlib.machinery import FileFinder, SourceFileLoader, SOURCE_SUFFIXES + from importlib.util import module_from_spec + spec = FileFinder(path, (SourceFileLoader, SOURCE_SUFFIXES)).find_spec(name) + _temp = module_from_spec(spec) + spec.loader.exec_module(_temp) + else: + _temp = __import__(name, globals(), locals(), ['build_time_vars'], 0) build_time_vars = _temp.build_time_vars vars.update(build_time_vars) diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py index f693a788048694..2ef4349552bf5f 100644 --- a/Lib/test/libregrtest/main.py +++ b/Lib/test/libregrtest/main.py @@ -594,6 +594,7 @@ def _add_cross_compile_opts(self, regrtest_opts): '_PYTHON_PROJECT_BASE', '_PYTHON_HOST_PLATFORM', '_PYTHON_SYSCONFIGDATA_NAME', + "_PYTHON_SYSCONFIGDATA_PATH", 'PYTHONPATH' } old_environ = os.environ diff --git a/Lib/test/pythoninfo.py b/Lib/test/pythoninfo.py index 05a28bda2d38ba..0b2e4b1c1988c4 100644 --- a/Lib/test/pythoninfo.py +++ b/Lib/test/pythoninfo.py @@ -334,6 +334,7 @@ def format_groups(groups): "_PYTHON_HOST_PLATFORM", "_PYTHON_PROJECT_BASE", "_PYTHON_SYSCONFIGDATA_NAME", + "_PYTHON_SYSCONFIGDATA_PATH", "__PYVENV_LAUNCHER__", # Sanitizer options diff --git a/Misc/NEWS.d/next/Build/2024-03-03-20-28-23.gh-issue-115382.97hJFE.rst b/Misc/NEWS.d/next/Build/2024-03-03-20-28-23.gh-issue-115382.97hJFE.rst new file mode 100644 index 00000000000000..f8d19651fc5854 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2024-03-03-20-28-23.gh-issue-115382.97hJFE.rst @@ -0,0 +1 @@ +Fix cross compile failures when the host and target SOABIs match. diff --git a/configure b/configure index 17c70d25f9e70c..b11f41d5379958 100755 --- a/configure +++ b/configure @@ -3708,7 +3708,7 @@ fi fi ac_cv_prog_PYTHON_FOR_REGEN=$with_build_python PYTHON_FOR_FREEZE="$with_build_python" - PYTHON_FOR_BUILD='_PYTHON_PROJECT_BASE=$(abs_builddir) _PYTHON_HOST_PLATFORM=$(_PYTHON_HOST_PLATFORM) PYTHONPATH=$(shell test -f pybuilddir.txt && echo $(abs_builddir)/`cat pybuilddir.txt`:)$(srcdir)/Lib _PYTHON_SYSCONFIGDATA_NAME=_sysconfigdata_$(ABIFLAGS)_$(MACHDEP)_$(MULTIARCH) '$with_build_python + PYTHON_FOR_BUILD='_PYTHON_PROJECT_BASE=$(abs_builddir) _PYTHON_HOST_PLATFORM=$(_PYTHON_HOST_PLATFORM) PYTHONPATH=$(srcdir)/Lib _PYTHON_SYSCONFIGDATA_NAME=_sysconfigdata_$(ABIFLAGS)_$(MACHDEP)_$(MULTIARCH) _PYTHON_SYSCONFIGDATA_PATH=$(shell test -f pybuilddir.txt && echo $(abs_builddir)/`cat pybuilddir.txt`) '$with_build_python { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $with_build_python" >&5 printf "%s\n" "$with_build_python" >&6; } diff --git a/configure.ac b/configure.ac index 56daa8b0f79bc0..d5bc739c34c90f 100644 --- a/configure.ac +++ b/configure.ac @@ -164,7 +164,7 @@ AC_ARG_WITH([build-python], dnl Build Python interpreter is used for regeneration and freezing. ac_cv_prog_PYTHON_FOR_REGEN=$with_build_python PYTHON_FOR_FREEZE="$with_build_python" - PYTHON_FOR_BUILD='_PYTHON_PROJECT_BASE=$(abs_builddir) _PYTHON_HOST_PLATFORM=$(_PYTHON_HOST_PLATFORM) PYTHONPATH=$(shell test -f pybuilddir.txt && echo $(abs_builddir)/`cat pybuilddir.txt`:)$(srcdir)/Lib _PYTHON_SYSCONFIGDATA_NAME=_sysconfigdata_$(ABIFLAGS)_$(MACHDEP)_$(MULTIARCH) '$with_build_python + PYTHON_FOR_BUILD='_PYTHON_PROJECT_BASE=$(abs_builddir) _PYTHON_HOST_PLATFORM=$(_PYTHON_HOST_PLATFORM) PYTHONPATH=$(srcdir)/Lib _PYTHON_SYSCONFIGDATA_NAME=_sysconfigdata_$(ABIFLAGS)_$(MACHDEP)_$(MULTIARCH) _PYTHON_SYSCONFIGDATA_PATH=$(shell test -f pybuilddir.txt && echo $(abs_builddir)/`cat pybuilddir.txt`) '$with_build_python AC_MSG_RESULT([$with_build_python]) ], [ AS_VAR_IF([cross_compiling], [yes], From 8e7b2a1161744c7d3d90966a65ed6ae1019a65cb Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Wed, 16 Oct 2024 23:05:20 +0100 Subject: [PATCH 009/106] gh-125550: Enable py.exe to detect Store installs of 3.14 (GH-125551) --- .../2024-10-15-21-28-43.gh-issue-125550.hmGWCP.rst | 2 ++ PC/launcher2.c | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2024-10-15-21-28-43.gh-issue-125550.hmGWCP.rst diff --git a/Misc/NEWS.d/next/Windows/2024-10-15-21-28-43.gh-issue-125550.hmGWCP.rst b/Misc/NEWS.d/next/Windows/2024-10-15-21-28-43.gh-issue-125550.hmGWCP.rst new file mode 100644 index 00000000000000..c3ae00c74b3d91 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2024-10-15-21-28-43.gh-issue-125550.hmGWCP.rst @@ -0,0 +1,2 @@ +Enable the :ref:`launcher` to detect Python 3.14 installs from the Windows +Store. diff --git a/PC/launcher2.c b/PC/launcher2.c index b372044e353202..befcbe30600f2c 100644 --- a/PC/launcher2.c +++ b/PC/launcher2.c @@ -1962,6 +1962,7 @@ struct AppxSearchInfo { struct AppxSearchInfo APPX_SEARCH[] = { // Releases made through the Store + { L"PythonSoftwareFoundation.Python.3.14_qbz5n2kfra8p0", L"3.14", 10 }, { L"PythonSoftwareFoundation.Python.3.13_qbz5n2kfra8p0", L"3.13", 10 }, { L"PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0", L"3.12", 10 }, { L"PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0", L"3.11", 10 }, @@ -1970,8 +1971,9 @@ struct AppxSearchInfo APPX_SEARCH[] = { { L"PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0", L"3.8", 10 }, // Side-loadable releases. Note that the publisher ID changes whenever we - // renew our code-signing certificate, so the newer ID has a higher - // priority (lower sortKey) + // change our code signing certificate subject, so the newer IDs have higher + // priorities (lower sortKey) + { L"PythonSoftwareFoundation.Python.3.14_3847v3x7pw1km", L"3.14", 11 }, { L"PythonSoftwareFoundation.Python.3.13_3847v3x7pw1km", L"3.13", 11 }, { L"PythonSoftwareFoundation.Python.3.12_3847v3x7pw1km", L"3.12", 11 }, { L"PythonSoftwareFoundation.Python.3.11_3847v3x7pw1km", L"3.11", 11 }, @@ -2054,7 +2056,8 @@ struct StoreSearchInfo { struct StoreSearchInfo STORE_SEARCH[] = { - { L"3", /* 3.12 */ L"9NCVDN91XZQP" }, + { L"3", /* 3.13 */ L"9PNRBTZXMB4Z" }, + { L"3.14", L"9NTRHQCBBPR8" }, { L"3.13", L"9PNRBTZXMB4Z" }, { L"3.12", L"9NCVDN91XZQP" }, { L"3.11", L"9NRWMJP3717K" }, From a38fef4439139743e3334c1d69f24cafdf4d71da Mon Sep 17 00:00:00 2001 From: Furkan Onder Date: Thu, 17 Oct 2024 01:42:29 +0300 Subject: [PATCH 010/106] gh-125620: Remove unnecessary import of subprocess in spawnv_passfds (#125624) Remove unnecessary import of subprocess in multiprocessing.util.spawnv_passfds. --- Lib/multiprocessing/util.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/multiprocessing/util.py b/Lib/multiprocessing/util.py index d48ef8a86b34e1..b7192042b9cf47 100644 --- a/Lib/multiprocessing/util.py +++ b/Lib/multiprocessing/util.py @@ -438,7 +438,6 @@ def _flush_std_streams(): def spawnv_passfds(path, args, passfds): import _posixsubprocess - import subprocess passfds = tuple(sorted(map(int, passfds))) errpipe_read, errpipe_write = os.pipe() try: From a5a7f5e16d8c3938d266703ea8fba8ffee3e3ae5 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 16 Oct 2024 16:50:46 -0600 Subject: [PATCH 011/106] gh-124694: Add concurrent.futures.InterpreterPoolExecutor (gh-124548) This is an implementation of InterpreterPoolExecutor that builds on ThreadPoolExecutor. (Note that this is not tied to PEP 734, which is strictly about adding a new stdlib module.) Possible future improvements: * support passing a script for the initializer or to submit() * support passing (most) arbitrary functions without pickling * support passing closures * optionally exec functions against __main__ instead of the their original module --- Doc/library/asyncio-dev.rst | 6 +- Doc/library/asyncio-eventloop.rst | 9 +- Doc/library/asyncio-llapi-index.rst | 2 +- Doc/library/concurrent.futures.rst | 135 ++++++- Doc/whatsnew/3.14.rst | 8 + Lib/concurrent/futures/__init__.py | 12 +- Lib/concurrent/futures/interpreter.py | 241 ++++++++++++ Lib/concurrent/futures/thread.py | 90 +++-- Lib/test/test_concurrent_futures/executor.py | 4 +- .../test_interpreter_pool.py | 346 ++++++++++++++++++ Lib/test/test_concurrent_futures/util.py | 5 + ...-09-27-15-42-55.gh-issue-124694.uUy32y.rst | 6 + 12 files changed, 826 insertions(+), 38 deletions(-) create mode 100644 Lib/concurrent/futures/interpreter.py create mode 100644 Lib/test/test_concurrent_futures/test_interpreter_pool.py create mode 100644 Misc/NEWS.d/next/Library/2024-09-27-15-42-55.gh-issue-124694.uUy32y.rst diff --git a/Doc/library/asyncio-dev.rst b/Doc/library/asyncio-dev.rst index a9c3a0183bb72d..44b507a9811116 100644 --- a/Doc/library/asyncio-dev.rst +++ b/Doc/library/asyncio-dev.rst @@ -103,7 +103,8 @@ To handle signals the event loop must be run in the main thread. The :meth:`loop.run_in_executor` method can be used with a -:class:`concurrent.futures.ThreadPoolExecutor` to execute +:class:`concurrent.futures.ThreadPoolExecutor` or +:class:`~concurrent.futures.InterpreterPoolExecutor` to execute blocking code in a different OS thread without blocking the OS thread that the event loop runs in. @@ -128,7 +129,8 @@ if a function performs a CPU-intensive calculation for 1 second, all concurrent asyncio Tasks and IO operations would be delayed by 1 second. -An executor can be used to run a task in a different thread or even in +An executor can be used to run a task in a different thread, +including in a different interpreter, or even in a different process to avoid blocking the OS thread with the event loop. See the :meth:`loop.run_in_executor` method for more details. diff --git a/Doc/library/asyncio-eventloop.rst b/Doc/library/asyncio-eventloop.rst index 943683f6b8a7f6..14fd153f640f05 100644 --- a/Doc/library/asyncio-eventloop.rst +++ b/Doc/library/asyncio-eventloop.rst @@ -1305,6 +1305,12 @@ Executing code in thread or process pools pool, cpu_bound) print('custom process pool', result) + # 4. Run in a custom interpreter pool: + with concurrent.futures.InterpreterPoolExecutor() as pool: + result = await loop.run_in_executor( + pool, cpu_bound) + print('custom interpreter pool', result) + if __name__ == '__main__': asyncio.run(main()) @@ -1329,7 +1335,8 @@ Executing code in thread or process pools Set *executor* as the default executor used by :meth:`run_in_executor`. *executor* must be an instance of - :class:`~concurrent.futures.ThreadPoolExecutor`. + :class:`~concurrent.futures.ThreadPoolExecutor`, which includes + :class:`~concurrent.futures.InterpreterPoolExecutor`. .. versionchanged:: 3.11 *executor* must be an instance of diff --git a/Doc/library/asyncio-llapi-index.rst b/Doc/library/asyncio-llapi-index.rst index 3e21054aa4fe9e..f5af888f31f186 100644 --- a/Doc/library/asyncio-llapi-index.rst +++ b/Doc/library/asyncio-llapi-index.rst @@ -96,7 +96,7 @@ See also the main documentation section about the - Invoke a callback *at* the given time. -.. rubric:: Thread/Process Pool +.. rubric:: Thread/Interpreter/Process Pool .. list-table:: :widths: 50 50 :class: full-width-table diff --git a/Doc/library/concurrent.futures.rst b/Doc/library/concurrent.futures.rst index ce72127127c7a6..45a73705f10e92 100644 --- a/Doc/library/concurrent.futures.rst +++ b/Doc/library/concurrent.futures.rst @@ -15,9 +15,10 @@ The :mod:`concurrent.futures` module provides a high-level interface for asynchronously executing callables. The asynchronous execution can be performed with threads, using -:class:`ThreadPoolExecutor`, or separate processes, using -:class:`ProcessPoolExecutor`. Both implement the same interface, which is -defined by the abstract :class:`Executor` class. +:class:`ThreadPoolExecutor` or :class:`InterpreterPoolExecutor`, +or separate processes, using :class:`ProcessPoolExecutor`. +Each implements the same interface, which is defined +by the abstract :class:`Executor` class. .. include:: ../includes/wasm-notavail.rst @@ -63,7 +64,8 @@ Executor Objects setting *chunksize* to a positive integer. For very long iterables, using a large value for *chunksize* can significantly improve performance compared to the default size of 1. With - :class:`ThreadPoolExecutor`, *chunksize* has no effect. + :class:`ThreadPoolExecutor` and :class:`InterpreterPoolExecutor`, + *chunksize* has no effect. .. versionchanged:: 3.5 Added the *chunksize* argument. @@ -227,6 +229,111 @@ ThreadPoolExecutor Example print('%r page is %d bytes' % (url, len(data))) +InterpreterPoolExecutor +----------------------- + +The :class:`InterpreterPoolExecutor` class uses a pool of interpreters +to execute calls asynchronously. It is a :class:`ThreadPoolExecutor` +subclass, which means each worker is running in its own thread. +The difference here is that each worker has its own interpreter, +and runs each task using that interpreter. + +The biggest benefit to using interpreters instead of only threads +is true multi-core parallelism. Each interpreter has its own +:term:`Global Interpreter Lock `, so code +running in one interpreter can run on one CPU core, while code in +another interpreter runs unblocked on a different core. + +The tradeoff is that writing concurrent code for use with multiple +interpreters can take extra effort. However, this is because it +forces you to be deliberate about how and when interpreters interact, +and to be explicit about what data is shared between interpreters. +This results in several benefits that help balance the extra effort, +including true multi-core parallelism, For example, code written +this way can make it easier to reason about concurrency. Another +major benefit is that you don't have to deal with several of the +big pain points of using threads, like nrace conditions. + +Each worker's interpreter is isolated from all the other interpreters. +"Isolated" means each interpreter has its own runtime state and +operates completely independently. For example, if you redirect +:data:`sys.stdout` in one interpreter, it will not be automatically +redirected any other interpreter. If you import a module in one +interpreter, it is not automatically imported in any other. You +would need to import the module separately in interpreter where +you need it. In fact, each module imported in an interpreter is +a completely separate object from the same module in a different +interpreter, including :mod:`sys`, :mod:`builtins`, +and even ``__main__``. + +Isolation means a mutable object, or other data, cannot be used +by more than one interpreter at the same time. That effectively means +interpreters cannot actually share such objects or data. Instead, +each interpreter must have its own copy, and you will have to +synchronize any changes between the copies manually. Immutable +objects and data, like the builtin singletons, strings, and tuples +of immutable objects, don't have these limitations. + +Communicating and synchronizing between interpreters is most effectively +done using dedicated tools, like those proposed in :pep:`734`. One less +efficient alternative is to serialize with :mod:`pickle` and then send +the bytes over a shared :mod:`socket ` or +:func:`pipe `. + +.. class:: InterpreterPoolExecutor(max_workers=None, thread_name_prefix='', initializer=None, initargs=(), shared=None) + + A :class:`ThreadPoolExecutor` subclass that executes calls asynchronously + using a pool of at most *max_workers* threads. Each thread runs + tasks in its own interpreter. The worker interpreters are isolated + from each other, which means each has its own runtime state and that + they can't share any mutable objects or other data. Each interpreter + has its own :term:`Global Interpreter Lock `, + which means code run with this executor has true multi-core parallelism. + + The optional *initializer* and *initargs* arguments have the same + meaning as for :class:`!ThreadPoolExecutor`: the initializer is run + when each worker is created, though in this case it is run.in + the worker's interpreter. The executor serializes the *initializer* + and *initargs* using :mod:`pickle` when sending them to the worker's + interpreter. + + .. note:: + Functions defined in the ``__main__`` module cannot be pickled + and thus cannot be used. + + .. note:: + The executor may replace uncaught exceptions from *initializer* + with :class:`~concurrent.futures.interpreter.ExecutionFailed`. + + The optional *shared* argument is a :class:`dict` of objects that all + interpreters in the pool share. The *shared* items are added to each + interpreter's ``__main__`` module. Not all objects are shareable. + Shareable objects include the builtin singletons, :class:`str` + and :class:`bytes`, and :class:`memoryview`. See :pep:`734` + for more info. + + Other caveats from parent :class:`ThreadPoolExecutor` apply here. + +:meth:`~Executor.submit` and :meth:`~Executor.map` work like normal, +except the worker serializes the callable and arguments using +:mod:`pickle` when sending them to its interpreter. The worker +likewise serializes the return value when sending it back. + +.. note:: + Functions defined in the ``__main__`` module cannot be pickled + and thus cannot be used. + +When a worker's current task raises an uncaught exception, the worker +always tries to preserve the exception as-is. If that is successful +then it also sets the ``__cause__`` to a corresponding +:class:`~concurrent.futures.interpreter.ExecutionFailed` +instance, which contains a summary of the original exception. +In the uncommon case that the worker is not able to preserve the +original as-is then it directly preserves the corresponding +:class:`~concurrent.futures.interpreter.ExecutionFailed` +instance instead. + + ProcessPoolExecutor ------------------- @@ -574,6 +681,26 @@ Exception classes .. versionadded:: 3.7 +.. currentmodule:: concurrent.futures.interpreter + +.. exception:: BrokenInterpreterPool + + Derived from :exc:`~concurrent.futures.thread.BrokenThreadPool`, + this exception class is raised when one of the workers + of a :class:`~concurrent.futures.InterpreterPoolExecutor` + has failed initializing. + + .. versionadded:: next + +.. exception:: ExecutionFailed + + Raised from :class:`~concurrent.futures.InterpreterPoolExecutor` when + the given initializer fails or from + :meth:`~concurrent.futures.Executor.submit` when there's an uncaught + exception from the submitted task. + + .. versionadded:: next + .. currentmodule:: concurrent.futures.process .. exception:: BrokenProcessPool diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index b106578fe9e8b0..9543af3c7ca225 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -225,6 +225,14 @@ ast * The ``repr()`` output for AST nodes now includes more information. (Contributed by Tomas R in :gh:`116022`.) +concurrent.futures +------------------ + +* Add :class:`~concurrent.futures.InterpreterPoolExecutor`, + which exposes "subinterpreters (multiple Python interpreters in the + same process) to Python code. This is separate from the proposed API + in :pep:`734`. + (Contributed by Eric Snow in :gh:`124548`.) ctypes ------ diff --git a/Lib/concurrent/futures/__init__.py b/Lib/concurrent/futures/__init__.py index 72de617a5b6f61..7ada7431c1ab8c 100644 --- a/Lib/concurrent/futures/__init__.py +++ b/Lib/concurrent/futures/__init__.py @@ -29,6 +29,7 @@ 'Executor', 'wait', 'as_completed', + 'InterpreterPoolExecutor', 'ProcessPoolExecutor', 'ThreadPoolExecutor', ) @@ -39,7 +40,7 @@ def __dir__(): def __getattr__(name): - global ProcessPoolExecutor, ThreadPoolExecutor + global ProcessPoolExecutor, ThreadPoolExecutor, InterpreterPoolExecutor if name == 'ProcessPoolExecutor': from .process import ProcessPoolExecutor as pe @@ -51,4 +52,13 @@ def __getattr__(name): ThreadPoolExecutor = te return te + if name == 'InterpreterPoolExecutor': + try: + from .interpreter import InterpreterPoolExecutor as ie + except ModuleNotFoundError: + ie = InterpreterPoolExecutor = None + else: + InterpreterPoolExecutor = ie + return ie + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/Lib/concurrent/futures/interpreter.py b/Lib/concurrent/futures/interpreter.py new file mode 100644 index 00000000000000..fd7941adb766bb --- /dev/null +++ b/Lib/concurrent/futures/interpreter.py @@ -0,0 +1,241 @@ +"""Implements InterpreterPoolExecutor.""" + +import contextlib +import pickle +import textwrap +from . import thread as _thread +import _interpreters +import _interpqueues + + +class ExecutionFailed(_interpreters.InterpreterError): + """An unhandled exception happened during execution.""" + + def __init__(self, excinfo): + msg = excinfo.formatted + if not msg: + if excinfo.type and excinfo.msg: + msg = f'{excinfo.type.__name__}: {excinfo.msg}' + else: + msg = excinfo.type.__name__ or excinfo.msg + super().__init__(msg) + self.excinfo = excinfo + + def __str__(self): + try: + formatted = self.excinfo.errdisplay + except Exception: + return super().__str__() + else: + return textwrap.dedent(f""" +{super().__str__()} + +Uncaught in the interpreter: + +{formatted} + """.strip()) + + +UNBOUND = 2 # error; this should not happen. + + +class WorkerContext(_thread.WorkerContext): + + @classmethod + def prepare(cls, initializer, initargs, shared): + def resolve_task(fn, args, kwargs): + if isinstance(fn, str): + # XXX Circle back to this later. + raise TypeError('scripts not supported') + if args or kwargs: + raise ValueError(f'a script does not take args or kwargs, got {args!r} and {kwargs!r}') + data = textwrap.dedent(fn) + kind = 'script' + # Make sure the script compiles. + # Ideally we wouldn't throw away the resulting code + # object. However, there isn't much to be done until + # code objects are shareable and/or we do a better job + # of supporting code objects in _interpreters.exec(). + compile(data, '', 'exec') + else: + # Functions defined in the __main__ module can't be pickled, + # so they can't be used here. In the future, we could possibly + # borrow from multiprocessing to work around this. + data = pickle.dumps((fn, args, kwargs)) + kind = 'function' + return (data, kind) + + if initializer is not None: + try: + initdata = resolve_task(initializer, initargs, {}) + except ValueError: + if isinstance(initializer, str) and initargs: + raise ValueError(f'an initializer script does not take args, got {initargs!r}') + raise # re-raise + else: + initdata = None + def create_context(): + return cls(initdata, shared) + return create_context, resolve_task + + @classmethod + @contextlib.contextmanager + def _capture_exc(cls, resultsid): + try: + yield + except BaseException as exc: + # Send the captured exception out on the results queue, + # but still leave it unhandled for the interpreter to handle. + err = pickle.dumps(exc) + _interpqueues.put(resultsid, (None, err), 1, UNBOUND) + raise # re-raise + + @classmethod + def _send_script_result(cls, resultsid): + _interpqueues.put(resultsid, (None, None), 0, UNBOUND) + + @classmethod + def _call(cls, func, args, kwargs, resultsid): + with cls._capture_exc(resultsid): + res = func(*args or (), **kwargs or {}) + # Send the result back. + try: + _interpqueues.put(resultsid, (res, None), 0, UNBOUND) + except _interpreters.NotShareableError: + res = pickle.dumps(res) + _interpqueues.put(resultsid, (res, None), 1, UNBOUND) + + @classmethod + def _call_pickled(cls, pickled, resultsid): + fn, args, kwargs = pickle.loads(pickled) + cls._call(fn, args, kwargs, resultsid) + + def __init__(self, initdata, shared=None): + self.initdata = initdata + self.shared = dict(shared) if shared else None + self.interpid = None + self.resultsid = None + + def __del__(self): + if self.interpid is not None: + self.finalize() + + def _exec(self, script): + assert self.interpid is not None + excinfo = _interpreters.exec(self.interpid, script, restrict=True) + if excinfo is not None: + raise ExecutionFailed(excinfo) + + def initialize(self): + assert self.interpid is None, self.interpid + self.interpid = _interpreters.create(reqrefs=True) + try: + _interpreters.incref(self.interpid) + + maxsize = 0 + fmt = 0 + self.resultsid = _interpqueues.create(maxsize, fmt, UNBOUND) + + self._exec(f'from {__name__} import WorkerContext') + + if self.shared: + _interpreters.set___main___attrs( + self.interpid, self.shared, restrict=True) + + if self.initdata: + self.run(self.initdata) + except BaseException: + self.finalize() + raise # re-raise + + def finalize(self): + interpid = self.interpid + resultsid = self.resultsid + self.resultsid = None + self.interpid = None + if resultsid is not None: + try: + _interpqueues.destroy(resultsid) + except _interpqueues.QueueNotFoundError: + pass + if interpid is not None: + try: + _interpreters.decref(interpid) + except _interpreters.InterpreterNotFoundError: + pass + + def run(self, task): + data, kind = task + if kind == 'script': + raise NotImplementedError('script kind disabled') + script = f""" +with WorkerContext._capture_exc({self.resultsid}): +{textwrap.indent(data, ' ')} +WorkerContext._send_script_result({self.resultsid})""" + elif kind == 'function': + script = f'WorkerContext._call_pickled({data!r}, {self.resultsid})' + else: + raise NotImplementedError(kind) + + try: + self._exec(script) + except ExecutionFailed as exc: + exc_wrapper = exc + else: + exc_wrapper = None + + # Return the result, or raise the exception. + while True: + try: + obj = _interpqueues.get(self.resultsid) + except _interpqueues.QueueNotFoundError: + raise # re-raise + except _interpqueues.QueueError: + continue + except ModuleNotFoundError: + # interpreters.queues doesn't exist, which means + # QueueEmpty doesn't. Act as though it does. + continue + else: + break + (res, excdata), pickled, unboundop = obj + assert unboundop is None, unboundop + if excdata is not None: + assert res is None, res + assert pickled + assert exc_wrapper is not None + exc = pickle.loads(excdata) + raise exc from exc_wrapper + return pickle.loads(res) if pickled else res + + +class BrokenInterpreterPool(_thread.BrokenThreadPool): + """ + Raised when a worker thread in an InterpreterPoolExecutor failed initializing. + """ + + +class InterpreterPoolExecutor(_thread.ThreadPoolExecutor): + + BROKEN = BrokenInterpreterPool + + @classmethod + def prepare_context(cls, initializer, initargs, shared): + return WorkerContext.prepare(initializer, initargs, shared) + + def __init__(self, max_workers=None, thread_name_prefix='', + initializer=None, initargs=(), shared=None): + """Initializes a new InterpreterPoolExecutor instance. + + Args: + max_workers: The maximum number of interpreters that can be used to + execute the given calls. + thread_name_prefix: An optional name prefix to give our threads. + initializer: A callable or script used to initialize + each worker interpreter. + initargs: A tuple of arguments to pass to the initializer. + shared: A mapping of shareabled objects to be inserted into + each worker interpreter. + """ + super().__init__(max_workers, thread_name_prefix, + initializer, initargs, shared=shared) diff --git a/Lib/concurrent/futures/thread.py b/Lib/concurrent/futures/thread.py index a024033f35fb54..16cc5533d429ef 100644 --- a/Lib/concurrent/futures/thread.py +++ b/Lib/concurrent/futures/thread.py @@ -43,19 +43,46 @@ def _python_exit(): after_in_parent=_global_shutdown_lock.release) +class WorkerContext: + + @classmethod + def prepare(cls, initializer, initargs): + if initializer is not None: + if not callable(initializer): + raise TypeError("initializer must be a callable") + def create_context(): + return cls(initializer, initargs) + def resolve_task(fn, args, kwargs): + return (fn, args, kwargs) + return create_context, resolve_task + + def __init__(self, initializer, initargs): + self.initializer = initializer + self.initargs = initargs + + def initialize(self): + if self.initializer is not None: + self.initializer(*self.initargs) + + def finalize(self): + pass + + def run(self, task): + fn, args, kwargs = task + return fn(*args, **kwargs) + + class _WorkItem: - def __init__(self, future, fn, args, kwargs): + def __init__(self, future, task): self.future = future - self.fn = fn - self.args = args - self.kwargs = kwargs + self.task = task - def run(self): + def run(self, ctx): if not self.future.set_running_or_notify_cancel(): return try: - result = self.fn(*self.args, **self.kwargs) + result = ctx.run(self.task) except BaseException as exc: self.future.set_exception(exc) # Break a reference cycle with the exception 'exc' @@ -66,16 +93,15 @@ def run(self): __class_getitem__ = classmethod(types.GenericAlias) -def _worker(executor_reference, work_queue, initializer, initargs): - if initializer is not None: - try: - initializer(*initargs) - except BaseException: - _base.LOGGER.critical('Exception in initializer:', exc_info=True) - executor = executor_reference() - if executor is not None: - executor._initializer_failed() - return +def _worker(executor_reference, ctx, work_queue): + try: + ctx.initialize() + except BaseException: + _base.LOGGER.critical('Exception in initializer:', exc_info=True) + executor = executor_reference() + if executor is not None: + executor._initializer_failed() + return try: while True: try: @@ -89,7 +115,7 @@ def _worker(executor_reference, work_queue, initializer, initargs): work_item = work_queue.get(block=True) if work_item is not None: - work_item.run() + work_item.run(ctx) # Delete references to object. See GH-60488 del work_item continue @@ -110,6 +136,8 @@ def _worker(executor_reference, work_queue, initializer, initargs): del executor except BaseException: _base.LOGGER.critical('Exception in worker', exc_info=True) + finally: + ctx.finalize() class BrokenThreadPool(_base.BrokenExecutor): @@ -120,11 +148,17 @@ class BrokenThreadPool(_base.BrokenExecutor): class ThreadPoolExecutor(_base.Executor): + BROKEN = BrokenThreadPool + # Used to assign unique thread names when thread_name_prefix is not supplied. _counter = itertools.count().__next__ + @classmethod + def prepare_context(cls, initializer, initargs): + return WorkerContext.prepare(initializer, initargs) + def __init__(self, max_workers=None, thread_name_prefix='', - initializer=None, initargs=()): + initializer=None, initargs=(), **ctxkwargs): """Initializes a new ThreadPoolExecutor instance. Args: @@ -133,6 +167,7 @@ def __init__(self, max_workers=None, thread_name_prefix='', thread_name_prefix: An optional name prefix to give our threads. initializer: A callable used to initialize worker threads. initargs: A tuple of arguments to pass to the initializer. + ctxkwargs: Additional arguments to cls.prepare_context(). """ if max_workers is None: # ThreadPoolExecutor is often used to: @@ -146,8 +181,9 @@ def __init__(self, max_workers=None, thread_name_prefix='', if max_workers <= 0: raise ValueError("max_workers must be greater than 0") - if initializer is not None and not callable(initializer): - raise TypeError("initializer must be a callable") + (self._create_worker_context, + self._resolve_work_item_task, + ) = type(self).prepare_context(initializer, initargs, **ctxkwargs) self._max_workers = max_workers self._work_queue = queue.SimpleQueue() @@ -158,13 +194,11 @@ def __init__(self, max_workers=None, thread_name_prefix='', self._shutdown_lock = threading.Lock() self._thread_name_prefix = (thread_name_prefix or ("ThreadPoolExecutor-%d" % self._counter())) - self._initializer = initializer - self._initargs = initargs def submit(self, fn, /, *args, **kwargs): with self._shutdown_lock, _global_shutdown_lock: if self._broken: - raise BrokenThreadPool(self._broken) + raise self.BROKEN(self._broken) if self._shutdown: raise RuntimeError('cannot schedule new futures after shutdown') @@ -173,7 +207,8 @@ def submit(self, fn, /, *args, **kwargs): 'interpreter shutdown') f = _base.Future() - w = _WorkItem(f, fn, args, kwargs) + task = self._resolve_work_item_task(fn, args, kwargs) + w = _WorkItem(f, task) self._work_queue.put(w) self._adjust_thread_count() @@ -196,9 +231,8 @@ def weakref_cb(_, q=self._work_queue): num_threads) t = threading.Thread(name=thread_name, target=_worker, args=(weakref.ref(self, weakref_cb), - self._work_queue, - self._initializer, - self._initargs)) + self._create_worker_context(), + self._work_queue)) t.start() self._threads.add(t) _threads_queues[t] = self._work_queue @@ -214,7 +248,7 @@ def _initializer_failed(self): except queue.Empty: break if work_item is not None: - work_item.future.set_exception(BrokenThreadPool(self._broken)) + work_item.future.set_exception(self.BROKEN(self._broken)) def shutdown(self, wait=True, *, cancel_futures=False): with self._shutdown_lock: diff --git a/Lib/test/test_concurrent_futures/executor.py b/Lib/test/test_concurrent_futures/executor.py index 4160656cb133ab..b97d9ffd94b1f8 100644 --- a/Lib/test/test_concurrent_futures/executor.py +++ b/Lib/test/test_concurrent_futures/executor.py @@ -23,6 +23,7 @@ def make_dummy_object(_): class ExecutorTest: + # Executor.shutdown() and context manager usage is tested by # ExecutorShutdownTest. def test_submit(self): @@ -52,7 +53,8 @@ def test_map_exception(self): i = self.executor.map(divmod, [1, 1, 1, 1], [2, 3, 0, 5]) self.assertEqual(i.__next__(), (0, 1)) self.assertEqual(i.__next__(), (0, 1)) - self.assertRaises(ZeroDivisionError, i.__next__) + with self.assertRaises(ZeroDivisionError): + i.__next__() @support.requires_resource('walltime') def test_map_timeout(self): diff --git a/Lib/test/test_concurrent_futures/test_interpreter_pool.py b/Lib/test/test_concurrent_futures/test_interpreter_pool.py new file mode 100644 index 00000000000000..0de03c0d669399 --- /dev/null +++ b/Lib/test/test_concurrent_futures/test_interpreter_pool.py @@ -0,0 +1,346 @@ +import asyncio +import contextlib +import io +import os +import pickle +import sys +import time +import unittest +from concurrent.futures.interpreter import ( + ExecutionFailed, BrokenInterpreterPool, +) +import _interpreters +from test import support +import test.test_asyncio.utils as testasyncio_utils +from test.support.interpreters import queues + +from .executor import ExecutorTest, mul +from .util import BaseTestCase, InterpreterPoolMixin, setup_module + + +def noop(): + pass + + +def write_msg(fd, msg): + os.write(fd, msg + b'\0') + + +def read_msg(fd): + msg = b'' + while ch := os.read(fd, 1): + if ch == b'\0': + return msg + msg += ch + + +def get_current_name(): + return __name__ + + +def fail(exctype, msg=None): + raise exctype(msg) + + +def get_current_interpid(*extra): + interpid, _ = _interpreters.get_current() + return (interpid, *extra) + + +class InterpretersMixin(InterpreterPoolMixin): + + def pipe(self): + r, w = os.pipe() + self.addCleanup(lambda: os.close(r)) + self.addCleanup(lambda: os.close(w)) + return r, w + + +class InterpreterPoolExecutorTest( + InterpretersMixin, ExecutorTest, BaseTestCase): + + @unittest.expectedFailure + def test_init_script(self): + msg1 = b'step: init' + msg2 = b'step: run' + r, w = self.pipe() + initscript = f""" + import os + msg = {msg2!r} + os.write({w}, {msg1!r} + b'\\0') + """ + script = f""" + os.write({w}, msg + b'\\0') + """ + os.write(w, b'\0') + + executor = self.executor_type(initializer=initscript) + before_init = os.read(r, 100) + fut = executor.submit(script) + after_init = read_msg(r) + fut.result() + after_run = read_msg(r) + + self.assertEqual(before_init, b'\0') + self.assertEqual(after_init, msg1) + self.assertEqual(after_run, msg2) + + @unittest.expectedFailure + def test_init_script_args(self): + with self.assertRaises(ValueError): + self.executor_type(initializer='pass', initargs=('spam',)) + + def test_init_func(self): + msg = b'step: init' + r, w = self.pipe() + os.write(w, b'\0') + + executor = self.executor_type( + initializer=write_msg, initargs=(w, msg)) + before = os.read(r, 100) + executor.submit(mul, 10, 10) + after = read_msg(r) + + self.assertEqual(before, b'\0') + self.assertEqual(after, msg) + + def test_init_closure(self): + count = 0 + def init1(): + assert count == 0, count + def init2(): + nonlocal count + count += 1 + + with self.assertRaises(pickle.PicklingError): + self.executor_type(initializer=init1) + with self.assertRaises(pickle.PicklingError): + self.executor_type(initializer=init2) + + def test_init_instance_method(self): + class Spam: + def initializer(self): + raise NotImplementedError + spam = Spam() + + with self.assertRaises(pickle.PicklingError): + self.executor_type(initializer=spam.initializer) + + def test_init_shared(self): + msg = b'eggs' + r, w = self.pipe() + script = f"""if True: + import os + if __name__ != '__main__': + import __main__ + spam = __main__.spam + os.write({w}, spam + b'\\0') + """ + + executor = self.executor_type(shared={'spam': msg}) + fut = executor.submit(exec, script) + fut.result() + after = read_msg(r) + + self.assertEqual(after, msg) + + @unittest.expectedFailure + def test_init_exception_in_script(self): + executor = self.executor_type(initializer='raise Exception("spam")') + with executor: + with contextlib.redirect_stderr(io.StringIO()) as stderr: + fut = executor.submit('pass') + with self.assertRaises(BrokenInterpreterPool): + fut.result() + stderr = stderr.getvalue() + self.assertIn('ExecutionFailed: Exception: spam', stderr) + self.assertIn('Uncaught in the interpreter:', stderr) + self.assertIn('The above exception was the direct cause of the following exception:', + stderr) + + def test_init_exception_in_func(self): + executor = self.executor_type(initializer=fail, + initargs=(Exception, 'spam')) + with executor: + with contextlib.redirect_stderr(io.StringIO()) as stderr: + fut = executor.submit(noop) + with self.assertRaises(BrokenInterpreterPool): + fut.result() + stderr = stderr.getvalue() + self.assertIn('ExecutionFailed: Exception: spam', stderr) + self.assertIn('Uncaught in the interpreter:', stderr) + self.assertIn('The above exception was the direct cause of the following exception:', + stderr) + + @unittest.expectedFailure + def test_submit_script(self): + msg = b'spam' + r, w = self.pipe() + script = f""" + import os + os.write({w}, __name__.encode('utf-8') + b'\\0') + """ + executor = self.executor_type() + + fut = executor.submit(script) + res = fut.result() + after = read_msg(r) + + self.assertEqual(after, b'__main__') + self.assertIs(res, None) + + def test_submit_closure(self): + spam = True + def task1(): + return spam + def task2(): + nonlocal spam + spam += 1 + return spam + + executor = self.executor_type() + with self.assertRaises(pickle.PicklingError): + executor.submit(task1) + with self.assertRaises(pickle.PicklingError): + executor.submit(task2) + + def test_submit_local_instance(self): + class Spam: + def __init__(self): + self.value = True + + executor = self.executor_type() + with self.assertRaises(pickle.PicklingError): + executor.submit(Spam) + + def test_submit_instance_method(self): + class Spam: + def run(self): + return True + spam = Spam() + + executor = self.executor_type() + with self.assertRaises(pickle.PicklingError): + executor.submit(spam.run) + + def test_submit_func_globals(self): + executor = self.executor_type() + fut = executor.submit(get_current_name) + name = fut.result() + + self.assertEqual(name, __name__) + self.assertNotEqual(name, '__main__') + + @unittest.expectedFailure + def test_submit_exception_in_script(self): + fut = self.executor.submit('raise Exception("spam")') + with self.assertRaises(Exception) as captured: + fut.result() + self.assertIs(type(captured.exception), Exception) + self.assertEqual(str(captured.exception), 'spam') + cause = captured.exception.__cause__ + self.assertIs(type(cause), ExecutionFailed) + for attr in ('__name__', '__qualname__', '__module__'): + self.assertEqual(getattr(cause.excinfo.type, attr), + getattr(Exception, attr)) + self.assertEqual(cause.excinfo.msg, 'spam') + + def test_submit_exception_in_func(self): + fut = self.executor.submit(fail, Exception, 'spam') + with self.assertRaises(Exception) as captured: + fut.result() + self.assertIs(type(captured.exception), Exception) + self.assertEqual(str(captured.exception), 'spam') + cause = captured.exception.__cause__ + self.assertIs(type(cause), ExecutionFailed) + for attr in ('__name__', '__qualname__', '__module__'): + self.assertEqual(getattr(cause.excinfo.type, attr), + getattr(Exception, attr)) + self.assertEqual(cause.excinfo.msg, 'spam') + + def test_saturation(self): + blocker = queues.create() + executor = self.executor_type(4, shared=dict(blocker=blocker)) + + for i in range(15 * executor._max_workers): + executor.submit(exec, 'import __main__; __main__.blocker.get()') + #executor.submit('blocker.get()') + self.assertEqual(len(executor._threads), executor._max_workers) + for i in range(15 * executor._max_workers): + blocker.put_nowait(None) + executor.shutdown(wait=True) + + @support.requires_gil_enabled("gh-117344: test is flaky without the GIL") + def test_idle_thread_reuse(self): + executor = self.executor_type() + executor.submit(mul, 21, 2).result() + executor.submit(mul, 6, 7).result() + executor.submit(mul, 3, 14).result() + self.assertEqual(len(executor._threads), 1) + executor.shutdown(wait=True) + + +class AsyncioTest(InterpretersMixin, testasyncio_utils.TestCase): + + def setUp(self): + super().setUp() + self.loop = asyncio.new_event_loop() + self.set_event_loop(self.loop) + + self.executor = self.executor_type() + self.addCleanup(lambda: self.executor.shutdown()) + + def tearDown(self): + if not self.loop.is_closed(): + testasyncio_utils.run_briefly(self.loop) + + self.doCleanups() + support.gc_collect() + super().tearDown() + + def test_run_in_executor(self): + unexpected, _ = _interpreters.get_current() + + func = get_current_interpid + fut = self.loop.run_in_executor(self.executor, func, 'yo') + interpid, res = self.loop.run_until_complete(fut) + + self.assertEqual(res, 'yo') + self.assertNotEqual(interpid, unexpected) + + def test_run_in_executor_cancel(self): + executor = self.executor_type() + + called = False + + def patched_call_soon(*args): + nonlocal called + called = True + + func = time.sleep + fut = self.loop.run_in_executor(self.executor, func, 0.05) + fut.cancel() + self.loop.run_until_complete( + self.loop.shutdown_default_executor()) + self.loop.close() + self.loop.call_soon = patched_call_soon + self.loop.call_soon_threadsafe = patched_call_soon + time.sleep(0.4) + self.assertFalse(called) + + def test_default_executor(self): + unexpected, _ = _interpreters.get_current() + + self.loop.set_default_executor(self.executor) + fut = self.loop.run_in_executor(None, get_current_interpid) + interpid, = self.loop.run_until_complete(fut) + + self.assertNotEqual(interpid, unexpected) + + +def setUpModule(): + setup_module() + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_concurrent_futures/util.py b/Lib/test/test_concurrent_futures/util.py index 3b8ec3e205d5aa..52baab51340fc9 100644 --- a/Lib/test/test_concurrent_futures/util.py +++ b/Lib/test/test_concurrent_futures/util.py @@ -74,6 +74,10 @@ class ThreadPoolMixin(ExecutorMixin): executor_type = futures.ThreadPoolExecutor +class InterpreterPoolMixin(ExecutorMixin): + executor_type = futures.InterpreterPoolExecutor + + class ProcessPoolForkMixin(ExecutorMixin): executor_type = futures.ProcessPoolExecutor ctx = "fork" @@ -120,6 +124,7 @@ def get_context(self): def create_executor_tests(remote_globals, mixin, bases=(BaseTestCase,), executor_mixins=(ThreadPoolMixin, + InterpreterPoolMixin, ProcessPoolForkMixin, ProcessPoolForkserverMixin, ProcessPoolSpawnMixin)): diff --git a/Misc/NEWS.d/next/Library/2024-09-27-15-42-55.gh-issue-124694.uUy32y.rst b/Misc/NEWS.d/next/Library/2024-09-27-15-42-55.gh-issue-124694.uUy32y.rst new file mode 100644 index 00000000000000..1aa1a463b0c63a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-27-15-42-55.gh-issue-124694.uUy32y.rst @@ -0,0 +1,6 @@ +We've added :class:`concurrent.futures.InterpreterPoolExecutor`, which +allows you to run code in multiple isolated interpreters. This allows you +to circumvent the limitations of CPU-bound threads (due to the GIL). Patch +by Eric Snow. + +This addition is unrelated to :pep:`734`. From 624be8699aec22bef137041478078c6fafaf032e Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Thu, 17 Oct 2024 00:07:37 -0700 Subject: [PATCH 012/106] GH-99749: Add optional feature to suggest correct names (ArgumentParser) (GH-124456) --- Doc/library/argparse.rst | 28 ++++- Lib/argparse.py | 35 ++++-- Lib/test/test_argparse.py | 103 +++++++++++++++--- ...4-09-24-18-49-16.gh-issue-99749.gBDJX7.rst | 1 + 4 files changed, 144 insertions(+), 23 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-09-24-18-49-16.gh-issue-99749.gBDJX7.rst diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index 51ab8e29ff96d5..ee8562b81770b6 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -61,7 +61,8 @@ ArgumentParser objects formatter_class=argparse.HelpFormatter, \ prefix_chars='-', fromfile_prefix_chars=None, \ argument_default=None, conflict_handler='error', \ - add_help=True, allow_abbrev=True, exit_on_error=True) + add_help=True, allow_abbrev=True, exit_on_error=True, \ + suggest_on_error=False) Create a new :class:`ArgumentParser` object. All parameters should be passed as keyword arguments. Each parameter has its own more detailed description @@ -103,6 +104,10 @@ ArgumentParser objects * exit_on_error_ - Determines whether or not ArgumentParser exits with error info when an error occurs. (default: ``True``) + * suggest_on_error_ - Enables suggestions for mistyped argument choices + and subparser names (default: ``False``) + + .. versionchanged:: 3.5 *allow_abbrev* parameter was added. @@ -559,6 +564,27 @@ If the user would like to catch errors manually, the feature can be enabled by s .. versionadded:: 3.9 +suggest_on_error +^^^^^^^^^^^^^^^^ + +By default, when a user passes an invalid argument choice or subparser name, +:class:`ArgumentParser` will exit with error info and list the permissible +argument choices (if specified) or subparser names as part of the error message. + +If the user would like to enable suggestions for mistyped argument choices and +subparser names, the feature can be enabled by setting ``suggest_on_error`` to +``True``. Note that this only applies for arguments when the choices specified +are strings:: + + >>> parser = argparse.ArgumentParser(description='Process some integers.', suggest_on_error=True) + >>> parser.add_argument('--action', choices=['sum', 'max']) + >>> parser.add_argument('integers', metavar='N', type=int, nargs='+', + ... help='an integer for the accumulator') + >>> parser.parse_args(['--action', 'sumn', 1, 2, 3]) + tester.py: error: argument --action: invalid choice: 'sumn', maybe you meant 'sum'? (choose from 'sum', 'max') + +.. versionadded:: 3.14 + The add_argument() method ------------------------- diff --git a/Lib/argparse.py b/Lib/argparse.py index fa9f5211257e96..ece6f2e880d5cb 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -1773,6 +1773,8 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer): - allow_abbrev -- Allow long options to be abbreviated unambiguously - exit_on_error -- Determines whether or not ArgumentParser exits with error info when an error occurs + - suggest_on_error - Enables suggestions for mistyped argument choices + and subparser names. (default: ``False``) """ def __init__(self, @@ -1788,7 +1790,8 @@ def __init__(self, conflict_handler='error', add_help=True, allow_abbrev=True, - exit_on_error=True): + exit_on_error=True, + suggest_on_error=False): superinit = super(ArgumentParser, self).__init__ superinit(description=description, @@ -1804,6 +1807,7 @@ def __init__(self, self.add_help = add_help self.allow_abbrev = allow_abbrev self.exit_on_error = exit_on_error + self.suggest_on_error = suggest_on_error add_group = self.add_argument_group self._positionals = add_group(_('positional arguments')) @@ -2601,14 +2605,27 @@ def _get_value(self, action, arg_string): def _check_value(self, action, value): # converted value must be one of the choices (if specified) choices = action.choices - if choices is not None: - if isinstance(choices, str): - choices = iter(choices) - if value not in choices: - args = {'value': str(value), - 'choices': ', '.join(map(str, action.choices))} - msg = _('invalid choice: %(value)r (choose from %(choices)s)') - raise ArgumentError(action, msg % args) + if choices is None: + return + + if isinstance(choices, str): + choices = iter(choices) + + if value not in choices: + args = {'value': str(value), + 'choices': ', '.join(map(str, action.choices))} + msg = _('invalid choice: %(value)r (choose from %(choices)s)') + + if self.suggest_on_error and isinstance(value, str): + if all(isinstance(choice, str) for choice in action.choices): + import difflib + suggestions = difflib.get_close_matches(value, action.choices, 1) + if suggestions: + args['closest'] = suggestions[0] + msg = _('invalid choice: %(value)r, maybe you meant %(closest)r? ' + '(choose from %(choices)s)') + + raise ArgumentError(action, msg % args) # ======================= # Help-formatting methods diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 78692fd3474782..a3c096ef3199c8 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -2253,6 +2253,95 @@ class TestNegativeNumber(ParserTestCase): ('--complex -1e-3j', NS(int=None, float=None, complex=-0.001j)), ] +class TestArgumentAndSubparserSuggestions(TestCase): + """Test error handling and suggestion when a user makes a typo""" + + def test_wrong_argument_error_with_suggestions(self): + parser = ErrorRaisingArgumentParser(suggest_on_error=True) + parser.add_argument('foo', choices=['bar', 'baz']) + with self.assertRaises(ArgumentParserError) as excinfo: + parser.parse_args(('bazz',)) + self.assertIn( + "error: argument foo: invalid choice: 'bazz', maybe you meant 'baz'? (choose from bar, baz)", + excinfo.exception.stderr + ) + + def test_wrong_argument_error_no_suggestions(self): + parser = ErrorRaisingArgumentParser(suggest_on_error=False) + parser.add_argument('foo', choices=['bar', 'baz']) + with self.assertRaises(ArgumentParserError) as excinfo: + parser.parse_args(('bazz',)) + self.assertIn( + "error: argument foo: invalid choice: 'bazz' (choose from bar, baz)", + excinfo.exception.stderr, + ) + + def test_wrong_argument_subparsers_with_suggestions(self): + parser = ErrorRaisingArgumentParser(suggest_on_error=True) + subparsers = parser.add_subparsers(required=True) + subparsers.add_parser('foo') + subparsers.add_parser('bar') + with self.assertRaises(ArgumentParserError) as excinfo: + parser.parse_args(('baz',)) + self.assertIn( + "error: argument {foo,bar}: invalid choice: 'baz', maybe you meant" + " 'bar'? (choose from foo, bar)", + excinfo.exception.stderr, + ) + + def test_wrong_argument_subparsers_no_suggestions(self): + parser = ErrorRaisingArgumentParser(suggest_on_error=False) + subparsers = parser.add_subparsers(required=True) + subparsers.add_parser('foo') + subparsers.add_parser('bar') + with self.assertRaises(ArgumentParserError) as excinfo: + parser.parse_args(('baz',)) + self.assertIn( + "error: argument {foo,bar}: invalid choice: 'baz' (choose from foo, bar)", + excinfo.exception.stderr, + ) + + def test_wrong_argument_no_suggestion_implicit(self): + parser = ErrorRaisingArgumentParser() + parser.add_argument('foo', choices=['bar', 'baz']) + with self.assertRaises(ArgumentParserError) as excinfo: + parser.parse_args(('bazz',)) + self.assertIn( + "error: argument foo: invalid choice: 'bazz' (choose from bar, baz)", + excinfo.exception.stderr, + ) + + def test_suggestions_choices_empty(self): + parser = ErrorRaisingArgumentParser(suggest_on_error=True) + parser.add_argument('foo', choices=[]) + with self.assertRaises(ArgumentParserError) as excinfo: + parser.parse_args(('bazz',)) + self.assertIn( + "error: argument foo: invalid choice: 'bazz' (choose from )", + excinfo.exception.stderr, + ) + + def test_suggestions_choices_int(self): + parser = ErrorRaisingArgumentParser(suggest_on_error=True) + parser.add_argument('foo', choices=[1, 2]) + with self.assertRaises(ArgumentParserError) as excinfo: + parser.parse_args(('3',)) + self.assertIn( + "error: argument foo: invalid choice: '3' (choose from 1, 2)", + excinfo.exception.stderr, + ) + + def test_suggestions_choices_mixed_types(self): + parser = ErrorRaisingArgumentParser(suggest_on_error=True) + parser.add_argument('foo', choices=[1, '2']) + with self.assertRaises(ArgumentParserError) as excinfo: + parser.parse_args(('3',)) + self.assertIn( + "error: argument foo: invalid choice: '3' (choose from 1, 2)", + excinfo.exception.stderr, + ) + + class TestInvalidAction(TestCase): """Test invalid user defined Action""" @@ -2505,18 +2594,6 @@ def test_required_subparsers_no_destination_error(self): 'error: the following arguments are required: {foo,bar}\n$' ) - def test_wrong_argument_subparsers_no_destination_error(self): - parser = ErrorRaisingArgumentParser() - subparsers = parser.add_subparsers(required=True) - subparsers.add_parser('foo') - subparsers.add_parser('bar') - with self.assertRaises(ArgumentParserError) as excinfo: - parser.parse_args(('baz',)) - self.assertRegex( - excinfo.exception.stderr, - r"error: argument {foo,bar}: invalid choice: 'baz' \(choose from foo, bar\)\n$" - ) - def test_optional_subparsers(self): parser = ErrorRaisingArgumentParser() subparsers = parser.add_subparsers(dest='command', required=False) @@ -2862,7 +2939,7 @@ def test_single_parent_mutex(self): parser = ErrorRaisingArgumentParser(parents=[self.ab_mutex_parent]) self._test_mutex_ab(parser.parse_args) - def test_single_granparent_mutex(self): + def test_single_grandparent_mutex(self): parents = [self.ab_mutex_parent] parser = ErrorRaisingArgumentParser(add_help=False, parents=parents) parser = ErrorRaisingArgumentParser(parents=[parser]) diff --git a/Misc/NEWS.d/next/Library/2024-09-24-18-49-16.gh-issue-99749.gBDJX7.rst b/Misc/NEWS.d/next/Library/2024-09-24-18-49-16.gh-issue-99749.gBDJX7.rst new file mode 100644 index 00000000000000..3ecd75c5b551b6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-24-18-49-16.gh-issue-99749.gBDJX7.rst @@ -0,0 +1 @@ +Adds a feature to optionally enable suggestions for argument choices and subparser names if mistyped by the user. From 7b04496e5c7ed47e9653f4591674fc9ffef34587 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Thu, 17 Oct 2024 02:11:47 -0700 Subject: [PATCH 013/106] gh-125542: Deprecate prefix_chars in ArgumentParser.add_argument_group() (GH-125563) --- .../pending-removal-in-future.rst | 9 +++++-- Doc/library/argparse.rst | 4 +++ Doc/whatsnew/3.14.rst | 6 +++++ Lib/argparse.py | 8 ++++++ Lib/test/test_argparse.py | 25 +++++++++++++++++++ ...-10-16-04-50-53.gh-issue-125542.vZJ-Ns.rst | 2 ++ 6 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-16-04-50-53.gh-issue-125542.vZJ-Ns.rst diff --git a/Doc/deprecations/pending-removal-in-future.rst b/Doc/deprecations/pending-removal-in-future.rst index f916797c07a068..d77fc86eab0ed6 100644 --- a/Doc/deprecations/pending-removal-in-future.rst +++ b/Doc/deprecations/pending-removal-in-future.rst @@ -4,8 +4,13 @@ Pending removal in future versions The following APIs will be removed in the future, although there is currently no date scheduled for their removal. -* :mod:`argparse`: Nesting argument groups and nesting mutually exclusive - groups are deprecated. +* :mod:`argparse`: + + * Nesting argument groups and nesting mutually exclusive + groups are deprecated. + * Passing the undocumented keyword argument *prefix_chars* to + :meth:`~argparse.ArgumentParser.add_argument_group` is now + deprecated. * :mod:`array`'s ``'u'`` format code (:gh:`57281`) diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index ee8562b81770b6..ef0db3e9789c98 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -1894,6 +1894,10 @@ Argument groups The function exists on the API by accident through inheritance and will be removed in the future. + .. deprecated:: 3.14 + Passing prefix_chars_ to :meth:`add_argument_group` + is now deprecated. + Mutual exclusion ^^^^^^^^^^^^^^^^ diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 9543af3c7ca225..feb65f244827ad 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -428,6 +428,12 @@ asyncio Deprecated ========== +* :mod:`argparse`: + Passing the undocumented keyword argument *prefix_chars* to + :meth:`~argparse.ArgumentParser.add_argument_group` is now + deprecated. + (Contributed by Savannah Ostrowski in :gh:`125563`.) + * :mod:`asyncio`: :func:`!asyncio.iscoroutinefunction` is deprecated and will be removed in Python 3.16, diff --git a/Lib/argparse.py b/Lib/argparse.py index ece6f2e880d5cb..49271a146c7282 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -1662,6 +1662,14 @@ def _check_help(self, action): class _ArgumentGroup(_ActionsContainer): def __init__(self, container, title=None, description=None, **kwargs): + if 'prefix_chars' in kwargs: + import warnings + depr_msg = ( + "The use of the undocumented 'prefix_chars' parameter in " + "ArgumentParser.add_argument_group() is deprecated." + ) + warnings.warn(depr_msg, DeprecationWarning, stacklevel=3) + # add any missing keyword arguments by checking the container update = kwargs.setdefault update('conflict_handler', container.conflict_handler) diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index a3c096ef3199c8..4fa669718abc50 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -2893,6 +2893,31 @@ def test_interleaved_groups(self): result = parser.parse_args('1 2 3 4'.split()) self.assertEqual(expected, result) +class TestGroupConstructor(TestCase): + def test_group_prefix_chars(self): + parser = ErrorRaisingArgumentParser() + msg = ( + "The use of the undocumented 'prefix_chars' parameter in " + "ArgumentParser.add_argument_group() is deprecated." + ) + with self.assertWarns(DeprecationWarning) as cm: + parser.add_argument_group(prefix_chars='-+') + self.assertEqual(msg, str(cm.warning)) + self.assertEqual(cm.filename, __file__) + + def test_group_prefix_chars_default(self): + # "default" isn't quite the right word here, but it's the same as + # the parser's default prefix so it's a good test + parser = ErrorRaisingArgumentParser() + msg = ( + "The use of the undocumented 'prefix_chars' parameter in " + "ArgumentParser.add_argument_group() is deprecated." + ) + with self.assertWarns(DeprecationWarning) as cm: + parser.add_argument_group(prefix_chars='-') + self.assertEqual(msg, str(cm.warning)) + self.assertEqual(cm.filename, __file__) + # =================== # Parent parser tests # =================== diff --git a/Misc/NEWS.d/next/Library/2024-10-16-04-50-53.gh-issue-125542.vZJ-Ns.rst b/Misc/NEWS.d/next/Library/2024-10-16-04-50-53.gh-issue-125542.vZJ-Ns.rst new file mode 100644 index 00000000000000..777920cc54ff9b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-16-04-50-53.gh-issue-125542.vZJ-Ns.rst @@ -0,0 +1,2 @@ +Deprecate passing keyword-only *prefix_chars* argument to +:meth:`argparse.ArgumentParser.add_argument_group`. From dbcc5ac4709dfd8dfaf323d51f135f2218d14068 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Thu, 17 Oct 2024 04:41:22 -0700 Subject: [PATCH 014/106] gh-95836: Add custom type converter examples to argparse tutorial (GH-125376) --- Doc/howto/argparse.rst | 47 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/Doc/howto/argparse.rst b/Doc/howto/argparse.rst index 30d9ac700376e6..1efbee64d60bb3 100644 --- a/Doc/howto/argparse.rst +++ b/Doc/howto/argparse.rst @@ -841,6 +841,53 @@ translated messages. To translate your own strings in the :mod:`argparse` output, use :mod:`gettext`. +Custom type converters +====================== + +The :mod:`argparse` module allows you to specify custom type converters for +your command-line arguments. This allows you to modify user input before it's +stored in the :class:`argparse.Namespace`. This can be useful when you need to +pre-process the input before it is used in your program. + +When using a custom type converter, you can use any callable that takes a +single string argument (the argument value) and returns the converted value. +However, if you need to handle more complex scenarios, you can use a custom +action class with the **action** parameter instead. + +For example, let's say you want to handle arguments with different prefixes and +process them accordingly:: + + import argparse + + parser = argparse.ArgumentParser(prefix_chars='-+') + + parser.add_argument('-a', metavar='', action='append', + type=lambda x: ('-', x)) + parser.add_argument('+a', metavar='', action='append', + type=lambda x: ('+', x)) + + args = parser.parse_args() + print(args) + +Output: + +.. code-block:: shell-session + + $ python prog.py -a value1 +a value2 + Namespace(a=[('-', 'value1'), ('+', 'value2')]) + +In this example, we: + +* Created a parser with custom prefix characters using the ``prefix_chars`` + parameter. + +* Defined two arguments, ``-a`` and ``+a``, which used the ``type`` parameter to + create custom type converters to store the value in a tuple with the prefix. + +Without the custom type converters, the arguments would have treated the ``-a`` +and ``+a`` as the same argument, which would have been undesirable. By using custom +type converters, we were able to differentiate between the two arguments. + Conclusion ========== From 37986e830ba25d2c382988b06bbe27410596346c Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Thu, 17 Oct 2024 08:20:30 -0400 Subject: [PATCH 015/106] gh-123153: Fix PGO builds with free-threading on Windows (#125607) * gh-123153: Fix PGO builds with free-threading * Redo how the #define works --- Python/ceval.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 43776e773e0deb..98d95b28488fd0 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -761,12 +761,20 @@ _PyObjectArray_Free(PyObject **array, PyObject **scratch) * so consume 3 units of C stack */ #define PY_EVAL_C_STACK_UNITS 2 -#if defined(_MSC_VER) && defined(_Py_USING_PGO) && defined(_Py_JIT) -/* _PyEval_EvalFrameDefault is too large to optimize for speed with - PGO on MSVC when the JIT is enabled. Disable that optimization - around this function only. If this is fixed upstream, we should - gate this on the version of MSVC. + +/* _PyEval_EvalFrameDefault is too large to optimize for speed with PGO on MSVC + when the JIT is enabled or GIL is disabled. Disable that optimization around + this function only. If this is fixed upstream, we should gate this on the + version of MSVC. */ +#if (defined(_MSC_VER) && \ + defined(_Py_USING_PGO) && \ + (defined(_Py_JIT) || \ + defined(Py_GIL_DISABLED))) +#define DO_NOT_OPTIMIZE_INTERP_LOOP +#endif + +#ifdef DO_NOT_OPTIMIZE_INTERP_LOOP # pragma optimize("t", off) /* This setting is reversed below following _PyEval_EvalFrameDefault */ #endif @@ -1146,7 +1154,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int } -#if defined(_MSC_VER) && defined(_Py_USING_PGO) && defined(_Py_JIT) +#ifdef DO_NOT_OPTIMIZE_INTERP_LOOP # pragma optimize("", on) #endif From 0d88b995a641315306d56fba7d07479b2c5f57ef Mon Sep 17 00:00:00 2001 From: Emmanuel Ferdman Date: Thu, 17 Oct 2024 17:33:37 +0300 Subject: [PATCH 016/106] gh-125644: Update `locations.md` reference (#125645) Signed-off-by: Emmanuel Ferdman --- InternalDocs/compiler.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/InternalDocs/compiler.md b/InternalDocs/compiler.md index acef6def563154..e9608977b0cbb3 100644 --- a/InternalDocs/compiler.md +++ b/InternalDocs/compiler.md @@ -447,7 +447,7 @@ bytecode. This includes transforming pseudo instructions into actual instruction converting jump targets from logical labels to relative offsets, and construction of the [exception table](exception_handling.md) and -[locations table](https://github.com/python/cpython/blob/main/Objects/locations.md). +[locations table](https://github.com/python/cpython/blob/main/InternalDocs/locations.md). The bytecode and tables are then wrapped into a ``PyCodeObject`` along with additional metadata, including the ``consts`` and ``names`` arrays, information about function reference to the source code (filename, etc). All of this is implemented by From 528bbab96feadbfabb798547e5bb2ad52070fb73 Mon Sep 17 00:00:00 2001 From: Jonathan Protzenko Date: Thu, 17 Oct 2024 08:08:43 -0700 Subject: [PATCH 017/106] GH-99108: Make vectorized versions of Blake2 available on x86, too (#125244) Accomplished by updating HACL* vendored code from hacl-star/hacl-star@a6a09496d9cff652b567d26f2c3ab012321b632a to hacl-star/hacl-star@315a9e491d2bc347b9dae99e0ea506995ea84d9d Co-authored-by: Victor Stinner Co-authored-by: Zachary Ware --- Misc/sbom.spdx.json | 48 ++-- Modules/_hacl/Hacl_Hash_Blake2b.c | 260 +++++++++++------- Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c | 225 +++++++++------ Modules/_hacl/Hacl_Hash_Blake2s.c | 231 ++++++++++------ Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c | 196 ++++++++----- .../include/krml/FStar_UInt128_Verified.h | 2 +- .../include/krml/FStar_UInt_8_16_32_64.h | 2 +- .../krml/fstar_uint128_struct_endianness.h | 2 +- Modules/_hacl/include/krml/internal/target.h | 6 +- .../_hacl/include/krml/lowstar_endianness.h | 2 +- Modules/_hacl/libintvector.h | 2 +- Modules/_hacl/refresh.sh | 2 +- 12 files changed, 619 insertions(+), 359 deletions(-) diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json index f07ad9423d9039..cc73e93009b43f 100644 --- a/Misc/sbom.spdx.json +++ b/Misc/sbom.spdx.json @@ -300,11 +300,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "c96cba53034348537ac423a220803b06cd9f0a43" + "checksumValue": "a34e821b68ef5334eccf4f729b28bb7bb65b965e" }, { "algorithm": "SHA256", - "checksumValue": "9f4fb5c70678638cfd163cc990be1def356cf7b65b75faa4666db8c5f8593530" + "checksumValue": "4582db9143c0810b98838a5357c577e0b32ae77f3018486159df4e0dfd3fce3c" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2b.c" @@ -328,11 +328,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "e11e2d1771e56c0afbdb0673906898b3a67e0cc3" + "checksumValue": "0ffe60c6d5eed5dd222515e820d461d319d16b1f" }, { "algorithm": "SHA256", - "checksumValue": "d5bf29d995f7cb9861841b813aa01206664895a1c5aa166a4796785c02117bf4" + "checksumValue": "4804cb3ce68bfdcf98853d6f1d77b4a844a3c2796f776b39770ba327e400d402" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c" @@ -370,11 +370,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "5422517af799cf74b194821fb2a1f39e3b02c54d" + "checksumValue": "cf035ffeff875bc74345a47373ce25dc408ea9dc" }, { "algorithm": "SHA256", - "checksumValue": "c66adab0259f2c2229e010cd635a982e8c2b8836e59e43e7867992d4148e4d9a" + "checksumValue": "579059b002c45fab0fed6381e85c3f5eaf1d959400ca64b103542ac6c35bade3" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2s.c" @@ -398,11 +398,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "7822db8e7c2f60dd64a18e112a1bc369e7f7a0ff" + "checksumValue": "9bb53022d158a9c349edb52a8def8aac7d098a4e" }, { "algorithm": "SHA256", - "checksumValue": "94b0cd3cf1f7385325ee878d2ef06affc8d6412af9302ca47d1aa6d858182050" + "checksumValue": "2abde0c6b5da0402e91b4bedfe786c24b908fbdc04e08e74651c7624729254d9" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c" @@ -580,11 +580,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "12c0c680c93b8112b97cc575faacbb3cbbd315b1" + "checksumValue": "7665829b9396f72e7f8098080d6d6773565468e9" }, { "algorithm": "SHA256", - "checksumValue": "455e94f24a0900deda7e6e36f4714e4253d32cea077f97e23f90c569a717bc48" + "checksumValue": "ca7357ee70365c690664a44f6522e526636151d9ed2da8d0d29da15bb8556530" } ], "fileName": "Modules/_hacl/include/krml/FStar_UInt128_Verified.h" @@ -594,11 +594,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "62b44acbbdc77b749c36c242cda027bacf7679f8" + "checksumValue": "a2db924d0e8f7df3139e9a20355ffa520aded479" }, { "algorithm": "SHA256", - "checksumValue": "65decdb74c24049aa19430462a51219250cfc65d8c162778e42df88b3142fa42" + "checksumValue": "f1de79fb4c763b215c823f44471bbae6b65e6bb533eb52a5863d551d5e2e6748" } ], "fileName": "Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h" @@ -608,11 +608,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "1987119a563a8fdc5966286e274f716dbcea77ee" + "checksumValue": "7f23693151d5409623cbe886e5b45a0e4f0d3c72" }, { "algorithm": "SHA256", - "checksumValue": "fe57e1bc5ce3224d106e36cb8829b5399c63a68a70b0ccd0c91d82a4565c8869" + "checksumValue": "1c9bee7ac4b987c73cc3aba6b7ceed8ec7e75c9a741810e4411f35602490e0d8" } ], "fileName": "Modules/_hacl/include/krml/fstar_uint128_struct_endianness.h" @@ -622,11 +622,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "81872ecdbd39b09cd813dee6e1dbed113a81aa4a" + "checksumValue": "9881567f43deb32bae77a84b2d349858a24b6685" }, { "algorithm": "SHA256", - "checksumValue": "1eef18295d412129007816fe65b7f15c0be8ad32840ef5e3dfaa5b67317e1b51" + "checksumValue": "3382156e32fcb376009177d3d2dc9712ff7c8c02afb97b3e16d98b41a2114f84" } ], "fileName": "Modules/_hacl/include/krml/internal/target.h" @@ -636,11 +636,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "964e09bd99ff2366afd6193b59863fc925e7fb05" + "checksumValue": "e18efc9239a5df0f222b5f7b0a65f72509d7e304" }, { "algorithm": "SHA256", - "checksumValue": "3734c7942bec9a434e16df069fa45bdcb84b130f14417bc5f7bfe8546272d9f5" + "checksumValue": "47dd5a7d21b5302255f9fff28884f65d3056fc3f54471ed62ec85fa1904f8aa5" } ], "fileName": "Modules/_hacl/include/krml/lowstar_endianness.h" @@ -804,11 +804,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "d5d85ee8f0bd52781fe470d0bf73ec388ddb3999" + "checksumValue": "f4a33ad535768b860362ab0bd033a70da0b524b7" }, { "algorithm": "SHA256", - "checksumValue": "9a421b998add98fe366374641c4edb27617ff539a59f0963879f345065d3d39d" + "checksumValue": "433cdf4ba80bc72e0cea5d4b420ff18676baeafdb5ba19adf5b7fb33e90b424b" } ], "fileName": "Modules/_hacl/libintvector.h" @@ -1640,14 +1640,14 @@ "checksums": [ { "algorithm": "SHA256", - "checksumValue": "988a74f5fbb59baca2d54e41447997ada92f4ebc59888dfb717438013f859117" + "checksumValue": "935ae51d0ff0bf1403f0ecc1ff02b8f685d09053618558c07fbe4bd2abbc5dd1" } ], - "downloadLocation": "https://github.com/hacl-star/hacl-star/archive/a6a09496d9cff652b567d26f2c3ab012321b632a.zip", + "downloadLocation": "https://github.com/hacl-star/hacl-star/archive/315a9e491d2bc347b9dae99e0ea506995ea84d9d.zip", "externalRefs": [ { "referenceCategory": "SECURITY", - "referenceLocator": "cpe:2.3:a:hacl-star:hacl-star:a6a09496d9cff652b567d26f2c3ab012321b632a:*:*:*:*:*:*:*", + "referenceLocator": "cpe:2.3:a:hacl-star:hacl-star:315a9e491d2bc347b9dae99e0ea506995ea84d9d:*:*:*:*:*:*:*", "referenceType": "cpe23Type" } ], @@ -1655,7 +1655,7 @@ "name": "hacl-star", "originator": "Organization: HACL* Developers", "primaryPackagePurpose": "SOURCE", - "versionInfo": "a6a09496d9cff652b567d26f2c3ab012321b632a" + "versionInfo": "315a9e491d2bc347b9dae99e0ea506995ea84d9d" }, { "SPDXID": "SPDXRef-PACKAGE-macholib", diff --git a/Modules/_hacl/Hacl_Hash_Blake2b.c b/Modules/_hacl/Hacl_Hash_Blake2b.c index e13f16fd971c56..cd3b9777e09f6c 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2b.c +++ b/Modules/_hacl/Hacl_Hash_Blake2b.c @@ -575,86 +575,6 @@ void Hacl_Hash_Blake2b_init(uint64_t *hash, uint32_t kk, uint32_t nn) r1[3U] = iv7_; } -static void init_with_params(uint64_t *hash, Hacl_Hash_Blake2b_blake2_params p) -{ - uint64_t tmp[8U] = { 0U }; - uint64_t *r0 = hash; - uint64_t *r1 = hash + 4U; - uint64_t *r2 = hash + 8U; - uint64_t *r3 = hash + 12U; - uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; - uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; - uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; - uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; - uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; - uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; - uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; - uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; - r2[0U] = iv0; - r2[1U] = iv1; - r2[2U] = iv2; - r2[3U] = iv3; - r3[0U] = iv4; - r3[1U] = iv5; - r3[2U] = iv6; - r3[3U] = iv7; - uint8_t kk = p.key_length; - uint8_t nn = p.digest_length; - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint64_t *os = tmp + 4U; - uint8_t *bj = p.salt + i * 8U; - uint64_t u = load64_le(bj); - uint64_t r = u; - uint64_t x = r; - os[i] = x;); - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint64_t *os = tmp + 6U; - uint8_t *bj = p.personal + i * 8U; - uint64_t u = load64_le(bj); - uint64_t r = u; - uint64_t x = r; - os[i] = x;); - tmp[0U] = - (uint64_t)nn - ^ - ((uint64_t)kk - << 8U - ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); - tmp[1U] = p.node_offset; - tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U; - tmp[3U] = 0ULL; - uint64_t tmp0 = tmp[0U]; - uint64_t tmp1 = tmp[1U]; - uint64_t tmp2 = tmp[2U]; - uint64_t tmp3 = tmp[3U]; - uint64_t tmp4 = tmp[4U]; - uint64_t tmp5 = tmp[5U]; - uint64_t tmp6 = tmp[6U]; - uint64_t tmp7 = tmp[7U]; - uint64_t iv0_ = iv0 ^ tmp0; - uint64_t iv1_ = iv1 ^ tmp1; - uint64_t iv2_ = iv2 ^ tmp2; - uint64_t iv3_ = iv3 ^ tmp3; - uint64_t iv4_ = iv4 ^ tmp4; - uint64_t iv5_ = iv5 ^ tmp5; - uint64_t iv6_ = iv6 ^ tmp6; - uint64_t iv7_ = iv7 ^ tmp7; - r0[0U] = iv0_; - r0[1U] = iv1_; - r0[2U] = iv2_; - r0[3U] = iv3_; - r1[0U] = iv4_; - r1[1U] = iv5_; - r1[2U] = iv6_; - r1[3U] = iv7_; -} - static void update_key(uint64_t *wv, uint64_t *hash, uint32_t kk, uint8_t *k, uint32_t ll) { FStar_UInt128_uint128 lb = FStar_UInt128_uint64_to_uint128((uint64_t)128U); @@ -811,16 +731,92 @@ static Hacl_Hash_Blake2b_state_t uint8_t nn = p1->digest_length; bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; - uint32_t kk2 = (uint32_t)i.key_length; + uint64_t *h = block_state.f3.snd; + uint32_t kk20 = (uint32_t)i.key_length; uint8_t *k_1 = key.snd; - if (!(kk2 == 0U)) + if (!(kk20 == 0U)) { - uint8_t *sub_b = buf + kk2; - memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t)); - memcpy(buf, k_1, kk2 * sizeof (uint8_t)); + uint8_t *sub_b = buf + kk20; + memset(sub_b, 0U, (128U - kk20) * sizeof (uint8_t)); + memcpy(buf, k_1, kk20 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p1[0U]; - init_with_params(block_state.f3.snd, pv); + uint64_t tmp[8U] = { 0U }; + uint64_t *r0 = h; + uint64_t *r1 = h + 4U; + uint64_t *r2 = h + 8U; + uint64_t *r3 = h + 12U; + uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; + uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; + uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; + uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; + uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; + uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; + uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; + uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; + r2[0U] = iv0; + r2[1U] = iv1; + r2[2U] = iv2; + r2[3U] = iv3; + r3[0U] = iv4; + r3[1U] = iv5; + r3[2U] = iv6; + r3[3U] = iv7; + uint8_t kk2 = pv.key_length; + uint8_t nn1 = pv.digest_length; + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r4 = u; + uint64_t x = r4; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r4 = u; + uint64_t x = r4; + os[i0] = x;); + tmp[0U] = + (uint64_t)nn1 + ^ + ((uint64_t)kk2 + << 8U + ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + tmp[1U] = pv.node_offset; + tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; + tmp[3U] = 0ULL; + uint64_t tmp0 = tmp[0U]; + uint64_t tmp1 = tmp[1U]; + uint64_t tmp2 = tmp[2U]; + uint64_t tmp3 = tmp[3U]; + uint64_t tmp4 = tmp[4U]; + uint64_t tmp5 = tmp[5U]; + uint64_t tmp6 = tmp[6U]; + uint64_t tmp7 = tmp[7U]; + uint64_t iv0_ = iv0 ^ tmp0; + uint64_t iv1_ = iv1 ^ tmp1; + uint64_t iv2_ = iv2 ^ tmp2; + uint64_t iv3_ = iv3 ^ tmp3; + uint64_t iv4_ = iv4 ^ tmp4; + uint64_t iv5_ = iv5 ^ tmp5; + uint64_t iv6_ = iv6 ^ tmp6; + uint64_t iv7_ = iv7 ^ tmp7; + r0[0U] = iv0_; + r0[1U] = iv1_; + r0[2U] = iv2_; + r0[3U] = iv3_; + r1[0U] = iv4_; + r1[1U] = iv5_; + r1[2U] = iv6_; + r1[3U] = iv7_; return p; } @@ -918,16 +914,92 @@ static void reset_raw(Hacl_Hash_Blake2b_state_t *state, Hacl_Hash_Blake2b_params bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; - uint32_t kk2 = (uint32_t)i1.key_length; + uint64_t *h = block_state.f3.snd; + uint32_t kk20 = (uint32_t)i1.key_length; uint8_t *k_1 = key.snd; - if (!(kk2 == 0U)) + if (!(kk20 == 0U)) { - uint8_t *sub_b = buf + kk2; - memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t)); - memcpy(buf, k_1, kk2 * sizeof (uint8_t)); + uint8_t *sub_b = buf + kk20; + memset(sub_b, 0U, (128U - kk20) * sizeof (uint8_t)); + memcpy(buf, k_1, kk20 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p[0U]; - init_with_params(block_state.f3.snd, pv); + uint64_t tmp[8U] = { 0U }; + uint64_t *r0 = h; + uint64_t *r1 = h + 4U; + uint64_t *r2 = h + 8U; + uint64_t *r3 = h + 12U; + uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; + uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; + uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; + uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; + uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; + uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; + uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; + uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; + r2[0U] = iv0; + r2[1U] = iv1; + r2[2U] = iv2; + r2[3U] = iv3; + r3[0U] = iv4; + r3[1U] = iv5; + r3[2U] = iv6; + r3[3U] = iv7; + uint8_t kk2 = pv.key_length; + uint8_t nn1 = pv.digest_length; + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i0] = x;); + tmp[0U] = + (uint64_t)nn1 + ^ + ((uint64_t)kk2 + << 8U + ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + tmp[1U] = pv.node_offset; + tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; + tmp[3U] = 0ULL; + uint64_t tmp0 = tmp[0U]; + uint64_t tmp1 = tmp[1U]; + uint64_t tmp2 = tmp[2U]; + uint64_t tmp3 = tmp[3U]; + uint64_t tmp4 = tmp[4U]; + uint64_t tmp5 = tmp[5U]; + uint64_t tmp6 = tmp[6U]; + uint64_t tmp7 = tmp[7U]; + uint64_t iv0_ = iv0 ^ tmp0; + uint64_t iv1_ = iv1 ^ tmp1; + uint64_t iv2_ = iv2 ^ tmp2; + uint64_t iv3_ = iv3 ^ tmp3; + uint64_t iv4_ = iv4 ^ tmp4; + uint64_t iv5_ = iv5 ^ tmp5; + uint64_t iv6_ = iv6 ^ tmp6; + uint64_t iv7_ = iv7 ^ tmp7; + r0[0U] = iv0_; + r0[1U] = iv1_; + r0[2U] = iv2_; + r0[3U] = iv3_; + r1[0U] = iv4_; + r1[1U] = iv5_; + r1[2U] = iv6_; + r1[3U] = iv7_; uint8_t kk11 = i.key_length; uint32_t ite; if (kk11 != 0U) @@ -939,8 +1011,8 @@ static void reset_raw(Hacl_Hash_Blake2b_state_t *state, Hacl_Hash_Blake2b_params ite = 0U; } Hacl_Hash_Blake2b_state_t - tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; - state[0U] = tmp; + tmp8 = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; + state[0U] = tmp8; } /** diff --git a/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c b/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c index 35608aea71a293..92b2e8f539041b 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c +++ b/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c @@ -298,75 +298,6 @@ Hacl_Hash_Blake2b_Simd256_init(Lib_IntVector_Intrinsics_vec256 *hash, uint32_t k r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_); } -static void -init_with_params(Lib_IntVector_Intrinsics_vec256 *hash, Hacl_Hash_Blake2b_blake2_params p) -{ - uint64_t tmp[8U] = { 0U }; - Lib_IntVector_Intrinsics_vec256 *r0 = hash; - Lib_IntVector_Intrinsics_vec256 *r1 = hash + 1U; - Lib_IntVector_Intrinsics_vec256 *r2 = hash + 2U; - Lib_IntVector_Intrinsics_vec256 *r3 = hash + 3U; - uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; - uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; - uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; - uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; - uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; - uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; - uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; - uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; - r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3); - r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7); - uint8_t kk = p.key_length; - uint8_t nn = p.digest_length; - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint64_t *os = tmp + 4U; - uint8_t *bj = p.salt + i * 8U; - uint64_t u = load64_le(bj); - uint64_t r = u; - uint64_t x = r; - os[i] = x;); - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint64_t *os = tmp + 6U; - uint8_t *bj = p.personal + i * 8U; - uint64_t u = load64_le(bj); - uint64_t r = u; - uint64_t x = r; - os[i] = x;); - tmp[0U] = - (uint64_t)nn - ^ - ((uint64_t)kk - << 8U - ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); - tmp[1U] = p.node_offset; - tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U; - tmp[3U] = 0ULL; - uint64_t tmp0 = tmp[0U]; - uint64_t tmp1 = tmp[1U]; - uint64_t tmp2 = tmp[2U]; - uint64_t tmp3 = tmp[3U]; - uint64_t tmp4 = tmp[4U]; - uint64_t tmp5 = tmp[5U]; - uint64_t tmp6 = tmp[6U]; - uint64_t tmp7 = tmp[7U]; - uint64_t iv0_ = iv0 ^ tmp0; - uint64_t iv1_ = iv1 ^ tmp1; - uint64_t iv2_ = iv2 ^ tmp2; - uint64_t iv3_ = iv3 ^ tmp3; - uint64_t iv4_ = iv4 ^ tmp4; - uint64_t iv5_ = iv5 ^ tmp5; - uint64_t iv6_ = iv6 ^ tmp6; - uint64_t iv7_ = iv7 ^ tmp7; - r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_); - r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_); -} - static void update_key( Lib_IntVector_Intrinsics_vec256 *wv, @@ -647,16 +578,80 @@ static Hacl_Hash_Blake2b_Simd256_state_t uint8_t nn = p1->digest_length; bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; - uint32_t kk2 = (uint32_t)i.key_length; + Lib_IntVector_Intrinsics_vec256 *h = block_state.f3.snd; + uint32_t kk20 = (uint32_t)i.key_length; uint8_t *k_1 = key.snd; - if (!(kk2 == 0U)) + if (!(kk20 == 0U)) { - uint8_t *sub_b = buf + kk2; - memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t)); - memcpy(buf, k_1, kk2 * sizeof (uint8_t)); + uint8_t *sub_b = buf + kk20; + memset(sub_b, 0U, (128U - kk20) * sizeof (uint8_t)); + memcpy(buf, k_1, kk20 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p1[0U]; - init_with_params(block_state.f3.snd, pv); + uint64_t tmp[8U] = { 0U }; + Lib_IntVector_Intrinsics_vec256 *r0 = h; + Lib_IntVector_Intrinsics_vec256 *r1 = h + 1U; + Lib_IntVector_Intrinsics_vec256 *r2 = h + 2U; + Lib_IntVector_Intrinsics_vec256 *r3 = h + 3U; + uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; + uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; + uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; + uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; + uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; + uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; + uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; + uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; + r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3); + r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7); + uint8_t kk2 = pv.key_length; + uint8_t nn1 = pv.digest_length; + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r4 = u; + uint64_t x = r4; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r4 = u; + uint64_t x = r4; + os[i0] = x;); + tmp[0U] = + (uint64_t)nn1 + ^ + ((uint64_t)kk2 + << 8U + ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + tmp[1U] = pv.node_offset; + tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; + tmp[3U] = 0ULL; + uint64_t tmp0 = tmp[0U]; + uint64_t tmp1 = tmp[1U]; + uint64_t tmp2 = tmp[2U]; + uint64_t tmp3 = tmp[3U]; + uint64_t tmp4 = tmp[4U]; + uint64_t tmp5 = tmp[5U]; + uint64_t tmp6 = tmp[6U]; + uint64_t tmp7 = tmp[7U]; + uint64_t iv0_ = iv0 ^ tmp0; + uint64_t iv1_ = iv1 ^ tmp1; + uint64_t iv2_ = iv2 ^ tmp2; + uint64_t iv3_ = iv3 ^ tmp3; + uint64_t iv4_ = iv4 ^ tmp4; + uint64_t iv5_ = iv5 ^ tmp5; + uint64_t iv6_ = iv6 ^ tmp6; + uint64_t iv7_ = iv7 ^ tmp7; + r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_); + r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_); return p; } @@ -757,16 +752,80 @@ reset_raw(Hacl_Hash_Blake2b_Simd256_state_t *state, Hacl_Hash_Blake2b_params_and bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; - uint32_t kk2 = (uint32_t)i1.key_length; + Lib_IntVector_Intrinsics_vec256 *h = block_state.f3.snd; + uint32_t kk20 = (uint32_t)i1.key_length; uint8_t *k_1 = key.snd; - if (!(kk2 == 0U)) + if (!(kk20 == 0U)) { - uint8_t *sub_b = buf + kk2; - memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t)); - memcpy(buf, k_1, kk2 * sizeof (uint8_t)); + uint8_t *sub_b = buf + kk20; + memset(sub_b, 0U, (128U - kk20) * sizeof (uint8_t)); + memcpy(buf, k_1, kk20 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p[0U]; - init_with_params(block_state.f3.snd, pv); + uint64_t tmp[8U] = { 0U }; + Lib_IntVector_Intrinsics_vec256 *r0 = h; + Lib_IntVector_Intrinsics_vec256 *r1 = h + 1U; + Lib_IntVector_Intrinsics_vec256 *r2 = h + 2U; + Lib_IntVector_Intrinsics_vec256 *r3 = h + 3U; + uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; + uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; + uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; + uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; + uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; + uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; + uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; + uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; + r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3); + r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7); + uint8_t kk2 = pv.key_length; + uint8_t nn1 = pv.digest_length; + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i0] = x;); + tmp[0U] = + (uint64_t)nn1 + ^ + ((uint64_t)kk2 + << 8U + ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + tmp[1U] = pv.node_offset; + tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; + tmp[3U] = 0ULL; + uint64_t tmp0 = tmp[0U]; + uint64_t tmp1 = tmp[1U]; + uint64_t tmp2 = tmp[2U]; + uint64_t tmp3 = tmp[3U]; + uint64_t tmp4 = tmp[4U]; + uint64_t tmp5 = tmp[5U]; + uint64_t tmp6 = tmp[6U]; + uint64_t tmp7 = tmp[7U]; + uint64_t iv0_ = iv0 ^ tmp0; + uint64_t iv1_ = iv1 ^ tmp1; + uint64_t iv2_ = iv2 ^ tmp2; + uint64_t iv3_ = iv3 ^ tmp3; + uint64_t iv4_ = iv4 ^ tmp4; + uint64_t iv5_ = iv5 ^ tmp5; + uint64_t iv6_ = iv6 ^ tmp6; + uint64_t iv7_ = iv7 ^ tmp7; + r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_); + r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_); uint8_t kk11 = i.key_length; uint32_t ite; if (kk11 != 0U) @@ -778,8 +837,8 @@ reset_raw(Hacl_Hash_Blake2b_Simd256_state_t *state, Hacl_Hash_Blake2b_params_and ite = 0U; } Hacl_Hash_Blake2b_Simd256_state_t - tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; - state[0U] = tmp; + tmp8 = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; + state[0U] = tmp8; } /** diff --git a/Modules/_hacl/Hacl_Hash_Blake2s.c b/Modules/_hacl/Hacl_Hash_Blake2s.c index 167f38fbd1c603..e5e0ecd0bfde7e 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2s.c +++ b/Modules/_hacl/Hacl_Hash_Blake2s.c @@ -573,83 +573,6 @@ void Hacl_Hash_Blake2s_init(uint32_t *hash, uint32_t kk, uint32_t nn) r1[3U] = iv7_; } -static void init_with_params(uint32_t *hash, Hacl_Hash_Blake2b_blake2_params p) -{ - uint32_t tmp[8U] = { 0U }; - uint32_t *r0 = hash; - uint32_t *r1 = hash + 4U; - uint32_t *r2 = hash + 8U; - uint32_t *r3 = hash + 12U; - uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; - uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; - uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; - uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; - uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; - uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; - uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; - uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; - r2[0U] = iv0; - r2[1U] = iv1; - r2[2U] = iv2; - r2[3U] = iv3; - r3[0U] = iv4; - r3[1U] = iv5; - r3[2U] = iv6; - r3[3U] = iv7; - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint32_t *os = tmp + 4U; - uint8_t *bj = p.salt + i * 4U; - uint32_t u = load32_le(bj); - uint32_t r = u; - uint32_t x = r; - os[i] = x;); - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint32_t *os = tmp + 6U; - uint8_t *bj = p.personal + i * 4U; - uint32_t u = load32_le(bj); - uint32_t r = u; - uint32_t x = r; - os[i] = x;); - tmp[0U] = - (uint32_t)p.digest_length - ^ ((uint32_t)p.key_length << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); - tmp[1U] = p.leaf_length; - tmp[2U] = (uint32_t)p.node_offset; - tmp[3U] = - (uint32_t)(p.node_offset >> 32U) - ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); - uint32_t tmp0 = tmp[0U]; - uint32_t tmp1 = tmp[1U]; - uint32_t tmp2 = tmp[2U]; - uint32_t tmp3 = tmp[3U]; - uint32_t tmp4 = tmp[4U]; - uint32_t tmp5 = tmp[5U]; - uint32_t tmp6 = tmp[6U]; - uint32_t tmp7 = tmp[7U]; - uint32_t iv0_ = iv0 ^ tmp0; - uint32_t iv1_ = iv1 ^ tmp1; - uint32_t iv2_ = iv2 ^ tmp2; - uint32_t iv3_ = iv3 ^ tmp3; - uint32_t iv4_ = iv4 ^ tmp4; - uint32_t iv5_ = iv5 ^ tmp5; - uint32_t iv6_ = iv6 ^ tmp6; - uint32_t iv7_ = iv7 ^ tmp7; - r0[0U] = iv0_; - r0[1U] = iv1_; - r0[2U] = iv2_; - r0[3U] = iv3_; - r1[0U] = iv4_; - r1[1U] = iv5_; - r1[2U] = iv6_; - r1[3U] = iv7_; -} - static void update_key(uint32_t *wv, uint32_t *hash, uint32_t kk, uint8_t *k, uint32_t ll) { uint64_t lb = (uint64_t)64U; @@ -796,6 +719,7 @@ static Hacl_Hash_Blake2s_state_t uint8_t nn = p1->digest_length; bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + uint32_t *h = block_state.f3.snd; uint32_t kk2 = (uint32_t)i.key_length; uint8_t *k_1 = key.snd; if (!(kk2 == 0U)) @@ -805,7 +729,79 @@ static Hacl_Hash_Blake2s_state_t memcpy(buf, k_1, kk2 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p1[0U]; - init_with_params(block_state.f3.snd, pv); + uint32_t tmp[8U] = { 0U }; + uint32_t *r0 = h; + uint32_t *r1 = h + 4U; + uint32_t *r2 = h + 8U; + uint32_t *r3 = h + 12U; + uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; + uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; + uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; + uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; + uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; + uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; + uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; + uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; + r2[0U] = iv0; + r2[1U] = iv1; + r2[2U] = iv2; + r2[3U] = iv3; + r3[0U] = iv4; + r3[1U] = iv5; + r3[2U] = iv6; + r3[3U] = iv7; + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r4 = u; + uint32_t x = r4; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r4 = u; + uint32_t x = r4; + os[i0] = x;); + tmp[0U] = + (uint32_t)pv.digest_length + ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + tmp[1U] = pv.leaf_length; + tmp[2U] = (uint32_t)pv.node_offset; + tmp[3U] = + (uint32_t)(pv.node_offset >> 32U) + ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + uint32_t tmp0 = tmp[0U]; + uint32_t tmp1 = tmp[1U]; + uint32_t tmp2 = tmp[2U]; + uint32_t tmp3 = tmp[3U]; + uint32_t tmp4 = tmp[4U]; + uint32_t tmp5 = tmp[5U]; + uint32_t tmp6 = tmp[6U]; + uint32_t tmp7 = tmp[7U]; + uint32_t iv0_ = iv0 ^ tmp0; + uint32_t iv1_ = iv1 ^ tmp1; + uint32_t iv2_ = iv2 ^ tmp2; + uint32_t iv3_ = iv3 ^ tmp3; + uint32_t iv4_ = iv4 ^ tmp4; + uint32_t iv5_ = iv5 ^ tmp5; + uint32_t iv6_ = iv6 ^ tmp6; + uint32_t iv7_ = iv7 ^ tmp7; + r0[0U] = iv0_; + r0[1U] = iv1_; + r0[2U] = iv2_; + r0[3U] = iv3_; + r1[0U] = iv4_; + r1[1U] = iv5_; + r1[2U] = iv6_; + r1[3U] = iv7_; return p; } @@ -903,6 +899,7 @@ static void reset_raw(Hacl_Hash_Blake2s_state_t *state, Hacl_Hash_Blake2b_params bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + uint32_t *h = block_state.f3.snd; uint32_t kk2 = (uint32_t)i1.key_length; uint8_t *k_1 = key.snd; if (!(kk2 == 0U)) @@ -912,7 +909,79 @@ static void reset_raw(Hacl_Hash_Blake2s_state_t *state, Hacl_Hash_Blake2b_params memcpy(buf, k_1, kk2 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p[0U]; - init_with_params(block_state.f3.snd, pv); + uint32_t tmp[8U] = { 0U }; + uint32_t *r0 = h; + uint32_t *r1 = h + 4U; + uint32_t *r2 = h + 8U; + uint32_t *r3 = h + 12U; + uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; + uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; + uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; + uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; + uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; + uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; + uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; + uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; + r2[0U] = iv0; + r2[1U] = iv1; + r2[2U] = iv2; + r2[3U] = iv3; + r3[0U] = iv4; + r3[1U] = iv5; + r3[2U] = iv6; + r3[3U] = iv7; + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i0] = x;); + tmp[0U] = + (uint32_t)pv.digest_length + ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + tmp[1U] = pv.leaf_length; + tmp[2U] = (uint32_t)pv.node_offset; + tmp[3U] = + (uint32_t)(pv.node_offset >> 32U) + ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + uint32_t tmp0 = tmp[0U]; + uint32_t tmp1 = tmp[1U]; + uint32_t tmp2 = tmp[2U]; + uint32_t tmp3 = tmp[3U]; + uint32_t tmp4 = tmp[4U]; + uint32_t tmp5 = tmp[5U]; + uint32_t tmp6 = tmp[6U]; + uint32_t tmp7 = tmp[7U]; + uint32_t iv0_ = iv0 ^ tmp0; + uint32_t iv1_ = iv1 ^ tmp1; + uint32_t iv2_ = iv2 ^ tmp2; + uint32_t iv3_ = iv3 ^ tmp3; + uint32_t iv4_ = iv4 ^ tmp4; + uint32_t iv5_ = iv5 ^ tmp5; + uint32_t iv6_ = iv6 ^ tmp6; + uint32_t iv7_ = iv7 ^ tmp7; + r0[0U] = iv0_; + r0[1U] = iv1_; + r0[2U] = iv2_; + r0[3U] = iv3_; + r1[0U] = iv4_; + r1[1U] = iv5_; + r1[2U] = iv6_; + r1[3U] = iv7_; uint8_t kk11 = i.key_length; uint32_t ite; if (kk11 != 0U) @@ -924,8 +993,8 @@ static void reset_raw(Hacl_Hash_Blake2s_state_t *state, Hacl_Hash_Blake2b_params ite = 0U; } Hacl_Hash_Blake2s_state_t - tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; - state[0U] = tmp; + tmp8 = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; + state[0U] = tmp8; } /** diff --git a/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c b/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c index a85b18a4d296ec..f675a7f14f192f 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c +++ b/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c @@ -295,72 +295,6 @@ Hacl_Hash_Blake2s_Simd128_init(Lib_IntVector_Intrinsics_vec128 *hash, uint32_t k r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_); } -static void -init_with_params(Lib_IntVector_Intrinsics_vec128 *hash, Hacl_Hash_Blake2b_blake2_params p) -{ - uint32_t tmp[8U] = { 0U }; - Lib_IntVector_Intrinsics_vec128 *r0 = hash; - Lib_IntVector_Intrinsics_vec128 *r1 = hash + 1U; - Lib_IntVector_Intrinsics_vec128 *r2 = hash + 2U; - Lib_IntVector_Intrinsics_vec128 *r3 = hash + 3U; - uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; - uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; - uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; - uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; - uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; - uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; - uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; - uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; - r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3); - r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7); - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint32_t *os = tmp + 4U; - uint8_t *bj = p.salt + i * 4U; - uint32_t u = load32_le(bj); - uint32_t r = u; - uint32_t x = r; - os[i] = x;); - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint32_t *os = tmp + 6U; - uint8_t *bj = p.personal + i * 4U; - uint32_t u = load32_le(bj); - uint32_t r = u; - uint32_t x = r; - os[i] = x;); - tmp[0U] = - (uint32_t)p.digest_length - ^ ((uint32_t)p.key_length << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); - tmp[1U] = p.leaf_length; - tmp[2U] = (uint32_t)p.node_offset; - tmp[3U] = - (uint32_t)(p.node_offset >> 32U) - ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); - uint32_t tmp0 = tmp[0U]; - uint32_t tmp1 = tmp[1U]; - uint32_t tmp2 = tmp[2U]; - uint32_t tmp3 = tmp[3U]; - uint32_t tmp4 = tmp[4U]; - uint32_t tmp5 = tmp[5U]; - uint32_t tmp6 = tmp[6U]; - uint32_t tmp7 = tmp[7U]; - uint32_t iv0_ = iv0 ^ tmp0; - uint32_t iv1_ = iv1 ^ tmp1; - uint32_t iv2_ = iv2 ^ tmp2; - uint32_t iv3_ = iv3 ^ tmp3; - uint32_t iv4_ = iv4 ^ tmp4; - uint32_t iv5_ = iv5 ^ tmp5; - uint32_t iv6_ = iv6 ^ tmp6; - uint32_t iv7_ = iv7 ^ tmp7; - r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_); - r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_); -} - static void update_key( Lib_IntVector_Intrinsics_vec128 *wv, @@ -637,6 +571,7 @@ static Hacl_Hash_Blake2s_Simd128_state_t uint8_t nn = p1->digest_length; bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + Lib_IntVector_Intrinsics_vec128 *h = block_state.f3.snd; uint32_t kk2 = (uint32_t)i.key_length; uint8_t *k_1 = key.snd; if (!(kk2 == 0U)) @@ -646,7 +581,67 @@ static Hacl_Hash_Blake2s_Simd128_state_t memcpy(buf, k_1, kk2 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p1[0U]; - init_with_params(block_state.f3.snd, pv); + uint32_t tmp[8U] = { 0U }; + Lib_IntVector_Intrinsics_vec128 *r0 = h; + Lib_IntVector_Intrinsics_vec128 *r1 = h + 1U; + Lib_IntVector_Intrinsics_vec128 *r2 = h + 2U; + Lib_IntVector_Intrinsics_vec128 *r3 = h + 3U; + uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; + uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; + uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; + uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; + uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; + uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; + uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; + uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; + r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3); + r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r4 = u; + uint32_t x = r4; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r4 = u; + uint32_t x = r4; + os[i0] = x;); + tmp[0U] = + (uint32_t)pv.digest_length + ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + tmp[1U] = pv.leaf_length; + tmp[2U] = (uint32_t)pv.node_offset; + tmp[3U] = + (uint32_t)(pv.node_offset >> 32U) + ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + uint32_t tmp0 = tmp[0U]; + uint32_t tmp1 = tmp[1U]; + uint32_t tmp2 = tmp[2U]; + uint32_t tmp3 = tmp[3U]; + uint32_t tmp4 = tmp[4U]; + uint32_t tmp5 = tmp[5U]; + uint32_t tmp6 = tmp[6U]; + uint32_t tmp7 = tmp[7U]; + uint32_t iv0_ = iv0 ^ tmp0; + uint32_t iv1_ = iv1 ^ tmp1; + uint32_t iv2_ = iv2 ^ tmp2; + uint32_t iv3_ = iv3 ^ tmp3; + uint32_t iv4_ = iv4 ^ tmp4; + uint32_t iv5_ = iv5 ^ tmp5; + uint32_t iv6_ = iv6 ^ tmp6; + uint32_t iv7_ = iv7 ^ tmp7; + r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_); + r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_); return p; } @@ -747,6 +742,7 @@ reset_raw(Hacl_Hash_Blake2s_Simd128_state_t *state, Hacl_Hash_Blake2b_params_and bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + Lib_IntVector_Intrinsics_vec128 *h = block_state.f3.snd; uint32_t kk2 = (uint32_t)i1.key_length; uint8_t *k_1 = key.snd; if (!(kk2 == 0U)) @@ -756,7 +752,67 @@ reset_raw(Hacl_Hash_Blake2s_Simd128_state_t *state, Hacl_Hash_Blake2b_params_and memcpy(buf, k_1, kk2 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p[0U]; - init_with_params(block_state.f3.snd, pv); + uint32_t tmp[8U] = { 0U }; + Lib_IntVector_Intrinsics_vec128 *r0 = h; + Lib_IntVector_Intrinsics_vec128 *r1 = h + 1U; + Lib_IntVector_Intrinsics_vec128 *r2 = h + 2U; + Lib_IntVector_Intrinsics_vec128 *r3 = h + 3U; + uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; + uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; + uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; + uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; + uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; + uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; + uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; + uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; + r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3); + r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i0] = x;); + tmp[0U] = + (uint32_t)pv.digest_length + ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + tmp[1U] = pv.leaf_length; + tmp[2U] = (uint32_t)pv.node_offset; + tmp[3U] = + (uint32_t)(pv.node_offset >> 32U) + ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + uint32_t tmp0 = tmp[0U]; + uint32_t tmp1 = tmp[1U]; + uint32_t tmp2 = tmp[2U]; + uint32_t tmp3 = tmp[3U]; + uint32_t tmp4 = tmp[4U]; + uint32_t tmp5 = tmp[5U]; + uint32_t tmp6 = tmp[6U]; + uint32_t tmp7 = tmp[7U]; + uint32_t iv0_ = iv0 ^ tmp0; + uint32_t iv1_ = iv1 ^ tmp1; + uint32_t iv2_ = iv2 ^ tmp2; + uint32_t iv3_ = iv3 ^ tmp3; + uint32_t iv4_ = iv4 ^ tmp4; + uint32_t iv5_ = iv5 ^ tmp5; + uint32_t iv6_ = iv6 ^ tmp6; + uint32_t iv7_ = iv7 ^ tmp7; + r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_); + r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_); uint8_t kk11 = i.key_length; uint32_t ite; if (kk11 != 0U) @@ -768,8 +824,8 @@ reset_raw(Hacl_Hash_Blake2s_Simd128_state_t *state, Hacl_Hash_Blake2b_params_and ite = 0U; } Hacl_Hash_Blake2s_Simd128_state_t - tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; - state[0U] = tmp; + tmp8 = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; + state[0U] = tmp8; } /** diff --git a/Modules/_hacl/include/krml/FStar_UInt128_Verified.h b/Modules/_hacl/include/krml/FStar_UInt128_Verified.h index bdf25898f2bc25..659745b24265cb 100644 --- a/Modules/_hacl/include/krml/FStar_UInt128_Verified.h +++ b/Modules/_hacl/include/krml/FStar_UInt128_Verified.h @@ -1,6 +1,6 @@ /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. - Licensed under the Apache 2.0 License. + Licensed under the Apache 2.0 and MIT Licenses. */ diff --git a/Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h b/Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h index 1bdec972a2f249..68bac0b3f0aab1 100644 --- a/Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h +++ b/Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h @@ -1,6 +1,6 @@ /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. - Licensed under the Apache 2.0 License. + Licensed under the Apache 2.0 and MIT Licenses. */ diff --git a/Modules/_hacl/include/krml/fstar_uint128_struct_endianness.h b/Modules/_hacl/include/krml/fstar_uint128_struct_endianness.h index e2b6d62859a5f1..bb736add318aa8 100644 --- a/Modules/_hacl/include/krml/fstar_uint128_struct_endianness.h +++ b/Modules/_hacl/include/krml/fstar_uint128_struct_endianness.h @@ -1,5 +1,5 @@ /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. - Licensed under the Apache 2.0 License. */ + Licensed under the Apache 2.0 and MIT Licenses. */ #ifndef FSTAR_UINT128_STRUCT_ENDIANNESS_H #define FSTAR_UINT128_STRUCT_ENDIANNESS_H diff --git a/Modules/_hacl/include/krml/internal/target.h b/Modules/_hacl/include/krml/internal/target.h index 292adc1423553f..fd74d3da684567 100644 --- a/Modules/_hacl/include/krml/internal/target.h +++ b/Modules/_hacl/include/krml/internal/target.h @@ -1,5 +1,5 @@ /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. - Licensed under the Apache 2.0 License. */ + Licensed under the Apache 2.0 and MIT Licenses. */ #ifndef __KRML_TARGET_H #define __KRML_TARGET_H @@ -82,6 +82,8 @@ # define KRML_NOINLINE __declspec(noinline) # elif defined (__GNUC__) # define KRML_NOINLINE __attribute__((noinline,unused)) +# elif defined (__SUNPRO_C) +# define KRML_NOINLINE __attribute__((noinline)) # else # define KRML_NOINLINE # warning "The KRML_NOINLINE macro is not defined for this toolchain!" @@ -95,6 +97,8 @@ # define KRML_MUSTINLINE inline __forceinline # elif defined (__GNUC__) # define KRML_MUSTINLINE inline __attribute__((always_inline)) +# elif defined (__SUNPRO_C) +# define KRML_MUSTINLINE inline __attribute__((always_inline)) # else # define KRML_MUSTINLINE inline # warning "The KRML_MUSTINLINE macro defaults to plain inline for this toolchain!" diff --git a/Modules/_hacl/include/krml/lowstar_endianness.h b/Modules/_hacl/include/krml/lowstar_endianness.h index 1aa2ccd644c06f..af6b882cf259cc 100644 --- a/Modules/_hacl/include/krml/lowstar_endianness.h +++ b/Modules/_hacl/include/krml/lowstar_endianness.h @@ -1,5 +1,5 @@ /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. - Licensed under the Apache 2.0 License. */ + Licensed under the Apache 2.0 and MIT Licenses. */ #ifndef __LOWSTAR_ENDIANNESS_H #define __LOWSTAR_ENDIANNESS_H diff --git a/Modules/_hacl/libintvector.h b/Modules/_hacl/libintvector.h index 99d11336942064..11e914f7e1650a 100644 --- a/Modules/_hacl/libintvector.h +++ b/Modules/_hacl/libintvector.h @@ -19,7 +19,7 @@ #define Lib_IntVector_Intrinsics_bit_mask64(x) -((x) & 1) -#if defined(__x86_64__) || defined(_M_X64) +#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) #if defined(HACL_CAN_COMPILE_VEC128) diff --git a/Modules/_hacl/refresh.sh b/Modules/_hacl/refresh.sh index 44e18a15f9652a..6234fea9f17bc7 100755 --- a/Modules/_hacl/refresh.sh +++ b/Modules/_hacl/refresh.sh @@ -22,7 +22,7 @@ fi # Update this when updating to a new version after verifying that the changes # the update brings in are good. -expected_hacl_star_rev=a6a09496d9cff652b567d26f2c3ab012321b632a +expected_hacl_star_rev=315a9e491d2bc347b9dae99e0ea506995ea84d9d hacl_dir="$(realpath "$1")" cd "$(dirname "$0")" From c124577ebe915a00de4033c0f7fa7c47621d79e0 Mon Sep 17 00:00:00 2001 From: Wulian Date: Thu, 17 Oct 2024 23:23:37 +0800 Subject: [PATCH 018/106] gh-123370: Fix the canvas not clearing after running turtledemo.clock (#123457) Rewriting the day and date every tick somehow prevented them from being removed either by clicking STOP or loading another example. The solution is to rewrite them only when they change. --- Lib/turtledemo/clock.py | 33 +++++++++++-------- ...-08-28-19-27-35.gh-issue-123370.SPZ9Ux.rst | 1 + 2 files changed, 21 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-08-28-19-27-35.gh-issue-123370.SPZ9Ux.rst diff --git a/Lib/turtledemo/clock.py b/Lib/turtledemo/clock.py index fd3b3992d466bf..8a630e29b8da50 100644 --- a/Lib/turtledemo/clock.py +++ b/Lib/turtledemo/clock.py @@ -1,7 +1,6 @@ -# -*- coding: cp1252 -*- """ turtle-example-suite: - tdemo_clock.py + turtledemo/clock.py Enhanced clock-program, showing date and time @@ -12,6 +11,9 @@ from turtle import * from datetime import datetime +dtfont = "TkFixedFont", 14, "bold" +current_day = None + def jump(distanz, winkel=0): penup() right(winkel) @@ -52,11 +54,23 @@ def clockface(radius): jump(-radius) rt(6) +def display_date_time(): + global current_day + writer.clear() + now = datetime.now() + current_day = now.day + writer.home() + writer.forward(distance=65) + writer.write(wochentag(now), align="center", font=dtfont) + writer.back(distance=150) + writer.write(datum(now), align="center", font=dtfont) + writer.forward(distance=85) + def setup(): global second_hand, minute_hand, hour_hand, writer mode("logo") make_hand_shape("second_hand", 125, 25) - make_hand_shape("minute_hand", 130, 25) + make_hand_shape("minute_hand", 115, 25) make_hand_shape("hour_hand", 90, 25) clockface(160) second_hand = Turtle() @@ -74,10 +88,10 @@ def setup(): hand.speed(0) ht() writer = Turtle() - #writer.mode("logo") writer.ht() writer.pu() writer.bk(85) + display_date_time() def wochentag(t): wochentag = ["Monday", "Tuesday", "Wednesday", @@ -99,18 +113,11 @@ def tick(): stunde = t.hour + minute/60.0 try: tracer(False) # Terminator can occur here - writer.clear() - writer.home() - writer.forward(65) - writer.write(wochentag(t), - align="center", font=("Courier", 14, "bold")) - writer.back(150) - writer.write(datum(t), - align="center", font=("Courier", 14, "bold")) - writer.forward(85) second_hand.setheading(6*sekunde) # or here minute_hand.setheading(6*minute) hour_hand.setheading(30*stunde) + if t.day != current_day: + display_date_time() tracer(True) ontimer(tick, 100) except Terminator: diff --git a/Misc/NEWS.d/next/Library/2024-08-28-19-27-35.gh-issue-123370.SPZ9Ux.rst b/Misc/NEWS.d/next/Library/2024-08-28-19-27-35.gh-issue-123370.SPZ9Ux.rst new file mode 100644 index 00000000000000..1fd5cc54eaf3e7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-08-28-19-27-35.gh-issue-123370.SPZ9Ux.rst @@ -0,0 +1 @@ +Fix the canvas not clearing after running turtledemo clock. From 0cb20f2e7e867d5c34fc17dd5b8e51e8b0020bb3 Mon Sep 17 00:00:00 2001 From: Wulian Date: Thu, 17 Oct 2024 23:40:30 +0800 Subject: [PATCH 019/106] gh-125625: Check for `py -3.13` in PCbuild/find_python.bat (GH-125626) --- PCbuild/find_python.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PCbuild/find_python.bat b/PCbuild/find_python.bat index 6db579fa8de08a..d65d080ca71a90 100644 --- a/PCbuild/find_python.bat +++ b/PCbuild/find_python.bat @@ -47,7 +47,7 @@ @rem If py.exe finds a recent enough version, use that one @rem It is fine to add new versions to this list when they have released, @rem but we do not use prerelease builds here. -@for %%p in (3.12 3.11 3.10) do @py -%%p -EV >nul 2>&1 && (set PYTHON=py -%%p) && (set _Py_Python_Source=found %%p with py.exe) && goto :found +@for %%p in (3.13 3.12 3.11 3.10) do @py -%%p -EV >nul 2>&1 && (set PYTHON=py -%%p) && (set _Py_Python_Source=found %%p with py.exe) && goto :found @if NOT exist "%_Py_EXTERNALS_DIR%" mkdir "%_Py_EXTERNALS_DIR%" @set _Py_NUGET=%NUGET% From ad3eac1963a5f195ef9b2c1dbb5e44fa3cce4c72 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 17 Oct 2024 18:46:59 +0300 Subject: [PATCH 020/106] gh-52551: Fix encoding issues in strftime() (GH-125193) Fix time.strftime(), the strftime() method and formatting of the datetime classes datetime, date and time. * Characters not encodable in the current locale are now acceptable in the format string. * Surrogate pairs and sequence of surrogatescape-encoded bytes are no longer recombinated. * Embedded null character no longer terminates the format string. This fixes also gh-78662 and gh-124531. --- Lib/test/datetimetester.py | 63 ++++- Lib/test/test_time.py | 29 ++- ...4-10-09-17-07-33.gh-issue-52551.PBakSY.rst | 8 + Modules/_datetimemodule.c | 206 +++++++--------- Modules/timemodule.c | 233 ++++++++++-------- 5 files changed, 307 insertions(+), 232 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-09-17-07-33.gh-issue-52551.PBakSY.rst diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index c81408b344968d..dbe25ef57dea83 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -2949,11 +2949,32 @@ def test_more_strftime(self): self.assertEqual(t.strftime("%z"), "-0200" + z) self.assertEqual(t.strftime("%:z"), "-02:00:" + z) - # bpo-34482: Check that surrogates don't cause a crash. - try: - t.strftime('%y\ud800%m %H\ud800%M') - except UnicodeEncodeError: - pass + def test_strftime_special(self): + t = self.theclass(2004, 12, 31, 6, 22, 33, 47) + s1 = t.strftime('%c') + s2 = t.strftime('%B') + # gh-52551, gh-78662: Unicode strings should pass through strftime, + # independently from locale. + self.assertEqual(t.strftime('\U0001f40d'), '\U0001f40d') + self.assertEqual(t.strftime('\U0001f4bb%c\U0001f40d%B'), f'\U0001f4bb{s1}\U0001f40d{s2}') + self.assertEqual(t.strftime('%c\U0001f4bb%B\U0001f40d'), f'{s1}\U0001f4bb{s2}\U0001f40d') + # Lone surrogates should pass through. + self.assertEqual(t.strftime('\ud83d'), '\ud83d') + self.assertEqual(t.strftime('\udc0d'), '\udc0d') + self.assertEqual(t.strftime('\ud83d%c\udc0d%B'), f'\ud83d{s1}\udc0d{s2}') + self.assertEqual(t.strftime('%c\ud83d%B\udc0d'), f'{s1}\ud83d{s2}\udc0d') + self.assertEqual(t.strftime('%c\udc0d%B\ud83d'), f'{s1}\udc0d{s2}\ud83d') + # Surrogate pairs should not recombine. + self.assertEqual(t.strftime('\ud83d\udc0d'), '\ud83d\udc0d') + self.assertEqual(t.strftime('%c\ud83d\udc0d%B'), f'{s1}\ud83d\udc0d{s2}') + # Surrogate-escaped bytes should not recombine. + self.assertEqual(t.strftime('\udcf0\udc9f\udc90\udc8d'), '\udcf0\udc9f\udc90\udc8d') + self.assertEqual(t.strftime('%c\udcf0\udc9f\udc90\udc8d%B'), f'{s1}\udcf0\udc9f\udc90\udc8d{s2}') + # gh-124531: The null character should not terminate the format string. + self.assertEqual(t.strftime('\0'), '\0') + self.assertEqual(t.strftime('\0'*1000), '\0'*1000) + self.assertEqual(t.strftime('\0%c\0%B'), f'\0{s1}\0{s2}') + self.assertEqual(t.strftime('%c\0%B\0'), f'{s1}\0{s2}\0') def test_extract(self): dt = self.theclass(2002, 3, 4, 18, 45, 3, 1234) @@ -3736,6 +3757,33 @@ def test_strftime(self): # gh-85432: The parameter was named "fmt" in the pure-Python impl. t.strftime(format="%f") + def test_strftime_special(self): + t = self.theclass(1, 2, 3, 4) + s1 = t.strftime('%I%p%Z') + s2 = t.strftime('%X') + # gh-52551, gh-78662: Unicode strings should pass through strftime, + # independently from locale. + self.assertEqual(t.strftime('\U0001f40d'), '\U0001f40d') + self.assertEqual(t.strftime('\U0001f4bb%I%p%Z\U0001f40d%X'), f'\U0001f4bb{s1}\U0001f40d{s2}') + self.assertEqual(t.strftime('%I%p%Z\U0001f4bb%X\U0001f40d'), f'{s1}\U0001f4bb{s2}\U0001f40d') + # Lone surrogates should pass through. + self.assertEqual(t.strftime('\ud83d'), '\ud83d') + self.assertEqual(t.strftime('\udc0d'), '\udc0d') + self.assertEqual(t.strftime('\ud83d%I%p%Z\udc0d%X'), f'\ud83d{s1}\udc0d{s2}') + self.assertEqual(t.strftime('%I%p%Z\ud83d%X\udc0d'), f'{s1}\ud83d{s2}\udc0d') + self.assertEqual(t.strftime('%I%p%Z\udc0d%X\ud83d'), f'{s1}\udc0d{s2}\ud83d') + # Surrogate pairs should not recombine. + self.assertEqual(t.strftime('\ud83d\udc0d'), '\ud83d\udc0d') + self.assertEqual(t.strftime('%I%p%Z\ud83d\udc0d%X'), f'{s1}\ud83d\udc0d{s2}') + # Surrogate-escaped bytes should not recombine. + self.assertEqual(t.strftime('\udcf0\udc9f\udc90\udc8d'), '\udcf0\udc9f\udc90\udc8d') + self.assertEqual(t.strftime('%I%p%Z\udcf0\udc9f\udc90\udc8d%X'), f'{s1}\udcf0\udc9f\udc90\udc8d{s2}') + # gh-124531: The null character should not terminate the format string. + self.assertEqual(t.strftime('\0'), '\0') + self.assertEqual(t.strftime('\0'*1000), '\0'*1000) + self.assertEqual(t.strftime('\0%I%p%Z\0%X'), f'\0{s1}\0{s2}') + self.assertEqual(t.strftime('%I%p%Z\0%X\0'), f'{s1}\0{s2}\0') + def test_format(self): t = self.theclass(1, 2, 3, 4) self.assertEqual(t.__format__(''), str(t)) @@ -4259,9 +4307,8 @@ def tzname(self, dt): return self.tz self.assertRaises(TypeError, t.strftime, "%Z") # Issue #6697: - if '_Fast' in self.__class__.__name__: - Badtzname.tz = '\ud800' - self.assertRaises(ValueError, t.strftime, "%Z") + Badtzname.tz = '\ud800' + self.assertEqual(t.strftime("%Z"), '\ud800') def test_hash_edge_cases(self): # Offsets that overflow a basic time. diff --git a/Lib/test/test_time.py b/Lib/test/test_time.py index 27c0f51acc58ab..f8b99a9b6a63f5 100644 --- a/Lib/test/test_time.py +++ b/Lib/test/test_time.py @@ -181,8 +181,33 @@ def test_strftime(self): self.fail('conversion specifier: %r failed.' % format) self.assertRaises(TypeError, time.strftime, b'%S', tt) - # embedded null character - self.assertRaises(ValueError, time.strftime, '%S\0', tt) + + def test_strftime_special(self): + tt = time.gmtime(self.t) + s1 = time.strftime('%c', tt) + s2 = time.strftime('%B', tt) + # gh-52551, gh-78662: Unicode strings should pass through strftime, + # independently from locale. + self.assertEqual(time.strftime('\U0001f40d', tt), '\U0001f40d') + self.assertEqual(time.strftime('\U0001f4bb%c\U0001f40d%B', tt), f'\U0001f4bb{s1}\U0001f40d{s2}') + self.assertEqual(time.strftime('%c\U0001f4bb%B\U0001f40d', tt), f'{s1}\U0001f4bb{s2}\U0001f40d') + # Lone surrogates should pass through. + self.assertEqual(time.strftime('\ud83d', tt), '\ud83d') + self.assertEqual(time.strftime('\udc0d', tt), '\udc0d') + self.assertEqual(time.strftime('\ud83d%c\udc0d%B', tt), f'\ud83d{s1}\udc0d{s2}') + self.assertEqual(time.strftime('%c\ud83d%B\udc0d', tt), f'{s1}\ud83d{s2}\udc0d') + self.assertEqual(time.strftime('%c\udc0d%B\ud83d', tt), f'{s1}\udc0d{s2}\ud83d') + # Surrogate pairs should not recombine. + self.assertEqual(time.strftime('\ud83d\udc0d', tt), '\ud83d\udc0d') + self.assertEqual(time.strftime('%c\ud83d\udc0d%B', tt), f'{s1}\ud83d\udc0d{s2}') + # Surrogate-escaped bytes should not recombine. + self.assertEqual(time.strftime('\udcf0\udc9f\udc90\udc8d', tt), '\udcf0\udc9f\udc90\udc8d') + self.assertEqual(time.strftime('%c\udcf0\udc9f\udc90\udc8d%B', tt), f'{s1}\udcf0\udc9f\udc90\udc8d{s2}') + # gh-124531: The null character should not terminate the format string. + self.assertEqual(time.strftime('\0', tt), '\0') + self.assertEqual(time.strftime('\0'*1000, tt), '\0'*1000) + self.assertEqual(time.strftime('\0%c\0%B', tt), f'\0{s1}\0{s2}') + self.assertEqual(time.strftime('%c\0%B\0', tt), f'{s1}\0{s2}\0') def _bounds_checking(self, func): # Make sure that strftime() checks the bounds of the various parts diff --git a/Misc/NEWS.d/next/Library/2024-10-09-17-07-33.gh-issue-52551.PBakSY.rst b/Misc/NEWS.d/next/Library/2024-10-09-17-07-33.gh-issue-52551.PBakSY.rst new file mode 100644 index 00000000000000..edc9ac5bb23117 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-09-17-07-33.gh-issue-52551.PBakSY.rst @@ -0,0 +1,8 @@ +Fix encoding issues in :func:`time.strftime`, the +:meth:`~datetime.datetime.strftime` method of the :mod:`datetime` classes +:class:`~datetime.datetime`, :class:`~datetime.date` and +:class:`~datetime.time` and formatting of these classes. Characters not +encodable in the current locale are now acceptable in the format string. +Surrogate pairs and sequence of surrogatescape-encoded bytes are no longer +recombinated. Embedded null character no longer terminates the format +string. diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 2ba46cddb4f558..e1bb98fcf05862 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -1747,7 +1747,7 @@ make_somezreplacement(PyObject *object, char *sep, PyObject *tzinfoarg) PyObject *tzinfo = get_tzinfo_member(object); if (tzinfo == Py_None || tzinfo == NULL) { - return PyBytes_FromStringAndSize(NULL, 0); + return PyUnicode_FromStringAndSize(NULL, 0); } assert(tzinfoarg != NULL); @@ -1758,7 +1758,7 @@ make_somezreplacement(PyObject *object, char *sep, PyObject *tzinfoarg) tzinfoarg) < 0) return NULL; - return PyBytes_FromStringAndSize(buf, strlen(buf)); + return PyUnicode_FromString(buf); } static PyObject * @@ -1815,7 +1815,7 @@ make_freplacement(PyObject *object) else sprintf(freplacement, "%06d", 0); - return PyBytes_FromStringAndSize(freplacement, strlen(freplacement)); + return PyUnicode_FromString(freplacement); } /* I sure don't want to reproduce the strftime code from the time module, @@ -1836,94 +1836,60 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple, PyObject *Zreplacement = NULL; /* py string, replacement for %Z */ PyObject *freplacement = NULL; /* py string, replacement for %f */ - const char *pin; /* pointer to next char in input format */ - Py_ssize_t flen; /* length of input format */ - char ch; /* next char in input format */ - - PyObject *newfmt = NULL; /* py string, the output format */ - char *pnew; /* pointer to available byte in output format */ - size_t totalnew; /* number bytes total in output format buffer, - exclusive of trailing \0 */ - size_t usednew; /* number bytes used so far in output format buffer */ - - const char *ptoappend; /* ptr to string to append to output buffer */ - Py_ssize_t ntoappend; /* # of bytes to append to output buffer */ - -#ifdef Py_NORMALIZE_CENTURY - /* Buffer of maximum size of formatted year permitted by long. */ - char buf[SIZEOF_LONG * 5 / 2 + 2 -#ifdef Py_STRFTIME_C99_SUPPORT - /* Need 6 more to accommodate dashes, 2-digit month and day for %F. */ - + 6 -#endif - ]; -#endif - assert(object && format && timetuple); assert(PyUnicode_Check(format)); - /* Convert the input format to a C string and size */ - pin = PyUnicode_AsUTF8AndSize(format, &flen); - if (!pin) - return NULL; PyObject *strftime = _PyImport_GetModuleAttrString("time", "strftime"); if (strftime == NULL) { - goto Done; + return NULL; } /* Scan the input format, looking for %z/%Z/%f escapes, building * a new format. Since computing the replacements for those codes * is expensive, don't unless they're actually used. */ - if (flen > INT_MAX - 1) { - PyErr_NoMemory(); - goto Done; - } - - totalnew = flen + 1; /* realistic if no %z/%Z */ - newfmt = PyBytes_FromStringAndSize(NULL, totalnew); - if (newfmt == NULL) goto Done; - pnew = PyBytes_AsString(newfmt); - usednew = 0; - while ((ch = *pin++) != '\0') { - if (ch != '%') { - ptoappend = pin - 1; - ntoappend = 1; + _PyUnicodeWriter writer; + _PyUnicodeWriter_Init(&writer); + writer.overallocate = 1; + + Py_ssize_t flen = PyUnicode_GET_LENGTH(format); + Py_ssize_t i = 0; + Py_ssize_t start = 0; + Py_ssize_t end = 0; + while (i != flen) { + i = PyUnicode_FindChar(format, '%', i, flen, 1); + if (i < 0) { + assert(!PyErr_Occurred()); + break; } - else if ((ch = *pin++) == '\0') { - /* Null byte follows %, copy only '%'. - * - * Back the pin up one char so that we catch the null check - * the next time through the loop.*/ - pin--; - ptoappend = pin - 1; - ntoappend = 1; + end = i; + i++; + if (i == flen) { + break; } + Py_UCS4 ch = PyUnicode_READ_CHAR(format, i); + i++; /* A % has been seen and ch is the character after it. */ - else if (ch == 'z') { + PyObject *replacement = NULL; + if (ch == 'z') { /* %z -> +HHMM */ if (zreplacement == NULL) { zreplacement = make_somezreplacement(object, "", tzinfoarg); if (zreplacement == NULL) - goto Done; + goto Error; } - assert(zreplacement != NULL); - assert(PyBytes_Check(zreplacement)); - ptoappend = PyBytes_AS_STRING(zreplacement); - ntoappend = PyBytes_GET_SIZE(zreplacement); + replacement = zreplacement; } - else if (ch == ':' && *pin == 'z' && pin++) { + else if (ch == ':' && i < flen && PyUnicode_READ_CHAR(format, i) == 'z') { /* %:z -> +HH:MM */ + i++; if (colonzreplacement == NULL) { colonzreplacement = make_somezreplacement(object, ":", tzinfoarg); if (colonzreplacement == NULL) - goto Done; + goto Error; } - assert(colonzreplacement != NULL); - assert(PyBytes_Check(colonzreplacement)); - ptoappend = PyBytes_AS_STRING(colonzreplacement); - ntoappend = PyBytes_GET_SIZE(colonzreplacement); + replacement = colonzreplacement; } else if (ch == 'Z') { /* format tzname */ @@ -1931,26 +1897,18 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple, Zreplacement = make_Zreplacement(object, tzinfoarg); if (Zreplacement == NULL) - goto Done; + goto Error; } - assert(Zreplacement != NULL); - assert(PyUnicode_Check(Zreplacement)); - ptoappend = PyUnicode_AsUTF8AndSize(Zreplacement, - &ntoappend); - if (ptoappend == NULL) - goto Done; + replacement = Zreplacement; } else if (ch == 'f') { /* format microseconds */ if (freplacement == NULL) { freplacement = make_freplacement(object); if (freplacement == NULL) - goto Done; + goto Error; } - assert(freplacement != NULL); - assert(PyBytes_Check(freplacement)); - ptoappend = PyBytes_AS_STRING(freplacement); - ntoappend = PyBytes_GET_SIZE(freplacement); + replacement = freplacement; } #ifdef Py_NORMALIZE_CENTURY else if (ch == 'Y' || ch == 'G' @@ -1961,100 +1919,102 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple, /* 0-pad year with century as necessary */ PyObject *item = PySequence_GetItem(timetuple, 0); if (item == NULL) { - goto Done; + goto Error; } long year_long = PyLong_AsLong(item); Py_DECREF(item); if (year_long == -1 && PyErr_Occurred()) { - goto Done; + goto Error; } /* Note that datetime(1000, 1, 1).strftime('%G') == '1000' so year 1000 for %G can go on the fast path. */ if (year_long >= 1000) { - goto PassThrough; + continue; } if (ch == 'G') { PyObject *year_str = PyObject_CallFunction(strftime, "sO", "%G", timetuple); if (year_str == NULL) { - goto Done; + goto Error; } PyObject *year = PyNumber_Long(year_str); Py_DECREF(year_str); if (year == NULL) { - goto Done; + goto Error; } year_long = PyLong_AsLong(year); Py_DECREF(year); if (year_long == -1 && PyErr_Occurred()) { - goto Done; + goto Error; } } - ntoappend = PyOS_snprintf(buf, sizeof(buf), + /* Buffer of maximum size of formatted year permitted by long. + * +6 to accommodate dashes, 2-digit month and day for %F. */ + char buf[SIZEOF_LONG * 5 / 2 + 2 + 6]; + Py_ssize_t n = PyOS_snprintf(buf, sizeof(buf), #ifdef Py_STRFTIME_C99_SUPPORT ch == 'F' ? "%04ld-%%m-%%d" : #endif "%04ld", year_long); #ifdef Py_STRFTIME_C99_SUPPORT if (ch == 'C') { - ntoappend -= 2; + n -= 2; } #endif - ptoappend = buf; + if (_PyUnicodeWriter_WriteSubstring(&writer, format, start, end) < 0) { + goto Error; + } + start = i; + if (_PyUnicodeWriter_WriteASCIIString(&writer, buf, n) < 0) { + goto Error; + } + continue; } #endif else { /* percent followed by something else */ -#ifdef Py_NORMALIZE_CENTURY - PassThrough: -#endif - ptoappend = pin - 2; - ntoappend = 2; - } - - /* Append the ntoappend chars starting at ptoappend to - * the new format. - */ - if (ntoappend == 0) continue; - assert(ptoappend != NULL); - assert(ntoappend > 0); - while (usednew + ntoappend > totalnew) { - if (totalnew > (PY_SSIZE_T_MAX >> 1)) { /* overflow */ - PyErr_NoMemory(); - goto Done; - } - totalnew <<= 1; - if (_PyBytes_Resize(&newfmt, totalnew) < 0) - goto Done; - pnew = PyBytes_AsString(newfmt) + usednew; } - memcpy(pnew, ptoappend, ntoappend); - pnew += ntoappend; - usednew += ntoappend; - assert(usednew <= totalnew); + assert(replacement != NULL); + assert(PyUnicode_Check(replacement)); + if (_PyUnicodeWriter_WriteSubstring(&writer, format, start, end) < 0) { + goto Error; + } + start = i; + if (_PyUnicodeWriter_WriteStr(&writer, replacement) < 0) { + goto Error; + } } /* end while() */ - if (_PyBytes_Resize(&newfmt, usednew) < 0) - goto Done; - { - PyObject *format; - - format = PyUnicode_FromString(PyBytes_AS_STRING(newfmt)); - if (format != NULL) { - result = PyObject_CallFunctionObjArgs(strftime, - format, timetuple, NULL); - Py_DECREF(format); + PyObject *newformat; + if (start == 0) { + _PyUnicodeWriter_Dealloc(&writer); + newformat = Py_NewRef(format); + } + else { + if (_PyUnicodeWriter_WriteSubstring(&writer, format, start, flen) < 0) { + goto Error; + } + newformat = _PyUnicodeWriter_Finish(&writer); + if (newformat == NULL) { + goto Done; } } + result = PyObject_CallFunctionObjArgs(strftime, + newformat, timetuple, NULL); + Py_DECREF(newformat); + Done: Py_XDECREF(freplacement); Py_XDECREF(zreplacement); Py_XDECREF(colonzreplacement); Py_XDECREF(Zreplacement); - Py_XDECREF(newfmt); Py_XDECREF(strftime); return result; + + Error: + _PyUnicodeWriter_Dealloc(&writer); + goto Done; } /* --------------------------------------------------------------------------- diff --git a/Modules/timemodule.c b/Modules/timemodule.c index 9720c201a184a8..b9d114ada0dfcd 100644 --- a/Modules/timemodule.c +++ b/Modules/timemodule.c @@ -776,27 +776,100 @@ the C library strftime function.\n" #endif static PyObject * -time_strftime(PyObject *module, PyObject *args) +time_strftime1(time_char **outbuf, size_t *bufsize, + time_char *format, size_t fmtlen, + struct tm *tm) { - PyObject *tup = NULL; - struct tm buf; - const time_char *fmt; + size_t buflen; +#if defined(MS_WINDOWS) && !defined(HAVE_WCSFTIME) + /* check that the format string contains only valid directives */ + for (const time_char *f = strchr(format, '%'); + f != NULL; + f = strchr(f + 2, '%')) + { + if (f[1] == '#') + ++f; /* not documented by python, */ + if (f[1] == '\0') + break; + if ((f[1] == 'y') && tm->tm_year < 0) { + PyErr_SetString(PyExc_ValueError, + "format %y requires year >= 1900 on Windows"); + return NULL; + } + } +#elif (defined(_AIX) || (defined(__sun) && defined(__SVR4))) && defined(HAVE_WCSFTIME) + for (const time_char *f = wcschr(format, '%'); + f != NULL; + f = wcschr(f + 2, '%')) + { + if (f[1] == L'\0') + break; + /* Issue #19634: On AIX, wcsftime("y", (1899, 1, 1, 0, 0, 0, 0, 0, 0)) + returns "0/" instead of "99" */ + if (f[1] == L'y' && tm->tm_year < 0) { + PyErr_SetString(PyExc_ValueError, + "format %y requires year >= 1900 on AIX"); + return NULL; + } + } +#endif + + /* I hate these functions that presume you know how big the output + * will be ahead of time... + */ + while (1) { + if (*bufsize > PY_SSIZE_T_MAX/sizeof(time_char)) { + PyErr_NoMemory(); + return NULL; + } + *outbuf = (time_char *)PyMem_Realloc(*outbuf, + *bufsize*sizeof(time_char)); + if (*outbuf == NULL) { + PyErr_NoMemory(); + return NULL; + } +#if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__) + errno = 0; +#endif + _Py_BEGIN_SUPPRESS_IPH + buflen = format_time(*outbuf, *bufsize, format, tm); + _Py_END_SUPPRESS_IPH +#if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__) + /* VisualStudio .NET 2005 does this properly */ + if (buflen == 0 && errno == EINVAL) { + PyErr_SetString(PyExc_ValueError, "Invalid format string"); + return NULL; + } +#endif + if (buflen == 0 && *bufsize < 256 * fmtlen) { + *bufsize += *bufsize; + continue; + } + /* If the buffer is 256 times as long as the format, + it's probably not failing for lack of room! + More likely, the format yields an empty result, + e.g. an empty format, or %Z when the timezone + is unknown. */ #ifdef HAVE_WCSFTIME - wchar_t *format; + return PyUnicode_FromWideChar(*outbuf, buflen); #else - PyObject *format; + return PyUnicode_DecodeLocaleAndSize(*outbuf, buflen, "surrogateescape"); #endif + } +} + +static PyObject * +time_strftime(PyObject *module, PyObject *args) +{ + PyObject *tup = NULL; + struct tm buf; PyObject *format_arg; - size_t fmtlen, buflen; - time_char *outbuf = NULL; - size_t i; - PyObject *ret = NULL; + Py_ssize_t format_size; + time_char *format, *outbuf = NULL; + size_t fmtlen, bufsize = 1024; memset((void *) &buf, '\0', sizeof(buf)); - /* Will always expect a unicode string to be passed as format. - Given that there's no str type anymore in py3k this seems safe. - */ if (!PyArg_ParseTuple(args, "U|O:strftime", &format_arg, &tup)) return NULL; @@ -834,101 +907,63 @@ time_strftime(PyObject *module, PyObject *args) else if (buf.tm_isdst > 1) buf.tm_isdst = 1; -#ifdef HAVE_WCSFTIME - format = PyUnicode_AsWideCharString(format_arg, NULL); - if (format == NULL) + format_size = PyUnicode_GET_LENGTH(format_arg); + if ((size_t)format_size > PY_SSIZE_T_MAX/sizeof(time_char) - 1) { + PyErr_NoMemory(); return NULL; - fmt = format; -#else - /* Convert the unicode string to an ascii one */ - format = PyUnicode_EncodeLocale(format_arg, "surrogateescape"); - if (format == NULL) + } + format = PyMem_Malloc((format_size + 1)*sizeof(time_char)); + if (format == NULL) { + PyErr_NoMemory(); return NULL; - fmt = PyBytes_AS_STRING(format); -#endif - -#if defined(MS_WINDOWS) && !defined(HAVE_WCSFTIME) - /* check that the format string contains only valid directives */ - for (outbuf = strchr(fmt, '%'); - outbuf != NULL; - outbuf = strchr(outbuf+2, '%')) - { - if (outbuf[1] == '#') - ++outbuf; /* not documented by python, */ - if (outbuf[1] == '\0') - break; - if ((outbuf[1] == 'y') && buf.tm_year < 0) { - PyErr_SetString(PyExc_ValueError, - "format %y requires year >= 1900 on Windows"); - Py_DECREF(format); - return NULL; - } } -#elif (defined(_AIX) || (defined(__sun) && defined(__SVR4))) && defined(HAVE_WCSFTIME) - for (outbuf = wcschr(fmt, '%'); - outbuf != NULL; - outbuf = wcschr(outbuf+2, '%')) - { - if (outbuf[1] == L'\0') - break; - /* Issue #19634: On AIX, wcsftime("y", (1899, 1, 1, 0, 0, 0, 0, 0, 0)) - returns "0/" instead of "99" */ - if (outbuf[1] == L'y' && buf.tm_year < 0) { - PyErr_SetString(PyExc_ValueError, - "format %y requires year >= 1900 on AIX"); - PyMem_Free(format); - return NULL; + _PyUnicodeWriter writer; + _PyUnicodeWriter_Init(&writer); + writer.overallocate = 1; + Py_ssize_t i = 0; + while (i < format_size) { + fmtlen = 0; + for (; i < format_size; i++) { + Py_UCS4 c = PyUnicode_READ_CHAR(format_arg, i); + if (!c || c > 127) { + break; + } + format[fmtlen++] = (char)c; } - } -#endif - - fmtlen = time_strlen(fmt); - - /* I hate these functions that presume you know how big the output - * will be ahead of time... - */ - for (i = 1024; ; i += i) { - outbuf = (time_char *)PyMem_Malloc(i*sizeof(time_char)); - if (outbuf == NULL) { - PyErr_NoMemory(); - break; + if (fmtlen) { + format[fmtlen] = 0; + PyObject *unicode = time_strftime1(&outbuf, &bufsize, + format, fmtlen, &buf); + if (unicode == NULL) { + goto error; + } + if (_PyUnicodeWriter_WriteStr(&writer, unicode) < 0) { + Py_DECREF(unicode); + goto error; + } + Py_DECREF(unicode); } -#if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__) - errno = 0; -#endif - _Py_BEGIN_SUPPRESS_IPH - buflen = format_time(outbuf, i, fmt, &buf); - _Py_END_SUPPRESS_IPH -#if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__) - /* VisualStudio .NET 2005 does this properly */ - if (buflen == 0 && errno == EINVAL) { - PyErr_SetString(PyExc_ValueError, "Invalid format string"); - PyMem_Free(outbuf); - break; + + Py_ssize_t start = i; + for (; i < format_size; i++) { + Py_UCS4 c = PyUnicode_READ_CHAR(format_arg, i); + if (c == '%') { + break; + } } -#endif - if (buflen > 0 || i >= 256 * fmtlen) { - /* If the buffer is 256 times as long as the format, - it's probably not failing for lack of room! - More likely, the format yields an empty result, - e.g. an empty format, or %Z when the timezone - is unknown. */ -#ifdef HAVE_WCSFTIME - ret = PyUnicode_FromWideChar(outbuf, buflen); -#else - ret = PyUnicode_DecodeLocaleAndSize(outbuf, buflen, "surrogateescape"); -#endif - PyMem_Free(outbuf); - break; + if (_PyUnicodeWriter_WriteSubstring(&writer, format_arg, start, i) < 0) { + goto error; } - PyMem_Free(outbuf); } -#ifdef HAVE_WCSFTIME + + PyMem_Free(outbuf); PyMem_Free(format); -#else - Py_DECREF(format); -#endif - return ret; + return _PyUnicodeWriter_Finish(&writer); +error: + PyMem_Free(outbuf); + PyMem_Free(format); + _PyUnicodeWriter_Dealloc(&writer); + return NULL; } #undef time_char From 04d6dd23e2d8a3132772cf7ce928676e26313585 Mon Sep 17 00:00:00 2001 From: George Pittock <66332098+georgepittock@users.noreply.github.com> Date: Thu, 17 Oct 2024 17:34:37 +0100 Subject: [PATCH 021/106] gh-113570: reprlib.repr does not use builtin __repr__ for reshadowed builtins (GH-113577) --- Lib/reprlib.py | 31 ++++++++++--- Lib/test/test_reprlib.py | 44 +++++++++++++++++++ ...-12-30-00-21-45.gh-issue-113570._XQgsW.rst | 1 + 3 files changed, 71 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-12-30-00-21-45.gh-issue-113570._XQgsW.rst diff --git a/Lib/reprlib.py b/Lib/reprlib.py index 05bb1a0eb01795..19dbe3a07eb618 100644 --- a/Lib/reprlib.py +++ b/Lib/reprlib.py @@ -36,6 +36,17 @@ def wrapper(self): return decorating_function class Repr: + _lookup = { + 'tuple': 'builtins', + 'list': 'builtins', + 'array': 'array', + 'set': 'builtins', + 'frozenset': 'builtins', + 'deque': 'collections', + 'dict': 'builtins', + 'str': 'builtins', + 'int': 'builtins' + } def __init__( self, *, maxlevel=6, maxtuple=6, maxlist=6, maxarray=5, maxdict=4, @@ -60,14 +71,24 @@ def repr(self, x): return self.repr1(x, self.maxlevel) def repr1(self, x, level): - typename = type(x).__name__ + cls = type(x) + typename = cls.__name__ + if ' ' in typename: parts = typename.split() typename = '_'.join(parts) - if hasattr(self, 'repr_' + typename): - return getattr(self, 'repr_' + typename)(x, level) - else: - return self.repr_instance(x, level) + + method = getattr(self, 'repr_' + typename, None) + if method: + # not defined in this class + if typename not in self._lookup: + return method(x, level) + module = getattr(cls, '__module__', None) + # defined in this class and is the module intended + if module == self._lookup[typename]: + return method(x, level) + + return self.repr_instance(x, level) def _join(self, pieces, level): if self.indent is None: diff --git a/Lib/test/test_reprlib.py b/Lib/test/test_reprlib.py index 3e93b561c143d8..ffeb1fba7b80c6 100644 --- a/Lib/test/test_reprlib.py +++ b/Lib/test/test_reprlib.py @@ -580,6 +580,50 @@ def test_invalid_indent(self): with self.assertRaisesRegex(expected_error, expected_msg): r.repr(test_object) + def test_shadowed_stdlib_array(self): + # Issue #113570: repr() should not be fooled by an array + class array: + def __repr__(self): + return "not array.array" + + self.assertEqual(r(array()), "not array.array") + + def test_shadowed_builtin(self): + # Issue #113570: repr() should not be fooled + # by a shadowed builtin function + class list: + def __repr__(self): + return "not builtins.list" + + self.assertEqual(r(list()), "not builtins.list") + + def test_custom_repr(self): + class MyRepr(Repr): + + def repr_TextIOWrapper(self, obj, level): + if obj.name in {'', '', ''}: + return obj.name + return repr(obj) + + aRepr = MyRepr() + self.assertEqual(aRepr.repr(sys.stdin), "") + + def test_custom_repr_class_with_spaces(self): + class TypeWithSpaces: + pass + + t = TypeWithSpaces() + type(t).__name__ = "type with spaces" + self.assertEqual(type(t).__name__, "type with spaces") + + class MyRepr(Repr): + def repr_type_with_spaces(self, obj, level): + return "Type With Spaces" + + + aRepr = MyRepr() + self.assertEqual(aRepr.repr(t), "Type With Spaces") + def write_file(path, text): with open(path, 'w', encoding='ASCII') as fp: fp.write(text) diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-12-30-00-21-45.gh-issue-113570._XQgsW.rst b/Misc/NEWS.d/next/Core and Builtins/2023-12-30-00-21-45.gh-issue-113570._XQgsW.rst new file mode 100644 index 00000000000000..6e0f0afe05369b --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-12-30-00-21-45.gh-issue-113570._XQgsW.rst @@ -0,0 +1 @@ +Fixed a bug in ``reprlib.repr`` where it incorrectly called the repr method on shadowed Python built-in types. From f203d1cb52f7697140337a73841c8412282e2ee0 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Thu, 17 Oct 2024 09:45:25 -0700 Subject: [PATCH 022/106] gh-125017: Fix crash on premature access to classmethod/staticmethod annotations (#125636) --- Lib/test/test_descr.py | 14 +++++++ ...-10-16-23-06-06.gh-issue-125017.fcltj0.rst | 2 + Objects/funcobject.c | 41 ++++++++++++------- 3 files changed, 43 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-16-23-06-06.gh-issue-125017.fcltj0.rst diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index 9d15ab3a96bad6..b7e0f4d6d64018 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -1618,6 +1618,9 @@ def annotated(cls) -> int: pass for method in (annotated, unannotated): with self.subTest(deco=deco, method=method): + with self.assertRaises(AttributeError): + del unannotated.__annotations__ + original_annotations = dict(method.__wrapped__.__annotations__) self.assertNotIn('__annotations__', method.__dict__) self.assertEqual(method.__annotations__, original_annotations) @@ -1644,6 +1647,17 @@ def annotated(cls) -> int: pass del method.__annotate__ self.assertIs(method.__annotate__, original_annotate) + def test_staticmethod_annotations_without_dict_access(self): + # gh-125017: this used to crash + class Spam: + def __new__(cls, x, y): + pass + + self.assertEqual(Spam.__new__.__annotations__, {}) + obj = Spam.__dict__['__new__'] + self.assertIsInstance(obj, staticmethod) + self.assertEqual(obj.__annotations__, {}) + @support.refcount_test def test_refleaks_in_classmethod___init__(self): gettotalrefcount = support.get_attribute(sys, 'gettotalrefcount') diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-16-23-06-06.gh-issue-125017.fcltj0.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-16-23-06-06.gh-issue-125017.fcltj0.rst new file mode 100644 index 00000000000000..11c526643c3122 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-16-23-06-06.gh-issue-125017.fcltj0.rst @@ -0,0 +1,2 @@ +Fix crash on certain accesses to the ``__annotations__`` of +:class:`staticmethod` and :class:`classmethod` objects. diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 6119a96b4aae76..f86ef32f1827bf 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -1220,30 +1220,43 @@ functools_wraps(PyObject *wrapper, PyObject *wrapped) // Used for wrapping __annotations__ and __annotate__ on classmethod // and staticmethod objects. static PyObject * -descriptor_get_wrapped_attribute(PyObject *wrapped, PyObject *dict, PyObject *name) +descriptor_get_wrapped_attribute(PyObject *wrapped, PyObject *obj, PyObject *name) { + PyObject *dict = PyObject_GenericGetDict(obj, NULL); + if (dict == NULL) { + return NULL; + } PyObject *res; if (PyDict_GetItemRef(dict, name, &res) < 0) { + Py_DECREF(dict); return NULL; } if (res != NULL) { + Py_DECREF(dict); return res; } res = PyObject_GetAttr(wrapped, name); if (res == NULL) { + Py_DECREF(dict); return NULL; } if (PyDict_SetItem(dict, name, res) < 0) { + Py_DECREF(dict); Py_DECREF(res); return NULL; } + Py_DECREF(dict); return res; } static int -descriptor_set_wrapped_attribute(PyObject *dict, PyObject *name, PyObject *value, +descriptor_set_wrapped_attribute(PyObject *oobj, PyObject *name, PyObject *value, char *type_name) { + PyObject *dict = PyObject_GenericGetDict(oobj, NULL); + if (dict == NULL) { + return -1; + } if (value == NULL) { if (PyDict_DelItem(dict, name) < 0) { if (PyErr_ExceptionMatches(PyExc_KeyError)) { @@ -1251,14 +1264,18 @@ descriptor_set_wrapped_attribute(PyObject *dict, PyObject *name, PyObject *value PyErr_Format(PyExc_AttributeError, "'%.200s' object has no attribute '%U'", type_name, name); + return -1; } else { + Py_DECREF(dict); return -1; } } + Py_DECREF(dict); return 0; } else { + Py_DECREF(dict); return PyDict_SetItem(dict, name, value); } } @@ -1380,28 +1397,26 @@ static PyObject * cm_get___annotations__(PyObject *self, void *closure) { classmethod *cm = _PyClassMethod_CAST(self); - return descriptor_get_wrapped_attribute(cm->cm_callable, cm->cm_dict, &_Py_ID(__annotations__)); + return descriptor_get_wrapped_attribute(cm->cm_callable, self, &_Py_ID(__annotations__)); } static int cm_set___annotations__(PyObject *self, PyObject *value, void *closure) { - classmethod *cm = _PyClassMethod_CAST(self); - return descriptor_set_wrapped_attribute(cm->cm_dict, &_Py_ID(__annotations__), value, "classmethod"); + return descriptor_set_wrapped_attribute(self, &_Py_ID(__annotations__), value, "classmethod"); } static PyObject * cm_get___annotate__(PyObject *self, void *closure) { classmethod *cm = _PyClassMethod_CAST(self); - return descriptor_get_wrapped_attribute(cm->cm_callable, cm->cm_dict, &_Py_ID(__annotate__)); + return descriptor_get_wrapped_attribute(cm->cm_callable, self, &_Py_ID(__annotate__)); } static int cm_set___annotate__(PyObject *self, PyObject *value, void *closure) { - classmethod *cm = _PyClassMethod_CAST(self); - return descriptor_set_wrapped_attribute(cm->cm_dict, &_Py_ID(__annotate__), value, "classmethod"); + return descriptor_set_wrapped_attribute(self, &_Py_ID(__annotate__), value, "classmethod"); } @@ -1615,28 +1630,26 @@ static PyObject * sm_get___annotations__(PyObject *self, void *closure) { staticmethod *sm = _PyStaticMethod_CAST(self); - return descriptor_get_wrapped_attribute(sm->sm_callable, sm->sm_dict, &_Py_ID(__annotations__)); + return descriptor_get_wrapped_attribute(sm->sm_callable, self, &_Py_ID(__annotations__)); } static int sm_set___annotations__(PyObject *self, PyObject *value, void *closure) { - staticmethod *sm = _PyStaticMethod_CAST(self); - return descriptor_set_wrapped_attribute(sm->sm_dict, &_Py_ID(__annotations__), value, "staticmethod"); + return descriptor_set_wrapped_attribute(self, &_Py_ID(__annotations__), value, "staticmethod"); } static PyObject * sm_get___annotate__(PyObject *self, void *closure) { staticmethod *sm = _PyStaticMethod_CAST(self); - return descriptor_get_wrapped_attribute(sm->sm_callable, sm->sm_dict, &_Py_ID(__annotate__)); + return descriptor_get_wrapped_attribute(sm->sm_callable, self, &_Py_ID(__annotate__)); } static int sm_set___annotate__(PyObject *self, PyObject *value, void *closure) { - staticmethod *sm = _PyStaticMethod_CAST(self); - return descriptor_set_wrapped_attribute(sm->sm_dict, &_Py_ID(__annotate__), value, "staticmethod"); + return descriptor_set_wrapped_attribute(self, &_Py_ID(__annotate__), value, "staticmethod"); } static PyGetSetDef sm_getsetlist[] = { From b454662921fd3a1fc27169e91aca03aadea08817 Mon Sep 17 00:00:00 2001 From: chrysn Date: Thu, 17 Oct 2024 19:49:12 +0200 Subject: [PATCH 023/106] gh-118986: expose `socket.IPV6_RECVERR` (#118987) --- Doc/library/socket.rst | 4 ++-- .../Library/2024-05-13-10-09-41.gh-issue-118986.-r4W9h.rst | 1 + Modules/socketmodule.c | 3 +++ 3 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-13-10-09-41.gh-issue-118986.-r4W9h.rst diff --git a/Doc/library/socket.rst b/Doc/library/socket.rst index 935d4a85342876..0c7b9328648f66 100644 --- a/Doc/library/socket.rst +++ b/Doc/library/socket.rst @@ -451,8 +451,8 @@ Constants network interface instead of its name. .. versionchanged:: 3.14 - Added missing ``IP_RECVERR``, ``IP_RECVTTL``, and ``IP_RECVORIGDSTADDR`` - on Linux. + Added missing ``IP_RECVERR``, ``IPV6_RECVERR``, ``IP_RECVTTL``, and + ``IP_RECVORIGDSTADDR`` on Linux. .. versionchanged:: 3.14 Added support for ``TCP_QUICKACK`` on Windows platforms when available. diff --git a/Misc/NEWS.d/next/Library/2024-05-13-10-09-41.gh-issue-118986.-r4W9h.rst b/Misc/NEWS.d/next/Library/2024-05-13-10-09-41.gh-issue-118986.-r4W9h.rst new file mode 100644 index 00000000000000..196da60a950bfb --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-13-10-09-41.gh-issue-118986.-r4W9h.rst @@ -0,0 +1 @@ +Add :data:`!socket.IPV6_RECVERR` constant (available since Linux 2.2). diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c index 744e5e0c0b2b54..2764bd6e2b2a47 100644 --- a/Modules/socketmodule.c +++ b/Modules/socketmodule.c @@ -8578,6 +8578,9 @@ socket_exec(PyObject *m) #ifdef IPV6_RECVDSTOPTS ADD_INT_MACRO(m, IPV6_RECVDSTOPTS); #endif +#ifdef IPV6_RECVERR + ADD_INT_MACRO(m, IPV6_RECVERR); +#endif #ifdef IPV6_RECVHOPLIMIT ADD_INT_MACRO(m, IPV6_RECVHOPLIMIT); #endif From d8c864816121547338efa43c56e3f75ead98a924 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 17 Oct 2024 14:10:55 -0400 Subject: [PATCH 024/106] gh-125541: Make Ctrl-C interrupt `threading.Lock.acquire()` on Windows (#125546) --- Doc/library/_thread.rst | 9 +++------ Doc/library/threading.rst | 3 +++ ...24-10-15-16-50-03.gh-issue-125541.FfhmWo.rst | 4 ++++ Python/parking_lot.c | 17 +++++++++++++++-- 4 files changed, 25 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-15-16-50-03.gh-issue-125541.FfhmWo.rst diff --git a/Doc/library/_thread.rst b/Doc/library/_thread.rst index 6a66fc4c64bc45..ed29ac70035597 100644 --- a/Doc/library/_thread.rst +++ b/Doc/library/_thread.rst @@ -187,6 +187,9 @@ Lock objects have the following methods: .. versionchanged:: 3.2 Lock acquires can now be interrupted by signals on POSIX. + .. versionchanged:: 3.14 + Lock acquires can now be interrupted by signals on Windows. + .. method:: lock.release() @@ -219,12 +222,6 @@ In addition to these methods, lock objects can also be used via the * Calling :func:`sys.exit` or raising the :exc:`SystemExit` exception is equivalent to calling :func:`_thread.exit`. -* It is platform-dependent whether the :meth:`~threading.Lock.acquire` method - on a lock can be interrupted (so that the :exc:`KeyboardInterrupt` exception - will happen immediately, rather than only after the lock has been acquired or - the operation has timed out). It can be interrupted on POSIX, but not on - Windows. - * When the main thread exits, it is system defined whether the other threads survive. On most systems, they are killed without executing :keyword:`try` ... :keyword:`finally` clauses or executing object diff --git a/Doc/library/threading.rst b/Doc/library/threading.rst index cb82fea377697b..d4b343db36efb3 100644 --- a/Doc/library/threading.rst +++ b/Doc/library/threading.rst @@ -567,6 +567,9 @@ All methods are executed atomically. Lock acquisition can now be interrupted by signals on POSIX if the underlying threading implementation supports it. + .. versionchanged:: 3.14 + Lock acquisition can now be interrupted by signals on Windows. + .. method:: release() diff --git a/Misc/NEWS.d/next/Library/2024-10-15-16-50-03.gh-issue-125541.FfhmWo.rst b/Misc/NEWS.d/next/Library/2024-10-15-16-50-03.gh-issue-125541.FfhmWo.rst new file mode 100644 index 00000000000000..7a20bca1739869 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-15-16-50-03.gh-issue-125541.FfhmWo.rst @@ -0,0 +1,4 @@ +Pressing :kbd:`Ctrl-C` while blocked in :meth:`threading.Lock.acquire`, +:meth:`threading.RLock.acquire`, and :meth:`threading.Thread.join` now +interrupts the function call and raises a :exc:`KeyboardInterrupt` exception +on Windows, similar to how those functions behave on macOS and Linux. diff --git a/Python/parking_lot.c b/Python/parking_lot.c index a7e9760e35d87a..bffc959e5d0978 100644 --- a/Python/parking_lot.c +++ b/Python/parking_lot.c @@ -111,15 +111,28 @@ _PySemaphore_PlatformWait(_PySemaphore *sema, PyTime_t timeout) millis = (DWORD) div; } } - wait = WaitForSingleObjectEx(sema->platform_sem, millis, FALSE); + + // NOTE: we wait on the sigint event even in non-main threads to match the + // behavior of the other platforms. Non-main threads will ignore the + // Py_PARK_INTR result. + HANDLE sigint_event = _PyOS_SigintEvent(); + HANDLE handles[2] = { sema->platform_sem, sigint_event }; + DWORD count = sigint_event != NULL ? 2 : 1; + wait = WaitForMultipleObjects(count, handles, FALSE, millis); if (wait == WAIT_OBJECT_0) { res = Py_PARK_OK; } + else if (wait == WAIT_OBJECT_0 + 1) { + ResetEvent(sigint_event); + res = Py_PARK_INTR; + } else if (wait == WAIT_TIMEOUT) { res = Py_PARK_TIMEOUT; } else { - res = Py_PARK_INTR; + _Py_FatalErrorFormat(__func__, + "unexpected error from semaphore: %u (error: %u)", + wait, GetLastError()); } #elif defined(_Py_USE_SEMAPHORES) int err; From c3164ae3cf4e8f9ccc4df8ea5f5664c5927ea839 Mon Sep 17 00:00:00 2001 From: Zachary Ware Date: Thu, 17 Oct 2024 17:21:32 -0500 Subject: [PATCH 025/106] gh-125017: Fix refleak from GH-125636 (GH-125664) --- Objects/funcobject.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Objects/funcobject.c b/Objects/funcobject.c index f86ef32f1827bf..3cb247691386bf 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -1264,6 +1264,7 @@ descriptor_set_wrapped_attribute(PyObject *oobj, PyObject *name, PyObject *value PyErr_Format(PyExc_AttributeError, "'%.200s' object has no attribute '%U'", type_name, name); + Py_DECREF(dict); return -1; } else { From 7cf2dbc3cb3ef7be65a98bbfc87246d36d795c82 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 17 Oct 2024 17:49:15 -0600 Subject: [PATCH 026/106] gh-125667: Statically Initialize the Arg Converter Data Values in _interpqueuesmodule.c (gh-125668) --- Modules/_interpqueuesmodule.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Modules/_interpqueuesmodule.c b/Modules/_interpqueuesmodule.c index 5dec240f02c4db..55c43199ee4d79 100644 --- a/Modules/_interpqueuesmodule.c +++ b/Modules/_interpqueuesmodule.c @@ -1518,7 +1518,7 @@ static PyObject * queuesmod_destroy(PyObject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"qid", NULL}; - qidarg_converter_data qidarg; + qidarg_converter_data qidarg = {0}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&:destroy", kwlist, qidarg_converter, &qidarg)) { return NULL; @@ -1579,7 +1579,7 @@ static PyObject * queuesmod_put(PyObject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"qid", "obj", "fmt", "unboundop", NULL}; - qidarg_converter_data qidarg; + qidarg_converter_data qidarg = {0}; PyObject *obj; int fmt; int unboundop; @@ -1615,7 +1615,7 @@ static PyObject * queuesmod_get(PyObject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"qid", NULL}; - qidarg_converter_data qidarg; + qidarg_converter_data qidarg = {0}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&:get", kwlist, qidarg_converter, &qidarg)) { return NULL; @@ -1651,7 +1651,7 @@ static PyObject * queuesmod_bind(PyObject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"qid", NULL}; - qidarg_converter_data qidarg; + qidarg_converter_data qidarg = {0}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&:bind", kwlist, qidarg_converter, &qidarg)) { return NULL; @@ -1681,7 +1681,7 @@ queuesmod_release(PyObject *self, PyObject *args, PyObject *kwds) { // Note that only the current interpreter is affected. static char *kwlist[] = {"qid", NULL}; - qidarg_converter_data qidarg; + qidarg_converter_data qidarg = {0}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&:release", kwlist, qidarg_converter, &qidarg)) { @@ -1710,7 +1710,7 @@ static PyObject * queuesmod_get_maxsize(PyObject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"qid", NULL}; - qidarg_converter_data qidarg; + qidarg_converter_data qidarg = {0}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&:get_maxsize", kwlist, qidarg_converter, &qidarg)) { @@ -1735,7 +1735,7 @@ static PyObject * queuesmod_get_queue_defaults(PyObject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"qid", NULL}; - qidarg_converter_data qidarg; + qidarg_converter_data qidarg = {0}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&:get_queue_defaults", kwlist, qidarg_converter, &qidarg)) { @@ -1765,7 +1765,7 @@ static PyObject * queuesmod_is_full(PyObject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"qid", NULL}; - qidarg_converter_data qidarg; + qidarg_converter_data qidarg = {0}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&:is_full", kwlist, qidarg_converter, &qidarg)) { @@ -1793,7 +1793,7 @@ static PyObject * queuesmod_get_count(PyObject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"qid", NULL}; - qidarg_converter_data qidarg; + qidarg_converter_data qidarg = {0}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&:get_count", kwlist, qidarg_converter, &qidarg)) { From 77cebb1ce9baac9e01a45d34113c3bea74940d90 Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Thu, 17 Oct 2024 17:29:11 -0700 Subject: [PATCH 027/106] gh-125600: Only show stale code warning on source code display commands (#125601) --- Lib/pdb.py | 24 +++++++++++++++---- Lib/test/test_pdb.py | 19 +++++++++++++++ ...-10-16-15-55-50.gh-issue-125600.yMsJx0.rst | 1 + 3 files changed, 40 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-16-15-55-50.gh-issue-125600.yMsJx0.rst diff --git a/Lib/pdb.py b/Lib/pdb.py index 3e5e6088fdcc7e..cd7a7042fa6987 100644 --- a/Lib/pdb.py +++ b/Lib/pdb.py @@ -402,6 +402,8 @@ def setup(self, f, tb): self.curframe = self.stack[self.curindex][0] self.set_convenience_variable(self.curframe, '_frame', self.curframe) + self._save_initial_file_mtime(self.curframe) + if self._chained_exceptions: self.set_convenience_variable( self.curframe, @@ -494,9 +496,21 @@ def _cmdloop(self): except KeyboardInterrupt: self.message('--KeyboardInterrupt--') + def _save_initial_file_mtime(self, frame): + """save the mtime of the all the files in the frame stack in the file mtime table + if they haven't been saved yet.""" + while frame: + filename = frame.f_code.co_filename + if filename not in self._file_mtime_table: + try: + self._file_mtime_table[filename] = os.path.getmtime(filename) + except Exception: + pass + frame = frame.f_back + def _validate_file_mtime(self): - """Check if the source file of the current frame has been modified since - the last time we saw it. If so, give a warning.""" + """Check if the source file of the current frame has been modified. + If so, give a warning and reset the modify time to current.""" try: filename = self.curframe.f_code.co_filename mtime = os.path.getmtime(filename) @@ -506,7 +520,7 @@ def _validate_file_mtime(self): mtime != self._file_mtime_table[filename]): self.message(f"*** WARNING: file '{filename}' was edited, " "running stale code until the program is rerun") - self._file_mtime_table[filename] = mtime + self._file_mtime_table[filename] = mtime # Called before loop, handles display expressions # Set up convenience variable containers @@ -836,7 +850,6 @@ def onecmd(self, line): a breakpoint command list definition. """ if not self.commands_defining: - self._validate_file_mtime() if line.startswith('_pdbcmd'): command, arg, line = self.parseline(line) if hasattr(self, command): @@ -980,6 +993,7 @@ def completedefault(self, text, line, begidx, endidx): def _pdbcmd_print_frame_status(self, arg): self.print_stack_trace(0) + self._validate_file_mtime() self._show_display() def _pdbcmd_silence_frame_status(self, arg): @@ -1861,6 +1875,7 @@ def do_list(self, arg): self.message('[EOF]') except KeyboardInterrupt: pass + self._validate_file_mtime() do_l = do_list def do_longlist(self, arg): @@ -1879,6 +1894,7 @@ def do_longlist(self, arg): self.error(err) return self._print_lines(lines, lineno, breaklist, self.curframe) + self._validate_file_mtime() do_ll = do_longlist def do_source(self, arg): diff --git a/Lib/test/test_pdb.py b/Lib/test/test_pdb.py index 8136c591a33001..7e6f276d355a14 100644 --- a/Lib/test/test_pdb.py +++ b/Lib/test/test_pdb.py @@ -3711,6 +3711,25 @@ def test_file_modified_after_execution(self): self.assertIn("WARNING:", stdout) self.assertIn("was edited", stdout) + def test_file_modified_and_immediately_restarted(self): + script = """ + print("hello") + """ + + # the time.sleep is needed for low-resolution filesystems like HFS+ + commands = """ + filename = $_frame.f_code.co_filename + f = open(filename, "w") + f.write("print('goodbye')") + import time; time.sleep(1) + f.close() + restart + """ + + stdout, stderr = self.run_pdb_script(script, commands) + self.assertNotIn("WARNING:", stdout) + self.assertNotIn("was edited", stdout) + def test_file_modified_after_execution_with_multiple_instances(self): # the time.sleep is needed for low-resolution filesystems like HFS+ script = """ diff --git a/Misc/NEWS.d/next/Library/2024-10-16-15-55-50.gh-issue-125600.yMsJx0.rst b/Misc/NEWS.d/next/Library/2024-10-16-15-55-50.gh-issue-125600.yMsJx0.rst new file mode 100644 index 00000000000000..19bf4fbefb601b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-16-15-55-50.gh-issue-125600.yMsJx0.rst @@ -0,0 +1 @@ +Only show stale code warning in :mod:`pdb` when we display source code. From a0f5c8e6272a1fd5422892d773923b138e77ae5f Mon Sep 17 00:00:00 2001 From: Furkan Onder Date: Fri, 18 Oct 2024 05:08:34 +0300 Subject: [PATCH 028/106] gh-125620: Skip check_resource_tracker_death on NetBSD due to long wait for SIGKILL process termination (GH-125621) * Skip test_resource_tracker_sigkill on NetBSD --- Lib/test/_test_multiprocessing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index a059a6b8340448..065fc27b770438 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -5761,6 +5761,8 @@ def test_resource_tracker_sigterm(self): # Catchable signal (ignored by semaphore tracker) self.check_resource_tracker_death(signal.SIGTERM, False) + @unittest.skipIf(sys.platform.startswith("netbsd"), + "gh-125620: Skip on NetBSD due to long wait for SIGKILL process termination.") def test_resource_tracker_sigkill(self): # Uncatchable signal. self.check_resource_tracker_death(signal.SIGKILL, True) From d358425e6968858e52908794d15f37e62abc74ec Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Fri, 18 Oct 2024 14:26:29 +0200 Subject: [PATCH 029/106] gh-125682: Reject non-ASCII digits in the Python implementation of JSON decoder (GH-125687) --- Lib/json/scanner.py | 2 +- Lib/test/test_json/test_decode.py | 6 ++++++ .../Library/2024-10-18-09-51-29.gh-issue-125682.vsj4cU.rst | 2 ++ 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-18-09-51-29.gh-issue-125682.vsj4cU.rst diff --git a/Lib/json/scanner.py b/Lib/json/scanner.py index 7a61cfc2d24dce..090897515fe2f3 100644 --- a/Lib/json/scanner.py +++ b/Lib/json/scanner.py @@ -9,7 +9,7 @@ __all__ = ['make_scanner'] NUMBER_RE = re.compile( - r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', + r'(-?(?:0|[1-9][0-9]*))(\.[0-9]+)?([eE][-+]?[0-9]+)?', (re.VERBOSE | re.MULTILINE | re.DOTALL)) def py_make_scanner(context): diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index 79fb239b35d3f2..2250af964c022b 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -16,6 +16,12 @@ def test_float(self): self.assertIsInstance(rval, float) self.assertEqual(rval, 1.0) + def test_nonascii_digits_rejected(self): + # JSON specifies only ascii digits, see gh-125687 + for num in ["1\uff10", "0.\uff10", "0e\uff10"]: + with self.assertRaises(self.JSONDecodeError): + self.loads(num) + def test_bytes(self): self.assertEqual(self.loads(b"1"), 1) diff --git a/Misc/NEWS.d/next/Library/2024-10-18-09-51-29.gh-issue-125682.vsj4cU.rst b/Misc/NEWS.d/next/Library/2024-10-18-09-51-29.gh-issue-125682.vsj4cU.rst new file mode 100644 index 00000000000000..3eb2905ad8d810 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-18-09-51-29.gh-issue-125682.vsj4cU.rst @@ -0,0 +1,2 @@ +Reject non-ASCII digits in the Python implementation of :func:`json.loads` +conforming to the JSON specification. From df751363e386d1f77c5ba9515a5539902457d386 Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Fri, 18 Oct 2024 14:29:47 +0200 Subject: [PATCH 030/106] gh-125660: Reject invalid unicode escapes for Python implementation of JSON decoder (GH-125683) --- Lib/json/decoder.py | 9 +++++---- Lib/test/test_json/test_scanstring.py | 10 ++++++++++ .../2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst | 1 + 3 files changed, 16 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py index d69a45d6793069..ff4bfcdcc407b9 100644 --- a/Lib/json/decoder.py +++ b/Lib/json/decoder.py @@ -50,17 +50,18 @@ def __reduce__(self): } +HEXDIGITS = re.compile(r'[0-9A-Fa-f]{4}', FLAGS) STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) BACKSLASH = { '"': '"', '\\': '\\', '/': '/', 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', } -def _decode_uXXXX(s, pos): - esc = s[pos + 1:pos + 5] - if len(esc) == 4 and esc[1] not in 'xX': +def _decode_uXXXX(s, pos, _m=HEXDIGITS.match): + esc = _m(s, pos + 1) + if esc is not None: try: - return int(esc, 16) + return int(esc.group(), 16) except ValueError: pass msg = "Invalid \\uXXXX escape" diff --git a/Lib/test/test_json/test_scanstring.py b/Lib/test/test_json/test_scanstring.py index 2d3ee8a8bf0f92..cca556a3b95bab 100644 --- a/Lib/test/test_json/test_scanstring.py +++ b/Lib/test/test_json/test_scanstring.py @@ -116,6 +116,11 @@ def test_bad_escapes(self): '"\\u012z"', '"\\u0x12"', '"\\u0X12"', + '"\\u{0}"'.format("\uff10" * 4), + '"\\u 123"', + '"\\u-123"', + '"\\u+123"', + '"\\u1_23"', '"\\ud834\\"', '"\\ud834\\u"', '"\\ud834\\ud"', @@ -127,6 +132,11 @@ def test_bad_escapes(self): '"\\ud834\\udd2z"', '"\\ud834\\u0x20"', '"\\ud834\\u0X20"', + '"\\ud834\\u{0}"'.format("\uff10" * 4), + '"\\ud834\\u 123"', + '"\\ud834\\u-123"', + '"\\ud834\\u+123"', + '"\\ud834\\u1_23"', ] for s in bad_escapes: with self.assertRaises(self.JSONDecodeError, msg=s): diff --git a/Misc/NEWS.d/next/Library/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst b/Misc/NEWS.d/next/Library/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst new file mode 100644 index 00000000000000..74d76c7bddae7d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst @@ -0,0 +1 @@ +Reject invalid unicode escapes for Python implementation of :func:`json.loads`. From 10c4c95395771fb37e93811aaace42f026c16de5 Mon Sep 17 00:00:00 2001 From: "RUANG (Roy James)" Date: Fri, 18 Oct 2024 20:45:17 +0800 Subject: [PATCH 031/106] gh-123610: Added additional types to ctypes/wintypes.py (GH-124086) --- Lib/ctypes/wintypes.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Lib/ctypes/wintypes.py b/Lib/ctypes/wintypes.py index 9c4e721438aad5..4beba0d19513e2 100644 --- a/Lib/ctypes/wintypes.py +++ b/Lib/ctypes/wintypes.py @@ -63,10 +63,16 @@ def __repr__(self): HBITMAP = HANDLE HBRUSH = HANDLE HCOLORSPACE = HANDLE +HCONV = HANDLE +HCONVLIST = HANDLE +HCURSOR = HANDLE HDC = HANDLE +HDDEDATA = HANDLE HDESK = HANDLE +HDROP = HANDLE HDWP = HANDLE HENHMETAFILE = HANDLE +HFILE = INT HFONT = HANDLE HGDIOBJ = HANDLE HGLOBAL = HANDLE @@ -82,9 +88,11 @@ def __repr__(self): HMONITOR = HANDLE HPALETTE = HANDLE HPEN = HANDLE +HRESULT = LONG HRGN = HANDLE HRSRC = HANDLE HSTR = HANDLE +HSZ = HANDLE HTASK = HANDLE HWINSTA = HANDLE HWND = HANDLE From cda0ec8e7c4e9a010e5f73c5afaf18f86cb27b97 Mon Sep 17 00:00:00 2001 From: Wulian Date: Fri, 18 Oct 2024 20:48:18 +0800 Subject: [PATCH 032/106] gh-124102: Clean up unsupported VS and WiX detections (GH-124784) --- PCbuild/pyproject.props | 5 ----- Tools/msi/README.txt | 20 +++++++++++--------- Tools/msi/wix.props | 2 -- 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/PCbuild/pyproject.props b/PCbuild/pyproject.props index 9c85e5efa4af4a..c65341179376ea 100644 --- a/PCbuild/pyproject.props +++ b/PCbuild/pyproject.props @@ -217,11 +217,6 @@ public override bool Execute() { - - - $(VCInstallDir)\redist\ - - <_RedistFiles Include="$(VCInstallDir)\Redist\MSVC\*\*.*" /> diff --git a/Tools/msi/README.txt b/Tools/msi/README.txt index 98e5ba039d2bcd..8ae156450d5240 100644 --- a/Tools/msi/README.txt +++ b/Tools/msi/README.txt @@ -61,18 +61,20 @@ the initial download size by separating them into their own MSIs. Building the Installer ====================== -Before building the installer, download extra build dependencies using -Tools\msi\get_externals.bat. (Note that this is in addition to the +Before building the installer, download the extra build dependencies +using Tools\msi\get_externals.bat. (Note that this is in addition to the similarly named file in PCbuild.) -One of the dependencies used in builds is WiX, a toolset that lets developers -create installers for Windows Installer, the Windows installation engine. +One of the dependencies used in the build process is WiX, a toolset that +allows developers to create installers for Windows Installer, the +Windows installation engine. If you're not using the copy of WiX +installed by Tools\msi\get_externals.bat, you'll need to set the +"WixInstallPath" environment variable before building. -Additionally, make sure "MSVC v14x - VS 20xx C++ ARM64 build tools" are -selected under "Desktop Development with C++" in "Visual Studio installer", -even if you are not building on ARM64. This is required because we have -upgraded to WiX-3.14, which requires these tools for Python 3.11 and later -versions. +Additionally, ensure that "MSVC v14x - VS 20xx C++ ARM64/ARM64EC build tools" +is selected under "Desktop Development with C++" in the "Visual Studio Installer", +even if you're not building on ARM64. This is required because we've upgraded +to WiX 3.14, which requires these tools for Python 3.10 and later versions. For testing, the installer should be built with the Tools/msi/build.bat script: diff --git a/Tools/msi/wix.props b/Tools/msi/wix.props index d8ced317d0ce81..707c8aeacb648d 100644 --- a/Tools/msi/wix.props +++ b/Tools/msi/wix.props @@ -5,8 +5,6 @@ $(MSBuildThisFileDirectory)\Wix\ $(ExternalsDir)\windows-installer\wix-314\ - $(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows Installer XML\3.10@InstallRoot) - $(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\Windows Installer XML\3.10@InstallRoot) $(WixInstallPath)\Wix.targets \ No newline at end of file From 2e950e341930ea79549137d4d3771d5edb940e65 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 18 Oct 2024 16:51:29 +0300 Subject: [PATCH 033/106] Add tests for time.strftime() with invalid format string (GH-125696) --- Lib/test/test_time.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_time.py b/Lib/test/test_time.py index f8b99a9b6a63f5..d368f08b610870 100644 --- a/Lib/test/test_time.py +++ b/Lib/test/test_time.py @@ -18,7 +18,7 @@ except ImportError: _testinternalcapi = None -from test.support import skip_if_buggy_ucrt_strfptime +from test.support import skip_if_buggy_ucrt_strfptime, SuppressCrashReport # Max year is only limited by the size of C int. SIZEOF_INT = sysconfig.get_config_var('SIZEOF_INT') or 4 @@ -182,6 +182,17 @@ def test_strftime(self): self.assertRaises(TypeError, time.strftime, b'%S', tt) + def test_strftime_invalid_format(self): + tt = time.gmtime(self.t) + with SuppressCrashReport(): + for i in range(1, 128): + format = ' %' + chr(i) + with self.subTest(format=format): + try: + time.strftime(format, tt) + except ValueError as exc: + self.assertEqual(str(exc), 'Invalid format string') + def test_strftime_special(self): tt = time.gmtime(self.t) s1 = time.strftime('%c', tt) From 6d93690954daae9e9a368084765a4005f957686d Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 18 Oct 2024 09:26:08 -0600 Subject: [PATCH 034/106] gh-125604: Move _Py_AuditHookEntry, etc. Out of pycore_runtime.h (gh-125605) This is essentially a cleanup, moving a handful of API declarations to the header files where they fit best, creating new ones when needed. We do the following: * add pycore_debug_offsets.h and move _Py_DebugOffsets, etc. there * inline struct _getargs_runtime_state and struct _gilstate_runtime_state in _PyRuntimeState * move struct _reftracer_runtime_state to the existing pycore_object_state.h * add pycore_audit.h and move to it _Py_AuditHookEntry , _PySys_Audit(), and _PySys_ClearAuditHooks * add audit.h and cpython/audit.h and move the existing audit-related API there *move the perfmap/trampoline API from cpython/sysmodule.h to cpython/ceval.h, and remove the now-empty cpython/sysmodule.h --- Include/Python.h | 1 + Include/audit.h | 30 +++ Include/cpython/audit.h | 8 + Include/cpython/ceval.h | 18 ++ Include/cpython/sysmodule.h | 22 -- Include/internal/pycore_audit.h | 35 +++ Include/internal/pycore_debug_offsets.h | 269 ++++++++++++++++++++++++ Include/internal/pycore_object_state.h | 8 + Include/internal/pycore_runtime.h | 197 ++--------------- Include/internal/pycore_runtime_init.h | 107 +--------- Include/internal/pycore_sysmodule.h | 10 - Include/sysmodule.h | 17 -- Makefile.pre.in | 5 +- Modules/_testexternalinspection.c | 4 +- Objects/object.c | 1 + PCbuild/pythoncore.vcxproj | 5 +- PCbuild/pythoncore.vcxproj.filters | 15 +- Python/bytecodes.c | 2 +- Python/ceval.c | 2 +- Python/errors.c | 3 +- Python/import.c | 3 +- Python/legacy_tracing.c | 2 +- Python/pylifecycle.c | 5 +- Python/pystate.c | 2 +- Python/pythonrun.c | 3 +- Python/sysmodule.c | 1 + 26 files changed, 429 insertions(+), 346 deletions(-) create mode 100644 Include/audit.h create mode 100644 Include/cpython/audit.h delete mode 100644 Include/cpython/sysmodule.h create mode 100644 Include/internal/pycore_audit.h create mode 100644 Include/internal/pycore_debug_offsets.h diff --git a/Include/Python.h b/Include/Python.h index e1abdd16f031fb..717e27feab62db 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -124,6 +124,7 @@ #include "pylifecycle.h" #include "ceval.h" #include "sysmodule.h" +#include "audit.h" #include "osmodule.h" #include "intrcheck.h" #include "import.h" diff --git a/Include/audit.h b/Include/audit.h new file mode 100644 index 00000000000000..793b7077e1027b --- /dev/null +++ b/Include/audit.h @@ -0,0 +1,30 @@ +#ifndef Py_AUDIT_H +#define Py_AUDIT_H +#ifdef __cplusplus +extern "C" { +#endif + + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030d0000 +PyAPI_FUNC(int) PySys_Audit( + const char *event, + const char *argFormat, + ...); + +PyAPI_FUNC(int) PySys_AuditTuple( + const char *event, + PyObject *args); +#endif + + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_AUDIT_H +# include "cpython/audit.h" +# undef Py_CPYTHON_AUDIT_H +#endif + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_AUDIT_H */ diff --git a/Include/cpython/audit.h b/Include/cpython/audit.h new file mode 100644 index 00000000000000..3c5c7a8c06091d --- /dev/null +++ b/Include/cpython/audit.h @@ -0,0 +1,8 @@ +#ifndef Py_CPYTHON_AUDIT_H +# error "this header file must not be included directly" +#endif + + +typedef int(*Py_AuditHookFunction)(const char *, PyObject *, void *); + +PyAPI_FUNC(int) PySys_AddAuditHook(Py_AuditHookFunction, void*); diff --git a/Include/cpython/ceval.h b/Include/cpython/ceval.h index 78f7405661662f..ca8109e3248a8d 100644 --- a/Include/cpython/ceval.h +++ b/Include/cpython/ceval.h @@ -23,3 +23,21 @@ _PyEval_RequestCodeExtraIndex(freefunc f) { PyAPI_FUNC(int) _PyEval_SliceIndex(PyObject *, Py_ssize_t *); PyAPI_FUNC(int) _PyEval_SliceIndexNotNone(PyObject *, Py_ssize_t *); + + +// Trampoline API + +typedef struct { + FILE* perf_map; + PyThread_type_lock map_lock; +} PerfMapState; + +PyAPI_FUNC(int) PyUnstable_PerfMapState_Init(void); +PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry( + const void *code_addr, + unsigned int code_size, + const char *entry_name); +PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void); +PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename); +PyAPI_FUNC(int) PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *); +PyAPI_FUNC(int) PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable); diff --git a/Include/cpython/sysmodule.h b/Include/cpython/sysmodule.h deleted file mode 100644 index a3ac07f538a94f..00000000000000 --- a/Include/cpython/sysmodule.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef Py_CPYTHON_SYSMODULE_H -# error "this header file must not be included directly" -#endif - -typedef int(*Py_AuditHookFunction)(const char *, PyObject *, void *); - -PyAPI_FUNC(int) PySys_AddAuditHook(Py_AuditHookFunction, void*); - -typedef struct { - FILE* perf_map; - PyThread_type_lock map_lock; -} PerfMapState; - -PyAPI_FUNC(int) PyUnstable_PerfMapState_Init(void); -PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry( - const void *code_addr, - unsigned int code_size, - const char *entry_name); -PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void); -PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename); -PyAPI_FUNC(int) PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *); -PyAPI_FUNC(int) PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable); diff --git a/Include/internal/pycore_audit.h b/Include/internal/pycore_audit.h new file mode 100644 index 00000000000000..2811aaa6236123 --- /dev/null +++ b/Include/internal/pycore_audit.h @@ -0,0 +1,35 @@ +#ifndef Py_INTERNAL_AUDIT_H +#define Py_INTERNAL_AUDIT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + + +/* Runtime audit hook state */ + +typedef struct _Py_AuditHookEntry { + struct _Py_AuditHookEntry *next; + Py_AuditHookFunction hookCFunction; + void *userData; +} _Py_AuditHookEntry; + + +extern int _PySys_Audit( + PyThreadState *tstate, + const char *event, + const char *argFormat, + ...); + +// _PySys_ClearAuditHooks() must not be exported: use extern rather than +// PyAPI_FUNC(). We want minimal exposure of this function. +extern void _PySys_ClearAuditHooks(PyThreadState *tstate); + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_AUDIT_H */ diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h new file mode 100644 index 00000000000000..184f4b9360b6d3 --- /dev/null +++ b/Include/internal/pycore_debug_offsets.h @@ -0,0 +1,269 @@ +#ifndef Py_INTERNAL_DEBUG_OFFSETS_H +#define Py_INTERNAL_DEBUG_OFFSETS_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + + +#define _Py_Debug_Cookie "xdebugpy" + +#ifdef Py_GIL_DISABLED +# define _Py_Debug_gilruntimestate_enabled offsetof(struct _gil_runtime_state, enabled) +# define _Py_Debug_Free_Threaded 1 +#else +# define _Py_Debug_gilruntimestate_enabled 0 +# define _Py_Debug_Free_Threaded 0 +#endif + + +typedef struct _Py_DebugOffsets { + char cookie[8]; + uint64_t version; + uint64_t free_threaded; + // Runtime state offset; + struct _runtime_state { + uint64_t size; + uint64_t finalizing; + uint64_t interpreters_head; + } runtime_state; + + // Interpreter state offset; + struct _interpreter_state { + uint64_t size; + uint64_t id; + uint64_t next; + uint64_t threads_head; + uint64_t gc; + uint64_t imports_modules; + uint64_t sysdict; + uint64_t builtins; + uint64_t ceval_gil; + uint64_t gil_runtime_state; + uint64_t gil_runtime_state_enabled; + uint64_t gil_runtime_state_locked; + uint64_t gil_runtime_state_holder; + } interpreter_state; + + // Thread state offset; + struct _thread_state{ + uint64_t size; + uint64_t prev; + uint64_t next; + uint64_t interp; + uint64_t current_frame; + uint64_t thread_id; + uint64_t native_thread_id; + uint64_t datastack_chunk; + uint64_t status; + } thread_state; + + // InterpreterFrame offset; + struct _interpreter_frame { + uint64_t size; + uint64_t previous; + uint64_t executable; + uint64_t instr_ptr; + uint64_t localsplus; + uint64_t owner; + } interpreter_frame; + + // Code object offset; + struct _code_object { + uint64_t size; + uint64_t filename; + uint64_t name; + uint64_t qualname; + uint64_t linetable; + uint64_t firstlineno; + uint64_t argcount; + uint64_t localsplusnames; + uint64_t localspluskinds; + uint64_t co_code_adaptive; + } code_object; + + // PyObject offset; + struct _pyobject { + uint64_t size; + uint64_t ob_type; + } pyobject; + + // PyTypeObject object offset; + struct _type_object { + uint64_t size; + uint64_t tp_name; + uint64_t tp_repr; + uint64_t tp_flags; + } type_object; + + // PyTuple object offset; + struct _tuple_object { + uint64_t size; + uint64_t ob_item; + uint64_t ob_size; + } tuple_object; + + // PyList object offset; + struct _list_object { + uint64_t size; + uint64_t ob_item; + uint64_t ob_size; + } list_object; + + // PyDict object offset; + struct _dict_object { + uint64_t size; + uint64_t ma_keys; + uint64_t ma_values; + } dict_object; + + // PyFloat object offset; + struct _float_object { + uint64_t size; + uint64_t ob_fval; + } float_object; + + // PyLong object offset; + struct _long_object { + uint64_t size; + uint64_t lv_tag; + uint64_t ob_digit; + } long_object; + + // PyBytes object offset; + struct _bytes_object { + uint64_t size; + uint64_t ob_size; + uint64_t ob_sval; + } bytes_object; + + // Unicode object offset; + struct _unicode_object { + uint64_t size; + uint64_t state; + uint64_t length; + uint64_t asciiobject_size; + } unicode_object; + + // GC runtime state offset; + struct _gc { + uint64_t size; + uint64_t collecting; + } gc; +} _Py_DebugOffsets; + + +#define _Py_DebugOffsets_INIT(debug_cookie) { \ + .cookie = debug_cookie, \ + .version = PY_VERSION_HEX, \ + .free_threaded = _Py_Debug_Free_Threaded, \ + .runtime_state = { \ + .size = sizeof(_PyRuntimeState), \ + .finalizing = offsetof(_PyRuntimeState, _finalizing), \ + .interpreters_head = offsetof(_PyRuntimeState, interpreters.head), \ + }, \ + .interpreter_state = { \ + .size = sizeof(PyInterpreterState), \ + .id = offsetof(PyInterpreterState, id), \ + .next = offsetof(PyInterpreterState, next), \ + .threads_head = offsetof(PyInterpreterState, threads.head), \ + .gc = offsetof(PyInterpreterState, gc), \ + .imports_modules = offsetof(PyInterpreterState, imports.modules), \ + .sysdict = offsetof(PyInterpreterState, sysdict), \ + .builtins = offsetof(PyInterpreterState, builtins), \ + .ceval_gil = offsetof(PyInterpreterState, ceval.gil), \ + .gil_runtime_state = offsetof(PyInterpreterState, _gil), \ + .gil_runtime_state_enabled = _Py_Debug_gilruntimestate_enabled, \ + .gil_runtime_state_locked = offsetof(PyInterpreterState, _gil.locked), \ + .gil_runtime_state_holder = offsetof(PyInterpreterState, _gil.last_holder), \ + }, \ + .thread_state = { \ + .size = sizeof(PyThreadState), \ + .prev = offsetof(PyThreadState, prev), \ + .next = offsetof(PyThreadState, next), \ + .interp = offsetof(PyThreadState, interp), \ + .current_frame = offsetof(PyThreadState, current_frame), \ + .thread_id = offsetof(PyThreadState, thread_id), \ + .native_thread_id = offsetof(PyThreadState, native_thread_id), \ + .datastack_chunk = offsetof(PyThreadState, datastack_chunk), \ + .status = offsetof(PyThreadState, _status), \ + }, \ + .interpreter_frame = { \ + .size = sizeof(_PyInterpreterFrame), \ + .previous = offsetof(_PyInterpreterFrame, previous), \ + .executable = offsetof(_PyInterpreterFrame, f_executable), \ + .instr_ptr = offsetof(_PyInterpreterFrame, instr_ptr), \ + .localsplus = offsetof(_PyInterpreterFrame, localsplus), \ + .owner = offsetof(_PyInterpreterFrame, owner), \ + }, \ + .code_object = { \ + .size = sizeof(PyCodeObject), \ + .filename = offsetof(PyCodeObject, co_filename), \ + .name = offsetof(PyCodeObject, co_name), \ + .qualname = offsetof(PyCodeObject, co_qualname), \ + .linetable = offsetof(PyCodeObject, co_linetable), \ + .firstlineno = offsetof(PyCodeObject, co_firstlineno), \ + .argcount = offsetof(PyCodeObject, co_argcount), \ + .localsplusnames = offsetof(PyCodeObject, co_localsplusnames), \ + .localspluskinds = offsetof(PyCodeObject, co_localspluskinds), \ + .co_code_adaptive = offsetof(PyCodeObject, co_code_adaptive), \ + }, \ + .pyobject = { \ + .size = sizeof(PyObject), \ + .ob_type = offsetof(PyObject, ob_type), \ + }, \ + .type_object = { \ + .size = sizeof(PyTypeObject), \ + .tp_name = offsetof(PyTypeObject, tp_name), \ + .tp_repr = offsetof(PyTypeObject, tp_repr), \ + .tp_flags = offsetof(PyTypeObject, tp_flags), \ + }, \ + .tuple_object = { \ + .size = sizeof(PyTupleObject), \ + .ob_item = offsetof(PyTupleObject, ob_item), \ + .ob_size = offsetof(PyTupleObject, ob_base.ob_size), \ + }, \ + .list_object = { \ + .size = sizeof(PyListObject), \ + .ob_item = offsetof(PyListObject, ob_item), \ + .ob_size = offsetof(PyListObject, ob_base.ob_size), \ + }, \ + .dict_object = { \ + .size = sizeof(PyDictObject), \ + .ma_keys = offsetof(PyDictObject, ma_keys), \ + .ma_values = offsetof(PyDictObject, ma_values), \ + }, \ + .float_object = { \ + .size = sizeof(PyFloatObject), \ + .ob_fval = offsetof(PyFloatObject, ob_fval), \ + }, \ + .long_object = { \ + .size = sizeof(PyLongObject), \ + .lv_tag = offsetof(PyLongObject, long_value.lv_tag), \ + .ob_digit = offsetof(PyLongObject, long_value.ob_digit), \ + }, \ + .bytes_object = { \ + .size = sizeof(PyBytesObject), \ + .ob_size = offsetof(PyBytesObject, ob_base.ob_size), \ + .ob_sval = offsetof(PyBytesObject, ob_sval), \ + }, \ + .unicode_object = { \ + .size = sizeof(PyUnicodeObject), \ + .state = offsetof(PyUnicodeObject, _base._base.state), \ + .length = offsetof(PyUnicodeObject, _base._base.length), \ + .asciiobject_size = sizeof(PyASCIIObject), \ + }, \ + .gc = { \ + .size = sizeof(struct _gc_runtime_state), \ + .collecting = offsetof(struct _gc_runtime_state, collecting), \ + }, \ +} + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_DEBUG_OFFSETS_H */ diff --git a/Include/internal/pycore_object_state.h b/Include/internal/pycore_object_state.h index e7fa7c1f10d6d1..8a47a6d9e6eb0d 100644 --- a/Include/internal/pycore_object_state.h +++ b/Include/internal/pycore_object_state.h @@ -11,6 +11,14 @@ extern "C" { #include "pycore_freelist_state.h" // _Py_freelists #include "pycore_hashtable.h" // _Py_hashtable_t + +/* Reference tracer state */ +struct _reftracer_runtime_state { + PyRefTracer tracer_func; + void* tracer_data; +}; + + struct _py_object_runtime_state { #ifdef Py_REF_DEBUG Py_ssize_t interpreter_leaks; diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h index d4291b87261ae0..7f592aa6cf9f05 100644 --- a/Include/internal/pycore_runtime.h +++ b/Include/internal/pycore_runtime.h @@ -9,8 +9,10 @@ extern "C" { #endif #include "pycore_atexit.h" // struct _atexit_runtime_state +#include "pycore_audit.h" // _Py_AuditHookEntry #include "pycore_ceval_state.h" // struct _ceval_runtime_state -#include "pycore_crossinterp.h" // struct _xidregistry +#include "pycore_crossinterp.h" // struct _xidregistry +#include "pycore_debug_offsets.h" // _Py_DebugOffsets #include "pycore_faulthandler.h" // struct _faulthandler_runtime_state #include "pycore_floatobject.h" // struct _Py_float_runtime_state #include "pycore_import.h" // struct _import_runtime_state @@ -25,185 +27,12 @@ extern "C" { #include "pycore_typeobject.h" // struct _types_runtime_state #include "pycore_unicodeobject.h" // struct _Py_unicode_runtime_state -struct _getargs_runtime_state { - struct _PyArg_Parser *static_parsers; -}; - -/* GIL state */ - -struct _gilstate_runtime_state { - /* bpo-26558: Flag to disable PyGILState_Check(). - If set to non-zero, PyGILState_Check() always return 1. */ - int check_enabled; - /* The single PyInterpreterState used by this process' - GILState implementation - */ - /* TODO: Given interp_main, it may be possible to kill this ref */ - PyInterpreterState *autoInterpreterState; -}; - -/* Runtime audit hook state */ - -#define _Py_Debug_Cookie "xdebugpy" - -#ifdef Py_GIL_DISABLED -# define _Py_Debug_gilruntimestate_enabled offsetof(struct _gil_runtime_state, enabled) -# define _Py_Debug_Free_Threaded 1 -#else -# define _Py_Debug_gilruntimestate_enabled 0 -# define _Py_Debug_Free_Threaded 0 -#endif -typedef struct _Py_AuditHookEntry { - struct _Py_AuditHookEntry *next; - Py_AuditHookFunction hookCFunction; - void *userData; -} _Py_AuditHookEntry; - -typedef struct _Py_DebugOffsets { - char cookie[8]; - uint64_t version; - uint64_t free_threaded; - // Runtime state offset; - struct _runtime_state { - uint64_t size; - uint64_t finalizing; - uint64_t interpreters_head; - } runtime_state; - - // Interpreter state offset; - struct _interpreter_state { - uint64_t size; - uint64_t id; - uint64_t next; - uint64_t threads_head; - uint64_t gc; - uint64_t imports_modules; - uint64_t sysdict; - uint64_t builtins; - uint64_t ceval_gil; - uint64_t gil_runtime_state; - uint64_t gil_runtime_state_enabled; - uint64_t gil_runtime_state_locked; - uint64_t gil_runtime_state_holder; - } interpreter_state; - - // Thread state offset; - struct _thread_state{ - uint64_t size; - uint64_t prev; - uint64_t next; - uint64_t interp; - uint64_t current_frame; - uint64_t thread_id; - uint64_t native_thread_id; - uint64_t datastack_chunk; - uint64_t status; - } thread_state; - - // InterpreterFrame offset; - struct _interpreter_frame { - uint64_t size; - uint64_t previous; - uint64_t executable; - uint64_t instr_ptr; - uint64_t localsplus; - uint64_t owner; - } interpreter_frame; - - // Code object offset; - struct _code_object { - uint64_t size; - uint64_t filename; - uint64_t name; - uint64_t qualname; - uint64_t linetable; - uint64_t firstlineno; - uint64_t argcount; - uint64_t localsplusnames; - uint64_t localspluskinds; - uint64_t co_code_adaptive; - } code_object; - - // PyObject offset; - struct _pyobject { - uint64_t size; - uint64_t ob_type; - } pyobject; - - // PyTypeObject object offset; - struct _type_object { - uint64_t size; - uint64_t tp_name; - uint64_t tp_repr; - uint64_t tp_flags; - } type_object; - - // PyTuple object offset; - struct _tuple_object { - uint64_t size; - uint64_t ob_item; - uint64_t ob_size; - } tuple_object; - - // PyList object offset; - struct _list_object { - uint64_t size; - uint64_t ob_item; - uint64_t ob_size; - } list_object; - - // PyDict object offset; - struct _dict_object { - uint64_t size; - uint64_t ma_keys; - uint64_t ma_values; - } dict_object; - - // PyFloat object offset; - struct _float_object { - uint64_t size; - uint64_t ob_fval; - } float_object; - - // PyLong object offset; - struct _long_object { - uint64_t size; - uint64_t lv_tag; - uint64_t ob_digit; - } long_object; - - // PyBytes object offset; - struct _bytes_object { - uint64_t size; - uint64_t ob_size; - uint64_t ob_sval; - } bytes_object; - - // Unicode object offset; - struct _unicode_object { - uint64_t size; - uint64_t state; - uint64_t length; - uint64_t asciiobject_size; - } unicode_object; - - // GC runtime state offset; - struct _gc { - uint64_t size; - uint64_t collecting; - } gc; -} _Py_DebugOffsets; - -/* Reference tracer state */ -struct _reftracer_runtime_state { - PyRefTracer tracer_func; - void* tracer_data; -}; /* Full Python runtime state */ /* _PyRuntimeState holds the global state for the CPython runtime. - That data is exposed in the internal API as a static variable (_PyRuntime). + That data is exported by the internal API as a global variable + (_PyRuntime, defined near the top of pylifecycle.c). */ typedef struct pyruntimestate { /* This field must be first to facilitate locating it by out of process @@ -299,8 +128,19 @@ typedef struct pyruntimestate { struct _import_runtime_state imports; struct _ceval_runtime_state ceval; - struct _gilstate_runtime_state gilstate; - struct _getargs_runtime_state getargs; + struct _gilstate_runtime_state { + /* bpo-26558: Flag to disable PyGILState_Check(). + If set to non-zero, PyGILState_Check() always return 1. */ + int check_enabled; + /* The single PyInterpreterState used by this process' + GILState implementation + */ + /* TODO: Given interp_main, it may be possible to kill this ref */ + PyInterpreterState *autoInterpreterState; + } gilstate; + struct _getargs_runtime_state { + struct _PyArg_Parser *static_parsers; + } getargs; struct _fileutils_state fileutils; struct _faulthandler_runtime_state faulthandler; struct _tracemalloc_runtime_state tracemalloc; @@ -404,6 +244,7 @@ _PyRuntimeState_SetFinalizing(_PyRuntimeState *runtime, PyThreadState *tstate) { } } + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index a17ba46966daa1..e99febab2f3d57 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -9,6 +9,7 @@ extern "C" { #endif #include "pycore_ceval_state.h" // _PyEval_RUNTIME_PERF_INIT +#include "pycore_debug_offsets.h" // _Py_DebugOffsets_INIT() #include "pycore_faulthandler.h" // _faulthandler_runtime_state_INIT #include "pycore_floatobject.h" // _py_float_format_unknown #include "pycore_function.h" @@ -32,111 +33,7 @@ extern PyTypeObject _PyExc_MemoryError; #define _PyRuntimeState_INIT(runtime, debug_cookie) \ { \ - .debug_offsets = { \ - .cookie = debug_cookie, \ - .version = PY_VERSION_HEX, \ - .free_threaded = _Py_Debug_Free_Threaded, \ - .runtime_state = { \ - .size = sizeof(_PyRuntimeState), \ - .finalizing = offsetof(_PyRuntimeState, _finalizing), \ - .interpreters_head = offsetof(_PyRuntimeState, interpreters.head), \ - }, \ - .interpreter_state = { \ - .size = sizeof(PyInterpreterState), \ - .id = offsetof(PyInterpreterState, id), \ - .next = offsetof(PyInterpreterState, next), \ - .threads_head = offsetof(PyInterpreterState, threads.head), \ - .gc = offsetof(PyInterpreterState, gc), \ - .imports_modules = offsetof(PyInterpreterState, imports.modules), \ - .sysdict = offsetof(PyInterpreterState, sysdict), \ - .builtins = offsetof(PyInterpreterState, builtins), \ - .ceval_gil = offsetof(PyInterpreterState, ceval.gil), \ - .gil_runtime_state = offsetof(PyInterpreterState, _gil), \ - .gil_runtime_state_enabled = _Py_Debug_gilruntimestate_enabled, \ - .gil_runtime_state_locked = offsetof(PyInterpreterState, _gil.locked), \ - .gil_runtime_state_holder = offsetof(PyInterpreterState, _gil.last_holder), \ - }, \ - .thread_state = { \ - .size = sizeof(PyThreadState), \ - .prev = offsetof(PyThreadState, prev), \ - .next = offsetof(PyThreadState, next), \ - .interp = offsetof(PyThreadState, interp), \ - .current_frame = offsetof(PyThreadState, current_frame), \ - .thread_id = offsetof(PyThreadState, thread_id), \ - .native_thread_id = offsetof(PyThreadState, native_thread_id), \ - .datastack_chunk = offsetof(PyThreadState, datastack_chunk), \ - .status = offsetof(PyThreadState, _status), \ - }, \ - .interpreter_frame = { \ - .size = sizeof(_PyInterpreterFrame), \ - .previous = offsetof(_PyInterpreterFrame, previous), \ - .executable = offsetof(_PyInterpreterFrame, f_executable), \ - .instr_ptr = offsetof(_PyInterpreterFrame, instr_ptr), \ - .localsplus = offsetof(_PyInterpreterFrame, localsplus), \ - .owner = offsetof(_PyInterpreterFrame, owner), \ - }, \ - .code_object = { \ - .size = sizeof(PyCodeObject), \ - .filename = offsetof(PyCodeObject, co_filename), \ - .name = offsetof(PyCodeObject, co_name), \ - .qualname = offsetof(PyCodeObject, co_qualname), \ - .linetable = offsetof(PyCodeObject, co_linetable), \ - .firstlineno = offsetof(PyCodeObject, co_firstlineno), \ - .argcount = offsetof(PyCodeObject, co_argcount), \ - .localsplusnames = offsetof(PyCodeObject, co_localsplusnames), \ - .localspluskinds = offsetof(PyCodeObject, co_localspluskinds), \ - .co_code_adaptive = offsetof(PyCodeObject, co_code_adaptive), \ - }, \ - .pyobject = { \ - .size = sizeof(PyObject), \ - .ob_type = offsetof(PyObject, ob_type), \ - }, \ - .type_object = { \ - .size = sizeof(PyTypeObject), \ - .tp_name = offsetof(PyTypeObject, tp_name), \ - .tp_repr = offsetof(PyTypeObject, tp_repr), \ - .tp_flags = offsetof(PyTypeObject, tp_flags), \ - }, \ - .tuple_object = { \ - .size = sizeof(PyTupleObject), \ - .ob_item = offsetof(PyTupleObject, ob_item), \ - .ob_size = offsetof(PyTupleObject, ob_base.ob_size), \ - }, \ - .list_object = { \ - .size = sizeof(PyListObject), \ - .ob_item = offsetof(PyListObject, ob_item), \ - .ob_size = offsetof(PyListObject, ob_base.ob_size), \ - }, \ - .dict_object = { \ - .size = sizeof(PyDictObject), \ - .ma_keys = offsetof(PyDictObject, ma_keys), \ - .ma_values = offsetof(PyDictObject, ma_values), \ - }, \ - .float_object = { \ - .size = sizeof(PyFloatObject), \ - .ob_fval = offsetof(PyFloatObject, ob_fval), \ - }, \ - .long_object = { \ - .size = sizeof(PyLongObject), \ - .lv_tag = offsetof(PyLongObject, long_value.lv_tag), \ - .ob_digit = offsetof(PyLongObject, long_value.ob_digit), \ - }, \ - .bytes_object = { \ - .size = sizeof(PyBytesObject), \ - .ob_size = offsetof(PyBytesObject, ob_base.ob_size), \ - .ob_sval = offsetof(PyBytesObject, ob_sval), \ - }, \ - .unicode_object = { \ - .size = sizeof(PyUnicodeObject), \ - .state = offsetof(PyUnicodeObject, _base._base.state), \ - .length = offsetof(PyUnicodeObject, _base._base.length), \ - .asciiobject_size = sizeof(PyASCIIObject), \ - }, \ - .gc = { \ - .size = sizeof(struct _gc_runtime_state), \ - .collecting = offsetof(struct _gc_runtime_state, collecting), \ - }, \ - }, \ + .debug_offsets = _Py_DebugOffsets_INIT(debug_cookie), \ .allocators = { \ .standard = _pymem_allocators_standard_INIT(runtime), \ .debug = _pymem_allocators_debug_INIT, \ diff --git a/Include/internal/pycore_sysmodule.h b/Include/internal/pycore_sysmodule.h index a1d795e284f6ac..99968df54a45f6 100644 --- a/Include/internal/pycore_sysmodule.h +++ b/Include/internal/pycore_sysmodule.h @@ -14,16 +14,6 @@ PyAPI_FUNC(PyObject*) _PySys_GetAttr(PyThreadState *tstate, PyObject *name); // Export for '_pickle' shared extension PyAPI_FUNC(size_t) _PySys_GetSizeOf(PyObject *); -extern int _PySys_Audit( - PyThreadState *tstate, - const char *event, - const char *argFormat, - ...); - -// _PySys_ClearAuditHooks() must not be exported: use extern rather than -// PyAPI_FUNC(). We want minimal exposure of this function. -extern void _PySys_ClearAuditHooks(PyThreadState *tstate); - extern int _PySys_SetAttr(PyObject *, PyObject *); extern int _PySys_ClearAttrString(PyInterpreterState *interp, diff --git a/Include/sysmodule.h b/Include/sysmodule.h index 5a0af2e1578eb7..c1d5f610fe08a5 100644 --- a/Include/sysmodule.h +++ b/Include/sysmodule.h @@ -21,23 +21,6 @@ Py_DEPRECATED(3.13) PyAPI_FUNC(void) PySys_ResetWarnOptions(void); PyAPI_FUNC(PyObject *) PySys_GetXOptions(void); -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030d0000 -PyAPI_FUNC(int) PySys_Audit( - const char *event, - const char *argFormat, - ...); - -PyAPI_FUNC(int) PySys_AuditTuple( - const char *event, - PyObject *args); -#endif - -#ifndef Py_LIMITED_API -# define Py_CPYTHON_SYSMODULE_H -# include "cpython/sysmodule.h" -# undef Py_CPYTHON_SYSMODULE_H -#endif - #ifdef __cplusplus } #endif diff --git a/Makefile.pre.in b/Makefile.pre.in index 07c8a4d20142db..fb6f22d57397db 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1032,6 +1032,7 @@ python.worker.js: $(srcdir)/Tools/wasm/python.worker.js PYTHON_HEADERS= \ $(srcdir)/Include/Python.h \ $(srcdir)/Include/abstract.h \ + $(srcdir)/Include/audit.h \ $(srcdir)/Include/bltinmodule.h \ $(srcdir)/Include/boolobject.h \ $(srcdir)/Include/bytearrayobject.h \ @@ -1110,6 +1111,7 @@ PYTHON_HEADERS= \ $(PARSER_HEADERS) \ \ $(srcdir)/Include/cpython/abstract.h \ + $(srcdir)/Include/cpython/audit.h \ $(srcdir)/Include/cpython/bytearrayobject.h \ $(srcdir)/Include/cpython/bytesobject.h \ $(srcdir)/Include/cpython/cellobject.h \ @@ -1159,7 +1161,6 @@ PYTHON_HEADERS= \ $(srcdir)/Include/cpython/pythonrun.h \ $(srcdir)/Include/cpython/pythread.h \ $(srcdir)/Include/cpython/setobject.h \ - $(srcdir)/Include/cpython/sysmodule.h \ $(srcdir)/Include/cpython/traceback.h \ $(srcdir)/Include/cpython/tracemalloc.h \ $(srcdir)/Include/cpython/tupleobject.h \ @@ -1174,6 +1175,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_ast.h \ $(srcdir)/Include/internal/pycore_ast_state.h \ $(srcdir)/Include/internal/pycore_atexit.h \ + $(srcdir)/Include/internal/pycore_audit.h \ $(srcdir)/Include/internal/pycore_backoff.h \ $(srcdir)/Include/internal/pycore_bitutils.h \ $(srcdir)/Include/internal/pycore_blocks_output_buffer.h \ @@ -1193,6 +1195,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_context.h \ $(srcdir)/Include/internal/pycore_critical_section.h \ $(srcdir)/Include/internal/pycore_crossinterp.h \ + $(srcdir)/Include/internal/pycore_debug_offsets.h \ $(srcdir)/Include/internal/pycore_descrobject.h \ $(srcdir)/Include/internal/pycore_dict.h \ $(srcdir)/Include/internal/pycore_dict_state.h \ diff --git a/Modules/_testexternalinspection.c b/Modules/_testexternalinspection.c index 2476346777c319..0807d1e47b6736 100644 --- a/Modules/_testexternalinspection.c +++ b/Modules/_testexternalinspection.c @@ -51,7 +51,9 @@ # define Py_BUILD_CORE_MODULE 1 #endif #include "Python.h" -#include +#include // _Py_DebugOffsets +#include // FRAME_OWNED_BY_CSTACK +#include // Py_TAG_BITS #ifndef HAVE_PROCESS_VM_READV # define HAVE_PROCESS_VM_READV 0 diff --git a/Objects/object.c b/Objects/object.c index 4a4c5bf7d7f08a..1a15b70d3dc63f 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -17,6 +17,7 @@ #include "pycore_memoryobject.h" // _PyManagedBuffer_Type #include "pycore_namespace.h" // _PyNamespace_Type #include "pycore_object.h" // PyAPI_DATA() _Py_SwappedOp definition +#include "pycore_object_state.h" // struct _reftracer_runtime_state #include "pycore_long.h" // _PyLong_GetZero() #include "pycore_optimizer.h" // _PyUOpExecutor_Type, _PyUOpOptimizer_Type, ... #include "pycore_pyerrors.h" // _PyErr_Occurred() diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 3b33c6bf6bb91d..a4881e9256e4dd 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -128,6 +128,7 @@ + @@ -137,6 +138,7 @@ + @@ -185,7 +187,6 @@ - @@ -208,6 +209,7 @@ + @@ -227,6 +229,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index ee2930b10439a9..6b294683320a73 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -48,6 +48,9 @@ Include + + Include + Include @@ -354,6 +357,9 @@ Include\cpython + + Include\cpython + Include\cpython @@ -513,9 +519,6 @@ Include\cpython - - Include\cpython - Include\cpython @@ -552,6 +555,9 @@ Include\internal + + Include\internal + Include\internal @@ -603,6 +609,9 @@ Include\internal + + Include\internal + Include\internal diff --git a/Python/bytecodes.c b/Python/bytecodes.c index e6525657cabc2b..c59a35c3e828ca 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -8,6 +8,7 @@ #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() +#include "pycore_audit.h" // _PySys_Audit() #include "pycore_backoff.h" #include "pycore_cell.h" // PyCell_GetRef() #include "pycore_ceval.h" @@ -27,7 +28,6 @@ #include "pycore_range.h" // _PyRangeIterObject #include "pycore_setobject.h" // _PySet_NextEntry() #include "pycore_sliceobject.h" // _PyBuildSlice_ConsumeRefs -#include "pycore_sysmodule.h" // _PySys_Audit() #include "pycore_tuple.h" // _PyTuple_ITEMS() #include "pycore_typeobject.h" // _PySuper_Lookup() diff --git a/Python/ceval.c b/Python/ceval.c index 98d95b28488fd0..55e5eba25eaa21 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4,6 +4,7 @@ #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() +#include "pycore_audit.h" // _PySys_Audit() #include "pycore_backoff.h" #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_cell.h" // PyCell_GetRef() @@ -26,7 +27,6 @@ #include "pycore_range.h" // _PyRangeIterObject #include "pycore_setobject.h" // _PySet_Update() #include "pycore_sliceobject.h" // _PyBuildSlice_ConsumeRefs -#include "pycore_sysmodule.h" // _PySys_Audit() #include "pycore_tuple.h" // _PyTuple_ITEMS() #include "pycore_typeobject.h" // _PySuper_Lookup() #include "pycore_uop_ids.h" // Uops diff --git a/Python/errors.c b/Python/errors.c index 9e2a3ce062a6fe..7f3b4aabc432d7 100644 --- a/Python/errors.c +++ b/Python/errors.c @@ -2,12 +2,13 @@ /* Error handling */ #include "Python.h" +#include "pycore_audit.h" // _PySys_Audit() #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_initconfig.h" // _PyStatus_ERR() #include "pycore_pyerrors.h" // _PyErr_Format() #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_structseq.h" // _PyStructSequence_FiniBuiltin() -#include "pycore_sysmodule.h" // _PySys_Audit() +#include "pycore_sysmodule.h" // _PySys_GetAttr() #include "pycore_traceback.h" // _PyTraceBack_FromFrame() #ifdef MS_WINDOWS diff --git a/Python/import.c b/Python/import.c index acf849f14562b9..d8ad37b2422795 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1,6 +1,7 @@ /* Module definition and import implementation */ #include "Python.h" +#include "pycore_audit.h" // _PySys_Audit() #include "pycore_ceval.h" #include "pycore_hashtable.h" // _Py_hashtable_new_full() #include "pycore_import.h" // _PyImport_BootstrapImp() @@ -14,7 +15,7 @@ #include "pycore_pylifecycle.h" #include "pycore_pymem.h" // _PyMem_SetDefaultAllocator() #include "pycore_pystate.h" // _PyInterpreterState_GET() -#include "pycore_sysmodule.h" // _PySys_Audit() +#include "pycore_sysmodule.h" // _PySys_ClearAttrString() #include "pycore_time.h" // _PyTime_AsMicroseconds() #include "pycore_weakref.h" // _PyWeakref_GET_REF() diff --git a/Python/legacy_tracing.c b/Python/legacy_tracing.c index 1436921a19b768..45af275f1f6dce 100644 --- a/Python/legacy_tracing.c +++ b/Python/legacy_tracing.c @@ -3,9 +3,9 @@ */ #include "Python.h" +#include "pycore_audit.h" // _PySys_Audit() #include "pycore_ceval.h" // export _PyEval_SetProfile() #include "pycore_object.h" -#include "pycore_sysmodule.h" // _PySys_Audit() #include "opcode.h" #include diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 5fb9c4f7c719fe..b8f424854ecb86 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -2,6 +2,7 @@ #include "Python.h" +#include "pycore_audit.h" // _PySys_ClearAuditHooks() #include "pycore_call.h" // _PyObject_CallMethod() #include "pycore_ceval.h" // _PyEval_FiniGIL() #include "pycore_codecs.h" // _PyCodec_Lookup() @@ -26,7 +27,7 @@ #include "pycore_runtime_init.h" // _PyRuntimeState_INIT #include "pycore_setobject.h" // _PySet_NextEntry() #include "pycore_sliceobject.h" // _PySlice_Fini() -#include "pycore_sysmodule.h" // _PySys_ClearAuditHooks() +#include "pycore_sysmodule.h" // _PySys_GetAttr() #include "pycore_traceback.h" // _Py_DumpTracebackThreads() #include "pycore_uniqueid.h" // _PyObject_FinalizeUniqueIdPool() #include "pycore_typeobject.h" // _PyTypes_InitTypes() @@ -78,6 +79,7 @@ static void wait_for_thread_shutdown(PyThreadState *tstate); static void finalize_subinterpreters(void); static void call_ll_exitfuncs(_PyRuntimeState *runtime); + /* The following places the `_PyRuntime` structure in a location that can be * found without any external information. This is meant to ease access to the * interpreter state for various runtime debugging tools, but is *not* an @@ -107,6 +109,7 @@ __attribute__ ((section (".PyRuntime"))) = _PyRuntimeState_INIT(_PyRuntime, _Py_Debug_Cookie); _Py_COMP_DIAG_POP + static int runtime_initialized = 0; PyStatus diff --git a/Python/pystate.c b/Python/pystate.c index e3812cba41d9c2..7df872cd6d7d8a 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -3,6 +3,7 @@ #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() +#include "pycore_audit.h" // _Py_AuditHookEntry #include "pycore_ceval.h" #include "pycore_code.h" // stats #include "pycore_critical_section.h" // _PyCriticalSection_Resume() @@ -18,7 +19,6 @@ #include "pycore_pymem.h" // _PyMem_SetDefaultAllocator() #include "pycore_pystate.h" #include "pycore_runtime_init.h" // _PyRuntimeState_INIT -#include "pycore_sysmodule.h" // _PySys_Audit() #include "pycore_obmalloc.h" // _PyMem_obmalloc_state_on_heap() #include "pycore_uniqueid.h" // _PyObject_FinalizePerThreadRefcounts() diff --git a/Python/pythonrun.c b/Python/pythonrun.c index b67597113ead45..fc0f11bc4e8af4 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -13,6 +13,7 @@ #include "Python.h" #include "pycore_ast.h" // PyAST_mod2obj() +#include "pycore_audit.h" // _PySys_Audit() #include "pycore_ceval.h" // _Py_EnterRecursiveCall() #include "pycore_compile.h" // _PyAST_Compile() #include "pycore_interp.h" // PyInterpreterState.importlib @@ -22,7 +23,7 @@ #include "pycore_pylifecycle.h" // _Py_FdIsInteractive() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_pythonrun.h" // export _PyRun_InteractiveLoopObject() -#include "pycore_sysmodule.h" // _PySys_Audit() +#include "pycore_sysmodule.h" // _PySys_GetAttr() #include "pycore_traceback.h" // _PyTraceBack_Print() #include "errcode.h" // E_EOF diff --git a/Python/sysmodule.c b/Python/sysmodule.c index ac343a8048e008..8b9209324002ce 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -15,6 +15,7 @@ Data members: */ #include "Python.h" +#include "pycore_audit.h" // _Py_AuditHookEntry #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_ceval.h" // _PyEval_SetAsyncGenFinalizer() #include "pycore_dict.h" // _PyDict_GetItemWithError() From f8ba9fb2ce6690d2dd05b356583e8e4790badad7 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Fri, 18 Oct 2024 17:09:34 +0100 Subject: [PATCH 035/106] gh-125703: Correctly honour tracemalloc hooks on specialized DECREF paths (#125704) --- Include/internal/pycore_object.h | 5 +++++ .../2024-10-18-16-00-10.gh-issue-125703.QRoqMo.rst | 2 ++ 2 files changed, 7 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-18-16-00-10.gh-issue-125703.QRoqMo.rst diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index ad1a7d7e120519..96f6d61e1c620b 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -208,6 +208,11 @@ _Py_DECREF_SPECIALIZED(PyObject *op, const destructor destruct) #ifdef Py_TRACE_REFS _Py_ForgetReference(op); #endif + struct _reftracer_runtime_state *tracer = &_PyRuntime.ref_tracer; + if (tracer->tracer_func != NULL) { + void* data = tracer->tracer_data; + tracer->tracer_func(op, PyRefTracer_DESTROY, data); + } destruct(op); } } diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-18-16-00-10.gh-issue-125703.QRoqMo.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-18-16-00-10.gh-issue-125703.QRoqMo.rst new file mode 100644 index 00000000000000..7cbfa725e78cef --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-18-16-00-10.gh-issue-125703.QRoqMo.rst @@ -0,0 +1,2 @@ +Correctly honour :mod:`tracemalloc` hooks in specialized ``Py_DECREF`` +paths. Patch by Pablo Galindo From c8fd4b12e3db49d795de55f74d9bac445c059f1b Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 18 Oct 2024 15:51:29 -0400 Subject: [PATCH 036/106] gh-125207: Fix MSVC 1935 build with JIT (#125209) * gh-125207: Use {0} array initializers * Simplify, as suggested in PR * Revert change to explicitly specify length --- Python/jit.c | 2 +- Tools/jit/_stencils.py | 2 +- Tools/jit/_writer.py | 7 +++++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Python/jit.c b/Python/jit.c index 234fc7dda83231..963bde2303dc2c 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -469,7 +469,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz // Loop once to find the total compiled size: size_t code_size = 0; size_t data_size = 0; - jit_state state = {}; + jit_state state = {0}; group = &trampoline; code_size += group->code_size; data_size += group->data_size; diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index bbb52f391f4b01..e4b2bf6e4702b3 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -339,7 +339,7 @@ def _get_trampoline_mask(self) -> str: word = bitmask & ((1 << 32) - 1) trampoline_mask.append(f"{word:#04x}") bitmask >>= 32 - return "{" + ", ".join(trampoline_mask) + "}" + return "{" + (", ".join(trampoline_mask) or "0") + "}" def as_c(self, opname: str) -> str: """Dump this hole as a StencilGroup initializer.""" diff --git a/Tools/jit/_writer.py b/Tools/jit/_writer.py index 7b99d10310a645..4e7f614b0e9d23 100644 --- a/Tools/jit/_writer.py +++ b/Tools/jit/_writer.py @@ -32,8 +32,11 @@ def _dump_footer( yield "};" yield "" yield f"static const void * const symbols_map[{max(len(symbols), 1)}] = {{" - for symbol, ordinal in symbols.items(): - yield f" [{ordinal}] = &{symbol}," + if symbols: + for symbol, ordinal in symbols.items(): + yield f" [{ordinal}] = &{symbol}," + else: + yield " 0" yield "};" From 322f14eeff9e3b5853eaac3233f7580ca0214cf8 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 18 Oct 2024 16:05:12 -0600 Subject: [PATCH 037/106] gh-124694: In test_interpreter_pool, Restore the Asyncio Event Loop Policy During Cleanup (gh-125708) This resolves a failure on the android buildbot. --- .../test_interpreter_pool.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Lib/test/test_concurrent_futures/test_interpreter_pool.py b/Lib/test/test_concurrent_futures/test_interpreter_pool.py index 0de03c0d669399..5264b1bb6e9c75 100644 --- a/Lib/test/test_concurrent_futures/test_interpreter_pool.py +++ b/Lib/test/test_concurrent_futures/test_interpreter_pool.py @@ -282,6 +282,19 @@ def test_idle_thread_reuse(self): class AsyncioTest(InterpretersMixin, testasyncio_utils.TestCase): + @classmethod + def setUpClass(cls): + # Most uses of asyncio will implicitly call set_event_loop_policy() + # with the default policy if a policy hasn't been set already. + # If that happens in a test, like here, we'll end up with a failure + # when --fail-env-changed is used. That's why the other tests that + # use asyncio are careful to set the policy back to None and why + # we're careful to do so here. We also validate that no other + # tests left a policy in place, just in case. + policy = support.maybe_get_event_loop_policy() + assert policy is None, policy + cls.addClassCleanup(lambda: asyncio.set_event_loop_policy(None)) + def setUp(self): super().setUp() self.loop = asyncio.new_event_loop() From 2bb7ab7ad364ec804eab8ed6867df01ece887240 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Sat, 19 Oct 2024 04:59:22 +0100 Subject: [PATCH 038/106] GH-125277: Increase minimum supported Sphinx to 7.2.6 (#125368) --- .github/workflows/reusable-docs.yml | 2 +- Doc/conf.py | 2 +- Doc/requirements-oldest-sphinx.txt | 30 +++++++++---------- ...-10-10-23-46-54.gh-issue-125277.QAby09.rst | 2 ++ 4 files changed, 19 insertions(+), 17 deletions(-) create mode 100644 Misc/NEWS.d/next/Documentation/2024-10-10-23-46-54.gh-issue-125277.QAby09.rst diff --git a/.github/workflows/reusable-docs.yml b/.github/workflows/reusable-docs.yml index 3809f24dcc977e..39a97392e898aa 100644 --- a/.github/workflows/reusable-docs.yml +++ b/.github/workflows/reusable-docs.yml @@ -84,7 +84,7 @@ jobs: - name: 'Set up Python' uses: actions/setup-python@v5 with: - python-version: '3.12' # known to work with Sphinx 6.2.1 + python-version: '3.13' # known to work with Sphinx 7.2.6 cache: 'pip' cache-dependency-path: 'Doc/requirements-oldest-sphinx.txt' - name: 'Install build dependencies' diff --git a/Doc/conf.py b/Doc/conf.py index 839beaad08bebd..db8fb9a9a68c6b 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -90,7 +90,7 @@ highlight_language = 'python3' # Minimum version of sphinx required -needs_sphinx = '6.2.1' +needs_sphinx = '7.2.6' # Create table of contents entries for domain objects (e.g. functions, classes, # attributes, etc.). Default is True. diff --git a/Doc/requirements-oldest-sphinx.txt b/Doc/requirements-oldest-sphinx.txt index 068fe0cb426ecd..3483faea6b56cb 100644 --- a/Doc/requirements-oldest-sphinx.txt +++ b/Doc/requirements-oldest-sphinx.txt @@ -7,29 +7,29 @@ blurb python-docs-theme>=2022.1 # Generated from: -# pip install "Sphinx~=6.2.1" +# pip install "Sphinx~=7.2.6" # pip freeze # -# Sphinx 6.2.1 comes from ``needs_sphinx = '6.2.1'`` in ``Doc/conf.py``. +# Sphinx 7.2.6 comes from ``needs_sphinx = '7.2.6'`` in ``Doc/conf.py``. alabaster==0.7.16 -Babel==2.15.0 -certifi==2024.7.4 -charset-normalizer==3.3.2 -docutils==0.19 -idna==3.7 +Babel==2.16.0 +certifi==2024.8.30 +charset-normalizer==3.4.0 +docutils==0.20.1 +idna==3.10 imagesize==1.4.1 Jinja2==3.1.4 -MarkupSafe==2.1.5 +MarkupSafe==3.0.1 packaging==24.1 Pygments==2.18.0 requests==2.32.3 snowballstemmer==2.2.0 -Sphinx==6.2.1 -sphinxcontrib-applehelp==1.0.8 -sphinxcontrib-devhelp==1.0.6 -sphinxcontrib-htmlhelp==2.0.5 +Sphinx==7.2.6 +sphinxcontrib-applehelp==2.0.0 +sphinxcontrib-devhelp==2.0.0 +sphinxcontrib-htmlhelp==2.1.0 sphinxcontrib-jsmath==1.0.1 -sphinxcontrib-qthelp==1.0.7 -sphinxcontrib-serializinghtml==1.1.10 -urllib3==2.2.2 +sphinxcontrib-qthelp==2.0.0 +sphinxcontrib-serializinghtml==2.0.0 +urllib3==2.2.3 diff --git a/Misc/NEWS.d/next/Documentation/2024-10-10-23-46-54.gh-issue-125277.QAby09.rst b/Misc/NEWS.d/next/Documentation/2024-10-10-23-46-54.gh-issue-125277.QAby09.rst new file mode 100644 index 00000000000000..fcd6e22c27b5f4 --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2024-10-10-23-46-54.gh-issue-125277.QAby09.rst @@ -0,0 +1,2 @@ +Require Sphinx 7.2.6 or later to build the Python documentation. +Patch by Adam Turner. From 84074a4fd810948350cec5500e77dc974cb5433d Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Sat, 19 Oct 2024 01:26:46 -0700 Subject: [PATCH 039/106] GH-99749: Add What's New entry for GH-124456 (GH-125671) --- Doc/whatsnew/3.14.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index feb65f244827ad..ad841538ccc547 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -210,6 +210,11 @@ argparse interpreter was instructed to find the ``__main__`` module code. (Contributed by Serhiy Storchaka and Alyssa Coghlan in :gh:`66436`.) +* Introduced the optional *suggest_on_error* parameter to + :class:`argparse.ArgumentParser`, enabling suggestions for argument choices + and subparser names if mistyped by the user. + (Contributed by Savannah Ostrowski in :gh:`124456`) + ast --- From a7443a1735c33d5758b47ac9b8a1c34bac4ef994 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 19 Oct 2024 11:29:51 +0300 Subject: [PATCH 040/106] gh-52551: Use wcsftime() to implement time.strftime() on Windows (GH-125658) --- Lib/test/test_strftime.py | 3 +-- .../Library/2024-10-17-20-36-06.gh-issue-52551.EIVNYY.rst | 1 + Modules/timemodule.c | 4 ---- 3 files changed, 2 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-17-20-36-06.gh-issue-52551.EIVNYY.rst diff --git a/Lib/test/test_strftime.py b/Lib/test/test_strftime.py index cebfc8927862a7..752e31359cf206 100644 --- a/Lib/test/test_strftime.py +++ b/Lib/test/test_strftime.py @@ -183,8 +183,7 @@ class Y1900Tests(unittest.TestCase): def test_y_before_1900(self): # Issue #13674, #19634 t = (1899, 1, 1, 0, 0, 0, 0, 0, 0) - if (sys.platform == "win32" - or sys.platform.startswith(("aix", "sunos", "solaris"))): + if sys.platform.startswith(("aix", "sunos", "solaris")): with self.assertRaises(ValueError): time.strftime("%y", t) else: diff --git a/Misc/NEWS.d/next/Library/2024-10-17-20-36-06.gh-issue-52551.EIVNYY.rst b/Misc/NEWS.d/next/Library/2024-10-17-20-36-06.gh-issue-52551.EIVNYY.rst new file mode 100644 index 00000000000000..48d3d93c3d72af --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-17-20-36-06.gh-issue-52551.EIVNYY.rst @@ -0,0 +1 @@ +Use :c:func:`!wcsftime` to implement :func:`time.strftime` on Windows. diff --git a/Modules/timemodule.c b/Modules/timemodule.c index b9d114ada0dfcd..340011fc08b551 100644 --- a/Modules/timemodule.c +++ b/Modules/timemodule.c @@ -739,10 +739,6 @@ checktm(struct tm* buf) return 1; } -#ifdef MS_WINDOWS - /* wcsftime() doesn't format correctly time zones, see issue #10653 */ -# undef HAVE_WCSFTIME -#endif #define STRFTIME_FORMAT_CODES \ "Commonly used format codes:\n\ \n\ From 4b421e8aca7f2dccc5ac8604b78589941dd7974c Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Sat, 19 Oct 2024 12:49:14 +0100 Subject: [PATCH 041/106] gh-125522: Fix bare except in test_math.testTan (#125544) --- Lib/test/test_math.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index e2e2a419c7778c..fecafd53aa6e6f 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -1902,7 +1902,7 @@ def testTan(self): try: self.assertTrue(math.isnan(math.tan(INF))) self.assertTrue(math.isnan(math.tan(NINF))) - except: + except ValueError: self.assertRaises(ValueError, math.tan, INF) self.assertRaises(ValueError, math.tan, NINF) self.assertTrue(math.isnan(math.tan(NAN))) From 2a378dba987e125521b678364f0cd44b92dd5d52 Mon Sep 17 00:00:00 2001 From: Julien Date: Sat, 19 Oct 2024 13:34:41 -0400 Subject: [PATCH 042/106] gh-125398: Convert paths in venv activate script when using Git Bash under Windows (GH-125399) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Convert paths in venv activate script when using Git Bash under Windows With https://github.com/python/cpython/pull/112508 the check to converts paths when running on Windows was changed from using the non-posix environment variable `$OSTYPE` to using `uname` instead. However this missed the fact that when running under Git Bash on Windows, uname reports `MINGW*` (`$OSTYPE` is still `msys`). This results in `$PATH` being set to something like `D:\a\github-actions-shells\github-actions-shells\venv/Scripts:…`, instead of `/d/a/github-actions-shells/github-actions-shells/venv/Scripts`. Notably, the Git Bash is the bash shell that’s used for GitHub Actions Windows runners, and ships with VSCode. --- Lib/venv/scripts/common/activate | 4 ++-- .../Library/2024-10-13-15-04-58.gh-issue-125398.UW7Ndv.rst | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-13-15-04-58.gh-issue-125398.UW7Ndv.rst diff --git a/Lib/venv/scripts/common/activate b/Lib/venv/scripts/common/activate index 4593799b7e9b0e..44f137672e9d2e 100644 --- a/Lib/venv/scripts/common/activate +++ b/Lib/venv/scripts/common/activate @@ -38,8 +38,8 @@ deactivate nondestructive # on Windows, a path can contain colons and backslashes and has to be converted: case "$(uname)" in - CYGWIN*|MSYS*) - # transform D:\path\to\venv to /d/path/to/venv on MSYS + CYGWIN*|MSYS*|MINGW*) + # transform D:\path\to\venv to /d/path/to/venv on MSYS and MINGW # and to /cygdrive/d/path/to/venv on Cygwin VIRTUAL_ENV=$(cygpath "__VENV_DIR__") export VIRTUAL_ENV diff --git a/Misc/NEWS.d/next/Library/2024-10-13-15-04-58.gh-issue-125398.UW7Ndv.rst b/Misc/NEWS.d/next/Library/2024-10-13-15-04-58.gh-issue-125398.UW7Ndv.rst new file mode 100644 index 00000000000000..a188b35e1fbdbc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-13-15-04-58.gh-issue-125398.UW7Ndv.rst @@ -0,0 +1 @@ +Fix the conversion of the :envvar:`!VIRTUAL_ENV` path in the activate script in :mod:`venv` when running in Git Bash for Windows. From 4c53b2577531c77193430cdcd66ad6385fcda81f Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Sat, 19 Oct 2024 17:16:36 -0400 Subject: [PATCH 043/106] gh-124984: Enhance `ssl` thread safety (#124993) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make SSL objects thread safe in Free Theaded build by using critical sections. Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/test/test_ssl.py | 51 + ...-10-04-22-43-48.gh-issue-124984.xjMv9b.rst | 1 + Modules/_ssl.c | 683 ++++++--- Modules/clinic/_ssl.c.h | 1347 +++++++++++++++-- 4 files changed, 1767 insertions(+), 315 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-04-22-43-48.gh-issue-124984.xjMv9b.rst diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py index b93fa0ed99f8ce..de5110a1cc4b6d 100644 --- a/Lib/test/test_ssl.py +++ b/Lib/test/test_ssl.py @@ -4,6 +4,7 @@ import unittest import unittest.mock from ast import literal_eval +from threading import Thread from test import support from test.support import import_helper from test.support import os_helper @@ -277,11 +278,19 @@ def test_wrap_socket(sock, *, return context.wrap_socket(sock, **kwargs) +USE_SAME_TEST_CONTEXT = False +_TEST_CONTEXT = None + def testing_context(server_cert=SIGNED_CERTFILE, *, server_chain=True): """Create context client_context, server_context, hostname = testing_context() """ + global _TEST_CONTEXT + if USE_SAME_TEST_CONTEXT: + if _TEST_CONTEXT is not None: + return _TEST_CONTEXT + if server_cert == SIGNED_CERTFILE: hostname = SIGNED_CERTFILE_HOSTNAME elif server_cert == SIGNED_CERTFILE2: @@ -299,6 +308,10 @@ def testing_context(server_cert=SIGNED_CERTFILE, *, server_chain=True): if server_chain: server_context.load_verify_locations(SIGNING_CA) + if USE_SAME_TEST_CONTEXT: + if _TEST_CONTEXT is not None: + _TEST_CONTEXT = client_context, server_context, hostname + return client_context, server_context, hostname @@ -2801,6 +2814,44 @@ def test_echo(self): 'Cannot create a client socket with a PROTOCOL_TLS_SERVER context', str(e.exception)) + @unittest.skipUnless(support.Py_GIL_DISABLED, "test is only useful if the GIL is disabled") + def test_ssl_in_multiple_threads(self): + # See GH-124984: OpenSSL is not thread safe. + threads = [] + + global USE_SAME_TEST_CONTEXT + USE_SAME_TEST_CONTEXT = True + try: + for func in ( + self.test_echo, + self.test_alpn_protocols, + self.test_getpeercert, + self.test_crl_check, + self.test_check_hostname_idn, + self.test_wrong_cert_tls12, + self.test_wrong_cert_tls13, + ): + # Be careful with the number of threads here. + # Too many can result in failing tests. + for num in range(5): + with self.subTest(func=func, num=num): + threads.append(Thread(target=func)) + + with threading_helper.catch_threading_exception() as cm: + for thread in threads: + with self.subTest(thread=thread): + thread.start() + + for thread in threads: + with self.subTest(thread=thread): + thread.join() + if cm.exc_value is not None: + # Some threads can skip their test + if not isinstance(cm.exc_value, unittest.SkipTest): + raise cm.exc_value + finally: + USE_SAME_TEST_CONTEXT = False + def test_getpeercert(self): if support.verbose: sys.stdout.write("\n") diff --git a/Misc/NEWS.d/next/Library/2024-10-04-22-43-48.gh-issue-124984.xjMv9b.rst b/Misc/NEWS.d/next/Library/2024-10-04-22-43-48.gh-issue-124984.xjMv9b.rst new file mode 100644 index 00000000000000..dd0a55a6854c25 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-04-22-43-48.gh-issue-124984.xjMv9b.rst @@ -0,0 +1 @@ +Fixed thread safety in :mod:`ssl` in the free-threaded build. OpenSSL operations are now protected by a per-object lock. diff --git a/Modules/_ssl.c b/Modules/_ssl.c index f2d3b331226a7a..62a94314512dd9 100644 --- a/Modules/_ssl.c +++ b/Modules/_ssl.c @@ -51,7 +51,6 @@ PySSL_BEGIN_ALLOW_THREADS_S(_save); #define PySSL_END_ALLOW_THREADS PySSL_END_ALLOW_THREADS_S(_save); } - #if defined(HAVE_POLL_H) #include #elif defined(HAVE_SYS_POLL_H) @@ -376,9 +375,6 @@ class _ssl.SSLSession "PySSLSession *" "get_state_type(type)->PySSLSession_Type" static int PySSL_select(PySocketSockObject *s, int writing, PyTime_t timeout); -static int PySSL_set_owner(PySSLSocket *, PyObject *, void *); -static int PySSL_set_session(PySSLSocket *, PyObject *, void *); - typedef enum { SOCKET_IS_NONBLOCKING, SOCKET_IS_BLOCKING, @@ -616,6 +612,7 @@ PySSL_SetError(PySSLSocket *sslsock, const char *filename, int lineno) _sslmodulestate *state = get_state_sock(sslsock); type = state->PySSLErrorObject; + // ERR functions are thread local, no need to lock them. e = ERR_peek_last_error(); if (sslsock->ssl != NULL) { @@ -926,13 +923,13 @@ newPySSLSocket(PySSLContext *sslctx, PySocketSockObject *sock, } } if (owner && owner != Py_None) { - if (PySSL_set_owner(self, owner, NULL) == -1) { + if (_ssl__SSLSocket_owner_set(self, owner, NULL) == -1) { Py_DECREF(self); return NULL; } } if (session && session != Py_None) { - if (PySSL_set_session(self, session, NULL) == -1) { + if (_ssl__SSLSocket_session_set(self, session, NULL) == -1) { Py_DECREF(self); return NULL; } @@ -945,12 +942,13 @@ newPySSLSocket(PySSLContext *sslctx, PySocketSockObject *sock, /* SSL object methods */ /*[clinic input] +@critical_section _ssl._SSLSocket.do_handshake [clinic start generated code]*/ static PyObject * _ssl__SSLSocket_do_handshake_impl(PySSLSocket *self) -/*[clinic end generated code: output=6c0898a8936548f6 input=d2d737de3df018c8]*/ +/*[clinic end generated code: output=6c0898a8936548f6 input=65619a7a4bea3176]*/ { int ret; _PySSLError err; @@ -1820,6 +1818,7 @@ _ssl__test_decode_cert_impl(PyObject *module, PyObject *path) /*[clinic input] +@critical_section _ssl._SSLSocket.getpeercert der as binary_mode: bool = False / @@ -1837,7 +1836,7 @@ return the certificate even if it wasn't validated. static PyObject * _ssl__SSLSocket_getpeercert_impl(PySSLSocket *self, int binary_mode) -/*[clinic end generated code: output=1f0ab66dfb693c88 input=c0fbe802e57629b7]*/ +/*[clinic end generated code: output=1f0ab66dfb693c88 input=e35af55fa5f9bab8]*/ { int verification; X509 *peer_cert; @@ -1867,13 +1866,14 @@ _ssl__SSLSocket_getpeercert_impl(PySSLSocket *self, int binary_mode) } /*[clinic input] +@critical_section _ssl._SSLSocket.get_verified_chain [clinic start generated code]*/ static PyObject * _ssl__SSLSocket_get_verified_chain_impl(PySSLSocket *self) -/*[clinic end generated code: output=802421163cdc3110 input=5fb0714f77e2bd51]*/ +/*[clinic end generated code: output=802421163cdc3110 input=83035fe238ec057b]*/ { /* borrowed reference */ STACK_OF(X509) *chain = SSL_get0_verified_chain(self->ssl); @@ -1884,13 +1884,14 @@ _ssl__SSLSocket_get_verified_chain_impl(PySSLSocket *self) } /*[clinic input] +@critical_section _ssl._SSLSocket.get_unverified_chain [clinic start generated code]*/ static PyObject * _ssl__SSLSocket_get_unverified_chain_impl(PySSLSocket *self) -/*[clinic end generated code: output=5acdae414e13f913 input=78c33c360c635cb5]*/ +/*[clinic end generated code: output=5acdae414e13f913 input=079f8ff5c205cb3b]*/ { PyObject *retval; /* borrowed reference */ @@ -2020,12 +2021,13 @@ cipher_to_dict(const SSL_CIPHER *cipher) } /*[clinic input] +@critical_section _ssl._SSLSocket.shared_ciphers [clinic start generated code]*/ static PyObject * _ssl__SSLSocket_shared_ciphers_impl(PySSLSocket *self) -/*[clinic end generated code: output=3d174ead2e42c4fd input=0bfe149da8fe6306]*/ +/*[clinic end generated code: output=3d174ead2e42c4fd input=869645271e3bc6d0]*/ { STACK_OF(SSL_CIPHER) *server_ciphers; STACK_OF(SSL_CIPHER) *client_ciphers; @@ -2069,12 +2071,13 @@ _ssl__SSLSocket_shared_ciphers_impl(PySSLSocket *self) } /*[clinic input] +@critical_section _ssl._SSLSocket.cipher [clinic start generated code]*/ static PyObject * _ssl__SSLSocket_cipher_impl(PySSLSocket *self) -/*[clinic end generated code: output=376417c16d0e5815 input=548fb0e27243796d]*/ +/*[clinic end generated code: output=376417c16d0e5815 input=39e180269a36f486]*/ { const SSL_CIPHER *current; @@ -2087,12 +2090,13 @@ _ssl__SSLSocket_cipher_impl(PySSLSocket *self) } /*[clinic input] +@critical_section _ssl._SSLSocket.version [clinic start generated code]*/ static PyObject * _ssl__SSLSocket_version_impl(PySSLSocket *self) -/*[clinic end generated code: output=178aed33193b2cdb input=900186a503436fd6]*/ +/*[clinic end generated code: output=178aed33193b2cdb input=2732bc3f7f597d09]*/ { const char *version; @@ -2109,12 +2113,13 @@ _ssl__SSLSocket_version_impl(PySSLSocket *self) } /*[clinic input] +@critical_section _ssl._SSLSocket.selected_alpn_protocol [clinic start generated code]*/ static PyObject * _ssl__SSLSocket_selected_alpn_protocol_impl(PySSLSocket *self) -/*[clinic end generated code: output=ec33688b303d250f input=442de30e35bc2913]*/ +/*[clinic end generated code: output=ec33688b303d250f input=f0b53506c9acdf8c]*/ { const unsigned char *out; unsigned int outlen; @@ -2152,13 +2157,29 @@ _ssl__SSLSocket_compression_impl(PySSLSocket *self) #endif } -static PySSLContext *PySSL_get_context(PySSLSocket *self, void *closure) { - return (PySSLContext*)Py_NewRef(self->ctx); +/*[clinic input] +@critical_section +@getter +_ssl._SSLSocket.context +[clinic start generated code]*/ + +static PyObject * +_ssl__SSLSocket_context_get_impl(PySSLSocket *self) +/*[clinic end generated code: output=d23e82f72f32e3d7 input=25aa82e4d9fa344a]*/ +{ + return Py_NewRef(self->ctx); } -static int PySSL_set_context(PySSLSocket *self, PyObject *value, - void *closure) { +/*[clinic input] +@critical_section +@setter +_ssl._SSLSocket.context +[clinic start generated code]*/ +static int +_ssl__SSLSocket_context_set_impl(PySSLSocket *self, PyObject *value) +/*[clinic end generated code: output=6b0a6cc5cf33d9fe input=48ece77724fd9dd4]*/ +{ if (PyObject_TypeCheck(value, self->ctx->state->PySSLContext_Type)) { Py_SETREF(self->ctx, (PySSLContext *)Py_NewRef(value)); SSL_set_SSL_CTX(self->ssl, self->ctx->ctx); @@ -2175,37 +2196,47 @@ static int PySSL_set_context(PySSLSocket *self, PyObject *value, return 0; } -PyDoc_STRVAR(PySSL_set_context_doc, -"_setter_context(ctx)\n\ -\ -This changes the context associated with the SSLSocket. This is typically\n\ -used from within a callback function set by the sni_callback\n\ -on the SSLContext to change the certificate information associated with the\n\ -SSLSocket before the cryptographic exchange handshake messages\n"); +/*[clinic input] +@critical_section +@getter +_ssl._SSLSocket.server_side +Whether this is a server-side socket. +[clinic start generated code]*/ static PyObject * -PySSL_get_server_side(PySSLSocket *self, void *c) +_ssl__SSLSocket_server_side_get_impl(PySSLSocket *self) +/*[clinic end generated code: output=ae51e372489148e3 input=b09b320510bc7cae]*/ { return PyBool_FromLong(self->socket_type == PY_SSL_SERVER); } -PyDoc_STRVAR(PySSL_get_server_side_doc, -"Whether this is a server-side socket."); +/*[clinic input] +@critical_section +@getter +_ssl._SSLSocket.server_hostname + +The currently set server hostname (for SNI). +[clinic start generated code]*/ static PyObject * -PySSL_get_server_hostname(PySSLSocket *self, void *c) +_ssl__SSLSocket_server_hostname_get_impl(PySSLSocket *self) +/*[clinic end generated code: output=1f40ea5a076de8e7 input=55d12a1dc6634b08]*/ { if (self->server_hostname == NULL) Py_RETURN_NONE; return Py_NewRef(self->server_hostname); } -PyDoc_STRVAR(PySSL_get_server_hostname_doc, -"The currently set server hostname (for SNI)."); +/*[clinic input] +@critical_section +@getter +_ssl._SSLSocket.owner +[clinic start generated code]*/ static PyObject * -PySSL_get_owner(PySSLSocket *self, void *c) +_ssl__SSLSocket_owner_get_impl(PySSLSocket *self) +/*[clinic end generated code: output=1f278cb930382927 input=bc2861ff3cf1402e]*/ { if (self->owner == NULL) { Py_RETURN_NONE; @@ -2217,8 +2248,15 @@ PySSL_get_owner(PySSLSocket *self, void *c) return owner; } +/*[clinic input] +@critical_section +@setter +_ssl._SSLSocket.owner +[clinic start generated code]*/ + static int -PySSL_set_owner(PySSLSocket *self, PyObject *value, void *c) +_ssl__SSLSocket_owner_set_impl(PySSLSocket *self, PyObject *value) +/*[clinic end generated code: output=2e3924498f2b6cde input=875666fd32367a73]*/ { Py_XSETREF(self->owner, PyWeakref_NewRef(value, NULL)); if (self->owner == NULL) @@ -2226,10 +2264,6 @@ PySSL_set_owner(PySSLSocket *self, PyObject *value, void *c) return 0; } -PyDoc_STRVAR(PySSL_get_owner_doc, -"The Python-level owner of this object.\ -Passed as \"self\" in servername callback."); - static int PySSL_traverse(PySSLSocket *self, visitproc visit, void *arg) { @@ -2343,6 +2377,7 @@ PySSL_select(PySocketSockObject *s, int writing, PyTime_t timeout) } /*[clinic input] +@critical_section _ssl._SSLSocket.write b: Py_buffer / @@ -2354,7 +2389,7 @@ Returns the number of bytes written. static PyObject * _ssl__SSLSocket_write_impl(PySSLSocket *self, Py_buffer *b) -/*[clinic end generated code: output=aa7a6be5527358d8 input=77262d994fe5100a]*/ +/*[clinic end generated code: output=aa7a6be5527358d8 input=967b5feeae641a26]*/ { size_t count = 0; int retval; @@ -2452,6 +2487,7 @@ _ssl__SSLSocket_write_impl(PySSLSocket *self, Py_buffer *b) } /*[clinic input] +@critical_section _ssl._SSLSocket.pending Returns the number of already decrypted bytes available for read, pending on the connection. @@ -2459,7 +2495,7 @@ Returns the number of already decrypted bytes available for read, pending on the static PyObject * _ssl__SSLSocket_pending_impl(PySSLSocket *self) -/*[clinic end generated code: output=983d9fecdc308a83 input=2b77487d6dfd597f]*/ +/*[clinic end generated code: output=983d9fecdc308a83 input=32ab982a254e8866]*/ { int count = 0; _PySSLError err; @@ -2477,6 +2513,7 @@ _ssl__SSLSocket_pending_impl(PySSLSocket *self) } /*[clinic input] +@critical_section _ssl._SSLSocket.read size as len: Py_ssize_t [ @@ -2490,7 +2527,7 @@ Read up to size bytes from the SSL socket. static PyObject * _ssl__SSLSocket_read_impl(PySSLSocket *self, Py_ssize_t len, int group_right_1, Py_buffer *buffer) -/*[clinic end generated code: output=49b16e6406023734 input=ec48bf622be1c4a1]*/ +/*[clinic end generated code: output=49b16e6406023734 input=80ed30436df01a71]*/ { PyObject *dest = NULL; char *mem; @@ -2619,6 +2656,7 @@ _ssl__SSLSocket_read_impl(PySSLSocket *self, Py_ssize_t len, } /*[clinic input] +@critical_section _ssl._SSLSocket.shutdown Does the SSL shutdown handshake with the remote end. @@ -2626,7 +2664,7 @@ Does the SSL shutdown handshake with the remote end. static PyObject * _ssl__SSLSocket_shutdown_impl(PySSLSocket *self) -/*[clinic end generated code: output=ca1aa7ed9d25ca42 input=11d39e69b0a2bf4a]*/ +/*[clinic end generated code: output=ca1aa7ed9d25ca42 input=98d9635cd4e16514]*/ { _PySSLError err; int sockstate, nonblocking, ret; @@ -2738,6 +2776,7 @@ _ssl__SSLSocket_shutdown_impl(PySSLSocket *self) } /*[clinic input] +@critical_section _ssl._SSLSocket.get_channel_binding cb_type: str = "tls-unique" @@ -2751,7 +2790,7 @@ Only 'tls-unique' channel binding data from RFC 5929 is supported. static PyObject * _ssl__SSLSocket_get_channel_binding_impl(PySSLSocket *self, const char *cb_type) -/*[clinic end generated code: output=34bac9acb6a61d31 input=08b7e43b99c17d41]*/ +/*[clinic end generated code: output=34bac9acb6a61d31 input=e008004fc08744db]*/ { char buf[PySSL_CB_MAXLEN]; size_t len; @@ -2783,6 +2822,7 @@ _ssl__SSLSocket_get_channel_binding_impl(PySSLSocket *self, } /*[clinic input] +@critical_section _ssl._SSLSocket.verify_client_post_handshake Initiate TLS 1.3 post-handshake authentication @@ -2790,7 +2830,7 @@ Initiate TLS 1.3 post-handshake authentication static PyObject * _ssl__SSLSocket_verify_client_post_handshake_impl(PySSLSocket *self) -/*[clinic end generated code: output=532147f3b1341425 input=6bfa874810a3d889]*/ +/*[clinic end generated code: output=532147f3b1341425 input=42b5bb1f0981eda1]*/ { #if defined(PySSL_HAVE_POST_HS_AUTH) int err = SSL_verify_client_post_handshake(self->ssl); @@ -2806,8 +2846,16 @@ _ssl__SSLSocket_verify_client_post_handshake_impl(PySSLSocket *self) #endif } +/*[clinic input] +@critical_section +@getter +_ssl._SSLSocket.session +[clinic start generated code]*/ + static PyObject * -PySSL_get_session(PySSLSocket *self, void *closure) { +_ssl__SSLSocket_session_get_impl(PySSLSocket *self) +/*[clinic end generated code: output=a5cd5755b35da670 input=b9792df9255a9f63]*/ +{ /* get_session can return sessions from a server-side connection, * it does not check for handshake done or client socket. */ PySSLSession *pysess; @@ -2819,6 +2867,8 @@ PySSL_get_session(PySSLSocket *self, void *closure) { } pysess = PyObject_GC_New(PySSLSession, self->ctx->state->PySSLSession_Type); if (pysess == NULL) { + // It's not possible for another thread to access this, so + // we don't need to lock it. SSL_SESSION_free(session); return NULL; } @@ -2830,8 +2880,16 @@ PySSL_get_session(PySSLSocket *self, void *closure) { return (PyObject *)pysess; } -static int PySSL_set_session(PySSLSocket *self, PyObject *value, - void *closure) { +/*[clinic input] +@critical_section +@setter +_ssl._SSLSocket.session +[clinic start generated code]*/ + +static int +_ssl__SSLSocket_session_set_impl(PySSLSocket *self, PyObject *value) +/*[clinic end generated code: output=a3fa2ddd7c2d54a2 input=5fa5f921640db98b]*/ +{ PySSLSession *pysess; if (!Py_IS_TYPE(value, get_state_sock(self)->PySSLSession_Type)) { @@ -2855,6 +2913,7 @@ static int PySSL_set_session(PySSLSocket *self, PyObject *value, "Cannot set session after handshake."); return -1; } + if (SSL_set_session(self->ssl, pysess->session) == 0) { _setSSLError(get_state_sock(self), NULL, 0, __FILE__, __LINE__); return -1; @@ -2862,36 +2921,29 @@ static int PySSL_set_session(PySSLSocket *self, PyObject *value, return 0; } -PyDoc_STRVAR(PySSL_set_session_doc, -"_setter_session(session)\n\ -\ -Get / set SSLSession."); +/*[clinic input] +@critical_section +@getter +_ssl._SSLSocket.session_reused + +Was the client session reused during handshake? +[clinic start generated code]*/ static PyObject * -PySSL_get_session_reused(PySSLSocket *self, void *closure) { - if (SSL_session_reused(self->ssl)) { - Py_RETURN_TRUE; - } else { - Py_RETURN_FALSE; - } +_ssl__SSLSocket_session_reused_get_impl(PySSLSocket *self) +/*[clinic end generated code: output=c8916909bcb80893 input=cec8bfec73a4461e]*/ +{ + int res = SSL_session_reused(self->ssl); + return res ? Py_True : Py_False; } -PyDoc_STRVAR(PySSL_get_session_reused_doc, -"Was the client session reused during handshake?"); - static PyGetSetDef ssl_getsetlist[] = { - {"context", (getter) PySSL_get_context, - (setter) PySSL_set_context, PySSL_set_context_doc}, - {"server_side", (getter) PySSL_get_server_side, NULL, - PySSL_get_server_side_doc}, - {"server_hostname", (getter) PySSL_get_server_hostname, NULL, - PySSL_get_server_hostname_doc}, - {"owner", (getter) PySSL_get_owner, (setter) PySSL_set_owner, - PySSL_get_owner_doc}, - {"session", (getter) PySSL_get_session, - (setter) PySSL_set_session, PySSL_set_session_doc}, - {"session_reused", (getter) PySSL_get_session_reused, NULL, - PySSL_get_session_reused_doc}, + _SSL__SSLSOCKET_CONTEXT_GETSETDEF + _SSL__SSLSOCKET_SERVER_SIDE_GETSETDEF + _SSL__SSLSOCKET_SERVER_HOSTNAME_GETSETDEF + _SSL__SSLSOCKET_OWNER_GETSETDEF + _SSL__SSLSOCKET_SESSION_GETSETDEF + _SSL__SSLSOCKET_SESSION_REUSED_GETSETDEF {NULL}, /* sentinel */ }; @@ -2964,6 +3016,7 @@ _set_verify_mode(PySSLContext *self, enum py_ssl_cert_requirements n) } /*[clinic input] +@critical_section @classmethod _ssl._SSLContext.__new__ protocol as proto_version: int @@ -2972,7 +3025,7 @@ _ssl._SSLContext.__new__ static PyObject * _ssl__SSLContext_impl(PyTypeObject *type, int proto_version) -/*[clinic end generated code: output=2cf0d7a0741b6bd1 input=8d58a805b95fc534]*/ +/*[clinic end generated code: output=2cf0d7a0741b6bd1 input=6fc79e62ae9d143c]*/ { PySSLContext *self; uint64_t options; @@ -3042,6 +3095,10 @@ _ssl__SSLContext_impl(PyTypeObject *type, int proto_version) return NULL; } + // This is the constructor, no need to lock it, because + // no other thread can be touching this object yet. + // (Technically, we can't even lock if we wanted to, as the + // lock hasn't been initialized yet.) PySSL_BEGIN_ALLOW_THREADS ctx = SSL_CTX_new(method); PySSL_END_ALLOW_THREADS @@ -3208,6 +3265,7 @@ context_dealloc(PySSLContext *self) } /*[clinic input] +@critical_section _ssl._SSLContext.set_ciphers cipherlist: str / @@ -3215,7 +3273,7 @@ _ssl._SSLContext.set_ciphers static PyObject * _ssl__SSLContext_set_ciphers_impl(PySSLContext *self, const char *cipherlist) -/*[clinic end generated code: output=3a3162f3557c0f3f input=a7ac931b9f3ca7fc]*/ +/*[clinic end generated code: output=3a3162f3557c0f3f input=40b583cded5c6ff9]*/ { int ret = SSL_CTX_set_cipher_list(self->ctx, cipherlist); if (ret == 0) { @@ -3231,12 +3289,13 @@ _ssl__SSLContext_set_ciphers_impl(PySSLContext *self, const char *cipherlist) } /*[clinic input] +@critical_section _ssl._SSLContext.get_ciphers [clinic start generated code]*/ static PyObject * _ssl__SSLContext_get_ciphers_impl(PySSLContext *self) -/*[clinic end generated code: output=a56e4d68a406dfc4 input=a2aadc9af89b79c5]*/ +/*[clinic end generated code: output=a56e4d68a406dfc4 input=d7fff51631a260ae]*/ { SSL *ssl = NULL; STACK_OF(SSL_CIPHER) *sk = NULL; @@ -3311,6 +3370,7 @@ _selectALPN_cb(SSL *s, } /*[clinic input] +@critical_section _ssl._SSLContext._set_alpn_protocols protos: Py_buffer / @@ -3319,7 +3379,7 @@ _ssl._SSLContext._set_alpn_protocols static PyObject * _ssl__SSLContext__set_alpn_protocols_impl(PySSLContext *self, Py_buffer *protos) -/*[clinic end generated code: output=87599a7f76651a9b input=9bba964595d519be]*/ +/*[clinic end generated code: output=87599a7f76651a9b input=b5096b186e49287d]*/ { if ((size_t)protos->len > UINT_MAX) { PyErr_Format(PyExc_OverflowError, @@ -3329,25 +3389,34 @@ _ssl__SSLContext__set_alpn_protocols_impl(PySSLContext *self, PyMem_Free(self->alpn_protocols); self->alpn_protocols = PyMem_Malloc(protos->len); - if (!self->alpn_protocols) + if (!self->alpn_protocols) { return PyErr_NoMemory(); + } memcpy(self->alpn_protocols, protos->buf, protos->len); self->alpn_protocols_len = (unsigned int)protos->len; - - if (SSL_CTX_set_alpn_protos(self->ctx, self->alpn_protocols, self->alpn_protocols_len)) + if (SSL_CTX_set_alpn_protos(self->ctx, self->alpn_protocols, self->alpn_protocols_len)) { return PyErr_NoMemory(); + } SSL_CTX_set_alpn_select_cb(self->ctx, _selectALPN_cb, self); Py_RETURN_NONE; } +/*[clinic input] +@critical_section +@getter +_ssl._SSLContext.verify_mode +[clinic start generated code]*/ + static PyObject * -get_verify_mode(PySSLContext *self, void *c) +_ssl__SSLContext_verify_mode_get_impl(PySSLContext *self) +/*[clinic end generated code: output=3e788736cc7229bc input=7e3c7f4454121d0a]*/ { /* ignore SSL_VERIFY_CLIENT_ONCE and SSL_VERIFY_POST_HANDSHAKE */ int mask = (SSL_VERIFY_NONE | SSL_VERIFY_PEER | SSL_VERIFY_FAIL_IF_NO_PEER_CERT); - switch (SSL_CTX_get_verify_mode(self->ctx) & mask) { + int verify_mode = SSL_CTX_get_verify_mode(self->ctx); + switch (verify_mode & mask) { case SSL_VERIFY_NONE: return PyLong_FromLong(PY_SSL_CERT_NONE); case SSL_VERIFY_PEER: @@ -3360,11 +3429,18 @@ get_verify_mode(PySSLContext *self, void *c) return NULL; } +/*[clinic input] +@critical_section +@setter +_ssl._SSLContext.verify_mode +[clinic start generated code]*/ + static int -set_verify_mode(PySSLContext *self, PyObject *arg, void *c) +_ssl__SSLContext_verify_mode_set_impl(PySSLContext *self, PyObject *value) +/*[clinic end generated code: output=d698e16c58db3118 input=3ee60057c3a22378]*/ { int n; - if (!PyArg_Parse(arg, "i", &n)) + if (!PyArg_Parse(value, "i", &n)) return -1; if (n == PY_SSL_CERT_NONE && self->check_hostname) { PyErr_SetString(PyExc_ValueError, @@ -3375,8 +3451,15 @@ set_verify_mode(PySSLContext *self, PyObject *arg, void *c) return _set_verify_mode(self, n); } +/*[clinic input] +@critical_section +@getter +_ssl._SSLContext.verify_flags +[clinic start generated code]*/ + static PyObject * -get_verify_flags(PySSLContext *self, void *c) +_ssl__SSLContext_verify_flags_get_impl(PySSLContext *self) +/*[clinic end generated code: output=fbbf8ba28ad6e56e input=c1ec36d610b3f391]*/ { X509_VERIFY_PARAM *param; unsigned long flags; @@ -3386,13 +3469,20 @@ get_verify_flags(PySSLContext *self, void *c) return PyLong_FromUnsignedLong(flags); } +/*[clinic input] +@critical_section +@setter +_ssl._SSLContext.verify_flags +[clinic start generated code]*/ + static int -set_verify_flags(PySSLContext *self, PyObject *arg, void *c) +_ssl__SSLContext_verify_flags_set_impl(PySSLContext *self, PyObject *value) +/*[clinic end generated code: output=a3e3b2a0ce6c2e99 input=b2a0c42583d4f34e]*/ { X509_VERIFY_PARAM *param; unsigned long new_flags, flags, set, clear; - if (!PyArg_Parse(arg, "k", &new_flags)) + if (!PyArg_Parse(value, "k", &new_flags)) return -1; param = SSL_CTX_get0_param(self->ctx); flags = X509_VERIFY_PARAM_get_flags(param); @@ -3500,8 +3590,15 @@ set_min_max_proto_version(PySSLContext *self, PyObject *arg, int what) return 0; } +/*[clinic input] +@critical_section +@getter +_ssl._SSLContext.minimum_version +[clinic start generated code]*/ + static PyObject * -get_minimum_version(PySSLContext *self, void *c) +_ssl__SSLContext_minimum_version_get_impl(PySSLContext *self) +/*[clinic end generated code: output=27fa8382276635ed input=6832821e7e974d40]*/ { int v = SSL_CTX_get_min_proto_version(self->ctx); if (v == 0) { @@ -3510,14 +3607,29 @@ get_minimum_version(PySSLContext *self, void *c) return PyLong_FromLong(v); } +/*[clinic input] +@critical_section +@setter +_ssl._SSLContext.minimum_version +[clinic start generated code]*/ + static int -set_minimum_version(PySSLContext *self, PyObject *arg, void *c) +_ssl__SSLContext_minimum_version_set_impl(PySSLContext *self, + PyObject *value) +/*[clinic end generated code: output=482e82f7372afb78 input=2c64724901a514b3]*/ { - return set_min_max_proto_version(self, arg, 0); + return set_min_max_proto_version(self, value, 0); } +/*[clinic input] +@critical_section +@getter +_ssl._SSLContext.maximum_version +[clinic start generated code]*/ + static PyObject * -get_maximum_version(PySSLContext *self, void *c) +_ssl__SSLContext_maximum_version_get_impl(PySSLContext *self) +/*[clinic end generated code: output=889249475112826a input=2b9e4c2d45f16b14]*/ { int v = SSL_CTX_get_max_proto_version(self->ctx); if (v == 0) { @@ -3526,24 +3638,51 @@ get_maximum_version(PySSLContext *self, void *c) return PyLong_FromLong(v); } +/*[clinic input] +@critical_section +@setter +_ssl._SSLContext.maximum_version +[clinic start generated code]*/ + static int -set_maximum_version(PySSLContext *self, PyObject *arg, void *c) +_ssl__SSLContext_maximum_version_set_impl(PySSLContext *self, + PyObject *value) +/*[clinic end generated code: output=4c0eed3042ca20d5 input=fe27e9fbbeb73c89]*/ { - return set_min_max_proto_version(self, arg, 1); + return set_min_max_proto_version(self, value, 1); } -#if defined(TLS1_3_VERSION) && !defined(OPENSSL_NO_TLS1_3) +/*[clinic input] +@critical_section +@getter +_ssl._SSLContext.num_tickets +[clinic start generated code]*/ + static PyObject * -get_num_tickets(PySSLContext *self, void *c) +_ssl__SSLContext_num_tickets_get_impl(PySSLContext *self) +/*[clinic end generated code: output=3d06d016318846c9 input=1dee26d75163c073]*/ { - return PyLong_FromSize_t(SSL_CTX_get_num_tickets(self->ctx)); + // Clinic seems to be misbehaving when the comment is wrapped with in directive +#if defined(TLS1_3_VERSION) && !defined(OPENSSL_NO_TLS1_3) + PyObject *res = PyLong_FromSize_t(SSL_CTX_get_num_tickets(self->ctx)); + return res; +#else + return 0; +#endif } +/*[clinic input] +@critical_section +@setter +_ssl._SSLContext.num_tickets +[clinic start generated code]*/ + static int -set_num_tickets(PySSLContext *self, PyObject *arg, void *c) +_ssl__SSLContext_num_tickets_set_impl(PySSLContext *self, PyObject *value) +/*[clinic end generated code: output=ced81b46f3beab09 input=6ef8067ac55607e7]*/ { long num; - if (!PyArg_Parse(arg, "l", &num)) + if (!PyArg_Parse(value, "l", &num)) return -1; if (num < 0) { PyErr_SetString(PyExc_ValueError, "value must be non-negative"); @@ -3561,27 +3700,44 @@ set_num_tickets(PySSLContext *self, PyObject *arg, void *c) return 0; } -PyDoc_STRVAR(PySSLContext_num_tickets_doc, -"Control the number of TLSv1.3 session tickets"); -#endif /* defined(TLS1_3_VERSION) */ +/*[clinic input] +@critical_section +@getter +_ssl._SSLContext.security_level +[clinic start generated code]*/ static PyObject * -get_security_level(PySSLContext *self, void *c) +_ssl__SSLContext_security_level_get_impl(PySSLContext *self) +/*[clinic end generated code: output=56ece09e6a9572d0 input=a0416598e07c3183]*/ { - return PyLong_FromLong(SSL_CTX_get_security_level(self->ctx)); + PyObject *res = PyLong_FromLong(SSL_CTX_get_security_level(self->ctx)); + return res; } -PyDoc_STRVAR(PySSLContext_security_level_doc, "The current security level"); + +/*[clinic input] +@critical_section +@getter +_ssl._SSLContext.options +[clinic start generated code]*/ static PyObject * -get_options(PySSLContext *self, void *c) +_ssl__SSLContext_options_get_impl(PySSLContext *self) +/*[clinic end generated code: output=3dfa6a74837f525b input=f5a2805c7cda6f25]*/ { uint64_t options = SSL_CTX_get_options(self->ctx); Py_BUILD_ASSERT(sizeof(unsigned long long) >= sizeof(options)); return PyLong_FromUnsignedLongLong(options); } +/*[clinic input] +@critical_section +@setter +_ssl._SSLContext.options +[clinic start generated code]*/ + static int -set_options(PySSLContext *self, PyObject *arg, void *c) +_ssl__SSLContext_options_set_impl(PySSLContext *self, PyObject *value) +/*[clinic end generated code: output=92ca34731ece5dbb input=2b94bf789e9ae5dd]*/ { PyObject *new_opts_obj; unsigned long long new_opts_arg; @@ -3591,7 +3747,7 @@ set_options(PySSLContext *self, PyObject *arg, void *c) SSL_OP_NO_TLSv1_1 | SSL_OP_NO_TLSv1_2 | SSL_OP_NO_TLSv1_3 ); - if (!PyArg_Parse(arg, "O!", &PyLong_Type, &new_opts_obj)) { + if (!PyArg_Parse(value, "O!", &PyLong_Type, &new_opts_obj)) { return -1; } new_opts_arg = PyLong_AsUnsignedLongLong(new_opts_obj); @@ -3620,19 +3776,33 @@ set_options(PySSLContext *self, PyObject *arg, void *c) return 0; } +/*[clinic input] +@critical_section +@getter +_ssl._SSLContext._host_flags +[clinic start generated code]*/ + static PyObject * -get_host_flags(PySSLContext *self, void *c) +_ssl__SSLContext__host_flags_get_impl(PySSLContext *self) +/*[clinic end generated code: output=0f9db6654ce32582 input=8e3c49499eefd0e5]*/ { return PyLong_FromUnsignedLong(self->hostflags); } +/*[clinic input] +@critical_section +@setter +_ssl._SSLContext._host_flags +[clinic start generated code]*/ + static int -set_host_flags(PySSLContext *self, PyObject *arg, void *c) +_ssl__SSLContext__host_flags_set_impl(PySSLContext *self, PyObject *value) +/*[clinic end generated code: output=1ed6f4027aaf2e3e input=28caf1fb9c32f6cb]*/ { X509_VERIFY_PARAM *param; unsigned int new_flags = 0; - if (!PyArg_Parse(arg, "I", &new_flags)) + if (!PyArg_Parse(value, "I", &new_flags)) return -1; param = SSL_CTX_get0_param(self->ctx); @@ -3641,20 +3811,35 @@ set_host_flags(PySSLContext *self, PyObject *arg, void *c) return 0; } +/*[clinic input] +@critical_section +@getter +_ssl._SSLContext.check_hostname +[clinic start generated code]*/ + static PyObject * -get_check_hostname(PySSLContext *self, void *c) +_ssl__SSLContext_check_hostname_get_impl(PySSLContext *self) +/*[clinic end generated code: output=e046d6eeefc76063 input=1b8341e705f9ecf5]*/ { return PyBool_FromLong(self->check_hostname); } +/*[clinic input] +@critical_section +@setter +_ssl._SSLContext.check_hostname +[clinic start generated code]*/ + static int -set_check_hostname(PySSLContext *self, PyObject *arg, void *c) +_ssl__SSLContext_check_hostname_set_impl(PySSLContext *self, PyObject *value) +/*[clinic end generated code: output=0e767b4784e7dc3f input=e6a771cb5919f74d]*/ { int check_hostname; - if (!PyArg_Parse(arg, "p", &check_hostname)) + if (!PyArg_Parse(value, "p", &check_hostname)) return -1; + int verify_mode = check_hostname ? SSL_CTX_get_verify_mode(self->ctx) : 0; if (check_hostname && - SSL_CTX_get_verify_mode(self->ctx) == SSL_VERIFY_NONE) { + verify_mode == SSL_VERIFY_NONE) { /* check_hostname = True sets verify_mode = CERT_REQUIRED */ if (_set_verify_mode(self, PY_SSL_CERT_REQUIRED) == -1) { return -1; @@ -3694,8 +3879,16 @@ set_post_handshake_auth(PySSLContext *self, PyObject *arg, void *c) { } #endif +/*[clinic input] +@critical_section +@getter +_ssl._SSLContext.protocol +[clinic start generated code]*/ + static PyObject * -get_protocol(PySSLContext *self, void *c) { +_ssl__SSLContext_protocol_get_impl(PySSLContext *self) +/*[clinic end generated code: output=a9a48febc16cee22 input=c9f5fa1a2bd4b8a8]*/ +{ return PyLong_FromLong(self->protocol); } @@ -3809,6 +4002,7 @@ _password_callback(char *buf, int size, int rwflag, void *userdata) } /*[clinic input] +@critical_section _ssl._SSLContext.load_cert_chain certfile: object keyfile: object = None @@ -3819,7 +4013,7 @@ _ssl._SSLContext.load_cert_chain static PyObject * _ssl__SSLContext_load_cert_chain_impl(PySSLContext *self, PyObject *certfile, PyObject *keyfile, PyObject *password) -/*[clinic end generated code: output=9480bc1c380e2095 input=30bc7e967ea01a58]*/ +/*[clinic end generated code: output=9480bc1c380e2095 input=6c7c5e8b73e4264b]*/ { PyObject *certfile_bytes = NULL, *keyfile_bytes = NULL; pem_password_cb *orig_passwd_cb = SSL_CTX_get_default_passwd_cb(self->ctx); @@ -4013,6 +4207,7 @@ _add_ca_certs(PySSLContext *self, const void *data, Py_ssize_t len, /*[clinic input] +@critical_section _ssl._SSLContext.load_verify_locations cafile: object = None capath: object = None @@ -4025,7 +4220,7 @@ _ssl__SSLContext_load_verify_locations_impl(PySSLContext *self, PyObject *cafile, PyObject *capath, PyObject *cadata) -/*[clinic end generated code: output=454c7e41230ca551 input=42ecfe258233e194]*/ +/*[clinic end generated code: output=454c7e41230ca551 input=b178852b41618414]*/ { PyObject *cafile_bytes = NULL, *capath_bytes = NULL; const char *cafile_buf = NULL, *capath_buf = NULL; @@ -4141,6 +4336,7 @@ _ssl__SSLContext_load_verify_locations_impl(PySSLContext *self, } /*[clinic input] +@critical_section _ssl._SSLContext.load_dh_params path as filepath: object / @@ -4148,8 +4344,8 @@ _ssl._SSLContext.load_dh_params [clinic start generated code]*/ static PyObject * -_ssl__SSLContext_load_dh_params(PySSLContext *self, PyObject *filepath) -/*[clinic end generated code: output=1c8e57a38e055af0 input=c8871f3c796ae1d6]*/ +_ssl__SSLContext_load_dh_params_impl(PySSLContext *self, PyObject *filepath) +/*[clinic end generated code: output=dd74b3c524dd2723 input=832769a0734b8c4d]*/ { FILE *f; DH *dh; @@ -4182,6 +4378,7 @@ _ssl__SSLContext_load_dh_params(PySSLContext *self, PyObject *filepath) } /*[clinic input] +@critical_section _ssl._SSLContext._wrap_socket sock: object(subclass_of="get_state_ctx(self)->Sock_Type") server_side: bool @@ -4196,7 +4393,7 @@ static PyObject * _ssl__SSLContext__wrap_socket_impl(PySSLContext *self, PyObject *sock, int server_side, PyObject *hostname_obj, PyObject *owner, PyObject *session) -/*[clinic end generated code: output=f103f238633940b4 input=700ca8fedff53994]*/ +/*[clinic end generated code: output=f103f238633940b4 input=eceadcee4434a06f]*/ { char *hostname = NULL; PyObject *res; @@ -4218,6 +4415,7 @@ _ssl__SSLContext__wrap_socket_impl(PySSLContext *self, PyObject *sock, } /*[clinic input] +@critical_section _ssl._SSLContext._wrap_bio incoming: object(subclass_of="get_state_ctx(self)->PySSLMemoryBIO_Type", type="PySSLMemoryBIO *") outgoing: object(subclass_of="get_state_ctx(self)->PySSLMemoryBIO_Type", type="PySSLMemoryBIO *") @@ -4234,7 +4432,7 @@ _ssl__SSLContext__wrap_bio_impl(PySSLContext *self, PySSLMemoryBIO *incoming, PySSLMemoryBIO *outgoing, int server_side, PyObject *hostname_obj, PyObject *owner, PyObject *session) -/*[clinic end generated code: output=5c5d6d9b41f99332 input=a9205d097fd45a82]*/ +/*[clinic end generated code: output=5c5d6d9b41f99332 input=58277fc962a60182]*/ { char *hostname = NULL; PyObject *res; @@ -4255,12 +4453,13 @@ _ssl__SSLContext__wrap_bio_impl(PySSLContext *self, PySSLMemoryBIO *incoming, } /*[clinic input] +@critical_section _ssl._SSLContext.session_stats [clinic start generated code]*/ static PyObject * _ssl__SSLContext_session_stats_impl(PySSLContext *self) -/*[clinic end generated code: output=0d96411c42893bfb input=7e0a81fb11102c8b]*/ +/*[clinic end generated code: output=0d96411c42893bfb input=db62af53004127a4]*/ { int r; PyObject *value, *stats = PyDict_New(); @@ -4299,12 +4498,13 @@ _ssl__SSLContext_session_stats_impl(PySSLContext *self) } /*[clinic input] +@critical_section _ssl._SSLContext.set_default_verify_paths [clinic start generated code]*/ static PyObject * _ssl__SSLContext_set_default_verify_paths_impl(PySSLContext *self) -/*[clinic end generated code: output=0bee74e6e09deaaa input=35f3408021463d74]*/ +/*[clinic end generated code: output=0bee74e6e09deaaa input=939a88e78f634119]*/ { int rc; Py_BEGIN_ALLOW_THREADS @@ -4318,6 +4518,7 @@ _ssl__SSLContext_set_default_verify_paths_impl(PySSLContext *self) } /*[clinic input] +@critical_section _ssl._SSLContext.set_ecdh_curve name: object / @@ -4325,8 +4526,8 @@ _ssl._SSLContext.set_ecdh_curve [clinic start generated code]*/ static PyObject * -_ssl__SSLContext_set_ecdh_curve(PySSLContext *self, PyObject *name) -/*[clinic end generated code: output=23022c196e40d7d2 input=c2bafb6f6e34726b]*/ +_ssl__SSLContext_set_ecdh_curve_impl(PySSLContext *self, PyObject *name) +/*[clinic end generated code: output=01081151ce0ecc45 input=039df032e666870e]*/ { PyObject *name_bytes; int nid; @@ -4460,8 +4661,15 @@ _servername_callback(SSL *s, int *al, void *args) return ret; } +/*[clinic input] +@critical_section +@getter +_ssl._SSLContext.sni_callback +[clinic start generated code]*/ + static PyObject * -get_sni_callback(PySSLContext *self, void *c) +_ssl__SSLContext_sni_callback_get_impl(PySSLContext *self) +/*[clinic end generated code: output=961e6575cdfaf036 input=22dd28c31fdc4318]*/ { PyObject *cb = self->set_sni_cb; if (cb == NULL) { @@ -4470,8 +4678,15 @@ get_sni_callback(PySSLContext *self, void *c) return Py_NewRef(cb); } +/*[clinic input] +@critical_section +@setter +_ssl._SSLContext.sni_callback +[clinic start generated code]*/ + static int -set_sni_callback(PySSLContext *self, PyObject *arg, void *c) +_ssl__SSLContext_sni_callback_set_impl(PySSLContext *self, PyObject *value) +/*[clinic end generated code: output=b32736c6b891f61a input=c3c4ff33540b3c85]*/ { if (self->protocol == PY_SSL_VERSION_TLS_CLIENT) { PyErr_SetString(PyExc_ValueError, @@ -4479,17 +4694,17 @@ set_sni_callback(PySSLContext *self, PyObject *arg, void *c) return -1; } Py_CLEAR(self->set_sni_cb); - if (arg == Py_None) { + if (value == Py_None) { SSL_CTX_set_tlsext_servername_callback(self->ctx, NULL); } else { - if (!PyCallable_Check(arg)) { + if (!PyCallable_Check(value)) { SSL_CTX_set_tlsext_servername_callback(self->ctx, NULL); PyErr_SetString(PyExc_TypeError, "not a callable object"); return -1; } - self->set_sni_cb = Py_NewRef(arg); + self->set_sni_cb = Py_NewRef(value); SSL_CTX_set_tlsext_servername_callback(self->ctx, _servername_callback); SSL_CTX_set_tlsext_servername_arg(self->ctx, self); } @@ -4540,14 +4755,8 @@ X509_STORE_get1_objects(X509_STORE *store) } #endif -PyDoc_STRVAR(PySSLContext_sni_callback_doc, -"Set a callback that will be called when a server name is provided by the SSL/TLS client in the SNI extension.\n\ -\n\ -If the argument is None then the callback is disabled. The method is called\n\ -with the SSLSocket, the server name as a string, and the SSLContext object.\n\ -See RFC 6066 for details of the SNI extension."); - /*[clinic input] +@critical_section _ssl._SSLContext.cert_store_stats Returns quantities of loaded X.509 certificates. @@ -4561,7 +4770,7 @@ been used at least once. static PyObject * _ssl__SSLContext_cert_store_stats_impl(PySSLContext *self) -/*[clinic end generated code: output=5f356f4d9cca874d input=eb40dd0f6d0e40cf]*/ +/*[clinic end generated code: output=5f356f4d9cca874d input=d13c6e3f2b48539b]*/ { X509_STORE *store; STACK_OF(X509_OBJECT) *objs; @@ -4598,6 +4807,7 @@ _ssl__SSLContext_cert_store_stats_impl(PySSLContext *self) } /*[clinic input] +@critical_section _ssl._SSLContext.get_ca_certs binary_form: bool = False @@ -4612,7 +4822,7 @@ been used at least once. static PyObject * _ssl__SSLContext_get_ca_certs_impl(PySSLContext *self, int binary_form) -/*[clinic end generated code: output=0d58f148f37e2938 input=6887b5a09b7f9076]*/ +/*[clinic end generated code: output=0d58f148f37e2938 input=eb0592909c9ad6e7]*/ { X509_STORE *store; STACK_OF(X509_OBJECT) *objs; @@ -4738,6 +4948,7 @@ static unsigned int psk_client_callback(SSL *s, #endif /*[clinic input] +@critical_section _ssl._SSLContext.set_psk_client_callback callback: object @@ -4746,7 +4957,7 @@ _ssl._SSLContext.set_psk_client_callback static PyObject * _ssl__SSLContext_set_psk_client_callback_impl(PySSLContext *self, PyObject *callback) -/*[clinic end generated code: output=0aba86f6ed75119e input=7627bae0e5ee7635]*/ +/*[clinic end generated code: output=0aba86f6ed75119e input=1e436eea625cfc35]*/ { #ifndef OPENSSL_NO_PSK if (self->protocol == PY_SSL_VERSION_TLS_SERVER) { @@ -4846,6 +5057,7 @@ static unsigned int psk_server_callback(SSL *s, #endif /*[clinic input] +@critical_section _ssl._SSLContext.set_psk_server_callback callback: object identity_hint: str(accept={str, NoneType}) = None @@ -4856,7 +5068,7 @@ static PyObject * _ssl__SSLContext_set_psk_server_callback_impl(PySSLContext *self, PyObject *callback, const char *identity_hint) -/*[clinic end generated code: output=1f4d6a4e09a92b03 input=65d4b6022aa85ea3]*/ +/*[clinic end generated code: output=1f4d6a4e09a92b03 input=5f79d932458284a7]*/ { #ifndef OPENSSL_NO_PSK if (self->protocol == PY_SSL_VERSION_TLS_CLIENT) { @@ -4901,26 +5113,19 @@ _ssl__SSLContext_set_psk_server_callback_impl(PySSLContext *self, static PyGetSetDef context_getsetlist[] = { - {"check_hostname", (getter) get_check_hostname, - (setter) set_check_hostname, NULL}, - {"_host_flags", (getter) get_host_flags, - (setter) set_host_flags, NULL}, - {"minimum_version", (getter) get_minimum_version, - (setter) set_minimum_version, NULL}, - {"maximum_version", (getter) get_maximum_version, - (setter) set_maximum_version, NULL}, + _SSL__SSLCONTEXT_CHECK_HOSTNAME_GETSETDEF + _SSL__SSLCONTEXT__HOST_FLAGS_GETSETDEF + _SSL__SSLCONTEXT_MINIMUM_VERSION_GETSETDEF + _SSL__SSLCONTEXT_MAXIMUM_VERSION_GETSETDEF {"keylog_filename", (getter) _PySSLContext_get_keylog_filename, (setter) _PySSLContext_set_keylog_filename, NULL}, {"_msg_callback", (getter) _PySSLContext_get_msg_callback, (setter) _PySSLContext_set_msg_callback, NULL}, - {"sni_callback", (getter) get_sni_callback, - (setter) set_sni_callback, PySSLContext_sni_callback_doc}, + _SSL__SSLCONTEXT_SNI_CALLBACK_GETSETDEF #if defined(TLS1_3_VERSION) && !defined(OPENSSL_NO_TLS1_3) - {"num_tickets", (getter) get_num_tickets, - (setter) set_num_tickets, PySSLContext_num_tickets_doc}, + _SSL__SSLCONTEXT_NUM_TICKETS_GETSETDEF #endif - {"options", (getter) get_options, - (setter) set_options, NULL}, + _SSL__SSLCONTEXT_OPTIONS_GETSETDEF {"post_handshake_auth", (getter) get_post_handshake_auth, #if defined(PySSL_HAVE_POST_HS_AUTH) (setter) set_post_handshake_auth, @@ -4928,14 +5133,10 @@ static PyGetSetDef context_getsetlist[] = { NULL, #endif NULL}, - {"protocol", (getter) get_protocol, - NULL, NULL}, - {"verify_flags", (getter) get_verify_flags, - (setter) set_verify_flags, NULL}, - {"verify_mode", (getter) get_verify_mode, - (setter) set_verify_mode, NULL}, - {"security_level", (getter) get_security_level, - NULL, PySSLContext_security_level_doc}, + _SSL__SSLCONTEXT_PROTOCOL_GETSETDEF + _SSL__SSLCONTEXT_VERIFY_FLAGS_GETSETDEF + _SSL__SSLCONTEXT_VERIFY_MODE_GETSETDEF + _SSL__SSLCONTEXT_SECURITY_LEVEL_GETSETDEF {NULL}, /* sentinel */ }; @@ -4982,6 +5183,7 @@ static PyType_Spec PySSLContext_spec = { */ /*[clinic input] +@critical_section @classmethod _ssl.MemoryBIO.__new__ @@ -4989,7 +5191,7 @@ _ssl.MemoryBIO.__new__ static PyObject * _ssl_MemoryBIO_impl(PyTypeObject *type) -/*[clinic end generated code: output=8820a58db78330ac input=26d22e4909ecb1b5]*/ +/*[clinic end generated code: output=8820a58db78330ac input=87f146cf30af454e]*/ { BIO *bio; PySSLMemoryBIO *self; @@ -5034,26 +5236,36 @@ memory_bio_dealloc(PySSLMemoryBIO *self) Py_DECREF(tp); } +/*[clinic input] +@critical_section +@getter +_ssl.MemoryBIO.pending +[clinic start generated code]*/ + static PyObject * -memory_bio_get_pending(PySSLMemoryBIO *self, void *c) +_ssl_MemoryBIO_pending_get_impl(PySSLMemoryBIO *self) +/*[clinic end generated code: output=19236a32a51ac8ff input=c0b6d14eba107f6a]*/ { - return PyLong_FromSize_t(BIO_ctrl_pending(self->bio)); + size_t res = BIO_ctrl_pending(self->bio); + return PyLong_FromSize_t(res); } -PyDoc_STRVAR(PySSL_memory_bio_pending_doc, -"The number of bytes pending in the memory BIO."); +/*[clinic input] +@critical_section +@getter +_ssl.MemoryBIO.eof +[clinic start generated code]*/ static PyObject * -memory_bio_get_eof(PySSLMemoryBIO *self, void *c) +_ssl_MemoryBIO_eof_get_impl(PySSLMemoryBIO *self) +/*[clinic end generated code: output=c255a9ea16e31b92 input=0f5c6be69752e04c]*/ { - return PyBool_FromLong((BIO_ctrl_pending(self->bio) == 0) - && self->eof_written); + size_t pending = BIO_ctrl_pending(self->bio); + return PyBool_FromLong((pending == 0) && self->eof_written); } -PyDoc_STRVAR(PySSL_memory_bio_eof_doc, -"Whether the memory BIO is at EOF."); - /*[clinic input] +@critical_section _ssl.MemoryBIO.read size as len: int = -1 / @@ -5068,7 +5280,7 @@ distinguish between the two. static PyObject * _ssl_MemoryBIO_read_impl(PySSLMemoryBIO *self, int len) -/*[clinic end generated code: output=a657aa1e79cd01b3 input=574d7be06a902366]*/ +/*[clinic end generated code: output=a657aa1e79cd01b3 input=21046f2d7dac3a90]*/ { int avail, nbytes; PyObject *result; @@ -5098,6 +5310,7 @@ _ssl_MemoryBIO_read_impl(PySSLMemoryBIO *self, int len) } /*[clinic input] +@critical_section _ssl.MemoryBIO.write b: Py_buffer / @@ -5109,7 +5322,7 @@ Returns the number of bytes written. static PyObject * _ssl_MemoryBIO_write_impl(PySSLMemoryBIO *self, Py_buffer *b) -/*[clinic end generated code: output=156ec59110d75935 input=e45757b3e17c4808]*/ +/*[clinic end generated code: output=156ec59110d75935 input=107da3f5fba26b37]*/ { int nbytes; @@ -5139,6 +5352,7 @@ _ssl_MemoryBIO_write_impl(PySSLMemoryBIO *self, Py_buffer *b) } /*[clinic input] +@critical_section _ssl.MemoryBIO.write_eof Write an EOF marker to the memory BIO. @@ -5148,7 +5362,7 @@ When all data has been read, the "eof" property will be True. static PyObject * _ssl_MemoryBIO_write_eof_impl(PySSLMemoryBIO *self) -/*[clinic end generated code: output=d4106276ccd1ed34 input=56a945f1d29e8bd6]*/ +/*[clinic end generated code: output=d4106276ccd1ed34 input=1e914231b1c5900a]*/ { self->eof_written = 1; /* After an EOF is written, a zero return from read() should be a real EOF @@ -5160,10 +5374,8 @@ _ssl_MemoryBIO_write_eof_impl(PySSLMemoryBIO *self) } static PyGetSetDef memory_bio_getsetlist[] = { - {"pending", (getter) memory_bio_get_pending, NULL, - PySSL_memory_bio_pending_doc}, - {"eof", (getter) memory_bio_get_eof, NULL, - PySSL_memory_bio_eof_doc}, + _SSL_MEMORYBIO_PENDING_GETSETDEF + _SSL_MEMORYBIO_EOF_GETSETDEF {NULL}, /* sentinel */ }; @@ -5283,8 +5495,16 @@ PySSLSession_clear(PySSLSession *self) } +/*[clinic input] +@critical_section +@getter +_ssl.SSLSession.time +[clinic start generated code]*/ + static PyObject * -PySSLSession_get_time(PySSLSession *self, void *closure) { +_ssl_SSLSession_time_get_impl(PySSLSession *self) +/*[clinic end generated code: output=4b887b9299de9be4 input=8d1e4afd09103279]*/ +{ #if OPENSSL_VERSION_NUMBER >= 0x30300000L return _PyLong_FromTime_t(SSL_SESSION_get_time_ex(self->session)); #else @@ -5292,65 +5512,72 @@ PySSLSession_get_time(PySSLSession *self, void *closure) { #endif } -PyDoc_STRVAR(PySSLSession_get_time_doc, -"Session creation time (seconds since epoch)."); - +/*[clinic input] +@critical_section +@getter +_ssl.SSLSession.timeout +[clinic start generated code]*/ static PyObject * -PySSLSession_get_timeout(PySSLSession *self, void *closure) { - return PyLong_FromLong(SSL_SESSION_get_timeout(self->session)); +_ssl_SSLSession_timeout_get_impl(PySSLSession *self) +/*[clinic end generated code: output=82339c148ab2f7d1 input=ae5e84a9d85df60d]*/ +{ + long timeout = SSL_SESSION_get_timeout(self->session); + PyObject *res = PyLong_FromLong(timeout); + return res; } -PyDoc_STRVAR(PySSLSession_get_timeout_doc, -"Session timeout (delta in seconds)."); - +/*[clinic input] +@critical_section +@getter +_ssl.SSLSession.ticket_lifetime_hint +[clinic start generated code]*/ static PyObject * -PySSLSession_get_ticket_lifetime_hint(PySSLSession *self, void *closure) { +_ssl_SSLSession_ticket_lifetime_hint_get_impl(PySSLSession *self) +/*[clinic end generated code: output=c8b6db498136c275 input=d0e06942ddd8d07f]*/ +{ unsigned long hint = SSL_SESSION_get_ticket_lifetime_hint(self->session); return PyLong_FromUnsignedLong(hint); } -PyDoc_STRVAR(PySSLSession_get_ticket_lifetime_hint_doc, -"Ticket life time hint."); - +/*[clinic input] +@critical_section +@getter +_ssl.SSLSession.id +[clinic start generated code]*/ static PyObject * -PySSLSession_get_session_id(PySSLSession *self, void *closure) { +_ssl_SSLSession_id_get_impl(PySSLSession *self) +/*[clinic end generated code: output=c532fb96b10c5adf input=e7322372cf6325dd]*/ + +{ const unsigned char *id; unsigned int len; id = SSL_SESSION_get_id(self->session, &len); return PyBytes_FromStringAndSize((const char *)id, len); } -PyDoc_STRVAR(PySSLSession_get_session_id_doc, -"Session id"); - +/*[clinic input] +@critical_section +@getter +_ssl.SSLSession.has_ticket +[clinic start generated code]*/ static PyObject * -PySSLSession_get_has_ticket(PySSLSession *self, void *closure) { - if (SSL_SESSION_has_ticket(self->session)) { - Py_RETURN_TRUE; - } else { - Py_RETURN_FALSE; - } +_ssl_SSLSession_has_ticket_get_impl(PySSLSession *self) +/*[clinic end generated code: output=aa3ccfc40b10b96d input=1a48ae8955fa9601]*/ +{ + int res = SSL_SESSION_has_ticket(self->session); + return res ? Py_True : Py_False; } -PyDoc_STRVAR(PySSLSession_get_has_ticket_doc, -"Does the session contain a ticket?"); - - static PyGetSetDef PySSLSession_getsetlist[] = { - {"has_ticket", (getter) PySSLSession_get_has_ticket, NULL, - PySSLSession_get_has_ticket_doc}, - {"id", (getter) PySSLSession_get_session_id, NULL, - PySSLSession_get_session_id_doc}, - {"ticket_lifetime_hint", (getter) PySSLSession_get_ticket_lifetime_hint, - NULL, PySSLSession_get_ticket_lifetime_hint_doc}, - {"time", (getter) PySSLSession_get_time, NULL, - PySSLSession_get_time_doc}, - {"timeout", (getter) PySSLSession_get_timeout, NULL, - PySSLSession_get_timeout_doc}, + _SSL_SSLSESSION_HAS_TICKET_GETSETDEF + _SSL_SSLSESSION_ID_GETSETDEF + _SSL_SSLSESSION_TICKET_LIFETIME_HINT_GETSETDEF + _SSL_SSLSESSION_TIME_GETSETDEF + _SSL_SSLSESSION_TIMEOUT_GETSETDEF {NULL}, /* sentinel */ }; @@ -5375,6 +5602,7 @@ static PyType_Spec PySSLSession_spec = { /* helper routines for seeding the SSL PRNG */ /*[clinic input] +@critical_section _ssl.RAND_add string as view: Py_buffer(accept={str, buffer}) entropy: double @@ -5388,7 +5616,7 @@ string. See RFC 4086. static PyObject * _ssl_RAND_add_impl(PyObject *module, Py_buffer *view, double entropy) -/*[clinic end generated code: output=e6dd48df9c9024e9 input=5c33017422828f5c]*/ +/*[clinic end generated code: output=e6dd48df9c9024e9 input=313cb73b34db31d5]*/ { const char *buf; Py_ssize_t len, written; @@ -5444,6 +5672,7 @@ PySSL_RAND(PyObject *module, int len, int pseudo) } /*[clinic input] +@critical_section _ssl.RAND_bytes n: int / @@ -5453,13 +5682,14 @@ Generate n cryptographically strong pseudo-random bytes. static PyObject * _ssl_RAND_bytes_impl(PyObject *module, int n) -/*[clinic end generated code: output=977da635e4838bc7 input=678ddf2872dfebfc]*/ +/*[clinic end generated code: output=977da635e4838bc7 input=2e78ce1e86336776]*/ { return PySSL_RAND(module, n, 0); } /*[clinic input] +@critical_section _ssl.RAND_status Returns True if the OpenSSL PRNG has been seeded with enough data and False if not. @@ -5470,12 +5700,13 @@ using the ssl() function. static PyObject * _ssl_RAND_status_impl(PyObject *module) -/*[clinic end generated code: output=7e0aaa2d39fdc1ad input=d5ae5aea52f36e01]*/ +/*[clinic end generated code: output=7e0aaa2d39fdc1ad input=636fb5659ea2e727]*/ { return PyBool_FromLong(RAND_status()); } /*[clinic input] +@critical_section _ssl.get_default_verify_paths Return search paths and environment vars that are used by SSLContext's set_default_verify_paths() to load default CAs. @@ -5485,7 +5716,7 @@ The values are 'cert_file_env', 'cert_file', 'cert_dir_env', 'cert_dir'. static PyObject * _ssl_get_default_verify_paths_impl(PyObject *module) -/*[clinic end generated code: output=e5b62a466271928b input=5210c953d98c3eb5]*/ +/*[clinic end generated code: output=e5b62a466271928b input=c6ae00bc04eb2b6e]*/ { PyObject *ofile_env = NULL; PyObject *ofile = NULL; @@ -5534,6 +5765,7 @@ asn1obj2py(_sslmodulestate *state, ASN1_OBJECT *obj) } /*[clinic input] +@critical_section _ssl.txt2obj txt: str name: bool = False @@ -5546,7 +5778,7 @@ long name are also matched. static PyObject * _ssl_txt2obj_impl(PyObject *module, const char *txt, int name) -/*[clinic end generated code: output=c38e3991347079c1 input=1c1e7d0aa7c48602]*/ +/*[clinic end generated code: output=c38e3991347079c1 input=c99b134d70173c5e]*/ { PyObject *result = NULL; ASN1_OBJECT *obj; @@ -5562,6 +5794,7 @@ _ssl_txt2obj_impl(PyObject *module, const char *txt, int name) } /*[clinic input] +@critical_section _ssl.nid2obj nid: int / @@ -5571,7 +5804,7 @@ Lookup NID, short name, long name and OID of an ASN1_OBJECT by NID. static PyObject * _ssl_nid2obj_impl(PyObject *module, int nid) -/*[clinic end generated code: output=4a98ab691cd4f84a input=51787a3bee7d8f98]*/ +/*[clinic end generated code: output=4a98ab691cd4f84a input=1b1170506fa83a53]*/ { PyObject *result = NULL; ASN1_OBJECT *obj; @@ -5723,6 +5956,7 @@ ssl_collect_certificates(const char *store_name) } /*[clinic input] +@critical_section _ssl.enum_certificates store_name: str @@ -5737,7 +5971,7 @@ a set of OIDs or the boolean True. static PyObject * _ssl_enum_certificates_impl(PyObject *module, const char *store_name) -/*[clinic end generated code: output=5134dc8bb3a3c893 input=915f60d70461ea4e]*/ +/*[clinic end generated code: output=5134dc8bb3a3c893 input=263c22e6c6988cf3]*/ { HCERTSTORE hCollectionStore = NULL; PCCERT_CONTEXT pCertCtx = NULL; @@ -5822,6 +6056,7 @@ _ssl_enum_certificates_impl(PyObject *module, const char *store_name) } /*[clinic input] +@critical_section _ssl.enum_crls store_name: str @@ -5835,7 +6070,7 @@ X509_ASN_ENCODING or PKCS_7_ASN_ENCODING. static PyObject * _ssl_enum_crls_impl(PyObject *module, const char *store_name) -/*[clinic end generated code: output=bce467f60ccd03b6 input=a1f1d7629f1c5d3d]*/ +/*[clinic end generated code: output=bce467f60ccd03b6 input=51a1b1059e55ce43]*/ { HCERTSTORE hCollectionStore = NULL; PCCRL_CONTEXT pCrlCtx = NULL; diff --git a/Modules/clinic/_ssl.c.h b/Modules/clinic/_ssl.c.h index e8d1342ed35e66..9d5b70dfad553d 100644 --- a/Modules/clinic/_ssl.c.h +++ b/Modules/clinic/_ssl.c.h @@ -6,6 +6,7 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_ID() #endif +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_CheckPositional() PyDoc_STRVAR(_ssl__SSLSocket_do_handshake__doc__, @@ -22,7 +23,13 @@ _ssl__SSLSocket_do_handshake_impl(PySSLSocket *self); static PyObject * _ssl__SSLSocket_do_handshake(PySSLSocket *self, PyObject *Py_UNUSED(ignored)) { - return _ssl__SSLSocket_do_handshake_impl(self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_do_handshake_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_ssl__test_decode_cert__doc__, @@ -88,7 +95,9 @@ _ssl__SSLSocket_getpeercert(PySSLSocket *self, PyObject *const *args, Py_ssize_t goto exit; } skip_optional: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _ssl__SSLSocket_getpeercert_impl(self, binary_mode); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -108,7 +117,13 @@ _ssl__SSLSocket_get_verified_chain_impl(PySSLSocket *self); static PyObject * _ssl__SSLSocket_get_verified_chain(PySSLSocket *self, PyObject *Py_UNUSED(ignored)) { - return _ssl__SSLSocket_get_verified_chain_impl(self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_get_verified_chain_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_ssl__SSLSocket_get_unverified_chain__doc__, @@ -125,7 +140,13 @@ _ssl__SSLSocket_get_unverified_chain_impl(PySSLSocket *self); static PyObject * _ssl__SSLSocket_get_unverified_chain(PySSLSocket *self, PyObject *Py_UNUSED(ignored)) { - return _ssl__SSLSocket_get_unverified_chain_impl(self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_get_unverified_chain_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_ssl__SSLSocket_shared_ciphers__doc__, @@ -142,7 +163,13 @@ _ssl__SSLSocket_shared_ciphers_impl(PySSLSocket *self); static PyObject * _ssl__SSLSocket_shared_ciphers(PySSLSocket *self, PyObject *Py_UNUSED(ignored)) { - return _ssl__SSLSocket_shared_ciphers_impl(self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_shared_ciphers_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_ssl__SSLSocket_cipher__doc__, @@ -159,7 +186,13 @@ _ssl__SSLSocket_cipher_impl(PySSLSocket *self); static PyObject * _ssl__SSLSocket_cipher(PySSLSocket *self, PyObject *Py_UNUSED(ignored)) { - return _ssl__SSLSocket_cipher_impl(self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_cipher_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_ssl__SSLSocket_version__doc__, @@ -176,7 +209,13 @@ _ssl__SSLSocket_version_impl(PySSLSocket *self); static PyObject * _ssl__SSLSocket_version(PySSLSocket *self, PyObject *Py_UNUSED(ignored)) { - return _ssl__SSLSocket_version_impl(self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_version_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_ssl__SSLSocket_selected_alpn_protocol__doc__, @@ -193,7 +232,13 @@ _ssl__SSLSocket_selected_alpn_protocol_impl(PySSLSocket *self); static PyObject * _ssl__SSLSocket_selected_alpn_protocol(PySSLSocket *self, PyObject *Py_UNUSED(ignored)) { - return _ssl__SSLSocket_selected_alpn_protocol_impl(self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_selected_alpn_protocol_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_ssl__SSLSocket_compression__doc__, @@ -213,6 +258,176 @@ _ssl__SSLSocket_compression(PySSLSocket *self, PyObject *Py_UNUSED(ignored)) return _ssl__SSLSocket_compression_impl(self); } +#if defined(_ssl__SSLSocket_context_HAS_DOCSTR) +# define _ssl__SSLSocket_context_DOCSTR _ssl__SSLSocket_context__doc__ +#else +# define _ssl__SSLSocket_context_DOCSTR NULL +#endif +#if defined(_SSL__SSLSOCKET_CONTEXT_GETSETDEF) +# undef _SSL__SSLSOCKET_CONTEXT_GETSETDEF +# define _SSL__SSLSOCKET_CONTEXT_GETSETDEF {"context", (getter)_ssl__SSLSocket_context_get, (setter)_ssl__SSLSocket_context_set, _ssl__SSLSocket_context_DOCSTR}, +#else +# define _SSL__SSLSOCKET_CONTEXT_GETSETDEF {"context", (getter)_ssl__SSLSocket_context_get, NULL, _ssl__SSLSocket_context_DOCSTR}, +#endif + +static PyObject * +_ssl__SSLSocket_context_get_impl(PySSLSocket *self); + +static PyObject * +_ssl__SSLSocket_context_get(PySSLSocket *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_context_get_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_SSL__SSLSOCKET_CONTEXT_HAS_DOCSTR) +# define _ssl__SSLSocket_context_DOCSTR _ssl__SSLSocket_context__doc__ +#else +# define _ssl__SSLSocket_context_DOCSTR NULL +#endif +#if defined(_SSL__SSLSOCKET_CONTEXT_GETSETDEF) +# undef _SSL__SSLSOCKET_CONTEXT_GETSETDEF +# define _SSL__SSLSOCKET_CONTEXT_GETSETDEF {"context", (getter)_ssl__SSLSocket_context_get, (setter)_ssl__SSLSocket_context_set, _ssl__SSLSocket_context_DOCSTR}, +#else +# define _SSL__SSLSOCKET_CONTEXT_GETSETDEF {"context", NULL, (setter)_ssl__SSLSocket_context_set, NULL}, +#endif + +static int +_ssl__SSLSocket_context_set_impl(PySSLSocket *self, PyObject *value); + +static int +_ssl__SSLSocket_context_set(PySSLSocket *self, PyObject *value, void *Py_UNUSED(context)) +{ + int return_value; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_context_set_impl(self, value); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(_ssl__SSLSocket_server_side__doc__, +"Whether this is a server-side socket."); +#define _ssl__SSLSocket_server_side_HAS_DOCSTR + +#if defined(_ssl__SSLSocket_server_side_HAS_DOCSTR) +# define _ssl__SSLSocket_server_side_DOCSTR _ssl__SSLSocket_server_side__doc__ +#else +# define _ssl__SSLSocket_server_side_DOCSTR NULL +#endif +#if defined(_SSL__SSLSOCKET_SERVER_SIDE_GETSETDEF) +# undef _SSL__SSLSOCKET_SERVER_SIDE_GETSETDEF +# define _SSL__SSLSOCKET_SERVER_SIDE_GETSETDEF {"server_side", (getter)_ssl__SSLSocket_server_side_get, (setter)_ssl__SSLSocket_server_side_set, _ssl__SSLSocket_server_side_DOCSTR}, +#else +# define _SSL__SSLSOCKET_SERVER_SIDE_GETSETDEF {"server_side", (getter)_ssl__SSLSocket_server_side_get, NULL, _ssl__SSLSocket_server_side_DOCSTR}, +#endif + +static PyObject * +_ssl__SSLSocket_server_side_get_impl(PySSLSocket *self); + +static PyObject * +_ssl__SSLSocket_server_side_get(PySSLSocket *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_server_side_get_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(_ssl__SSLSocket_server_hostname__doc__, +"The currently set server hostname (for SNI)."); +#define _ssl__SSLSocket_server_hostname_HAS_DOCSTR + +#if defined(_ssl__SSLSocket_server_hostname_HAS_DOCSTR) +# define _ssl__SSLSocket_server_hostname_DOCSTR _ssl__SSLSocket_server_hostname__doc__ +#else +# define _ssl__SSLSocket_server_hostname_DOCSTR NULL +#endif +#if defined(_SSL__SSLSOCKET_SERVER_HOSTNAME_GETSETDEF) +# undef _SSL__SSLSOCKET_SERVER_HOSTNAME_GETSETDEF +# define _SSL__SSLSOCKET_SERVER_HOSTNAME_GETSETDEF {"server_hostname", (getter)_ssl__SSLSocket_server_hostname_get, (setter)_ssl__SSLSocket_server_hostname_set, _ssl__SSLSocket_server_hostname_DOCSTR}, +#else +# define _SSL__SSLSOCKET_SERVER_HOSTNAME_GETSETDEF {"server_hostname", (getter)_ssl__SSLSocket_server_hostname_get, NULL, _ssl__SSLSocket_server_hostname_DOCSTR}, +#endif + +static PyObject * +_ssl__SSLSocket_server_hostname_get_impl(PySSLSocket *self); + +static PyObject * +_ssl__SSLSocket_server_hostname_get(PySSLSocket *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_server_hostname_get_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_ssl__SSLSocket_owner_HAS_DOCSTR) +# define _ssl__SSLSocket_owner_DOCSTR _ssl__SSLSocket_owner__doc__ +#else +# define _ssl__SSLSocket_owner_DOCSTR NULL +#endif +#if defined(_SSL__SSLSOCKET_OWNER_GETSETDEF) +# undef _SSL__SSLSOCKET_OWNER_GETSETDEF +# define _SSL__SSLSOCKET_OWNER_GETSETDEF {"owner", (getter)_ssl__SSLSocket_owner_get, (setter)_ssl__SSLSocket_owner_set, _ssl__SSLSocket_owner_DOCSTR}, +#else +# define _SSL__SSLSOCKET_OWNER_GETSETDEF {"owner", (getter)_ssl__SSLSocket_owner_get, NULL, _ssl__SSLSocket_owner_DOCSTR}, +#endif + +static PyObject * +_ssl__SSLSocket_owner_get_impl(PySSLSocket *self); + +static PyObject * +_ssl__SSLSocket_owner_get(PySSLSocket *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_owner_get_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_SSL__SSLSOCKET_OWNER_HAS_DOCSTR) +# define _ssl__SSLSocket_owner_DOCSTR _ssl__SSLSocket_owner__doc__ +#else +# define _ssl__SSLSocket_owner_DOCSTR NULL +#endif +#if defined(_SSL__SSLSOCKET_OWNER_GETSETDEF) +# undef _SSL__SSLSOCKET_OWNER_GETSETDEF +# define _SSL__SSLSOCKET_OWNER_GETSETDEF {"owner", (getter)_ssl__SSLSocket_owner_get, (setter)_ssl__SSLSocket_owner_set, _ssl__SSLSocket_owner_DOCSTR}, +#else +# define _SSL__SSLSOCKET_OWNER_GETSETDEF {"owner", NULL, (setter)_ssl__SSLSocket_owner_set, NULL}, +#endif + +static int +_ssl__SSLSocket_owner_set_impl(PySSLSocket *self, PyObject *value); + +static int +_ssl__SSLSocket_owner_set(PySSLSocket *self, PyObject *value, void *Py_UNUSED(context)) +{ + int return_value; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_owner_set_impl(self, value); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + PyDoc_STRVAR(_ssl__SSLSocket_write__doc__, "write($self, b, /)\n" "--\n" @@ -236,7 +451,9 @@ _ssl__SSLSocket_write(PySSLSocket *self, PyObject *arg) if (PyObject_GetBuffer(arg, &b, PyBUF_SIMPLE) != 0) { goto exit; } + Py_BEGIN_CRITICAL_SECTION(self); return_value = _ssl__SSLSocket_write_impl(self, &b); + Py_END_CRITICAL_SECTION(); exit: /* Cleanup for b */ @@ -262,7 +479,13 @@ _ssl__SSLSocket_pending_impl(PySSLSocket *self); static PyObject * _ssl__SSLSocket_pending(PySSLSocket *self, PyObject *Py_UNUSED(ignored)) { - return _ssl__SSLSocket_pending_impl(self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_pending_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_ssl__SSLSocket_read__doc__, @@ -300,7 +523,9 @@ _ssl__SSLSocket_read(PySSLSocket *self, PyObject *args) PyErr_SetString(PyExc_TypeError, "_ssl._SSLSocket.read requires 1 to 2 arguments"); goto exit; } + Py_BEGIN_CRITICAL_SECTION(self); return_value = _ssl__SSLSocket_read_impl(self, len, group_right_1, &buffer); + Py_END_CRITICAL_SECTION(); exit: /* Cleanup for buffer */ @@ -326,7 +551,13 @@ _ssl__SSLSocket_shutdown_impl(PySSLSocket *self); static PyObject * _ssl__SSLSocket_shutdown(PySSLSocket *self, PyObject *Py_UNUSED(ignored)) { - return _ssl__SSLSocket_shutdown_impl(self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_shutdown_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_ssl__SSLSocket_get_channel_binding__doc__, @@ -400,7 +631,9 @@ _ssl__SSLSocket_get_channel_binding(PySSLSocket *self, PyObject *const *args, Py goto exit; } skip_optional_pos: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _ssl__SSLSocket_get_channel_binding_impl(self, cb_type); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -421,117 +654,708 @@ _ssl__SSLSocket_verify_client_post_handshake_impl(PySSLSocket *self); static PyObject * _ssl__SSLSocket_verify_client_post_handshake(PySSLSocket *self, PyObject *Py_UNUSED(ignored)) { - return _ssl__SSLSocket_verify_client_post_handshake_impl(self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_verify_client_post_handshake_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_ssl__SSLSocket_session_HAS_DOCSTR) +# define _ssl__SSLSocket_session_DOCSTR _ssl__SSLSocket_session__doc__ +#else +# define _ssl__SSLSocket_session_DOCSTR NULL +#endif +#if defined(_SSL__SSLSOCKET_SESSION_GETSETDEF) +# undef _SSL__SSLSOCKET_SESSION_GETSETDEF +# define _SSL__SSLSOCKET_SESSION_GETSETDEF {"session", (getter)_ssl__SSLSocket_session_get, (setter)_ssl__SSLSocket_session_set, _ssl__SSLSocket_session_DOCSTR}, +#else +# define _SSL__SSLSOCKET_SESSION_GETSETDEF {"session", (getter)_ssl__SSLSocket_session_get, NULL, _ssl__SSLSocket_session_DOCSTR}, +#endif + +static PyObject * +_ssl__SSLSocket_session_get_impl(PySSLSocket *self); + +static PyObject * +_ssl__SSLSocket_session_get(PySSLSocket *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_session_get_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_SSL__SSLSOCKET_SESSION_HAS_DOCSTR) +# define _ssl__SSLSocket_session_DOCSTR _ssl__SSLSocket_session__doc__ +#else +# define _ssl__SSLSocket_session_DOCSTR NULL +#endif +#if defined(_SSL__SSLSOCKET_SESSION_GETSETDEF) +# undef _SSL__SSLSOCKET_SESSION_GETSETDEF +# define _SSL__SSLSOCKET_SESSION_GETSETDEF {"session", (getter)_ssl__SSLSocket_session_get, (setter)_ssl__SSLSocket_session_set, _ssl__SSLSocket_session_DOCSTR}, +#else +# define _SSL__SSLSOCKET_SESSION_GETSETDEF {"session", NULL, (setter)_ssl__SSLSocket_session_set, NULL}, +#endif + +static int +_ssl__SSLSocket_session_set_impl(PySSLSocket *self, PyObject *value); + +static int +_ssl__SSLSocket_session_set(PySSLSocket *self, PyObject *value, void *Py_UNUSED(context)) +{ + int return_value; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_session_set_impl(self, value); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(_ssl__SSLSocket_session_reused__doc__, +"Was the client session reused during handshake?"); +#define _ssl__SSLSocket_session_reused_HAS_DOCSTR + +#if defined(_ssl__SSLSocket_session_reused_HAS_DOCSTR) +# define _ssl__SSLSocket_session_reused_DOCSTR _ssl__SSLSocket_session_reused__doc__ +#else +# define _ssl__SSLSocket_session_reused_DOCSTR NULL +#endif +#if defined(_SSL__SSLSOCKET_SESSION_REUSED_GETSETDEF) +# undef _SSL__SSLSOCKET_SESSION_REUSED_GETSETDEF +# define _SSL__SSLSOCKET_SESSION_REUSED_GETSETDEF {"session_reused", (getter)_ssl__SSLSocket_session_reused_get, (setter)_ssl__SSLSocket_session_reused_set, _ssl__SSLSocket_session_reused_DOCSTR}, +#else +# define _SSL__SSLSOCKET_SESSION_REUSED_GETSETDEF {"session_reused", (getter)_ssl__SSLSocket_session_reused_get, NULL, _ssl__SSLSocket_session_reused_DOCSTR}, +#endif + +static PyObject * +_ssl__SSLSocket_session_reused_get_impl(PySSLSocket *self); + +static PyObject * +_ssl__SSLSocket_session_reused_get(PySSLSocket *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLSocket_session_reused_get_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +static PyObject * +_ssl__SSLContext_impl(PyTypeObject *type, int proto_version); + +static PyObject * +_ssl__SSLContext(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + PyTypeObject *base_tp = get_state_type(type)->PySSLContext_Type; + int proto_version; + + if ((type == base_tp || type->tp_init == base_tp->tp_init) && + !_PyArg_NoKeywords("_SSLContext", kwargs)) { + goto exit; + } + if (!_PyArg_CheckPositional("_SSLContext", PyTuple_GET_SIZE(args), 1, 1)) { + goto exit; + } + proto_version = PyLong_AsInt(PyTuple_GET_ITEM(args, 0)); + if (proto_version == -1 && PyErr_Occurred()) { + goto exit; + } + Py_BEGIN_CRITICAL_SECTION(type); + return_value = _ssl__SSLContext_impl(type, proto_version); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; +} + +PyDoc_STRVAR(_ssl__SSLContext_set_ciphers__doc__, +"set_ciphers($self, cipherlist, /)\n" +"--\n" +"\n"); + +#define _SSL__SSLCONTEXT_SET_CIPHERS_METHODDEF \ + {"set_ciphers", (PyCFunction)_ssl__SSLContext_set_ciphers, METH_O, _ssl__SSLContext_set_ciphers__doc__}, + +static PyObject * +_ssl__SSLContext_set_ciphers_impl(PySSLContext *self, const char *cipherlist); + +static PyObject * +_ssl__SSLContext_set_ciphers(PySSLContext *self, PyObject *arg) +{ + PyObject *return_value = NULL; + const char *cipherlist; + + if (!PyUnicode_Check(arg)) { + _PyArg_BadArgument("set_ciphers", "argument", "str", arg); + goto exit; + } + Py_ssize_t cipherlist_length; + cipherlist = PyUnicode_AsUTF8AndSize(arg, &cipherlist_length); + if (cipherlist == NULL) { + goto exit; + } + if (strlen(cipherlist) != (size_t)cipherlist_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_set_ciphers_impl(self, cipherlist); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; +} + +PyDoc_STRVAR(_ssl__SSLContext_get_ciphers__doc__, +"get_ciphers($self, /)\n" +"--\n" +"\n"); + +#define _SSL__SSLCONTEXT_GET_CIPHERS_METHODDEF \ + {"get_ciphers", (PyCFunction)_ssl__SSLContext_get_ciphers, METH_NOARGS, _ssl__SSLContext_get_ciphers__doc__}, + +static PyObject * +_ssl__SSLContext_get_ciphers_impl(PySSLContext *self); + +static PyObject * +_ssl__SSLContext_get_ciphers(PySSLContext *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_get_ciphers_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(_ssl__SSLContext__set_alpn_protocols__doc__, +"_set_alpn_protocols($self, protos, /)\n" +"--\n" +"\n"); + +#define _SSL__SSLCONTEXT__SET_ALPN_PROTOCOLS_METHODDEF \ + {"_set_alpn_protocols", (PyCFunction)_ssl__SSLContext__set_alpn_protocols, METH_O, _ssl__SSLContext__set_alpn_protocols__doc__}, + +static PyObject * +_ssl__SSLContext__set_alpn_protocols_impl(PySSLContext *self, + Py_buffer *protos); + +static PyObject * +_ssl__SSLContext__set_alpn_protocols(PySSLContext *self, PyObject *arg) +{ + PyObject *return_value = NULL; + Py_buffer protos = {NULL, NULL}; + + if (PyObject_GetBuffer(arg, &protos, PyBUF_SIMPLE) != 0) { + goto exit; + } + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext__set_alpn_protocols_impl(self, &protos); + Py_END_CRITICAL_SECTION(); + +exit: + /* Cleanup for protos */ + if (protos.obj) { + PyBuffer_Release(&protos); + } + + return return_value; +} + +#if defined(_ssl__SSLContext_verify_mode_HAS_DOCSTR) +# define _ssl__SSLContext_verify_mode_DOCSTR _ssl__SSLContext_verify_mode__doc__ +#else +# define _ssl__SSLContext_verify_mode_DOCSTR NULL +#endif +#if defined(_SSL__SSLCONTEXT_VERIFY_MODE_GETSETDEF) +# undef _SSL__SSLCONTEXT_VERIFY_MODE_GETSETDEF +# define _SSL__SSLCONTEXT_VERIFY_MODE_GETSETDEF {"verify_mode", (getter)_ssl__SSLContext_verify_mode_get, (setter)_ssl__SSLContext_verify_mode_set, _ssl__SSLContext_verify_mode_DOCSTR}, +#else +# define _SSL__SSLCONTEXT_VERIFY_MODE_GETSETDEF {"verify_mode", (getter)_ssl__SSLContext_verify_mode_get, NULL, _ssl__SSLContext_verify_mode_DOCSTR}, +#endif + +static PyObject * +_ssl__SSLContext_verify_mode_get_impl(PySSLContext *self); + +static PyObject * +_ssl__SSLContext_verify_mode_get(PySSLContext *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_verify_mode_get_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_SSL__SSLCONTEXT_VERIFY_MODE_HAS_DOCSTR) +# define _ssl__SSLContext_verify_mode_DOCSTR _ssl__SSLContext_verify_mode__doc__ +#else +# define _ssl__SSLContext_verify_mode_DOCSTR NULL +#endif +#if defined(_SSL__SSLCONTEXT_VERIFY_MODE_GETSETDEF) +# undef _SSL__SSLCONTEXT_VERIFY_MODE_GETSETDEF +# define _SSL__SSLCONTEXT_VERIFY_MODE_GETSETDEF {"verify_mode", (getter)_ssl__SSLContext_verify_mode_get, (setter)_ssl__SSLContext_verify_mode_set, _ssl__SSLContext_verify_mode_DOCSTR}, +#else +# define _SSL__SSLCONTEXT_VERIFY_MODE_GETSETDEF {"verify_mode", NULL, (setter)_ssl__SSLContext_verify_mode_set, NULL}, +#endif + +static int +_ssl__SSLContext_verify_mode_set_impl(PySSLContext *self, PyObject *value); + +static int +_ssl__SSLContext_verify_mode_set(PySSLContext *self, PyObject *value, void *Py_UNUSED(context)) +{ + int return_value; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_verify_mode_set_impl(self, value); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_ssl__SSLContext_verify_flags_HAS_DOCSTR) +# define _ssl__SSLContext_verify_flags_DOCSTR _ssl__SSLContext_verify_flags__doc__ +#else +# define _ssl__SSLContext_verify_flags_DOCSTR NULL +#endif +#if defined(_SSL__SSLCONTEXT_VERIFY_FLAGS_GETSETDEF) +# undef _SSL__SSLCONTEXT_VERIFY_FLAGS_GETSETDEF +# define _SSL__SSLCONTEXT_VERIFY_FLAGS_GETSETDEF {"verify_flags", (getter)_ssl__SSLContext_verify_flags_get, (setter)_ssl__SSLContext_verify_flags_set, _ssl__SSLContext_verify_flags_DOCSTR}, +#else +# define _SSL__SSLCONTEXT_VERIFY_FLAGS_GETSETDEF {"verify_flags", (getter)_ssl__SSLContext_verify_flags_get, NULL, _ssl__SSLContext_verify_flags_DOCSTR}, +#endif + +static PyObject * +_ssl__SSLContext_verify_flags_get_impl(PySSLContext *self); + +static PyObject * +_ssl__SSLContext_verify_flags_get(PySSLContext *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_verify_flags_get_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_SSL__SSLCONTEXT_VERIFY_FLAGS_HAS_DOCSTR) +# define _ssl__SSLContext_verify_flags_DOCSTR _ssl__SSLContext_verify_flags__doc__ +#else +# define _ssl__SSLContext_verify_flags_DOCSTR NULL +#endif +#if defined(_SSL__SSLCONTEXT_VERIFY_FLAGS_GETSETDEF) +# undef _SSL__SSLCONTEXT_VERIFY_FLAGS_GETSETDEF +# define _SSL__SSLCONTEXT_VERIFY_FLAGS_GETSETDEF {"verify_flags", (getter)_ssl__SSLContext_verify_flags_get, (setter)_ssl__SSLContext_verify_flags_set, _ssl__SSLContext_verify_flags_DOCSTR}, +#else +# define _SSL__SSLCONTEXT_VERIFY_FLAGS_GETSETDEF {"verify_flags", NULL, (setter)_ssl__SSLContext_verify_flags_set, NULL}, +#endif + +static int +_ssl__SSLContext_verify_flags_set_impl(PySSLContext *self, PyObject *value); + +static int +_ssl__SSLContext_verify_flags_set(PySSLContext *self, PyObject *value, void *Py_UNUSED(context)) +{ + int return_value; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_verify_flags_set_impl(self, value); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_ssl__SSLContext_minimum_version_HAS_DOCSTR) +# define _ssl__SSLContext_minimum_version_DOCSTR _ssl__SSLContext_minimum_version__doc__ +#else +# define _ssl__SSLContext_minimum_version_DOCSTR NULL +#endif +#if defined(_SSL__SSLCONTEXT_MINIMUM_VERSION_GETSETDEF) +# undef _SSL__SSLCONTEXT_MINIMUM_VERSION_GETSETDEF +# define _SSL__SSLCONTEXT_MINIMUM_VERSION_GETSETDEF {"minimum_version", (getter)_ssl__SSLContext_minimum_version_get, (setter)_ssl__SSLContext_minimum_version_set, _ssl__SSLContext_minimum_version_DOCSTR}, +#else +# define _SSL__SSLCONTEXT_MINIMUM_VERSION_GETSETDEF {"minimum_version", (getter)_ssl__SSLContext_minimum_version_get, NULL, _ssl__SSLContext_minimum_version_DOCSTR}, +#endif + +static PyObject * +_ssl__SSLContext_minimum_version_get_impl(PySSLContext *self); + +static PyObject * +_ssl__SSLContext_minimum_version_get(PySSLContext *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_minimum_version_get_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_SSL__SSLCONTEXT_MINIMUM_VERSION_HAS_DOCSTR) +# define _ssl__SSLContext_minimum_version_DOCSTR _ssl__SSLContext_minimum_version__doc__ +#else +# define _ssl__SSLContext_minimum_version_DOCSTR NULL +#endif +#if defined(_SSL__SSLCONTEXT_MINIMUM_VERSION_GETSETDEF) +# undef _SSL__SSLCONTEXT_MINIMUM_VERSION_GETSETDEF +# define _SSL__SSLCONTEXT_MINIMUM_VERSION_GETSETDEF {"minimum_version", (getter)_ssl__SSLContext_minimum_version_get, (setter)_ssl__SSLContext_minimum_version_set, _ssl__SSLContext_minimum_version_DOCSTR}, +#else +# define _SSL__SSLCONTEXT_MINIMUM_VERSION_GETSETDEF {"minimum_version", NULL, (setter)_ssl__SSLContext_minimum_version_set, NULL}, +#endif + +static int +_ssl__SSLContext_minimum_version_set_impl(PySSLContext *self, + PyObject *value); + +static int +_ssl__SSLContext_minimum_version_set(PySSLContext *self, PyObject *value, void *Py_UNUSED(context)) +{ + int return_value; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_minimum_version_set_impl(self, value); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_ssl__SSLContext_maximum_version_HAS_DOCSTR) +# define _ssl__SSLContext_maximum_version_DOCSTR _ssl__SSLContext_maximum_version__doc__ +#else +# define _ssl__SSLContext_maximum_version_DOCSTR NULL +#endif +#if defined(_SSL__SSLCONTEXT_MAXIMUM_VERSION_GETSETDEF) +# undef _SSL__SSLCONTEXT_MAXIMUM_VERSION_GETSETDEF +# define _SSL__SSLCONTEXT_MAXIMUM_VERSION_GETSETDEF {"maximum_version", (getter)_ssl__SSLContext_maximum_version_get, (setter)_ssl__SSLContext_maximum_version_set, _ssl__SSLContext_maximum_version_DOCSTR}, +#else +# define _SSL__SSLCONTEXT_MAXIMUM_VERSION_GETSETDEF {"maximum_version", (getter)_ssl__SSLContext_maximum_version_get, NULL, _ssl__SSLContext_maximum_version_DOCSTR}, +#endif + +static PyObject * +_ssl__SSLContext_maximum_version_get_impl(PySSLContext *self); + +static PyObject * +_ssl__SSLContext_maximum_version_get(PySSLContext *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_maximum_version_get_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_SSL__SSLCONTEXT_MAXIMUM_VERSION_HAS_DOCSTR) +# define _ssl__SSLContext_maximum_version_DOCSTR _ssl__SSLContext_maximum_version__doc__ +#else +# define _ssl__SSLContext_maximum_version_DOCSTR NULL +#endif +#if defined(_SSL__SSLCONTEXT_MAXIMUM_VERSION_GETSETDEF) +# undef _SSL__SSLCONTEXT_MAXIMUM_VERSION_GETSETDEF +# define _SSL__SSLCONTEXT_MAXIMUM_VERSION_GETSETDEF {"maximum_version", (getter)_ssl__SSLContext_maximum_version_get, (setter)_ssl__SSLContext_maximum_version_set, _ssl__SSLContext_maximum_version_DOCSTR}, +#else +# define _SSL__SSLCONTEXT_MAXIMUM_VERSION_GETSETDEF {"maximum_version", NULL, (setter)_ssl__SSLContext_maximum_version_set, NULL}, +#endif + +static int +_ssl__SSLContext_maximum_version_set_impl(PySSLContext *self, + PyObject *value); + +static int +_ssl__SSLContext_maximum_version_set(PySSLContext *self, PyObject *value, void *Py_UNUSED(context)) +{ + int return_value; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_maximum_version_set_impl(self, value); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_ssl__SSLContext_num_tickets_HAS_DOCSTR) +# define _ssl__SSLContext_num_tickets_DOCSTR _ssl__SSLContext_num_tickets__doc__ +#else +# define _ssl__SSLContext_num_tickets_DOCSTR NULL +#endif +#if defined(_SSL__SSLCONTEXT_NUM_TICKETS_GETSETDEF) +# undef _SSL__SSLCONTEXT_NUM_TICKETS_GETSETDEF +# define _SSL__SSLCONTEXT_NUM_TICKETS_GETSETDEF {"num_tickets", (getter)_ssl__SSLContext_num_tickets_get, (setter)_ssl__SSLContext_num_tickets_set, _ssl__SSLContext_num_tickets_DOCSTR}, +#else +# define _SSL__SSLCONTEXT_NUM_TICKETS_GETSETDEF {"num_tickets", (getter)_ssl__SSLContext_num_tickets_get, NULL, _ssl__SSLContext_num_tickets_DOCSTR}, +#endif + +static PyObject * +_ssl__SSLContext_num_tickets_get_impl(PySSLContext *self); + +static PyObject * +_ssl__SSLContext_num_tickets_get(PySSLContext *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_num_tickets_get_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_SSL__SSLCONTEXT_NUM_TICKETS_HAS_DOCSTR) +# define _ssl__SSLContext_num_tickets_DOCSTR _ssl__SSLContext_num_tickets__doc__ +#else +# define _ssl__SSLContext_num_tickets_DOCSTR NULL +#endif +#if defined(_SSL__SSLCONTEXT_NUM_TICKETS_GETSETDEF) +# undef _SSL__SSLCONTEXT_NUM_TICKETS_GETSETDEF +# define _SSL__SSLCONTEXT_NUM_TICKETS_GETSETDEF {"num_tickets", (getter)_ssl__SSLContext_num_tickets_get, (setter)_ssl__SSLContext_num_tickets_set, _ssl__SSLContext_num_tickets_DOCSTR}, +#else +# define _SSL__SSLCONTEXT_NUM_TICKETS_GETSETDEF {"num_tickets", NULL, (setter)_ssl__SSLContext_num_tickets_set, NULL}, +#endif + +static int +_ssl__SSLContext_num_tickets_set_impl(PySSLContext *self, PyObject *value); + +static int +_ssl__SSLContext_num_tickets_set(PySSLContext *self, PyObject *value, void *Py_UNUSED(context)) +{ + int return_value; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_num_tickets_set_impl(self, value); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_ssl__SSLContext_security_level_HAS_DOCSTR) +# define _ssl__SSLContext_security_level_DOCSTR _ssl__SSLContext_security_level__doc__ +#else +# define _ssl__SSLContext_security_level_DOCSTR NULL +#endif +#if defined(_SSL__SSLCONTEXT_SECURITY_LEVEL_GETSETDEF) +# undef _SSL__SSLCONTEXT_SECURITY_LEVEL_GETSETDEF +# define _SSL__SSLCONTEXT_SECURITY_LEVEL_GETSETDEF {"security_level", (getter)_ssl__SSLContext_security_level_get, (setter)_ssl__SSLContext_security_level_set, _ssl__SSLContext_security_level_DOCSTR}, +#else +# define _SSL__SSLCONTEXT_SECURITY_LEVEL_GETSETDEF {"security_level", (getter)_ssl__SSLContext_security_level_get, NULL, _ssl__SSLContext_security_level_DOCSTR}, +#endif + +static PyObject * +_ssl__SSLContext_security_level_get_impl(PySSLContext *self); + +static PyObject * +_ssl__SSLContext_security_level_get(PySSLContext *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_security_level_get_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_ssl__SSLContext_options_HAS_DOCSTR) +# define _ssl__SSLContext_options_DOCSTR _ssl__SSLContext_options__doc__ +#else +# define _ssl__SSLContext_options_DOCSTR NULL +#endif +#if defined(_SSL__SSLCONTEXT_OPTIONS_GETSETDEF) +# undef _SSL__SSLCONTEXT_OPTIONS_GETSETDEF +# define _SSL__SSLCONTEXT_OPTIONS_GETSETDEF {"options", (getter)_ssl__SSLContext_options_get, (setter)_ssl__SSLContext_options_set, _ssl__SSLContext_options_DOCSTR}, +#else +# define _SSL__SSLCONTEXT_OPTIONS_GETSETDEF {"options", (getter)_ssl__SSLContext_options_get, NULL, _ssl__SSLContext_options_DOCSTR}, +#endif + +static PyObject * +_ssl__SSLContext_options_get_impl(PySSLContext *self); + +static PyObject * +_ssl__SSLContext_options_get(PySSLContext *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_options_get_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_SSL__SSLCONTEXT_OPTIONS_HAS_DOCSTR) +# define _ssl__SSLContext_options_DOCSTR _ssl__SSLContext_options__doc__ +#else +# define _ssl__SSLContext_options_DOCSTR NULL +#endif +#if defined(_SSL__SSLCONTEXT_OPTIONS_GETSETDEF) +# undef _SSL__SSLCONTEXT_OPTIONS_GETSETDEF +# define _SSL__SSLCONTEXT_OPTIONS_GETSETDEF {"options", (getter)_ssl__SSLContext_options_get, (setter)_ssl__SSLContext_options_set, _ssl__SSLContext_options_DOCSTR}, +#else +# define _SSL__SSLCONTEXT_OPTIONS_GETSETDEF {"options", NULL, (setter)_ssl__SSLContext_options_set, NULL}, +#endif + +static int +_ssl__SSLContext_options_set_impl(PySSLContext *self, PyObject *value); + +static int +_ssl__SSLContext_options_set(PySSLContext *self, PyObject *value, void *Py_UNUSED(context)) +{ + int return_value; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_options_set_impl(self, value); + Py_END_CRITICAL_SECTION(); + + return return_value; } +#if defined(_ssl__SSLContext__host_flags_HAS_DOCSTR) +# define _ssl__SSLContext__host_flags_DOCSTR _ssl__SSLContext__host_flags__doc__ +#else +# define _ssl__SSLContext__host_flags_DOCSTR NULL +#endif +#if defined(_SSL__SSLCONTEXT__HOST_FLAGS_GETSETDEF) +# undef _SSL__SSLCONTEXT__HOST_FLAGS_GETSETDEF +# define _SSL__SSLCONTEXT__HOST_FLAGS_GETSETDEF {"_host_flags", (getter)_ssl__SSLContext__host_flags_get, (setter)_ssl__SSLContext__host_flags_set, _ssl__SSLContext__host_flags_DOCSTR}, +#else +# define _SSL__SSLCONTEXT__HOST_FLAGS_GETSETDEF {"_host_flags", (getter)_ssl__SSLContext__host_flags_get, NULL, _ssl__SSLContext__host_flags_DOCSTR}, +#endif + static PyObject * -_ssl__SSLContext_impl(PyTypeObject *type, int proto_version); +_ssl__SSLContext__host_flags_get_impl(PySSLContext *self); static PyObject * -_ssl__SSLContext(PyTypeObject *type, PyObject *args, PyObject *kwargs) +_ssl__SSLContext__host_flags_get(PySSLContext *self, void *Py_UNUSED(context)) { PyObject *return_value = NULL; - PyTypeObject *base_tp = get_state_type(type)->PySSLContext_Type; - int proto_version; - if ((type == base_tp || type->tp_init == base_tp->tp_init) && - !_PyArg_NoKeywords("_SSLContext", kwargs)) { - goto exit; - } - if (!_PyArg_CheckPositional("_SSLContext", PyTuple_GET_SIZE(args), 1, 1)) { - goto exit; - } - proto_version = PyLong_AsInt(PyTuple_GET_ITEM(args, 0)); - if (proto_version == -1 && PyErr_Occurred()) { - goto exit; - } - return_value = _ssl__SSLContext_impl(type, proto_version); + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext__host_flags_get_impl(self); + Py_END_CRITICAL_SECTION(); -exit: return return_value; } -PyDoc_STRVAR(_ssl__SSLContext_set_ciphers__doc__, -"set_ciphers($self, cipherlist, /)\n" -"--\n" -"\n"); +#if defined(_SSL__SSLCONTEXT__HOST_FLAGS_HAS_DOCSTR) +# define _ssl__SSLContext__host_flags_DOCSTR _ssl__SSLContext__host_flags__doc__ +#else +# define _ssl__SSLContext__host_flags_DOCSTR NULL +#endif +#if defined(_SSL__SSLCONTEXT__HOST_FLAGS_GETSETDEF) +# undef _SSL__SSLCONTEXT__HOST_FLAGS_GETSETDEF +# define _SSL__SSLCONTEXT__HOST_FLAGS_GETSETDEF {"_host_flags", (getter)_ssl__SSLContext__host_flags_get, (setter)_ssl__SSLContext__host_flags_set, _ssl__SSLContext__host_flags_DOCSTR}, +#else +# define _SSL__SSLCONTEXT__HOST_FLAGS_GETSETDEF {"_host_flags", NULL, (setter)_ssl__SSLContext__host_flags_set, NULL}, +#endif -#define _SSL__SSLCONTEXT_SET_CIPHERS_METHODDEF \ - {"set_ciphers", (PyCFunction)_ssl__SSLContext_set_ciphers, METH_O, _ssl__SSLContext_set_ciphers__doc__}, +static int +_ssl__SSLContext__host_flags_set_impl(PySSLContext *self, PyObject *value); + +static int +_ssl__SSLContext__host_flags_set(PySSLContext *self, PyObject *value, void *Py_UNUSED(context)) +{ + int return_value; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext__host_flags_set_impl(self, value); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_ssl__SSLContext_check_hostname_HAS_DOCSTR) +# define _ssl__SSLContext_check_hostname_DOCSTR _ssl__SSLContext_check_hostname__doc__ +#else +# define _ssl__SSLContext_check_hostname_DOCSTR NULL +#endif +#if defined(_SSL__SSLCONTEXT_CHECK_HOSTNAME_GETSETDEF) +# undef _SSL__SSLCONTEXT_CHECK_HOSTNAME_GETSETDEF +# define _SSL__SSLCONTEXT_CHECK_HOSTNAME_GETSETDEF {"check_hostname", (getter)_ssl__SSLContext_check_hostname_get, (setter)_ssl__SSLContext_check_hostname_set, _ssl__SSLContext_check_hostname_DOCSTR}, +#else +# define _SSL__SSLCONTEXT_CHECK_HOSTNAME_GETSETDEF {"check_hostname", (getter)_ssl__SSLContext_check_hostname_get, NULL, _ssl__SSLContext_check_hostname_DOCSTR}, +#endif static PyObject * -_ssl__SSLContext_set_ciphers_impl(PySSLContext *self, const char *cipherlist); +_ssl__SSLContext_check_hostname_get_impl(PySSLContext *self); static PyObject * -_ssl__SSLContext_set_ciphers(PySSLContext *self, PyObject *arg) +_ssl__SSLContext_check_hostname_get(PySSLContext *self, void *Py_UNUSED(context)) { PyObject *return_value = NULL; - const char *cipherlist; - if (!PyUnicode_Check(arg)) { - _PyArg_BadArgument("set_ciphers", "argument", "str", arg); - goto exit; - } - Py_ssize_t cipherlist_length; - cipherlist = PyUnicode_AsUTF8AndSize(arg, &cipherlist_length); - if (cipherlist == NULL) { - goto exit; - } - if (strlen(cipherlist) != (size_t)cipherlist_length) { - PyErr_SetString(PyExc_ValueError, "embedded null character"); - goto exit; - } - return_value = _ssl__SSLContext_set_ciphers_impl(self, cipherlist); + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_check_hostname_get_impl(self); + Py_END_CRITICAL_SECTION(); -exit: return return_value; } -PyDoc_STRVAR(_ssl__SSLContext_get_ciphers__doc__, -"get_ciphers($self, /)\n" -"--\n" -"\n"); - -#define _SSL__SSLCONTEXT_GET_CIPHERS_METHODDEF \ - {"get_ciphers", (PyCFunction)_ssl__SSLContext_get_ciphers, METH_NOARGS, _ssl__SSLContext_get_ciphers__doc__}, +#if defined(_SSL__SSLCONTEXT_CHECK_HOSTNAME_HAS_DOCSTR) +# define _ssl__SSLContext_check_hostname_DOCSTR _ssl__SSLContext_check_hostname__doc__ +#else +# define _ssl__SSLContext_check_hostname_DOCSTR NULL +#endif +#if defined(_SSL__SSLCONTEXT_CHECK_HOSTNAME_GETSETDEF) +# undef _SSL__SSLCONTEXT_CHECK_HOSTNAME_GETSETDEF +# define _SSL__SSLCONTEXT_CHECK_HOSTNAME_GETSETDEF {"check_hostname", (getter)_ssl__SSLContext_check_hostname_get, (setter)_ssl__SSLContext_check_hostname_set, _ssl__SSLContext_check_hostname_DOCSTR}, +#else +# define _SSL__SSLCONTEXT_CHECK_HOSTNAME_GETSETDEF {"check_hostname", NULL, (setter)_ssl__SSLContext_check_hostname_set, NULL}, +#endif -static PyObject * -_ssl__SSLContext_get_ciphers_impl(PySSLContext *self); +static int +_ssl__SSLContext_check_hostname_set_impl(PySSLContext *self, PyObject *value); -static PyObject * -_ssl__SSLContext_get_ciphers(PySSLContext *self, PyObject *Py_UNUSED(ignored)) +static int +_ssl__SSLContext_check_hostname_set(PySSLContext *self, PyObject *value, void *Py_UNUSED(context)) { - return _ssl__SSLContext_get_ciphers_impl(self); -} + int return_value; -PyDoc_STRVAR(_ssl__SSLContext__set_alpn_protocols__doc__, -"_set_alpn_protocols($self, protos, /)\n" -"--\n" -"\n"); + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_check_hostname_set_impl(self, value); + Py_END_CRITICAL_SECTION(); -#define _SSL__SSLCONTEXT__SET_ALPN_PROTOCOLS_METHODDEF \ - {"_set_alpn_protocols", (PyCFunction)_ssl__SSLContext__set_alpn_protocols, METH_O, _ssl__SSLContext__set_alpn_protocols__doc__}, + return return_value; +} + +#if defined(_ssl__SSLContext_protocol_HAS_DOCSTR) +# define _ssl__SSLContext_protocol_DOCSTR _ssl__SSLContext_protocol__doc__ +#else +# define _ssl__SSLContext_protocol_DOCSTR NULL +#endif +#if defined(_SSL__SSLCONTEXT_PROTOCOL_GETSETDEF) +# undef _SSL__SSLCONTEXT_PROTOCOL_GETSETDEF +# define _SSL__SSLCONTEXT_PROTOCOL_GETSETDEF {"protocol", (getter)_ssl__SSLContext_protocol_get, (setter)_ssl__SSLContext_protocol_set, _ssl__SSLContext_protocol_DOCSTR}, +#else +# define _SSL__SSLCONTEXT_PROTOCOL_GETSETDEF {"protocol", (getter)_ssl__SSLContext_protocol_get, NULL, _ssl__SSLContext_protocol_DOCSTR}, +#endif static PyObject * -_ssl__SSLContext__set_alpn_protocols_impl(PySSLContext *self, - Py_buffer *protos); +_ssl__SSLContext_protocol_get_impl(PySSLContext *self); static PyObject * -_ssl__SSLContext__set_alpn_protocols(PySSLContext *self, PyObject *arg) +_ssl__SSLContext_protocol_get(PySSLContext *self, void *Py_UNUSED(context)) { PyObject *return_value = NULL; - Py_buffer protos = {NULL, NULL}; - if (PyObject_GetBuffer(arg, &protos, PyBUF_SIMPLE) != 0) { - goto exit; - } - return_value = _ssl__SSLContext__set_alpn_protocols_impl(self, &protos); - -exit: - /* Cleanup for protos */ - if (protos.obj) { - PyBuffer_Release(&protos); - } + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_protocol_get_impl(self); + Py_END_CRITICAL_SECTION(); return return_value; } @@ -599,7 +1423,9 @@ _ssl__SSLContext_load_cert_chain(PySSLContext *self, PyObject *const *args, Py_s } password = args[2]; skip_optional_pos: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _ssl__SSLContext_load_cert_chain_impl(self, certfile, keyfile, password); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -675,7 +1501,9 @@ _ssl__SSLContext_load_verify_locations(PySSLContext *self, PyObject *const *args } cadata = args[2]; skip_optional_pos: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _ssl__SSLContext_load_verify_locations_impl(self, cafile, capath, cadata); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -689,6 +1517,21 @@ PyDoc_STRVAR(_ssl__SSLContext_load_dh_params__doc__, #define _SSL__SSLCONTEXT_LOAD_DH_PARAMS_METHODDEF \ {"load_dh_params", (PyCFunction)_ssl__SSLContext_load_dh_params, METH_O, _ssl__SSLContext_load_dh_params__doc__}, +static PyObject * +_ssl__SSLContext_load_dh_params_impl(PySSLContext *self, PyObject *filepath); + +static PyObject * +_ssl__SSLContext_load_dh_params(PySSLContext *self, PyObject *filepath) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_load_dh_params_impl(self, filepath); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + PyDoc_STRVAR(_ssl__SSLContext__wrap_socket__doc__, "_wrap_socket($self, /, sock, server_side, server_hostname=None, *,\n" " owner=None, session=None)\n" @@ -774,7 +1617,9 @@ _ssl__SSLContext__wrap_socket(PySSLContext *self, PyObject *const *args, Py_ssiz } session = args[4]; skip_optional_kwonly: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _ssl__SSLContext__wrap_socket_impl(self, sock, server_side, hostname_obj, owner, session); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -872,7 +1717,9 @@ _ssl__SSLContext__wrap_bio(PySSLContext *self, PyObject *const *args, Py_ssize_t } session = args[5]; skip_optional_kwonly: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _ssl__SSLContext__wrap_bio_impl(self, incoming, outgoing, server_side, hostname_obj, owner, session); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -892,7 +1739,13 @@ _ssl__SSLContext_session_stats_impl(PySSLContext *self); static PyObject * _ssl__SSLContext_session_stats(PySSLContext *self, PyObject *Py_UNUSED(ignored)) { - return _ssl__SSLContext_session_stats_impl(self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_session_stats_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_ssl__SSLContext_set_default_verify_paths__doc__, @@ -909,7 +1762,13 @@ _ssl__SSLContext_set_default_verify_paths_impl(PySSLContext *self); static PyObject * _ssl__SSLContext_set_default_verify_paths(PySSLContext *self, PyObject *Py_UNUSED(ignored)) { - return _ssl__SSLContext_set_default_verify_paths_impl(self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_set_default_verify_paths_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_ssl__SSLContext_set_ecdh_curve__doc__, @@ -920,6 +1779,75 @@ PyDoc_STRVAR(_ssl__SSLContext_set_ecdh_curve__doc__, #define _SSL__SSLCONTEXT_SET_ECDH_CURVE_METHODDEF \ {"set_ecdh_curve", (PyCFunction)_ssl__SSLContext_set_ecdh_curve, METH_O, _ssl__SSLContext_set_ecdh_curve__doc__}, +static PyObject * +_ssl__SSLContext_set_ecdh_curve_impl(PySSLContext *self, PyObject *name); + +static PyObject * +_ssl__SSLContext_set_ecdh_curve(PySSLContext *self, PyObject *name) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_set_ecdh_curve_impl(self, name); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_ssl__SSLContext_sni_callback_HAS_DOCSTR) +# define _ssl__SSLContext_sni_callback_DOCSTR _ssl__SSLContext_sni_callback__doc__ +#else +# define _ssl__SSLContext_sni_callback_DOCSTR NULL +#endif +#if defined(_SSL__SSLCONTEXT_SNI_CALLBACK_GETSETDEF) +# undef _SSL__SSLCONTEXT_SNI_CALLBACK_GETSETDEF +# define _SSL__SSLCONTEXT_SNI_CALLBACK_GETSETDEF {"sni_callback", (getter)_ssl__SSLContext_sni_callback_get, (setter)_ssl__SSLContext_sni_callback_set, _ssl__SSLContext_sni_callback_DOCSTR}, +#else +# define _SSL__SSLCONTEXT_SNI_CALLBACK_GETSETDEF {"sni_callback", (getter)_ssl__SSLContext_sni_callback_get, NULL, _ssl__SSLContext_sni_callback_DOCSTR}, +#endif + +static PyObject * +_ssl__SSLContext_sni_callback_get_impl(PySSLContext *self); + +static PyObject * +_ssl__SSLContext_sni_callback_get(PySSLContext *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_sni_callback_get_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_SSL__SSLCONTEXT_SNI_CALLBACK_HAS_DOCSTR) +# define _ssl__SSLContext_sni_callback_DOCSTR _ssl__SSLContext_sni_callback__doc__ +#else +# define _ssl__SSLContext_sni_callback_DOCSTR NULL +#endif +#if defined(_SSL__SSLCONTEXT_SNI_CALLBACK_GETSETDEF) +# undef _SSL__SSLCONTEXT_SNI_CALLBACK_GETSETDEF +# define _SSL__SSLCONTEXT_SNI_CALLBACK_GETSETDEF {"sni_callback", (getter)_ssl__SSLContext_sni_callback_get, (setter)_ssl__SSLContext_sni_callback_set, _ssl__SSLContext_sni_callback_DOCSTR}, +#else +# define _SSL__SSLCONTEXT_SNI_CALLBACK_GETSETDEF {"sni_callback", NULL, (setter)_ssl__SSLContext_sni_callback_set, NULL}, +#endif + +static int +_ssl__SSLContext_sni_callback_set_impl(PySSLContext *self, PyObject *value); + +static int +_ssl__SSLContext_sni_callback_set(PySSLContext *self, PyObject *value, void *Py_UNUSED(context)) +{ + int return_value; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_sni_callback_set_impl(self, value); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + PyDoc_STRVAR(_ssl__SSLContext_cert_store_stats__doc__, "cert_store_stats($self, /)\n" "--\n" @@ -941,7 +1869,13 @@ _ssl__SSLContext_cert_store_stats_impl(PySSLContext *self); static PyObject * _ssl__SSLContext_cert_store_stats(PySSLContext *self, PyObject *Py_UNUSED(ignored)) { - return _ssl__SSLContext_cert_store_stats_impl(self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl__SSLContext_cert_store_stats_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_ssl__SSLContext_get_ca_certs__doc__, @@ -1007,7 +1941,9 @@ _ssl__SSLContext_get_ca_certs(PySSLContext *self, PyObject *const *args, Py_ssiz goto exit; } skip_optional_pos: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _ssl__SSLContext_get_ca_certs_impl(self, binary_form); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -1062,7 +1998,9 @@ _ssl__SSLContext_set_psk_client_callback(PySSLContext *self, PyObject *const *ar goto exit; } callback = args[0]; + Py_BEGIN_CRITICAL_SECTION(self); return_value = _ssl__SSLContext_set_psk_client_callback_impl(self, callback); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -1142,7 +2080,9 @@ _ssl__SSLContext_set_psk_server_callback(PySSLContext *self, PyObject *const *ar goto exit; } skip_optional_pos: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _ssl__SSLContext_set_psk_server_callback_impl(self, callback, identity_hint); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -1165,12 +2105,68 @@ _ssl_MemoryBIO(PyTypeObject *type, PyObject *args, PyObject *kwargs) !_PyArg_NoKeywords("MemoryBIO", kwargs)) { goto exit; } + Py_BEGIN_CRITICAL_SECTION(type); return_value = _ssl_MemoryBIO_impl(type); + Py_END_CRITICAL_SECTION(); exit: return return_value; } +#if defined(_ssl_MemoryBIO_pending_HAS_DOCSTR) +# define _ssl_MemoryBIO_pending_DOCSTR _ssl_MemoryBIO_pending__doc__ +#else +# define _ssl_MemoryBIO_pending_DOCSTR NULL +#endif +#if defined(_SSL_MEMORYBIO_PENDING_GETSETDEF) +# undef _SSL_MEMORYBIO_PENDING_GETSETDEF +# define _SSL_MEMORYBIO_PENDING_GETSETDEF {"pending", (getter)_ssl_MemoryBIO_pending_get, (setter)_ssl_MemoryBIO_pending_set, _ssl_MemoryBIO_pending_DOCSTR}, +#else +# define _SSL_MEMORYBIO_PENDING_GETSETDEF {"pending", (getter)_ssl_MemoryBIO_pending_get, NULL, _ssl_MemoryBIO_pending_DOCSTR}, +#endif + +static PyObject * +_ssl_MemoryBIO_pending_get_impl(PySSLMemoryBIO *self); + +static PyObject * +_ssl_MemoryBIO_pending_get(PySSLMemoryBIO *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl_MemoryBIO_pending_get_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_ssl_MemoryBIO_eof_HAS_DOCSTR) +# define _ssl_MemoryBIO_eof_DOCSTR _ssl_MemoryBIO_eof__doc__ +#else +# define _ssl_MemoryBIO_eof_DOCSTR NULL +#endif +#if defined(_SSL_MEMORYBIO_EOF_GETSETDEF) +# undef _SSL_MEMORYBIO_EOF_GETSETDEF +# define _SSL_MEMORYBIO_EOF_GETSETDEF {"eof", (getter)_ssl_MemoryBIO_eof_get, (setter)_ssl_MemoryBIO_eof_set, _ssl_MemoryBIO_eof_DOCSTR}, +#else +# define _SSL_MEMORYBIO_EOF_GETSETDEF {"eof", (getter)_ssl_MemoryBIO_eof_get, NULL, _ssl_MemoryBIO_eof_DOCSTR}, +#endif + +static PyObject * +_ssl_MemoryBIO_eof_get_impl(PySSLMemoryBIO *self); + +static PyObject * +_ssl_MemoryBIO_eof_get(PySSLMemoryBIO *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl_MemoryBIO_eof_get_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + PyDoc_STRVAR(_ssl_MemoryBIO_read__doc__, "read($self, size=-1, /)\n" "--\n" @@ -1205,7 +2201,9 @@ _ssl_MemoryBIO_read(PySSLMemoryBIO *self, PyObject *const *args, Py_ssize_t narg goto exit; } skip_optional: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _ssl_MemoryBIO_read_impl(self, len); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -1234,7 +2232,9 @@ _ssl_MemoryBIO_write(PySSLMemoryBIO *self, PyObject *arg) if (PyObject_GetBuffer(arg, &b, PyBUF_SIMPLE) != 0) { goto exit; } + Py_BEGIN_CRITICAL_SECTION(self); return_value = _ssl_MemoryBIO_write_impl(self, &b); + Py_END_CRITICAL_SECTION(); exit: /* Cleanup for b */ @@ -1262,7 +2262,148 @@ _ssl_MemoryBIO_write_eof_impl(PySSLMemoryBIO *self); static PyObject * _ssl_MemoryBIO_write_eof(PySSLMemoryBIO *self, PyObject *Py_UNUSED(ignored)) { - return _ssl_MemoryBIO_write_eof_impl(self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl_MemoryBIO_write_eof_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_ssl_SSLSession_time_HAS_DOCSTR) +# define _ssl_SSLSession_time_DOCSTR _ssl_SSLSession_time__doc__ +#else +# define _ssl_SSLSession_time_DOCSTR NULL +#endif +#if defined(_SSL_SSLSESSION_TIME_GETSETDEF) +# undef _SSL_SSLSESSION_TIME_GETSETDEF +# define _SSL_SSLSESSION_TIME_GETSETDEF {"time", (getter)_ssl_SSLSession_time_get, (setter)_ssl_SSLSession_time_set, _ssl_SSLSession_time_DOCSTR}, +#else +# define _SSL_SSLSESSION_TIME_GETSETDEF {"time", (getter)_ssl_SSLSession_time_get, NULL, _ssl_SSLSession_time_DOCSTR}, +#endif + +static PyObject * +_ssl_SSLSession_time_get_impl(PySSLSession *self); + +static PyObject * +_ssl_SSLSession_time_get(PySSLSession *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl_SSLSession_time_get_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_ssl_SSLSession_timeout_HAS_DOCSTR) +# define _ssl_SSLSession_timeout_DOCSTR _ssl_SSLSession_timeout__doc__ +#else +# define _ssl_SSLSession_timeout_DOCSTR NULL +#endif +#if defined(_SSL_SSLSESSION_TIMEOUT_GETSETDEF) +# undef _SSL_SSLSESSION_TIMEOUT_GETSETDEF +# define _SSL_SSLSESSION_TIMEOUT_GETSETDEF {"timeout", (getter)_ssl_SSLSession_timeout_get, (setter)_ssl_SSLSession_timeout_set, _ssl_SSLSession_timeout_DOCSTR}, +#else +# define _SSL_SSLSESSION_TIMEOUT_GETSETDEF {"timeout", (getter)_ssl_SSLSession_timeout_get, NULL, _ssl_SSLSession_timeout_DOCSTR}, +#endif + +static PyObject * +_ssl_SSLSession_timeout_get_impl(PySSLSession *self); + +static PyObject * +_ssl_SSLSession_timeout_get(PySSLSession *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl_SSLSession_timeout_get_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_ssl_SSLSession_ticket_lifetime_hint_HAS_DOCSTR) +# define _ssl_SSLSession_ticket_lifetime_hint_DOCSTR _ssl_SSLSession_ticket_lifetime_hint__doc__ +#else +# define _ssl_SSLSession_ticket_lifetime_hint_DOCSTR NULL +#endif +#if defined(_SSL_SSLSESSION_TICKET_LIFETIME_HINT_GETSETDEF) +# undef _SSL_SSLSESSION_TICKET_LIFETIME_HINT_GETSETDEF +# define _SSL_SSLSESSION_TICKET_LIFETIME_HINT_GETSETDEF {"ticket_lifetime_hint", (getter)_ssl_SSLSession_ticket_lifetime_hint_get, (setter)_ssl_SSLSession_ticket_lifetime_hint_set, _ssl_SSLSession_ticket_lifetime_hint_DOCSTR}, +#else +# define _SSL_SSLSESSION_TICKET_LIFETIME_HINT_GETSETDEF {"ticket_lifetime_hint", (getter)_ssl_SSLSession_ticket_lifetime_hint_get, NULL, _ssl_SSLSession_ticket_lifetime_hint_DOCSTR}, +#endif + +static PyObject * +_ssl_SSLSession_ticket_lifetime_hint_get_impl(PySSLSession *self); + +static PyObject * +_ssl_SSLSession_ticket_lifetime_hint_get(PySSLSession *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl_SSLSession_ticket_lifetime_hint_get_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_ssl_SSLSession_id_HAS_DOCSTR) +# define _ssl_SSLSession_id_DOCSTR _ssl_SSLSession_id__doc__ +#else +# define _ssl_SSLSession_id_DOCSTR NULL +#endif +#if defined(_SSL_SSLSESSION_ID_GETSETDEF) +# undef _SSL_SSLSESSION_ID_GETSETDEF +# define _SSL_SSLSESSION_ID_GETSETDEF {"id", (getter)_ssl_SSLSession_id_get, (setter)_ssl_SSLSession_id_set, _ssl_SSLSession_id_DOCSTR}, +#else +# define _SSL_SSLSESSION_ID_GETSETDEF {"id", (getter)_ssl_SSLSession_id_get, NULL, _ssl_SSLSession_id_DOCSTR}, +#endif + +static PyObject * +_ssl_SSLSession_id_get_impl(PySSLSession *self); + +static PyObject * +_ssl_SSLSession_id_get(PySSLSession *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl_SSLSession_id_get_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#if defined(_ssl_SSLSession_has_ticket_HAS_DOCSTR) +# define _ssl_SSLSession_has_ticket_DOCSTR _ssl_SSLSession_has_ticket__doc__ +#else +# define _ssl_SSLSession_has_ticket_DOCSTR NULL +#endif +#if defined(_SSL_SSLSESSION_HAS_TICKET_GETSETDEF) +# undef _SSL_SSLSESSION_HAS_TICKET_GETSETDEF +# define _SSL_SSLSESSION_HAS_TICKET_GETSETDEF {"has_ticket", (getter)_ssl_SSLSession_has_ticket_get, (setter)_ssl_SSLSession_has_ticket_set, _ssl_SSLSession_has_ticket_DOCSTR}, +#else +# define _SSL_SSLSESSION_HAS_TICKET_GETSETDEF {"has_ticket", (getter)_ssl_SSLSession_has_ticket_get, NULL, _ssl_SSLSession_has_ticket_DOCSTR}, +#endif + +static PyObject * +_ssl_SSLSession_has_ticket_get_impl(PySSLSession *self); + +static PyObject * +_ssl_SSLSession_has_ticket_get(PySSLSession *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _ssl_SSLSession_has_ticket_get_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_ssl_RAND_add__doc__, @@ -1315,7 +2456,9 @@ _ssl_RAND_add(PyObject *module, PyObject *const *args, Py_ssize_t nargs) goto exit; } } + Py_BEGIN_CRITICAL_SECTION(module); return_value = _ssl_RAND_add_impl(module, &view, entropy); + Py_END_CRITICAL_SECTION(); exit: /* Cleanup for view */ @@ -1348,7 +2491,9 @@ _ssl_RAND_bytes(PyObject *module, PyObject *arg) if (n == -1 && PyErr_Occurred()) { goto exit; } + Py_BEGIN_CRITICAL_SECTION(module); return_value = _ssl_RAND_bytes_impl(module, n); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -1372,7 +2517,13 @@ _ssl_RAND_status_impl(PyObject *module); static PyObject * _ssl_RAND_status(PyObject *module, PyObject *Py_UNUSED(ignored)) { - return _ssl_RAND_status_impl(module); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(module); + return_value = _ssl_RAND_status_impl(module); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_ssl_get_default_verify_paths__doc__, @@ -1392,7 +2543,13 @@ _ssl_get_default_verify_paths_impl(PyObject *module); static PyObject * _ssl_get_default_verify_paths(PyObject *module, PyObject *Py_UNUSED(ignored)) { - return _ssl_get_default_verify_paths_impl(module); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(module); + return_value = _ssl_get_default_verify_paths_impl(module); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_ssl_txt2obj__doc__, @@ -1469,7 +2626,9 @@ _ssl_txt2obj(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject goto exit; } skip_optional_pos: + Py_BEGIN_CRITICAL_SECTION(module); return_value = _ssl_txt2obj_impl(module, txt, name); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -1497,7 +2656,9 @@ _ssl_nid2obj(PyObject *module, PyObject *arg) if (nid == -1 && PyErr_Occurred()) { goto exit; } + Py_BEGIN_CRITICAL_SECTION(module); return_value = _ssl_nid2obj_impl(module, nid); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -1572,7 +2733,9 @@ _ssl_enum_certificates(PyObject *module, PyObject *const *args, Py_ssize_t nargs PyErr_SetString(PyExc_ValueError, "embedded null character"); goto exit; } + Py_BEGIN_CRITICAL_SECTION(module); return_value = _ssl_enum_certificates_impl(module, store_name); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -1648,7 +2811,9 @@ _ssl_enum_crls(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje PyErr_SetString(PyExc_ValueError, "embedded null character"); goto exit; } + Py_BEGIN_CRITICAL_SECTION(module); return_value = _ssl_enum_crls_impl(module, store_name); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -1663,4 +2828,4 @@ _ssl_enum_crls(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje #ifndef _SSL_ENUM_CRLS_METHODDEF #define _SSL_ENUM_CRLS_METHODDEF #endif /* !defined(_SSL_ENUM_CRLS_METHODDEF) */ -/*[clinic end generated code: output=28a22f2b09d631cb input=a9049054013a1b77]*/ +/*[clinic end generated code: output=c1489122072a9f5e input=a9049054013a1b77]*/ From 8f5e39d5c885318e3128a3e84464c098b5f79a79 Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Sat, 19 Oct 2024 14:46:57 -0700 Subject: [PATCH 044/106] gh-125378: Trigger a repeat for the full multi-line statement for empty line command (#125717) --- Lib/pdb.py | 1 + Lib/test/test_pdb.py | 16 +++++++++++++--- ...024-10-19-01-30-40.gh-issue-125378.WTosxX.rst | 1 + 3 files changed, 15 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-19-01-30-40.gh-issue-125378.WTosxX.rst diff --git a/Lib/pdb.py b/Lib/pdb.py index cd7a7042fa6987..832213abbb98e6 100644 --- a/Lib/pdb.py +++ b/Lib/pdb.py @@ -755,6 +755,7 @@ def default(self, line): else: line = line.rstrip('\r\n') buffer += '\n' + line + self.lastcmd = buffer save_stdout = sys.stdout save_stdin = sys.stdin save_displayhook = sys.displayhook diff --git a/Lib/test/test_pdb.py b/Lib/test/test_pdb.py index 7e6f276d355a14..1ea93ed037005d 100644 --- a/Lib/test/test_pdb.py +++ b/Lib/test/test_pdb.py @@ -2448,7 +2448,12 @@ def test_pdb_multiline_statement(): ... 'def f(x):', ... ' return x * 2', ... '', - ... 'f(2)', + ... 'val = 2', + ... 'if val > 0:', + ... ' val = f(val)', + ... '', + ... '', # empty line should repeat the multi-line statement + ... 'val', ... 'c' ... ]): ... test_function() @@ -2457,8 +2462,13 @@ def test_pdb_multiline_statement(): (Pdb) def f(x): ... return x * 2 ... - (Pdb) f(2) - 4 + (Pdb) val = 2 + (Pdb) if val > 0: + ... val = f(val) + ... + (Pdb) + (Pdb) val + 8 (Pdb) c """ diff --git a/Misc/NEWS.d/next/Library/2024-10-19-01-30-40.gh-issue-125378.WTosxX.rst b/Misc/NEWS.d/next/Library/2024-10-19-01-30-40.gh-issue-125378.WTosxX.rst new file mode 100644 index 00000000000000..dc76889d3b210a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-19-01-30-40.gh-issue-125378.WTosxX.rst @@ -0,0 +1 @@ +Fixed the bug in :mod:`pdb` where after a multi-line command, an empty line repeats the first line of the multi-line command, instead of the full command. From 14cafe1a108cf0be73a27a0001003b5897eec8f0 Mon Sep 17 00:00:00 2001 From: Tom Most Date: Sat, 19 Oct 2024 17:48:06 -0700 Subject: [PATCH 045/106] Doc: Fix pluralization in os.process_cpu_count() documentation (#125678) --- Doc/library/os.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 33dd58febd9a5e..081d7a6f97b079 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -5579,7 +5579,7 @@ Miscellaneous System Information If :option:`-X cpu_count <-X>` is given or :envvar:`PYTHON_CPU_COUNT` is set, :func:`process_cpu_count` returns the overridden value *n*. - See also the :func:`sched_getaffinity` functions. + See also the :func:`sched_getaffinity` function. .. versionadded:: 3.13 From e924bb667a19ee1812d6c7592a37dd37346dda04 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Sun, 20 Oct 2024 03:10:17 +0200 Subject: [PATCH 046/106] gh-125698: Replace EXEEXT with EXE_SUFFIX (#125699) --- Makefile.pre.in | 2 +- configure | 22 +++++++++++----------- configure.ac | 20 ++++++++++---------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/Makefile.pre.in b/Makefile.pre.in index fb6f22d57397db..9c313c8029fb68 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -176,7 +176,7 @@ EXPORTSYMS= @EXPORTSYMS@ EXPORTSFROM= @EXPORTSFROM@ # Executable suffix (.exe on Windows and Mac OS X) -EXE= @EXEEXT@ +EXE= @EXE_SUFFIX@ BUILDEXE= @BUILDEXEEXT@ # Name of the patch file to apply for app store compliance diff --git a/configure b/configure index b11f41d5379958..ab35f69f73011f 100755 --- a/configure +++ b/configure @@ -7296,11 +7296,11 @@ then : withval=$with_suffix; case $with_suffix in #( no) : - EXEEXT= ;; #( + EXE_SUFFIX= ;; #( yes) : - EXEEXT=.exe ;; #( + EXE_SUFFIX=.exe ;; #( *) : - EXEEXT=$with_suffix + EXE_SUFFIX=$with_suffix ;; esac @@ -7308,20 +7308,20 @@ else $as_nop case $ac_sys_system/$ac_sys_emscripten_target in #( Emscripten/browser*) : - EXEEXT=.js ;; #( + EXE_SUFFIX=.js ;; #( Emscripten/node*) : - EXEEXT=.js ;; #( + EXE_SUFFIX=.js ;; #( WASI/*) : - EXEEXT=.wasm ;; #( + EXE_SUFFIX=.wasm ;; #( *) : - EXEEXT= + EXE_SUFFIX= ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $EXEEXT" >&5 -printf "%s\n" "$EXEEXT" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $EXE_SUFFIX" >&5 +printf "%s\n" "$EXE_SUFFIX" >&6; } # Test whether we're running on a non-case-sensitive system, in which # case we give a warning if no ext is given @@ -7332,7 +7332,7 @@ if test ! -d CaseSensitiveTestDir; then mkdir CaseSensitiveTestDir fi -if test -d casesensitivetestdir && test -z "$EXEEXT" +if test -d casesensitivetestdir && test -z "$EXE_SUFFIX" then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -7340,7 +7340,7 @@ printf "%s\n" "yes" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - BUILDEXEEXT=$EXEEXT + BUILDEXEEXT=$EXE_SUFFIX fi rmdir CaseSensitiveTestDir diff --git a/configure.ac b/configure.ac index d5bc739c34c90f..9e50eae6a7ece8 100644 --- a/configure.ac +++ b/configure.ac @@ -1325,19 +1325,19 @@ AC_ARG_WITH([suffix], [AS_HELP_STRING([--with-suffix=SUFFIX], [set executable suffix to SUFFIX (default is empty, yes is mapped to '.exe')])], [ AS_CASE([$with_suffix], - [no], [EXEEXT=], - [yes], [EXEEXT=.exe], - [EXEEXT=$with_suffix] + [no], [EXE_SUFFIX=], + [yes], [EXE_SUFFIX=.exe], + [EXE_SUFFIX=$with_suffix] ) ], [ AS_CASE([$ac_sys_system/$ac_sys_emscripten_target], - [Emscripten/browser*], [EXEEXT=.js], - [Emscripten/node*], [EXEEXT=.js], - [WASI/*], [EXEEXT=.wasm], - [EXEEXT=] + [Emscripten/browser*], [EXE_SUFFIX=.js], + [Emscripten/node*], [EXE_SUFFIX=.js], + [WASI/*], [EXE_SUFFIX=.wasm], + [EXE_SUFFIX=] ) ]) -AC_MSG_RESULT([$EXEEXT]) +AC_MSG_RESULT([$EXE_SUFFIX]) # Test whether we're running on a non-case-sensitive system, in which # case we give a warning if no ext is given @@ -1347,13 +1347,13 @@ if test ! -d CaseSensitiveTestDir; then mkdir CaseSensitiveTestDir fi -if test -d casesensitivetestdir && test -z "$EXEEXT" +if test -d casesensitivetestdir && test -z "$EXE_SUFFIX" then AC_MSG_RESULT([yes]) BUILDEXEEXT=.exe else AC_MSG_RESULT([no]) - BUILDEXEEXT=$EXEEXT + BUILDEXEEXT=$EXE_SUFFIX fi rmdir CaseSensitiveTestDir From ed24702bd0f9925908ce48584c31dfad732208b2 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sun, 20 Oct 2024 16:55:26 +0900 Subject: [PATCH 047/106] gh-125741: Update check_generated_files CI to use our published container image (gh-125744) --- .github/workflows/build.yml | 53 ++++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ec7904c2e2cc73..a72d4a1bb97cb9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -40,6 +40,50 @@ jobs: if: fromJSON(needs.check_source.outputs.run-docs) uses: ./.github/workflows/reusable-docs.yml + check_autoconf_regen: + name: 'Check if Autoconf files are up to date' + # Don't use ubuntu-latest but a specific version to make the job + # reproducible: to get the same tools versions (autoconf, aclocal, ...) + runs-on: ubuntu-24.04 + container: + image: ghcr.io/python/autoconf:2024.10.11.11293396815 + timeout-minutes: 60 + needs: check_source + if: needs.check_source.outputs.run_tests == 'true' + steps: + - name: Install Git + run: | + apt install git -yq + git config --global --add safe.directory "$GITHUB_WORKSPACE" + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + - name: Runner image version + run: echo "IMAGE_VERSION=${ImageVersion}" >> $GITHUB_ENV + - name: Check Autoconf and aclocal versions + run: | + grep "Generated by GNU Autoconf 2.71" configure + grep "aclocal 1.16.5" aclocal.m4 + grep -q "runstatedir" configure + grep -q "PKG_PROG_PKG_CONFIG" aclocal.m4 + - name: Regenerate autoconf files + # Same command used by Tools/build/regen-configure.sh ($AUTORECONF) + run: autoreconf -ivf -Werror + - name: Check for changes + run: | + git add -u + changes=$(git status --porcelain) + # Check for changes in regenerated files + if test -n "$changes"; then + echo "Generated files not up to date." + echo "Perhaps you forgot to run make regen-all or build.bat --regen. ;)" + echo "configure files must be regenerated with a specific version of autoconf." + echo "$changes" + echo "" + git diff --staged || true + exit 1 + fi + check_generated_files: name: 'Check if generated files are up to date' # Don't use ubuntu-latest but a specific version to make the job @@ -69,19 +113,10 @@ jobs: uses: hendrikmuhs/ccache-action@v1.2 with: save: false - - name: Check Autoconf and aclocal versions - run: | - grep "Generated by GNU Autoconf 2.71" configure - grep "aclocal 1.16.5" aclocal.m4 - grep -q "runstatedir" configure - grep -q "PKG_PROG_PKG_CONFIG" aclocal.m4 - name: Configure CPython run: | # Build Python with the libpython dynamic library ./configure --config-cache --with-pydebug --enable-shared - - name: Regenerate autoconf files - # Same command used by Tools/build/regen-configure.sh ($AUTORECONF) - run: autoreconf -ivf -Werror - name: Build CPython run: | make -j4 regen-all From b3c6b2c9e19ea84f617c13399c411044afbc3813 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Sun, 20 Oct 2024 23:08:01 +0200 Subject: [PATCH 048/106] gh-125698: Restore EXEEXT in configure and Make (#125758) This reverts commit e924bb667. --- Makefile.pre.in | 2 +- configure | 22 +++++++++++----------- configure.ac | 20 ++++++++++---------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/Makefile.pre.in b/Makefile.pre.in index 9c313c8029fb68..fb6f22d57397db 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -176,7 +176,7 @@ EXPORTSYMS= @EXPORTSYMS@ EXPORTSFROM= @EXPORTSFROM@ # Executable suffix (.exe on Windows and Mac OS X) -EXE= @EXE_SUFFIX@ +EXE= @EXEEXT@ BUILDEXE= @BUILDEXEEXT@ # Name of the patch file to apply for app store compliance diff --git a/configure b/configure index ab35f69f73011f..b11f41d5379958 100755 --- a/configure +++ b/configure @@ -7296,11 +7296,11 @@ then : withval=$with_suffix; case $with_suffix in #( no) : - EXE_SUFFIX= ;; #( + EXEEXT= ;; #( yes) : - EXE_SUFFIX=.exe ;; #( + EXEEXT=.exe ;; #( *) : - EXE_SUFFIX=$with_suffix + EXEEXT=$with_suffix ;; esac @@ -7308,20 +7308,20 @@ else $as_nop case $ac_sys_system/$ac_sys_emscripten_target in #( Emscripten/browser*) : - EXE_SUFFIX=.js ;; #( + EXEEXT=.js ;; #( Emscripten/node*) : - EXE_SUFFIX=.js ;; #( + EXEEXT=.js ;; #( WASI/*) : - EXE_SUFFIX=.wasm ;; #( + EXEEXT=.wasm ;; #( *) : - EXE_SUFFIX= + EXEEXT= ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $EXE_SUFFIX" >&5 -printf "%s\n" "$EXE_SUFFIX" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $EXEEXT" >&5 +printf "%s\n" "$EXEEXT" >&6; } # Test whether we're running on a non-case-sensitive system, in which # case we give a warning if no ext is given @@ -7332,7 +7332,7 @@ if test ! -d CaseSensitiveTestDir; then mkdir CaseSensitiveTestDir fi -if test -d casesensitivetestdir && test -z "$EXE_SUFFIX" +if test -d casesensitivetestdir && test -z "$EXEEXT" then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -7340,7 +7340,7 @@ printf "%s\n" "yes" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - BUILDEXEEXT=$EXE_SUFFIX + BUILDEXEEXT=$EXEEXT fi rmdir CaseSensitiveTestDir diff --git a/configure.ac b/configure.ac index 9e50eae6a7ece8..d5bc739c34c90f 100644 --- a/configure.ac +++ b/configure.ac @@ -1325,19 +1325,19 @@ AC_ARG_WITH([suffix], [AS_HELP_STRING([--with-suffix=SUFFIX], [set executable suffix to SUFFIX (default is empty, yes is mapped to '.exe')])], [ AS_CASE([$with_suffix], - [no], [EXE_SUFFIX=], - [yes], [EXE_SUFFIX=.exe], - [EXE_SUFFIX=$with_suffix] + [no], [EXEEXT=], + [yes], [EXEEXT=.exe], + [EXEEXT=$with_suffix] ) ], [ AS_CASE([$ac_sys_system/$ac_sys_emscripten_target], - [Emscripten/browser*], [EXE_SUFFIX=.js], - [Emscripten/node*], [EXE_SUFFIX=.js], - [WASI/*], [EXE_SUFFIX=.wasm], - [EXE_SUFFIX=] + [Emscripten/browser*], [EXEEXT=.js], + [Emscripten/node*], [EXEEXT=.js], + [WASI/*], [EXEEXT=.wasm], + [EXEEXT=] ) ]) -AC_MSG_RESULT([$EXE_SUFFIX]) +AC_MSG_RESULT([$EXEEXT]) # Test whether we're running on a non-case-sensitive system, in which # case we give a warning if no ext is given @@ -1347,13 +1347,13 @@ if test ! -d CaseSensitiveTestDir; then mkdir CaseSensitiveTestDir fi -if test -d casesensitivetestdir && test -z "$EXE_SUFFIX" +if test -d casesensitivetestdir && test -z "$EXEEXT" then AC_MSG_RESULT([yes]) BUILDEXEEXT=.exe else AC_MSG_RESULT([no]) - BUILDEXEEXT=$EXE_SUFFIX + BUILDEXEEXT=$EXEEXT fi rmdir CaseSensitiveTestDir From 9256be7ff0ab035cfd262127d893c9bc88b3c84c Mon Sep 17 00:00:00 2001 From: "RUANG (Roy James)" Date: Mon, 21 Oct 2024 08:36:27 +0800 Subject: [PATCH 049/106] gh-99030: Added documentation links for types and exceptions (GH-123857) * Added documentation links for types and exceptions * Shortened description sentences * Change content * Change documentation * Move seealso * Add a spaces --- Doc/library/builtins.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Doc/library/builtins.rst b/Doc/library/builtins.rst index 644344e7fef29a..c4979db52d2aed 100644 --- a/Doc/library/builtins.rst +++ b/Doc/library/builtins.rst @@ -7,10 +7,7 @@ -------------- This module provides direct access to all 'built-in' identifiers of Python; for -example, ``builtins.open`` is the full name for the built-in function -:func:`open`. See :ref:`built-in-funcs` and :ref:`built-in-consts` for -documentation. - +example, ``builtins.open`` is the full name for the built-in function :func:`open`. This module is not normally accessed explicitly by most applications, but can be useful in modules that provide objects with the same name as a built-in value, @@ -40,3 +37,10 @@ available as part of their globals. The value of ``__builtins__`` is normally either this module or the value of this module's :attr:`~object.__dict__` attribute. Since this is an implementation detail, it may not be used by alternate implementations of Python. + +.. seealso:: + + * :ref:`built-in-consts` + * :ref:`bltin-exceptions` + * :ref:`built-in-funcs` + * :ref:`bltin-types` From c5c21fee7ae1ea689a351caa454c98e716a6e537 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Mon, 21 Oct 2024 07:53:21 +0100 Subject: [PATCH 050/106] gh-125519: Improve traceback if `importlib.reload()` is called with a non-module object (#125520) --- Lib/importlib/__init__.py | 2 +- Lib/test/test_importlib/test_api.py | 15 +++++++++++++++ ...2024-10-15-14-01-03.gh-issue-125519.TqGh6a.rst | 2 ++ 3 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-15-14-01-03.gh-issue-125519.TqGh6a.rst diff --git a/Lib/importlib/__init__.py b/Lib/importlib/__init__.py index f38fe5c1ab461a..a7d57561ead046 100644 --- a/Lib/importlib/__init__.py +++ b/Lib/importlib/__init__.py @@ -103,7 +103,7 @@ def reload(module): try: name = module.__name__ except AttributeError: - raise TypeError("reload() argument must be a module") + raise TypeError("reload() argument must be a module") from None if sys.modules.get(name) is not module: raise ImportError(f"module {name} not in sys.modules", name=name) diff --git a/Lib/test/test_importlib/test_api.py b/Lib/test/test_importlib/test_api.py index 973237c0791a3e..51ea5270b1a928 100644 --- a/Lib/test/test_importlib/test_api.py +++ b/Lib/test/test_importlib/test_api.py @@ -9,6 +9,7 @@ from test import support from test.support import import_helper from test.support import os_helper +import traceback import types import unittest @@ -354,6 +355,20 @@ def test_module_missing_spec(self): with self.assertRaises(ModuleNotFoundError): self.init.reload(module) + def test_reload_traceback_with_non_str(self): + # gh-125519 + with support.captured_stdout() as stdout: + try: + self.init.reload("typing") + except TypeError as exc: + traceback.print_exception(exc, file=stdout) + else: + self.fail("Expected TypeError to be raised") + printed_traceback = stdout.getvalue() + self.assertIn("TypeError", printed_traceback) + self.assertNotIn("AttributeError", printed_traceback) + self.assertNotIn("module.__spec__.name", printed_traceback) + (Frozen_ReloadTests, Source_ReloadTests diff --git a/Misc/NEWS.d/next/Library/2024-10-15-14-01-03.gh-issue-125519.TqGh6a.rst b/Misc/NEWS.d/next/Library/2024-10-15-14-01-03.gh-issue-125519.TqGh6a.rst new file mode 100644 index 00000000000000..e6062625104590 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-15-14-01-03.gh-issue-125519.TqGh6a.rst @@ -0,0 +1,2 @@ +Improve traceback if :func:`importlib.reload` is called with an object that +is not a module. Patch by Alex Waygood. From ded105a62b9d78717f8dc64652e3903190b585dd Mon Sep 17 00:00:00 2001 From: ember91 <31469580+ember91@users.noreply.github.com> Date: Mon, 21 Oct 2024 10:44:18 +0200 Subject: [PATCH 051/106] Doc: Fix typos (#125728) --- Doc/c-api/init.rst | 2 +- Doc/c-api/long.rst | 2 +- Doc/c-api/monitoring.rst | 2 +- Doc/library/concurrent.futures.rst | 2 +- Doc/library/importlib.metadata.rst | 2 +- Doc/using/windows.rst | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index ffc5b4223ba589..412a41ce02cfd7 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -2418,7 +2418,7 @@ Example usage:: In the above example, :c:macro:`Py_SETREF` calls :c:macro:`Py_DECREF`, which can call arbitrary code through an object's deallocation function. The critical -section API avoids potentital deadlocks due to reentrancy and lock ordering +section API avoids potential deadlocks due to reentrancy and lock ordering by allowing the runtime to temporarily suspend the critical section if the code triggered by the finalizer blocks and calls :c:func:`PyEval_SaveThread`. diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index 6d3463fe25a614..9ff3e5265004a1 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -511,7 +511,7 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. free(bignum); *flags* is either ``-1`` (``Py_ASNATIVEBYTES_DEFAULTS``) to select defaults - that behave most like a C cast, or a combintation of the other flags in + that behave most like a C cast, or a combination of the other flags in the table below. Note that ``-1`` cannot be combined with other flags. diff --git a/Doc/c-api/monitoring.rst b/Doc/c-api/monitoring.rst index 285ddb2889a67f..51d866cfd47469 100644 --- a/Doc/c-api/monitoring.rst +++ b/Doc/c-api/monitoring.rst @@ -147,7 +147,7 @@ would typically correspond to a python function. The ``version`` argument is a pointer to a value which should be allocated by the user together with ``state_array`` and initialized to 0, - and then set only by :c:func:`!PyMonitoring_EnterScope` itelf. It allows this + and then set only by :c:func:`!PyMonitoring_EnterScope` itself. It allows this function to determine whether event states have changed since the previous call, and to return quickly if they have not. diff --git a/Doc/library/concurrent.futures.rst b/Doc/library/concurrent.futures.rst index 45a73705f10e92..48e027152a9851 100644 --- a/Doc/library/concurrent.futures.rst +++ b/Doc/library/concurrent.futures.rst @@ -208,7 +208,7 @@ ThreadPoolExecutor Example 'http://www.cnn.com/', 'http://europe.wsj.com/', 'http://www.bbc.co.uk/', - 'http://nonexistant-subdomain.python.org/'] + 'http://nonexistent-subdomain.python.org/'] # Retrieve a single page and report the URL and contents def load_url(url, timeout): diff --git a/Doc/library/importlib.metadata.rst b/Doc/library/importlib.metadata.rst index 85d5a2d684d6eb..ddfc5c06d825c0 100644 --- a/Doc/library/importlib.metadata.rst +++ b/Doc/library/importlib.metadata.rst @@ -559,7 +559,7 @@ path. ``DatabaseDistribution``, then, would look something like:: - class DatabaseDistribution(importlib.metadata.Distributon): + class DatabaseDistribution(importlib.metadata.Distribution): def __init__(self, record): self.record = record diff --git a/Doc/using/windows.rst b/Doc/using/windows.rst index 20d872d7639219..daaf8822af1161 100644 --- a/Doc/using/windows.rst +++ b/Doc/using/windows.rst @@ -838,8 +838,8 @@ The short form of the argument (``-3``) only ever selects from core Python releases, and not other distributions. However, the longer form (``-V:3``) will select from any. -The Company is matched on the full string, case-insenitive. The Tag is matched -oneither the full string, or a prefix, provided the next character is a dot or a +The Company is matched on the full string, case-insensitive. The Tag is matched +on either the full string, or a prefix, provided the next character is a dot or a hyphen. This allows ``-V:3.1`` to match ``3.1-32``, but not ``3.10``. Tags are sorted using numerical ordering (``3.10`` is newer than ``3.1``), but are compared using text (``-V:3.01`` does not match ``3.1``). From 7d88140d5299bd086434840db66ede8ccd01a688 Mon Sep 17 00:00:00 2001 From: Y5 <124019959+y5c4l3@users.noreply.github.com> Date: Mon, 21 Oct 2024 17:35:54 +0800 Subject: [PATCH 052/106] gh-125313: Prefer `sys.base_*` paths in `Py_Get(Exec)Prefix` deprecation notes (#125317) Prefer `sys.base_*` paths in `Py_Get(Exec)Prefix` deprecation notes. Signed-off-by: y5c4l3 --- Doc/c-api/init.rst | 11 +++++++---- Doc/deprecations/c-api-pending-removal-in-3.15.rst | 4 ++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index 412a41ce02cfd7..6e881590131cab 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -625,7 +625,7 @@ Process-wide parameters returned string points into static storage; the caller should not modify its value. This corresponds to the :makevar:`prefix` variable in the top-level :file:`Makefile` and the :option:`--prefix` argument to the :program:`configure` - script at build time. The value is available to Python code as ``sys.prefix``. + script at build time. The value is available to Python code as ``sys.base_prefix``. It is only useful on Unix. See also the next function. This function should not be called before :c:func:`Py_Initialize`, otherwise @@ -635,7 +635,8 @@ Process-wide parameters It now returns ``NULL`` if called before :c:func:`Py_Initialize`. .. deprecated-removed:: 3.13 3.15 - Get :data:`sys.prefix` instead. + Get :data:`sys.base_prefix` instead, or :data:`sys.prefix` if + :ref:`virtual environments ` need to be handled. .. c:function:: wchar_t* Py_GetExecPrefix() @@ -648,7 +649,8 @@ Process-wide parameters should not modify its value. This corresponds to the :makevar:`exec_prefix` variable in the top-level :file:`Makefile` and the ``--exec-prefix`` argument to the :program:`configure` script at build time. The value is - available to Python code as ``sys.exec_prefix``. It is only useful on Unix. + available to Python code as ``sys.base_exec_prefix``. It is only useful on + Unix. Background: The exec-prefix differs from the prefix when platform dependent files (such as executables and shared libraries) are installed in a different @@ -679,7 +681,8 @@ Process-wide parameters It now returns ``NULL`` if called before :c:func:`Py_Initialize`. .. deprecated-removed:: 3.13 3.15 - Get :data:`sys.exec_prefix` instead. + Get :data:`sys.base_exec_prefix` instead, or :data:`sys.exec_prefix` if + :ref:`virtual environments ` need to be handled. .. c:function:: wchar_t* Py_GetProgramFullPath() diff --git a/Doc/deprecations/c-api-pending-removal-in-3.15.rst b/Doc/deprecations/c-api-pending-removal-in-3.15.rst index 1bb49e5b4874f2..0ce0f9c118c094 100644 --- a/Doc/deprecations/c-api-pending-removal-in-3.15.rst +++ b/Doc/deprecations/c-api-pending-removal-in-3.15.rst @@ -13,11 +13,11 @@ Pending removal in Python 3.15 * :c:func:`PySys_ResetWarnOptions`: Clear :data:`sys.warnoptions` and :data:`!warnings.filters` instead. * :c:func:`Py_GetExecPrefix`: - Get :data:`sys.exec_prefix` instead. + Get :data:`sys.base_exec_prefix` and :data:`sys.exec_prefix` instead. * :c:func:`Py_GetPath`: Get :data:`sys.path` instead. * :c:func:`Py_GetPrefix`: - Get :data:`sys.prefix` instead. + Get :data:`sys.base_prefix` and :data:`sys.prefix` instead. * :c:func:`Py_GetProgramFullPath`: Get :data:`sys.executable` instead. * :c:func:`Py_GetProgramName`: From f36d37bbafcee711c765a8cda9ac3ca00b8258c8 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Mon, 21 Oct 2024 11:54:54 +0100 Subject: [PATCH 053/106] gh-125741: Update `build.yml` for the new check_autoconf_regen job (#125772) --- .github/workflows/build.yml | 2 ++ .github/workflows/posix-deps-apt.sh | 2 -- Tools/build/regen-configure.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a72d4a1bb97cb9..88898895d15ad0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -536,6 +536,7 @@ jobs: needs: - check_source # Transitive dependency, needed to access `run_tests` value - check-docs + - check_autoconf_regen - check_generated_files - build_macos - build_ubuntu @@ -571,6 +572,7 @@ jobs: ${{ needs.check_source.outputs.run_tests != 'true' && ' + check_autoconf_regen, check_generated_files, build_macos, build_ubuntu, diff --git a/.github/workflows/posix-deps-apt.sh b/.github/workflows/posix-deps-apt.sh index fb485bd4f82bd2..bfc5a0874281bd 100755 --- a/.github/workflows/posix-deps-apt.sh +++ b/.github/workflows/posix-deps-apt.sh @@ -1,11 +1,9 @@ #!/bin/sh apt-get update -# autoconf-archive is needed by autoreconf (check_generated_files job) apt-get -yq install \ build-essential \ pkg-config \ - autoconf-archive \ ccache \ gdb \ lcov \ diff --git a/Tools/build/regen-configure.sh b/Tools/build/regen-configure.sh index 1a24b07c3ff707..ee7c03e86999c1 100755 --- a/Tools/build/regen-configure.sh +++ b/Tools/build/regen-configure.sh @@ -2,7 +2,7 @@ set -e -x -# The check_generated_files job of .github/workflows/build.yml must kept in +# The check_autoconf_regen job of .github/workflows/build.yml must kept in # sync with this script. Use the same container image than the job so the job # doesn't need to run autoreconf in a container. IMAGE="ghcr.io/python/autoconf:2024.10.11.11293396815" From 0cd21406bf84b3b4927a8117024232774823aee0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lum=C3=ADr=20=27Frenzy=27=20Balhar?= Date: Mon, 21 Oct 2024 13:14:02 +0200 Subject: [PATCH 054/106] gh-119311: Add missing magic number (3571) for 3.13.0b1 (#125771) Add missing magic number 3571 for 3.13b1 It was added after branching in https://github.com/python/cpython/commit/6394a72e99b342d980297ec437ecafea92a044c4#diff-efefe383b3a81d16150c280db0b64eed7569254299418f64cc0d749f8e16f3a4R475 --- Include/internal/pycore_magic_number.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Include/internal/pycore_magic_number.h b/Include/internal/pycore_magic_number.h index a88ff2deeba941..4aa89f3cac8063 100644 --- a/Include/internal/pycore_magic_number.h +++ b/Include/internal/pycore_magic_number.h @@ -251,6 +251,7 @@ Known values: Python 3.13a1 3568 (Change semantics of END_FOR) Python 3.13a5 3569 (Specialize CONTAINS_OP) Python 3.13a6 3570 (Add __firstlineno__ class attribute) + Python 3.13b1 3571 (Fix miscompilation of private names in generic classes) Python 3.14a1 3600 (Add LOAD_COMMON_CONSTANT) Python 3.14a1 3601 (Fix miscompilation of private names in generic classes) Python 3.14a1 3602 (Add LOAD_SPECIAL. Remove BEFORE_WITH and BEFORE_ASYNC_WITH) From 5989eb74463c26780632f17f221d6bf4c9372a01 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 21 Oct 2024 08:23:38 -0400 Subject: [PATCH 055/106] gh-125608: Trigger dictionary watchers when inline values change (#125611) Dictionary watchers on an object's attributes dictionary (`object.__dict__`) were not triggered when the managed dictionary used the object's inline values. --- Lib/test/test_capi/test_watchers.py | 17 +++++++++++++++ ...-10-16-19-28-23.gh-issue-125608.gTsU2g.rst | 3 +++ Objects/dictobject.c | 21 +++++++++++++------ 3 files changed, 35 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2024-10-16-19-28-23.gh-issue-125608.gTsU2g.rst diff --git a/Lib/test/test_capi/test_watchers.py b/Lib/test/test_capi/test_watchers.py index 4bb764bf9d0963..e578a622a03487 100644 --- a/Lib/test/test_capi/test_watchers.py +++ b/Lib/test/test_capi/test_watchers.py @@ -97,6 +97,23 @@ def test_dealloc(self): del d self.assert_events(["dealloc"]) + def test_object_dict(self): + class MyObj: pass + o = MyObj() + + with self.watcher() as wid: + self.watch(wid, o.__dict__) + o.foo = "bar" + o.foo = "baz" + del o.foo + self.assert_events(["new:foo:bar", "mod:foo:baz", "del:foo"]) + + with self.watcher() as wid: + self.watch(wid, o.__dict__) + for _ in range(100): + o.foo = "bar" + self.assert_events(["new:foo:bar"] + ["mod:foo:bar"] * 99) + def test_unwatch(self): d = {} with self.watcher() as wid: diff --git a/Misc/NEWS.d/next/C_API/2024-10-16-19-28-23.gh-issue-125608.gTsU2g.rst b/Misc/NEWS.d/next/C_API/2024-10-16-19-28-23.gh-issue-125608.gTsU2g.rst new file mode 100644 index 00000000000000..e70f9f173957a2 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-10-16-19-28-23.gh-issue-125608.gTsU2g.rst @@ -0,0 +1,3 @@ +Fix a bug where dictionary watchers (e.g., :c:func:`PyDict_Watch`) on an +object's attribute dictionary (:attr:`~object.__dict__`) were not triggered +when the object's attributes were modified. diff --git a/Objects/dictobject.c b/Objects/dictobject.c index b27599d2815c82..806096f5814062 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -6835,15 +6835,24 @@ store_instance_attr_lock_held(PyObject *obj, PyDictValues *values, } PyObject *old_value = values->values[ix]; + if (old_value == NULL && value == NULL) { + PyErr_Format(PyExc_AttributeError, + "'%.100s' object has no attribute '%U'", + Py_TYPE(obj)->tp_name, name); + return -1; + } + + if (dict) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + PyDict_WatchEvent event = (old_value == NULL ? PyDict_EVENT_ADDED : + value == NULL ? PyDict_EVENT_DELETED : + PyDict_EVENT_MODIFIED); + _PyDict_NotifyEvent(interp, event, dict, name, value); + } + FT_ATOMIC_STORE_PTR_RELEASE(values->values[ix], Py_XNewRef(value)); if (old_value == NULL) { - if (value == NULL) { - PyErr_Format(PyExc_AttributeError, - "'%.100s' object has no attribute '%U'", - Py_TYPE(obj)->tp_name, name); - return -1; - } _PyDictValues_AddToInsertionOrder(values, ix); if (dict) { assert(dict->ma_values == values); From 3d1df3d84e5c75a52b6f1379cd7f2809fc50befa Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Mon, 21 Oct 2024 15:39:05 +0100 Subject: [PATCH 056/106] gh-125703: Correctly honour tracemalloc hooks on more PyDECREF specialized paths (#125712) --- Python/ceval.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Python/ceval.c b/Python/ceval.c index 55e5eba25eaa21..ca75646b585f07 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -99,6 +99,11 @@ } \ _Py_DECREF_STAT_INC(); \ if (--op->ob_refcnt == 0) { \ + struct _reftracer_runtime_state *tracer = &_PyRuntime.ref_tracer; \ + if (tracer->tracer_func != NULL) { \ + void* data = tracer->tracer_data; \ + tracer->tracer_func(op, PyRefTracer_DESTROY, data); \ + } \ destructor d = (destructor)(dealloc); \ d(op); \ } \ From 5b7a872b26a9ba6c93d7c2109559a82d1c1612de Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Mon, 21 Oct 2024 08:43:08 -0700 Subject: [PATCH 057/106] gh-125590: Allow FrameLocalsProxy to delete and pop keys from extra locals (#125616) --- Lib/test/test_frame.py | 30 +++++++- ...-10-16-20-32-40.gh-issue-125590.stHzOP.rst | 1 + Objects/frameobject.c | 76 +++++++++++++++++-- 3 files changed, 99 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-16-20-32-40.gh-issue-125590.stHzOP.rst diff --git a/Lib/test/test_frame.py b/Lib/test/test_frame.py index 32de8ed9a13f80..11f191700ccef0 100644 --- a/Lib/test/test_frame.py +++ b/Lib/test/test_frame.py @@ -397,15 +397,41 @@ def test_repr(self): def test_delete(self): x = 1 d = sys._getframe().f_locals - with self.assertRaises(TypeError): + + # This needs to be tested before f_extra_locals is created + with self.assertRaisesRegex(KeyError, 'non_exist'): + del d['non_exist'] + + with self.assertRaises(KeyError): + d.pop('non_exist') + + with self.assertRaisesRegex(ValueError, 'local variables'): del d['x'] with self.assertRaises(AttributeError): d.clear() - with self.assertRaises(AttributeError): + with self.assertRaises(ValueError): d.pop('x') + with self.assertRaises(ValueError): + d.pop('x', None) + + # 'm', 'n' is stored in f_extra_locals + d['m'] = 1 + d['n'] = 1 + + with self.assertRaises(KeyError): + d.pop('non_exist') + + del d['m'] + self.assertEqual(d.pop('n'), 1) + + self.assertNotIn('m', d) + self.assertNotIn('n', d) + + self.assertEqual(d.pop('n', 2), 2) + @support.cpython_only def test_sizeof(self): proxy = sys._getframe().f_locals diff --git a/Misc/NEWS.d/next/Library/2024-10-16-20-32-40.gh-issue-125590.stHzOP.rst b/Misc/NEWS.d/next/Library/2024-10-16-20-32-40.gh-issue-125590.stHzOP.rst new file mode 100644 index 00000000000000..dc6765ada641a9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-16-20-32-40.gh-issue-125590.stHzOP.rst @@ -0,0 +1 @@ +Allow ``FrameLocalsProxy`` to delete and pop if the key is not a fast variable. diff --git a/Objects/frameobject.c b/Objects/frameobject.c index f3a66ffc9aac8f..5ef48919a081be 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -5,6 +5,7 @@ #include "pycore_code.h" // CO_FAST_LOCAL, etc. #include "pycore_function.h" // _PyFunction_FromConstructor() #include "pycore_moduleobject.h" // _PyModule_GetDict() +#include "pycore_modsupport.h" // _PyArg_CheckPositional() #include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "pycore_opcode_metadata.h" // _PyOpcode_Deopt, _PyOpcode_Caches @@ -158,16 +159,16 @@ framelocalsproxy_setitem(PyObject *self, PyObject *key, PyObject *value) _PyStackRef *fast = _PyFrame_GetLocalsArray(frame->f_frame); PyCodeObject *co = _PyFrame_GetCode(frame->f_frame); - if (value == NULL) { - PyErr_SetString(PyExc_TypeError, "cannot remove variables from FrameLocalsProxy"); - return -1; - } - int i = framelocalsproxy_getkeyindex(frame, key, false); if (i == -2) { return -1; } if (i >= 0) { + if (value == NULL) { + PyErr_SetString(PyExc_ValueError, "cannot remove local variables from FrameLocalsProxy"); + return -1; + } + _Py_Executors_InvalidateDependency(PyInterpreterState_Get(), co, 1); _PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i); @@ -202,6 +203,10 @@ framelocalsproxy_setitem(PyObject *self, PyObject *key, PyObject *value) PyObject *extra = frame->f_extra_locals; if (extra == NULL) { + if (value == NULL) { + _PyErr_SetKeyError(key); + return -1; + } extra = PyDict_New(); if (extra == NULL) { return -1; @@ -211,7 +216,11 @@ framelocalsproxy_setitem(PyObject *self, PyObject *key, PyObject *value) assert(PyDict_Check(extra)); - return PyDict_SetItem(extra, key, value); + if (value == NULL) { + return PyDict_DelItem(extra, key); + } else { + return PyDict_SetItem(extra, key, value); + } } static int @@ -676,6 +685,59 @@ framelocalsproxy_setdefault(PyObject* self, PyObject *const *args, Py_ssize_t na return result; } +static PyObject* +framelocalsproxy_pop(PyObject* self, PyObject *const *args, Py_ssize_t nargs) +{ + if (!_PyArg_CheckPositional("pop", nargs, 1, 2)) { + return NULL; + } + + PyObject *key = args[0]; + PyObject *default_value = NULL; + + if (nargs == 2) { + default_value = args[1]; + } + + PyFrameObject *frame = ((PyFrameLocalsProxyObject*)self)->frame; + + int i = framelocalsproxy_getkeyindex(frame, key, false); + if (i == -2) { + return NULL; + } + + if (i >= 0) { + PyErr_SetString(PyExc_ValueError, "cannot remove local variables from FrameLocalsProxy"); + return NULL; + } + + PyObject *result = NULL; + + if (frame->f_extra_locals == NULL) { + if (default_value != NULL) { + return Py_XNewRef(default_value); + } else { + _PyErr_SetKeyError(key); + return NULL; + } + } + + if (PyDict_Pop(frame->f_extra_locals, key, &result) < 0) { + return NULL; + } + + if (result == NULL) { + if (default_value != NULL) { + return Py_XNewRef(default_value); + } else { + _PyErr_SetKeyError(key); + return NULL; + } + } + + return result; +} + static PyObject* framelocalsproxy_copy(PyObject *self, PyObject *Py_UNUSED(ignored)) { @@ -743,6 +805,8 @@ static PyMethodDef framelocalsproxy_methods[] = { NULL}, {"get", _PyCFunction_CAST(framelocalsproxy_get), METH_FASTCALL, NULL}, + {"pop", _PyCFunction_CAST(framelocalsproxy_pop), METH_FASTCALL, + NULL}, {"setdefault", _PyCFunction_CAST(framelocalsproxy_setdefault), METH_FASTCALL, NULL}, {NULL, NULL} /* sentinel */ From d67bf2d89ab57f94608d7d2cf949dc4a8749485d Mon Sep 17 00:00:00 2001 From: partev Date: Mon, 21 Oct 2024 12:18:10 -0400 Subject: [PATCH 058/106] gh-125766: Docs: minor rewording of installation on Linux section (GH-125743) --- Doc/using/unix.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/using/unix.rst b/Doc/using/unix.rst index 58838c28e6eb86..a2bcdab09a9282 100644 --- a/Doc/using/unix.rst +++ b/Doc/using/unix.rst @@ -17,12 +17,12 @@ On Linux Python comes preinstalled on most Linux distributions, and is available as a package on all others. However there are certain features you might want to use -that are not available on your distro's package. You can easily compile the +that are not available on your distro's package. You can compile the latest version of Python from source. -In the event that Python doesn't come preinstalled and isn't in the repositories as -well, you can easily make packages for your own distro. Have a look at the -following links: +In the event that the latest version of Python doesn't come preinstalled and isn't +in the repositories as well, you can make packages for your own distro. Have a +look at the following links: .. seealso:: From d880c83ff7fb2e464bc4f469d74cc3fc3eca082c Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Mon, 21 Oct 2024 12:46:37 -0400 Subject: [PATCH 059/106] Doc: C API: Move `tp_dealloc` paragraph to `tp_dealloc` section (#125737) It looks like commit 43cf44ddcce6b225f959ea2a53e4817244ca6054 (gh-31501) accidentally moved the paragraph to the `tp_finalize` section when the intent was to move it to the `tp_dealloc` section (according to the commit message). Also: * Convert the paragraph to a warning. * Apply the appropriate font style to `tp_dealloc`. * Unlinkify the first mention of `tp_dealloc` since the paragraph is already in the `tp_dealloc` section. --- Doc/c-api/typeobj.rst | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/Doc/c-api/typeobj.rst b/Doc/c-api/typeobj.rst index da1b5092fbf787..0c59b3da0795cb 100644 --- a/Doc/c-api/typeobj.rst +++ b/Doc/c-api/typeobj.rst @@ -682,6 +682,19 @@ and :c:data:`PyType_Type` effectively act as defaults.) Py_DECREF(tp); } + .. warning:: + + In a garbage collected Python, :c:member:`!tp_dealloc` may be called from + any Python thread, not just the thread which created the object (if the + object becomes part of a refcount cycle, that cycle might be collected by + a garbage collection on any thread). This is not a problem for Python + API calls, since the thread on which :c:member:`!tp_dealloc` is called + will own the Global Interpreter Lock (GIL). However, if the object being + destroyed in turn destroys objects from some other C or C++ library, care + should be taken to ensure that destroying those objects on the thread + which called :c:member:`!tp_dealloc` will not violate any assumptions of + the library. + **Inheritance:** @@ -2109,17 +2122,6 @@ and :c:data:`PyType_Type` effectively act as defaults.) PyErr_Restore(error_type, error_value, error_traceback); } - Also, note that, in a garbage collected Python, - :c:member:`~PyTypeObject.tp_dealloc` may be called from - any Python thread, not just the thread which created the object (if the object - becomes part of a refcount cycle, that cycle might be collected by a garbage - collection on any thread). This is not a problem for Python API calls, since - the thread on which tp_dealloc is called will own the Global Interpreter Lock - (GIL). However, if the object being destroyed in turn destroys objects from some - other C or C++ library, care should be taken to ensure that destroying those - objects on the thread which called tp_dealloc will not violate any assumptions - of the library. - **Inheritance:** This field is inherited by subtypes. From 9b0bfba2a265b8108610b037945c004d8e81f2b4 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 21 Oct 2024 12:51:29 -0400 Subject: [PATCH 060/106] gh-124218: Use per-thread reference counting for globals and builtins (#125713) Use per-thread refcounting for the reference from function objects to the globals and builtins dictionaries. --- Include/cpython/dictobject.h | 4 +++- Include/internal/pycore_dict.h | 34 ++++++++++++++++++++++++++ Include/internal/pycore_object.h | 14 +++++++++++ Include/internal/pycore_uniqueid.h | 3 +++ Objects/dictobject.c | 18 ++++++++++++++ Objects/funcobject.c | 38 +++++++++++++++++++++++++----- Objects/moduleobject.c | 3 ++- Python/uniqueid.c | 12 +++++++--- 8 files changed, 115 insertions(+), 11 deletions(-) diff --git a/Include/cpython/dictobject.h b/Include/cpython/dictobject.h index b113c7fdcf6515..78473e54898fa5 100644 --- a/Include/cpython/dictobject.h +++ b/Include/cpython/dictobject.h @@ -17,7 +17,9 @@ typedef struct { /* This is a private field for CPython's internal use. * Bits 0-7 are for dict watchers. * Bits 8-11 are for the watched mutation counter (used by tier2 optimization) - * The remaining bits are not currently used. */ + * Bits 12-31 are currently unused + * Bits 32-63 are a unique id in the free threading build (used for per-thread refcounting) + */ uint64_t _ma_watcher_tag; PyDictKeysObject *ma_keys; diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 1920724c1d4f57..1d185559b3ef43 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -229,6 +229,8 @@ static inline PyDictUnicodeEntry* DK_UNICODE_ENTRIES(PyDictKeysObject *dk) { #define DICT_VERSION_INCREMENT (1 << (DICT_MAX_WATCHERS + DICT_WATCHED_MUTATION_BITS)) #define DICT_WATCHER_MASK ((1 << DICT_MAX_WATCHERS) - 1) #define DICT_WATCHER_AND_MODIFICATION_MASK ((1 << (DICT_MAX_WATCHERS + DICT_WATCHED_MUTATION_BITS)) - 1) +#define DICT_UNIQUE_ID_SHIFT (32) +#define DICT_UNIQUE_ID_MAX ((UINT64_C(1) << (64 - DICT_UNIQUE_ID_SHIFT)) - 1) PyAPI_FUNC(void) @@ -307,8 +309,40 @@ _PyInlineValuesSize(PyTypeObject *tp) int _PyDict_DetachFromObject(PyDictObject *dict, PyObject *obj); +// Enables per-thread ref counting on this dict in the free threading build +extern void _PyDict_EnablePerThreadRefcounting(PyObject *op); + PyDictObject *_PyObject_MaterializeManagedDict_LockHeld(PyObject *); +// See `_Py_INCREF_TYPE()` in pycore_object.h +#ifndef Py_GIL_DISABLED +# define _Py_INCREF_DICT Py_INCREF +# define _Py_DECREF_DICT Py_DECREF +#else +static inline Py_ssize_t +_PyDict_UniqueId(PyDictObject *mp) +{ + // Offset by one so that _ma_watcher_tag=0 represents an unassigned id + return (Py_ssize_t)(mp->_ma_watcher_tag >> DICT_UNIQUE_ID_SHIFT) - 1; +} + +static inline void +_Py_INCREF_DICT(PyObject *op) +{ + assert(PyDict_Check(op)); + Py_ssize_t id = _PyDict_UniqueId((PyDictObject *)op); + _Py_THREAD_INCREF_OBJECT(op, id); +} + +static inline void +_Py_DECREF_DICT(PyObject *op) +{ + assert(PyDict_Check(op)); + Py_ssize_t id = _PyDict_UniqueId((PyDictObject *)op); + _Py_THREAD_DECREF_OBJECT(op, id); +} +#endif + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 96f6d61e1c620b..c7af720b1ce43d 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -293,6 +293,20 @@ extern PyStatus _PyObject_InitState(PyInterpreterState *interp); extern void _PyObject_FiniState(PyInterpreterState *interp); extern bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj); +// Macros used for per-thread reference counting in the free threading build. +// They resolve to normal Py_INCREF/DECREF calls in the default build. +// +// The macros are used for only a few references that would otherwise cause +// scaling bottlenecks in the free threading build: +// - The reference from an object to `ob_type`. +// - The reference from a function to `func_code`. +// - The reference from a function to `func_globals` and `func_builtins`. +// +// It's safe, but not performant or necessary, to use these macros for other +// references to code, type, or dict objects. It's also safe to mix their +// usage with normal Py_INCREF/DECREF calls. +// +// See also Include/internal/pycore_dict.h for _Py_INCREF_DICT/_Py_DECREF_DICT. #ifndef Py_GIL_DISABLED # define _Py_INCREF_TYPE Py_INCREF # define _Py_DECREF_TYPE Py_DECREF diff --git a/Include/internal/pycore_uniqueid.h b/Include/internal/pycore_uniqueid.h index ad5dd38ea08483..d3db49ddb78103 100644 --- a/Include/internal/pycore_uniqueid.h +++ b/Include/internal/pycore_uniqueid.h @@ -48,6 +48,9 @@ struct _Py_unique_id_pool { // Assigns the next id from the pool of ids. extern Py_ssize_t _PyObject_AssignUniqueId(PyObject *obj); +// Releases the allocated id back to the pool. +extern void _PyObject_ReleaseUniqueId(Py_ssize_t unique_id); + // Releases the allocated id back to the pool. extern void _PyObject_DisablePerThreadRefcounting(PyObject *obj); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 806096f5814062..c4e11a3e9c0bc7 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1636,6 +1636,24 @@ _PyDict_MaybeUntrack(PyObject *op) _PyObject_GC_UNTRACK(op); } +void +_PyDict_EnablePerThreadRefcounting(PyObject *op) +{ + assert(PyDict_Check(op)); +#ifdef Py_GIL_DISABLED + Py_ssize_t id = _PyObject_AssignUniqueId(op); + if ((uint64_t)id >= (uint64_t)DICT_UNIQUE_ID_MAX) { + _PyObject_ReleaseUniqueId(id); + return; + } + + PyDictObject *mp = (PyDictObject *)op; + assert((mp->_ma_watcher_tag >> DICT_UNIQUE_ID_SHIFT) == 0); + // Plus 1 so that _ma_watcher_tag=0 represents an unassigned id + mp->_ma_watcher_tag += ((uint64_t)id + 1) << DICT_UNIQUE_ID_SHIFT; +#endif +} + static inline int is_unusable_slot(Py_ssize_t ix) { diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 3cb247691386bf..44fb4ac0907d7b 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -3,6 +3,7 @@ #include "Python.h" #include "pycore_ceval.h" // _PyEval_BuiltinsFromGlobals() +#include "pycore_dict.h" // _Py_INCREF_DICT() #include "pycore_long.h" // _PyLong_GetOne() #include "pycore_modsupport.h" // _PyArg_NoKeywords() #include "pycore_object.h" // _PyObject_GC_UNTRACK() @@ -112,8 +113,15 @@ _PyFunction_FromConstructor(PyFrameConstructor *constr) Py_XDECREF(module); return NULL; } - op->func_globals = Py_NewRef(constr->fc_globals); - op->func_builtins = Py_NewRef(constr->fc_builtins); + _Py_INCREF_DICT(constr->fc_globals); + op->func_globals = constr->fc_globals; + if (PyDict_Check(constr->fc_builtins)) { + _Py_INCREF_DICT(constr->fc_builtins); + } + else { + Py_INCREF(constr->fc_builtins); + } + op->func_builtins = constr->fc_builtins; op->func_name = Py_NewRef(constr->fc_name); op->func_qualname = Py_NewRef(constr->fc_qualname); _Py_INCREF_CODE((PyCodeObject *)constr->fc_code); @@ -143,7 +151,7 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname { assert(globals != NULL); assert(PyDict_Check(globals)); - Py_INCREF(globals); + _Py_INCREF_DICT(globals); PyThreadState *tstate = _PyThreadState_GET(); @@ -184,7 +192,12 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname if (builtins == NULL) { goto error; } - Py_INCREF(builtins); + if (PyDict_Check(builtins)) { + _Py_INCREF_DICT(builtins); + } + else { + Py_INCREF(builtins); + } PyFunctionObject *op = PyObject_GC_New(PyFunctionObject, &PyFunction_Type); if (op == NULL) { @@ -1057,8 +1070,21 @@ func_clear(PyObject *self) { PyFunctionObject *op = _PyFunction_CAST(self); func_clear_version(_PyInterpreterState_GET(), op); - Py_CLEAR(op->func_globals); - Py_CLEAR(op->func_builtins); + PyObject *globals = op->func_globals; + op->func_globals = NULL; + if (globals != NULL) { + _Py_DECREF_DICT(globals); + } + PyObject *builtins = op->func_builtins; + op->func_builtins = NULL; + if (builtins != NULL) { + if (PyDict_Check(builtins)) { + _Py_DECREF_DICT(builtins); + } + else { + Py_DECREF(builtins); + } + } Py_CLEAR(op->func_module); Py_CLEAR(op->func_defaults); Py_CLEAR(op->func_kwdefaults); diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index f63ae4e048bcd9..c06badd5f3edfe 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -3,6 +3,7 @@ #include "Python.h" #include "pycore_call.h" // _PyObject_CallNoArgs() +#include "pycore_dict.h" // _PyDict_EnablePerThreadRefcounting() #include "pycore_fileutils.h" // _Py_wgetcwd #include "pycore_interp.h" // PyInterpreterState.importlib #include "pycore_long.h" // _PyLong_GetOne() @@ -105,7 +106,7 @@ new_module_notrack(PyTypeObject *mt) static void track_module(PyModuleObject *m) { - _PyObject_SetDeferredRefcount(m->md_dict); + _PyDict_EnablePerThreadRefcounting(m->md_dict); PyObject_GC_Track(m->md_dict); _PyObject_SetDeferredRefcount((PyObject *)m); diff --git a/Python/uniqueid.c b/Python/uniqueid.c index 0cbb35c6cd2f8b..b9f30713feeb57 100644 --- a/Python/uniqueid.c +++ b/Python/uniqueid.c @@ -1,5 +1,6 @@ #include "Python.h" +#include "pycore_dict.h" // _PyDict_UniqueId() #include "pycore_lock.h" // PyMutex_LockFlags() #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_object.h" // _Py_IncRefTotal @@ -98,8 +99,8 @@ _PyObject_AssignUniqueId(PyObject *obj) return unique_id; } -static void -release_unique_id(Py_ssize_t unique_id) +void +_PyObject_ReleaseUniqueId(Py_ssize_t unique_id) { PyInterpreterState *interp = _PyInterpreterState_GET(); struct _Py_unique_id_pool *pool = &interp->unique_ids; @@ -128,6 +129,11 @@ clear_unique_id(PyObject *obj) id = co->_co_unique_id; co->_co_unique_id = -1; } + else if (PyDict_Check(obj)) { + PyDictObject *mp = (PyDictObject *)obj; + id = _PyDict_UniqueId(mp); + mp->_ma_watcher_tag &= ~(UINT64_MAX << DICT_UNIQUE_ID_SHIFT); + } return id; } @@ -136,7 +142,7 @@ _PyObject_DisablePerThreadRefcounting(PyObject *obj) { Py_ssize_t id = clear_unique_id(obj); if (id >= 0) { - release_unique_id(id); + _PyObject_ReleaseUniqueId(id); } } From 695814c6e97aad0ae2b116cedca3e77d25d5b968 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Mon, 21 Oct 2024 18:54:24 +0100 Subject: [PATCH 061/106] gh-119786: move interpreter doc from devguide to InternalDocs (#125715) --- InternalDocs/README.md | 31 ++- InternalDocs/code_objects.md | 5 + InternalDocs/generators.md | 9 + InternalDocs/interpreter.md | 364 +++++++++++++++++++++++++++++++++++ 4 files changed, 400 insertions(+), 9 deletions(-) create mode 100644 InternalDocs/code_objects.md create mode 100644 InternalDocs/generators.md create mode 100644 InternalDocs/interpreter.md diff --git a/InternalDocs/README.md b/InternalDocs/README.md index 0a6ecf899458ed..48c893bde2a631 100644 --- a/InternalDocs/README.md +++ b/InternalDocs/README.md @@ -11,19 +11,32 @@ The core dev team attempts to keep this documentation up to date. If it is not, please report that through the [issue tracker](https://github.com/python/cpython/issues). -Index: ------ -[Guide to the parser](parser.md) +Compiling Python Source Code +--- -[Compiler Design](compiler.md) +- [Guide to the parser](parser.md) -[Frames](frames.md) +- [Compiler Design](compiler.md) -[Adaptive Instruction Families](adaptive.md) +Runtime Objects +--- -[The Source Code Locations Table](locations.md) +- [Code Objects (coming soon)](code_objects.md) -[Garbage collector design](garbage_collector.md) +- [The Source Code Locations Table](locations.md) -[Exception Handling](exception_handling.md) +- [Generators (coming soon)](generators.md) + +- [Frames](frames.md) + +Program Execution +--- + +- [The Interpreter](interpreter.md) + +- [Adaptive Instruction Families](adaptive.md) + +- [Garbage Collector Design](garbage_collector.md) + +- [Exception Handling](exception_handling.md) diff --git a/InternalDocs/code_objects.md b/InternalDocs/code_objects.md new file mode 100644 index 00000000000000..284a8b7aee5765 --- /dev/null +++ b/InternalDocs/code_objects.md @@ -0,0 +1,5 @@ + +Code objects +============ + +Coming soon. diff --git a/InternalDocs/generators.md b/InternalDocs/generators.md new file mode 100644 index 00000000000000..d53f0f9bdff4e4 --- /dev/null +++ b/InternalDocs/generators.md @@ -0,0 +1,9 @@ + +Generators +========== + +Coming soon. + + diff --git a/InternalDocs/interpreter.md b/InternalDocs/interpreter.md new file mode 100644 index 00000000000000..dcfddc99370c0e --- /dev/null +++ b/InternalDocs/interpreter.md @@ -0,0 +1,364 @@ + +The bytecode interpreter +======================== + +Overview +-------- + +This document describes the workings and implementation of the bytecode +interpreter, the part of python that executes compiled Python code. Its +entry point is in [Python/ceval.c](../Python/ceval.c). + +At a high level, the interpreter consists of a loop that iterates over the +bytecode instructions, executing each of them via a switch statement that +has a case implementing each opcode. This switch statement is generated +from the instruction definitions in [Python/bytecodes.c](../Python/bytecodes.c) +which are written in [a DSL](../Tools/cases_generator/interpreter_definition.md) +developed for this purpose. + +Recall that the [Python Compiler](compiler.md) produces a [`CodeObject`](code_object.md), +which contains the bytecode instructions along with static data that is required to execute them, +such as the consts list, variable names, +[exception table](exception_handling.md#format-of-the-exception-table), and so on. + +When the interpreter's +[`PyEval_EvalCode()`](https://docs.python.org/3.14/c-api/veryhigh.html#c.PyEval_EvalCode) +function is called to execute a `CodeObject`, it constructs a [`Frame`](frames.md) and calls +[`_PyEval_EvalFrame()`](https://docs.python.org/3.14/c-api/veryhigh.html#c.PyEval_EvalCode) +to execute the code object in this frame. The frame hold the dynamic state of the +`CodeObject`'s execution, including the instruction pointer, the globals and builtins. +It also has a reference to the `CodeObject` itself. + +In addition to the frame, `_PyEval_EvalFrame()` also receives a +[`Thread State`](https://docs.python.org/3/c-api/init.html#c.PyThreadState) +object, `tstate`, which includes things like the exception state and the +recursion depth. The thread state also provides access to the per-interpreter +state (`tstate->interp`), which has a pointer to the per-runtime (that is, +truly global) state (`tstate->interp->runtime`). + +Finally, `_PyEval_EvalFrame()` receives an integer argument `throwflag` +which, when nonzero, indicates that the interpreter should just raise the current exception +(this is used in the implementation of +[`gen.throw`](https://docs.python.org/3.14/reference/expressions.html#generator.throw). + +By default, [`_PyEval_EvalFrame()`](https://docs.python.org/3.14/c-api/veryhigh.html#c.PyEval_EvalCode) +simply calls [`_PyEval_EvalFrameDefault()`] to execute the frame. However, as per +[`PEP 523`](https://peps.python.org/pep-0523/) this is configurable by setting +`interp->eval_frame`. In the following, we describe the default function, +`_PyEval_EvalFrameDefault()`. + + +Instruction decoding +-------------------- + +The first task of the interpreter is to decode the bytecode instructions. +Bytecode is stored as an array of 16-bit code units (`_Py_CODEUNIT`). +Each code unit contains an 8-bit `opcode` and an 8-bit argument (`oparg`), both unsigned. +In order to make the bytecode format independent of the machine byte order when stored on disk, +`opcode` is always the first byte and `oparg` is always the second byte. +Macros are used to extract the `opcode` and `oparg` from a code unit +(`_Py_OPCODE(word)` and `_Py_OPARG(word)`). +Some instructions (for example, `NOP` or `POP_TOP`) have no argument -- in this case +we ignore `oparg`. + +A simplified version of the interpreter's main loop looks like this: + +```c + _Py_CODEUNIT *first_instr = code->co_code_adaptive; + _Py_CODEUNIT *next_instr = first_instr; + while (1) { + _Py_CODEUNIT word = *next_instr++; + unsigned char opcode = _Py_OPCODE(word); + unsigned int oparg = _Py_OPARG(word); + switch (opcode) { + // ... A case for each opcode ... + } + } +``` + +This loop iterates over the instructions, decoding each into its `opcode` +and `oparg`, and then executes the switch case that implements this `opcode`. + +The instruction format supports 256 different opcodes, which is sufficient. +However, it also limits `oparg` to 8-bit values, which is too restrictive. +To overcome this, the `EXTENDED_ARG` opcode allows us to prefix any instruction +with one or more additional data bytes, which combine into a larger oparg. +For example, this sequence of code units: + + EXTENDED_ARG 1 + EXTENDED_ARG 0 + LOAD_CONST 2 + +would set `opcode` to `LOAD_CONST` and `oparg` to `65538` (that is, `0x1_00_02`). +The compiler should limit itself to at most three `EXTENDED_ARG` prefixes, to allow the +resulting `oparg` to fit in 32 bits, but the interpreter does not check this. + +In the following, a `code unit` is always two bytes, while an `instruction` is a +sequence of code units consisting of zero to three `EXTENDED_ARG` opcodes followed by +a primary opcode. + +The following loop, to be inserted just above the `switch` statement, will make the above +snippet decode a complete instruction: + +```c + while (opcode == EXTENDED_ARG) { + word = *next_instr++; + opcode = _Py_OPCODE(word); + oparg = (oparg << 8) | _Py_OPARG(word); + } +``` + +For various reasons we'll get to later (mostly efficiency, given that `EXTENDED_ARG` +is rare) the actual code is different. + +Jumps +===== + +Note that when the `switch` statement is reached, `next_instr` (the "instruction offset") +already points to the next instruction. +Thus, jump instructions can be implemented by manipulating `next_instr`: + +- A jump forward (`JUMP_FORWARD`) sets `next_instr += oparg`. +- A jump backward sets `next_instr -= oparg`. + +Inline cache entries +==================== + +Some (specialized or specializable) instructions have an associated "inline cache". +The inline cache consists of one or more two-byte entries included in the bytecode +array as additional words following the `opcode`/`oparg` pair. +The size of the inline cache for a particular instruction is fixed by its `opcode`. +Moreover, the inline cache size for all instructions in a +[family of specialized/specializable instructions](adaptive.md) +(for example, `LOAD_ATTR`, `LOAD_ATTR_SLOT`, `LOAD_ATTR_MODULE`) must all be +the same. Cache entries are reserved by the compiler and initialized with zeros. +Although they are represented by code units, cache entries do not conform to the +`opcode` / `oparg` format. + +If an instruction has an inline cache, the layout of its cache is described by +a `struct` definition in (`pycore_code.h`)[../Include/internal/pycore_code.h]. +This allows us to access the cache by casting `next_instr` to a pointer to this `struct`. +The size of such a `struct` must be independent of the machine architecture, word size +and alignment requirements. For a 32-bit field, the `struct` should use `_Py_CODEUNIT field[2]`. + +The instruction implementation is responsible for advancing `next_instr` past the inline cache. +For example, if an instruction's inline cache is four bytes (that is, two code units) in size, +the code for the instruction must contain `next_instr += 2;`. +This is equivalent to a relative forward jump by that many code units. +(In the interpreter definition DSL, this is coded as `JUMPBY(n)`, where `n` is the number +of code units to jump, typically given as a named constant.) + +Serializing non-zero cache entries would present a problem because the serialization +(:mod:`marshal`) format must be independent of the machine byte order. + +More information about the use of inline caches can be found in +[PEP 659](https://peps.python.org/pep-0659/#ancillary-data). + +The evaluation stack +-------------------- + +Most instructions read or write some data in the form of object references (`PyObject *`). +The CPython bytecode interpreter is a stack machine, meaning that its instructions operate +by pushing data onto and popping it off the stack. +The stack is forms part of the frame for the code object. Its maximum depth is calculated +by the compiler and stored in the `co_stacksize` field of the code object, so that the +stack can be pre-allocated is a contiguous array of `PyObject*` pointers, when the frame +is created. + +The stack effects of each instruction are also exposed through the +[opcode metadata](../Include/internal/pycore_opcode_metadata.h) through two +functions that report how many stack elements the instructions consumes, +and how many it produces (`_PyOpcode_num_popped` and `_PyOpcode_num_pushed`). +For example, the `BINARY_OP` instruction pops two objects from the stack and pushes the +result back onto the stack. + +The stack grows up in memory; the operation `PUSH(x)` is equivalent to `*stack_pointer++ = x`, +whereas `x = POP()` means `x = *--stack_pointer`. +Overflow and underflow checks are active in debug mode, but are otherwise optimized away. + +At any point during execution, the stack level is knowable based on the instruction pointer +alone, and some properties of each item on the stack are also known. +In particular, only a few instructions may push a `NULL` onto the stack, and the positions +that may be `NULL` are known. +A few other instructions (`GET_ITER`, `FOR_ITER`) push or pop an object that is known to +be an iterator. + +Instruction sequences that do not allow statically knowing the stack depth are deemed illegal; +the bytecode compiler never generates such sequences. +For example, the following sequence is illegal, because it keeps pushing items on the stack: + + LOAD_FAST 0 + JUMP_BACKWARD 2 + +> [!NOTE] +> Do not confuse the evaluation stack with the call stack, which is used to implement calling +> and returning from functions. + +Error handling +-------------- + +When the implementation of an opcode raises an exception, it jumps to the +`exception_unwind` label in [Python/ceval.c](../Python/ceval.c). +The exception is then handled as described in the +[`exception handling documentation`](exception_handling.md#handling-exceptions). + +Python-to-Python calls +---------------------- + +The `_PyEval_EvalFrameDefault()` function is recursive, because sometimes +the interpreter calls some C function that calls back into the interpreter. +In 3.10 and before, this was the case even when a Python function called +another Python function: +The `CALL` opcode would call the `tp_call` dispatch function of the +callee, which would extract the code object, create a new frame for the call +stack, and then call back into the interpreter. This approach is very general +but consumes several C stack frames for each nested Python call, thereby +increasing the risk of an (unrecoverable) C stack overflow. + +Since 3.11, the `CALL` instruction special-cases function objects to "inline" +the call. When a call gets inlined, a new frame gets pushed onto the call +stack and the interpreter "jumps" to the start of the callee's bytecode. +When an inlined callee executes a `RETURN_VALUE` instruction, the frame is +popped off the call stack and the interpreter returns to its caller, +by popping a frame off the call stack and "jumping" to the return address. +There is a flag in the frame (`frame->is_entry`) that indicates whether +the frame was inlined (set if it wasn't). +If `RETURN_VALUE` finds this flag set, it performs the usual cleanup and +returns from `_PyEval_EvalFrameDefault()` altogether, to a C caller. + +A similar check is performed when an unhandled exception occurs. + +The call stack +-------------- + +Up through 3.10, the call stack was implemented as a singly-linked list of +[frame objects](frames.md). This was expensive because each call would require a +heap allocation for the stack frame. + +Since 3.11, frames are no longer fully-fledged objects. Instead, a leaner internal +`_PyInterpreterFrame` structure is used, which is allocated using a custom allocator +function (`_PyThreadState_BumpFramePointer()`), which allocates and initializes a +frame structure. Usually a frame allocation is just a pointer bump, which improves +memory locality. + +Sometimes an actual `PyFrameObject` is needed, such as when Python code calls +`sys._getframe()` or an extension module calls +[`PyEval_GetFrame()`](https://docs.python.org/3/c-api/reflection.html#c.PyEval_GetFrame). +In this case we allocate a proper `PyFrameObject` and initialize it from the +`_PyInterpreterFrame`. + +Things get more complicated when generators are involved, since those do not +follow the push/pop model. This includes async functions, which are based on +the same mechanism. A generator object has space for a `_PyInterpreterFrame` +structure, including the variable-size part (used for locals and the eval stack). +When a generator (or async) function is first called, a special opcode +`RETURN_GENERATOR` is executed, which is responsible for creating the +generator object. The generator object's `_PyInterpreterFrame` is initialized +with a copy of the current stack frame. The current stack frame is then popped +off the frame stack and the generator object is returned. +(Details differ depending on the `is_entry` flag.) +When the generator is resumed, the interpreter pushes its `_PyInterpreterFrame` +onto the frame stack and resumes execution. +See also the [generators](generators.md) section. + + + + + +Introducing a new bytecode instruction +-------------------------------------- + +It is occasionally necessary to add a new opcode in order to implement +a new feature or change the way that existing features are compiled. +This section describes the changes required to do this. + +First, you must choose a name for the bytecode, implement it in +[`Python/bytecodes.c`](../Python/bytecodes.c) and add a documentation +entry in [`Doc/library/dis.rst`](../Doc/library/dis.rst). +Then run `make regen-cases` to assign a number for it (see +[`Include/opcode_ids.h`](../Include/opcode_ids.h)) and regenerate a +number of files with the actual implementation of the bytecode in +[`Python/generated_cases.c.h`](../Python/generated_cases.c.h) and +metadata about it in additional files. + +With a new bytecode you must also change what is called the "magic number" for +.pyc files: bump the value of the variable `MAGIC_NUMBER` in +[`Lib/importlib/_bootstrap_external.py`](../Lib/importlib/_bootstrap_external.py). +Changing this number will lead to all .pyc files with the old `MAGIC_NUMBER` +to be recompiled by the interpreter on import. Whenever `MAGIC_NUMBER` is +changed, the ranges in the `magic_values` array in +[`PC/launcher.c`](../PC/launcher.c) may also need to be updated. Changes to +[`Lib/importlib/_bootstrap_external.py`](../Lib/importlib/_bootstrap_external.py) +will take effect only after running `make regen-importlib`. + +> [!NOTE] +> Running `make regen-importlib` before adding the new bytecode target to +> [`Python/bytecodes.c`](../Python/bytecodes.c) +> (followed by `make regen-cases`) will result in an error. You should only run +> `make regen-importlib` after the new bytecode target has been added. + +> [!NOTE] +> On Windows, running the `./build.bat` script will automatically +> regenerate the required files without requiring additional arguments. + +Finally, you need to introduce the use of the new bytecode. Update +[`Python/codegen.c`](../Python/codegen.c) to emit code with this bytecode. +Optimizations in [`Python/flowgraph.c`](../Python/flowgraph.c) may also +need to be updated. If the new opcode affects a control flow or the block +stack, you may have to update the `frame_setlineno()` function in +[`Objects/frameobject.c`](../Objects/frameobject.c). It may also be necessary +to update [`Lib/dis.py`](../Lib/dis.py) if the new opcode interprets its +argument in a special way (like `FORMAT_VALUE` or `MAKE_FUNCTION`). + +If you make a change here that can affect the output of bytecode that +is already in existence and you do not change the magic number, make +sure to delete your old .py(c|o) files! Even though you will end up changing +the magic number if you change the bytecode, while you are debugging your work +you may be changing the bytecode output without constantly bumping up the +magic number. This can leave you with stale .pyc files that will not be +recreated. +Running `find . -name '*.py[co]' -exec rm -f '{}' +` should delete all .pyc +files you have, forcing new ones to be created and thus allow you test out your +new bytecode properly. Run `make regen-importlib` for updating the +bytecode of frozen importlib files. You have to run `make` again after this +to recompile the generated C files. + +Additional resources +-------------------- + +* Brandt Bucher's talk about the specializing interpreter at PyCon US 2023. + [Slides](https://github.com/brandtbucher/brandtbucher/blob/master/2023/04/21/inside_cpython_311s_new_specializing_adaptive_interpreter.pdf) + [Video](https://www.youtube.com/watch?v=PGZPSWZSkJI&t=1470s) From de5a6c7c7d00ac37d66cba9849202b374e9cdfb7 Mon Sep 17 00:00:00 2001 From: mpage Date: Mon, 21 Oct 2024 11:08:13 -0700 Subject: [PATCH 062/106] gh-121459: Fix a couple of uses of `PyStackRef_FromPyObjectSteal` (#125711) * Fix usage of PyStackRef_FromPyObjectSteal in CALL_TUPLE_1 This was missed in gh-124894 * Fix usage of PyStackRef_FromPyObjectSteal in _CALL_STR_1 This was missed in gh-124894 * Regenerate code --- Python/bytecodes.c | 10 ++++++---- Python/executor_cases.c.h | 10 ++++++---- Python/generated_cases.c.h | 10 ++++++---- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c59a35c3e828ca..62e9b5ddd1584c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3629,11 +3629,12 @@ dummy_func( DEOPT_IF(!PyStackRef_IsNull(null)); DEOPT_IF(callable_o != (PyObject *)&PyUnicode_Type); STAT_INC(CALL, hit); - res = PyStackRef_FromPyObjectSteal(PyObject_Str(arg_o)); + PyObject *res_o = PyObject_Str(arg_o); DEAD(null); DEAD(callable); PyStackRef_CLOSE(arg); - ERROR_IF(PyStackRef_IsNull(res), error); + ERROR_IF(res_o == NULL, error); + res = PyStackRef_FromPyObjectSteal(res_o); } macro(CALL_STR_1) = @@ -3650,11 +3651,12 @@ dummy_func( DEOPT_IF(!PyStackRef_IsNull(null)); DEOPT_IF(callable_o != (PyObject *)&PyTuple_Type); STAT_INC(CALL, hit); - res = PyStackRef_FromPyObjectSteal(PySequence_Tuple(arg_o)); + PyObject *res_o = PySequence_Tuple(arg_o); DEAD(null); DEAD(callable); PyStackRef_CLOSE(arg); - ERROR_IF(PyStackRef_IsNull(res), error); + ERROR_IF(res_o == NULL, error); + res = PyStackRef_FromPyObjectSteal(res_o); } macro(CALL_TUPLE_1) = diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 15a6c7bc1a7966..5df4986cd838b5 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4299,10 +4299,11 @@ } STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = PyStackRef_FromPyObjectSteal(PyObject_Str(arg_o)); + PyObject *res_o = PyObject_Str(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(arg); - if (PyStackRef_IsNull(res)) JUMP_TO_ERROR(); + if (res_o == NULL) JUMP_TO_ERROR(); + res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-3] = res; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); @@ -4331,10 +4332,11 @@ } STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = PyStackRef_FromPyObjectSteal(PySequence_Tuple(arg_o)); + PyObject *res_o = PySequence_Tuple(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(arg); - if (PyStackRef_IsNull(res)) JUMP_TO_ERROR(); + if (res_o == NULL) JUMP_TO_ERROR(); + res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-3] = res; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index a9290986c24f45..388031af87a79f 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2978,10 +2978,11 @@ DEOPT_IF(callable_o != (PyObject *)&PyUnicode_Type, CALL); STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = PyStackRef_FromPyObjectSteal(PyObject_Str(arg_o)); + PyObject *res_o = PyObject_Str(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(arg); - if (PyStackRef_IsNull(res)) goto pop_3_error; + if (res_o == NULL) goto pop_3_error; + res = PyStackRef_FromPyObjectSteal(res_o); } // _CHECK_PERIODIC { @@ -3028,10 +3029,11 @@ DEOPT_IF(callable_o != (PyObject *)&PyTuple_Type, CALL); STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = PyStackRef_FromPyObjectSteal(PySequence_Tuple(arg_o)); + PyObject *res_o = PySequence_Tuple(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(arg); - if (PyStackRef_IsNull(res)) goto pop_3_error; + if (res_o == NULL) goto pop_3_error; + res = PyStackRef_FromPyObjectSteal(res_o); } // _CHECK_PERIODIC { From 5ca4e34bc1aab8321911aac6d5b2b9e75ff764d8 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 21 Oct 2024 21:30:45 +0300 Subject: [PATCH 063/106] gh-125767: Fix pickling and copying of super objects (GH-125781) Previously, copying a super object returned a copy of the instance invoking super(). Pickling a super object could pickle the instance invoking super() or fail, depending on its type and protocol. Now deep copying returns a new super object and pickling pickles the super object. Shallow copying returns the same super object. --- Doc/library/functions.rst | 4 ++ Doc/whatsnew/3.14.rst | 4 ++ Lib/copy.py | 2 +- Lib/copyreg.py | 5 ++ Lib/test/test_super.py | 70 +++++++++++++++++++ ...-10-21-13-52-37.gh-issue-125767.0kK4lX.rst | 2 + 6 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-21-13-52-37.gh-issue-125767.0kK4lX.rst diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 0638df04c6ff40..290c63827ff766 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -2032,6 +2032,10 @@ are always available. They are listed here in alphabetical order. :func:`super`, see `guide to using super() `_. + .. versionchanged:: 3.14 + :class:`super` objects are now :mod:`pickleable ` and + :mod:`copyable `. + .. _func-tuple: .. class:: tuple() diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index ad841538ccc547..d52faa614db94e 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -190,6 +190,10 @@ Other language changes They raise an error if the argument is a string. (Contributed by Serhiy Storchaka in :gh:`84978`.) +* :class:`super` objects are now :mod:`pickleable ` and + :mod:`copyable `. + (Contributed by Serhiy Storchaka in :gh:`125767`.) + New modules =========== diff --git a/Lib/copy.py b/Lib/copy.py index a79976d3a658f0..f27e109973cfb7 100644 --- a/Lib/copy.py +++ b/Lib/copy.py @@ -106,7 +106,7 @@ def _copy_immutable(x): bytes, frozenset, type, range, slice, property, types.BuiltinFunctionType, types.EllipsisType, types.NotImplementedType, types.FunctionType, types.CodeType, - weakref.ref): + weakref.ref, super): d[t] = _copy_immutable d[list] = list.copy diff --git a/Lib/copyreg.py b/Lib/copyreg.py index 578392409b403c..17c5dde67c887c 100644 --- a/Lib/copyreg.py +++ b/Lib/copyreg.py @@ -36,6 +36,11 @@ def pickle_union(obj): pickle(type(int | str), pickle_union) +def pickle_super(obj): + return super, (obj.__thisclass__, obj.__self__) + +pickle(super, pickle_super) + # Support for pickling new-style objects def _reconstructor(cls, base, state): diff --git a/Lib/test/test_super.py b/Lib/test/test_super.py index 1222ec6a3c4109..149016635522c3 100644 --- a/Lib/test/test_super.py +++ b/Lib/test/test_super.py @@ -1,5 +1,7 @@ """Unit tests for zero-argument super() & related machinery.""" +import copy +import pickle import textwrap import threading import unittest @@ -539,6 +541,74 @@ def work(): for thread in threads: thread.join() + def test_special_methods(self): + for e in E(), E: + s = super(C, e) + self.assertEqual(s.__reduce__, e.__reduce__) + self.assertEqual(s.__reduce_ex__, e.__reduce_ex__) + self.assertEqual(s.__getstate__, e.__getstate__) + self.assertFalse(hasattr(s, '__getnewargs__')) + self.assertFalse(hasattr(s, '__getnewargs_ex__')) + self.assertFalse(hasattr(s, '__setstate__')) + self.assertFalse(hasattr(s, '__copy__')) + self.assertFalse(hasattr(s, '__deepcopy__')) + + def test_pickling(self): + e = E() + e.x = 1 + s = super(C, e) + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(proto=proto): + u = pickle.loads(pickle.dumps(s, proto)) + self.assertEqual(u.f(), s.f()) + self.assertIs(type(u), type(s)) + self.assertIs(type(u.__self__), E) + self.assertEqual(u.__self__.x, 1) + self.assertIs(u.__thisclass__, C) + self.assertIs(u.__self_class__, E) + + s = super(C, E) + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(proto=proto): + u = pickle.loads(pickle.dumps(s, proto)) + self.assertEqual(u.cm(), s.cm()) + self.assertEqual(u.f, s.f) + self.assertIs(type(u), type(s)) + self.assertIs(u.__self__, E) + self.assertIs(u.__thisclass__, C) + self.assertIs(u.__self_class__, E) + + def test_shallow_copying(self): + s = super(C, E()) + self.assertIs(copy.copy(s), s) + s = super(C, E) + self.assertIs(copy.copy(s), s) + + def test_deep_copying(self): + e = E() + e.x = [1] + s = super(C, e) + u = copy.deepcopy(s) + self.assertEqual(u.f(), s.f()) + self.assertIs(type(u), type(s)) + self.assertIsNot(u, s) + self.assertIs(type(u.__self__), E) + self.assertIsNot(u.__self__, e) + self.assertIsNot(u.__self__.x, e.x) + self.assertEqual(u.__self__.x, [1]) + self.assertIs(u.__thisclass__, C) + self.assertIs(u.__self_class__, E) + + s = super(C, E) + u = copy.deepcopy(s) + self.assertEqual(u.cm(), s.cm()) + self.assertEqual(u.f, s.f) + self.assertIsNot(u, s) + self.assertIs(type(u), type(s)) + self.assertIs(u.__self__, E) + self.assertIs(u.__thisclass__, C) + self.assertIs(u.__self_class__, E) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2024-10-21-13-52-37.gh-issue-125767.0kK4lX.rst b/Misc/NEWS.d/next/Library/2024-10-21-13-52-37.gh-issue-125767.0kK4lX.rst new file mode 100644 index 00000000000000..bfda740a79d10e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-21-13-52-37.gh-issue-125767.0kK4lX.rst @@ -0,0 +1,2 @@ +:class:`super` objects are now :mod:`pickleable ` and +:mod:`copyable `. From dcc4fb2c9068f60353f0c0978948b7681f7745e6 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 21 Oct 2024 21:54:12 +0300 Subject: [PATCH 064/106] gh-124969: Make locale.nl_langinfo(locale.ALT_DIGITS) returning a string again (GH-125774) This is a follow up of GH-124974. Only Glibc needed a fix. Now the returned value is a string consisting of semicolon-separated symbols on all Posix platforms. --- Doc/library/locale.rst | 7 ++-- Lib/test/test__locale.py | 30 ++++++++++----- ...-10-21-12-06-55.gh-issue-124969.xiY8UP.rst | 2 + Modules/_localemodule.c | 38 +++++++++++-------- 4 files changed, 50 insertions(+), 27 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-21-12-06-55.gh-issue-124969.xiY8UP.rst diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index 5f3c4840b5cc70..f172a55080efc9 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -158,8 +158,7 @@ The :mod:`locale` module defines the following exception and functions: .. function:: nl_langinfo(option) - Return some locale-specific information as a string (or a tuple for - ``ALT_DIGITS``). This function is not + Return some locale-specific information as a string. This function is not available on all systems, and the set of possible options might also vary across platforms. The possible argument values are numbers, for which symbolic constants are available in the locale module. @@ -312,7 +311,9 @@ The :mod:`locale` module defines the following exception and functions: .. data:: ALT_DIGITS - Get a tuple of up to 100 strings used to represent the values 0 to 99. + Get a string consisting of up to 100 semicolon-separated symbols used + to represent the values 0 to 99 in a locale-specific way. + In most locales this is an empty string. The function temporarily sets the ``LC_CTYPE`` locale to the locale of the category that determines the requested value (``LC_TIME``, diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py index e403c2a822788d..7e6e296c069abb 100644 --- a/Lib/test/test__locale.py +++ b/Lib/test/test__locale.py @@ -26,7 +26,10 @@ 'bs_BA', 'fr_LU', 'kl_GL', 'fa_IR', 'de_BE', 'sv_SE', 'it_CH', 'uk_UA', 'eu_ES', 'vi_VN', 'af_ZA', 'nb_NO', 'en_DK', 'tg_TJ', 'ps_AF', 'en_US', 'fr_FR.ISO8859-1', 'fr_FR.UTF-8', 'fr_FR.ISO8859-15@euro', - 'ru_RU.KOI8-R', 'ko_KR.eucKR'] + 'ru_RU.KOI8-R', 'ko_KR.eucKR', + 'ja_JP.UTF-8', 'lzh_TW.UTF-8', 'my_MM.UTF-8', 'or_IN.UTF-8', 'shn_MM.UTF-8', + 'ar_AE.UTF-8', 'bn_IN.UTF-8', 'mr_IN.UTF-8', 'th_TH.TIS620', +] def setUpModule(): global candidate_locales @@ -78,11 +81,13 @@ def accept(loc): 'C': (0, {}), 'en_US': (0, {}), 'fa_IR': (100, {0: '\u06f0\u06f0', 10: '\u06f1\u06f0', 99: '\u06f9\u06f9'}), - 'ja_JP': (100, {0: '\u3007', 10: '\u5341', 99: '\u4e5d\u5341\u4e5d'}), + 'ja_JP': (100, {1: '\u4e00', 10: '\u5341', 99: '\u4e5d\u5341\u4e5d'}), 'lzh_TW': (32, {0: '\u3007', 10: '\u5341', 31: '\u5345\u4e00'}), 'my_MM': (100, {0: '\u1040\u1040', 10: '\u1041\u1040', 99: '\u1049\u1049'}), 'or_IN': (100, {0: '\u0b66', 10: '\u0b67\u0b66', 99: '\u0b6f\u0b6f'}), 'shn_MM': (100, {0: '\u1090\u1090', 10: '\u1091\u1090', 99: '\u1099\u1099'}), + 'ar_AE': (100, {0: '\u0660', 10: '\u0661\u0660', 99: '\u0669\u0669'}), + 'bn_IN': (100, {0: '\u09e6', 10: '\u09e7\u09e6', 99: '\u09ef\u09ef'}), } if sys.platform == 'win32': @@ -199,21 +204,28 @@ def test_lc_numeric_basic(self): def test_alt_digits_nl_langinfo(self): # Test nl_langinfo(ALT_DIGITS) tested = False - for loc, (count, samples) in known_alt_digits.items(): + for loc in candidate_locales: with self.subTest(locale=loc): try: setlocale(LC_TIME, loc) except Error: self.skipTest(f'no locale {loc!r}') continue + with self.subTest(locale=loc): alt_digits = nl_langinfo(locale.ALT_DIGITS) - self.assertIsInstance(alt_digits, tuple) - if count and not alt_digits and support.is_apple: - self.skipTest(f'ALT_DIGITS is not set for locale {loc!r} on Apple platforms') - self.assertEqual(len(alt_digits), count) - for i in samples: - self.assertEqual(alt_digits[i], samples[i]) + self.assertIsInstance(alt_digits, str) + alt_digits = alt_digits.split(';') if alt_digits else [] + if alt_digits: + self.assertGreaterEqual(len(alt_digits), 10, alt_digits) + loc1 = loc.split('.', 1)[0] + if loc1 in known_alt_digits: + count, samples = known_alt_digits[loc1] + if count and not alt_digits: + self.skipTest(f'ALT_DIGITS is not set for locale {loc!r} on this platform') + self.assertEqual(len(alt_digits), count, alt_digits) + for i in samples: + self.assertEqual(alt_digits[i], samples[i]) tested = True if not tested: self.skipTest('no suitable locales') diff --git a/Misc/NEWS.d/next/Library/2024-10-21-12-06-55.gh-issue-124969.xiY8UP.rst b/Misc/NEWS.d/next/Library/2024-10-21-12-06-55.gh-issue-124969.xiY8UP.rst new file mode 100644 index 00000000000000..c44550184e0000 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-21-12-06-55.gh-issue-124969.xiY8UP.rst @@ -0,0 +1,2 @@ +``locale.nl_langinfo(locale.ALT_DIGITS)`` now returns a string again. The +returned value consists of up to 100 semicolon-separated symbols. diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 0daec646605775..2a789ea74d27da 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -667,28 +667,36 @@ _locale_nl_langinfo_impl(PyObject *module, int item) return NULL; } PyObject *pyresult; +#ifdef __GLIBC__ #ifdef ALT_DIGITS - if (item == ALT_DIGITS) { - /* The result is a sequence of up to 100 NUL-separated strings. */ - const char *s = result; + if (item == ALT_DIGITS && *result) { + /* According to the POSIX specification the result must be + * a sequence of up to 100 semicolon-separated strings. + * But in Glibc they are NUL-separated. */ + Py_ssize_t i = 0; int count = 0; - for (; count < 100 && *s; count++) { - s += strlen(s) + 1; + for (; count < 100 && result[i]; count++) { + i += strlen(result + i) + 1; } - pyresult = PyTuple_New(count); - if (pyresult != NULL) { - for (int i = 0; i < count; i++) { - PyObject *unicode = PyUnicode_DecodeLocale(result, NULL); - if (unicode == NULL) { - Py_CLEAR(pyresult); - break; - } - PyTuple_SET_ITEM(pyresult, i, unicode); - result += strlen(result) + 1; + char *buf = PyMem_Malloc(i); + if (buf == NULL) { + PyErr_NoMemory(); + pyresult = NULL; + } + else { + memcpy(buf, result, i); + /* Replace all NULs with semicolons. */ + i = 0; + while (--count) { + i += strlen(buf + i); + buf[i++] = ';'; } + pyresult = PyUnicode_DecodeLocale(buf, NULL); + PyMem_Free(buf); } } else +#endif #endif { pyresult = PyUnicode_DecodeLocale(result, NULL); From 9dde4638e44639d45bd7d72e70a8d410995a585a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 21 Oct 2024 22:17:39 +0300 Subject: [PATCH 065/106] gh-53203: Fix test_strptime on Solaris (GH-125785) Use fixed timezone. Skip roundtrip tests on locales with 2-digit year. --- Lib/test/test_strptime.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 09f6f656bfcb0d..9f5cfca9c7f124 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -515,12 +515,17 @@ def test_date_time_locale(self): # NB: Dates before 1969 do not roundtrip on some locales: # az_IR, bo_CN, bo_IN, dz_BT, eu_ES, eu_FR, fa_IR, or_IN. + @support.run_with_tz('STD-1DST,M4.1.0,M10.1.0') @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', 'ar_AE', 'mfe_MU', 'yo_NG', 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN', 'my_MM', 'shn_MM') def test_date_time_locale2(self): # Test %c directive + loc = locale.getlocale(locale.LC_TIME)[0] + if sys.platform.startswith('sunos'): + if loc in ('ar_AE',): + self.skipTest(f'locale {loc!r} may not work on this platform') self.roundtrip('%c', slice(0, 6), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) self.roundtrip('%c', slice(0, 6), (1800, 1, 1, 0, 0, 0, 0, 1, 0)) @@ -553,6 +558,10 @@ def test_date_locale(self): 'eu_ES', 'ar_AE', 'my_MM', 'shn_MM') def test_date_locale2(self): # Test %x directive + loc = locale.getlocale(locale.LC_TIME)[0] + if sys.platform.startswith('sunos'): + if loc in ('en_US', 'de_DE', 'ar_AE'): + self.skipTest(f'locale {loc!r} may not work on this platform') self.roundtrip('%x', slice(0, 3), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) self.roundtrip('%x', slice(0, 3), (1800, 1, 1, 0, 0, 0, 0, 1, 0)) From 44f841f01af0fb038e142a07f15eda1ecdd5b08a Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 21 Oct 2024 13:39:07 -0600 Subject: [PATCH 066/106] gh-125716: Raise an Exception If _globals_init() Fails In the _interpqueues Module (gh-125802) The fix applies to the _interpchannels module as well. I've also included a drive-by typo fix for _interpqueues. --- Modules/_interpchannelsmodule.c | 3 ++- Modules/_interpqueuesmodule.c | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Modules/_interpchannelsmodule.c b/Modules/_interpchannelsmodule.c index a8b4a8d76b0eaa..c52cde6da500f7 100644 --- a/Modules/_interpchannelsmodule.c +++ b/Modules/_interpchannelsmodule.c @@ -3482,7 +3482,8 @@ The 'interpreters' module provides a more convenient interface."); static int module_exec(PyObject *mod) { - if (_globals_init() != 0) { + int err = _globals_init(); + if (handle_channel_error(err, mod, -1)) { return -1; } diff --git a/Modules/_interpqueuesmodule.c b/Modules/_interpqueuesmodule.c index 55c43199ee4d79..aa70134d82b046 100644 --- a/Modules/_interpqueuesmodule.c +++ b/Modules/_interpqueuesmodule.c @@ -1312,7 +1312,7 @@ _queueid_xid_new(int64_t qid) struct _queueid_xid *data = PyMem_RawMalloc(sizeof(struct _queueid_xid)); if (data == NULL) { - _queues_incref(queues, qid); + _queues_decref(queues, qid); return NULL; } data->qid = qid; @@ -1894,7 +1894,8 @@ The 'interpreters' module provides a more convenient interface."); static int module_exec(PyObject *mod) { - if (_globals_init() != 0) { + int err = _globals_init(); + if (handle_queue_error(err, mod, -1)) { return -1; } From d48cc82ed25e26b02eb97c6263d95dcaa1e9111b Mon Sep 17 00:00:00 2001 From: Y5 <124019959+y5c4l3@users.noreply.github.com> Date: Tue, 22 Oct 2024 04:48:04 +0800 Subject: [PATCH 067/106] gh-124651: Quote template strings in `venv` activation scripts (GH-124712) This patch properly quotes template strings in `venv` activation scripts. This mitigates potential command injection. --- Lib/test/test_venv.py | 81 +++++++++++++++++++ Lib/venv/__init__.py | 42 ++++++++-- Lib/venv/scripts/common/activate | 10 +-- Lib/venv/scripts/common/activate.fish | 8 +- Lib/venv/scripts/nt/activate.bat | 6 +- Lib/venv/scripts/posix/activate.csh | 8 +- ...-09-28-02-03-04.gh-issue-124651.bLBGtH.rst | 1 + 7 files changed, 135 insertions(+), 21 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-09-28-02-03-04.gh-issue-124651.bLBGtH.rst diff --git a/Lib/test/test_venv.py b/Lib/test/test_venv.py index 1ef08da326c18c..6b2127bd31e40a 100644 --- a/Lib/test/test_venv.py +++ b/Lib/test/test_venv.py @@ -17,6 +17,7 @@ import sys import sysconfig import tempfile +import shlex from test.support import (captured_stdout, captured_stderr, skip_if_broken_multiprocessing_synchronize, verbose, requires_subprocess, is_android, is_apple_mobile, @@ -110,6 +111,10 @@ def get_text_file_contents(self, *args, encoding='utf-8'): result = f.read() return result + def assertEndsWith(self, string, tail): + if not string.endswith(tail): + self.fail(f"String {string!r} does not end with {tail!r}") + class BasicTest(BaseTest): """Test venv module functionality.""" @@ -488,6 +493,82 @@ def test_executable_symlinks(self): 'import sys; print(sys.executable)']) self.assertEqual(out.strip(), envpy.encode()) + # gh-124651: test quoted strings + @unittest.skipIf(os.name == 'nt', 'contains invalid characters on Windows') + def test_special_chars_bash(self): + """ + Test that the template strings are quoted properly (bash) + """ + rmtree(self.env_dir) + bash = shutil.which('bash') + if bash is None: + self.skipTest('bash required for this test') + env_name = '"\';&&$e|\'"' + env_dir = os.path.join(os.path.realpath(self.env_dir), env_name) + builder = venv.EnvBuilder(clear=True) + builder.create(env_dir) + activate = os.path.join(env_dir, self.bindir, 'activate') + test_script = os.path.join(self.env_dir, 'test_special_chars.sh') + with open(test_script, "w") as f: + f.write(f'source {shlex.quote(activate)}\n' + 'python -c \'import sys; print(sys.executable)\'\n' + 'python -c \'import os; print(os.environ["VIRTUAL_ENV"])\'\n' + 'deactivate\n') + out, err = check_output([bash, test_script]) + lines = out.splitlines() + self.assertTrue(env_name.encode() in lines[0]) + self.assertEndsWith(lines[1], env_name.encode()) + + # gh-124651: test quoted strings + @unittest.skipIf(os.name == 'nt', 'contains invalid characters on Windows') + def test_special_chars_csh(self): + """ + Test that the template strings are quoted properly (csh) + """ + rmtree(self.env_dir) + csh = shutil.which('tcsh') or shutil.which('csh') + if csh is None: + self.skipTest('csh required for this test') + env_name = '"\';&&$e|\'"' + env_dir = os.path.join(os.path.realpath(self.env_dir), env_name) + builder = venv.EnvBuilder(clear=True) + builder.create(env_dir) + activate = os.path.join(env_dir, self.bindir, 'activate.csh') + test_script = os.path.join(self.env_dir, 'test_special_chars.csh') + with open(test_script, "w") as f: + f.write(f'source {shlex.quote(activate)}\n' + 'python -c \'import sys; print(sys.executable)\'\n' + 'python -c \'import os; print(os.environ["VIRTUAL_ENV"])\'\n' + 'deactivate\n') + out, err = check_output([csh, test_script]) + lines = out.splitlines() + self.assertTrue(env_name.encode() in lines[0]) + self.assertEndsWith(lines[1], env_name.encode()) + + # gh-124651: test quoted strings on Windows + @unittest.skipUnless(os.name == 'nt', 'only relevant on Windows') + def test_special_chars_windows(self): + """ + Test that the template strings are quoted properly on Windows + """ + rmtree(self.env_dir) + env_name = "'&&^$e" + env_dir = os.path.join(os.path.realpath(self.env_dir), env_name) + builder = venv.EnvBuilder(clear=True) + builder.create(env_dir) + activate = os.path.join(env_dir, self.bindir, 'activate.bat') + test_batch = os.path.join(self.env_dir, 'test_special_chars.bat') + with open(test_batch, "w") as f: + f.write('@echo off\n' + f'"{activate}" & ' + f'{self.exe} -c "import sys; print(sys.executable)" & ' + f'{self.exe} -c "import os; print(os.environ[\'VIRTUAL_ENV\'])" & ' + 'deactivate') + out, err = check_output([test_batch]) + lines = out.splitlines() + self.assertTrue(env_name.encode() in lines[0]) + self.assertEndsWith(lines[1], env_name.encode()) + @unittest.skipUnless(os.name == 'nt', 'only relevant on Windows') def test_unicode_in_batch_file(self): """ diff --git a/Lib/venv/__init__.py b/Lib/venv/__init__.py index a5d348ba4cf121..ca1af84e6705fe 100644 --- a/Lib/venv/__init__.py +++ b/Lib/venv/__init__.py @@ -11,6 +11,7 @@ import sys import sysconfig import types +import shlex CORE_VENV_DEPS = ('pip',) @@ -484,11 +485,41 @@ def replace_variables(self, text, context): :param context: The information for the environment creation request being processed. """ - text = text.replace('__VENV_DIR__', context.env_dir) - text = text.replace('__VENV_NAME__', context.env_name) - text = text.replace('__VENV_PROMPT__', context.prompt) - text = text.replace('__VENV_BIN_NAME__', context.bin_name) - text = text.replace('__VENV_PYTHON__', context.env_exe) + replacements = { + '__VENV_DIR__': context.env_dir, + '__VENV_NAME__': context.env_name, + '__VENV_PROMPT__': context.prompt, + '__VENV_BIN_NAME__': context.bin_name, + '__VENV_PYTHON__': context.env_exe, + } + + def quote_ps1(s): + """ + This should satisfy PowerShell quoting rules [1], unless the quoted + string is passed directly to Windows native commands [2]. + [1]: https://learn.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_quoting_rules + [2]: https://learn.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_parsing#passing-arguments-that-contain-quote-characters + """ + s = s.replace("'", "''") + return f"'{s}'" + + def quote_bat(s): + return s + + # gh-124651: need to quote the template strings properly + quote = shlex.quote + script_path = context.script_path + if script_path.endswith('.ps1'): + quote = quote_ps1 + elif script_path.endswith('.bat'): + quote = quote_bat + else: + # fallbacks to POSIX shell compliant quote + quote = shlex.quote + + replacements = {key: quote(s) for key, s in replacements.items()} + for key, quoted in replacements.items(): + text = text.replace(key, quoted) return text def install_scripts(self, context, path): @@ -538,6 +569,7 @@ def skip_file(f): with open(srcfile, 'rb') as f: data = f.read() try: + context.script_path = srcfile new_data = ( self.replace_variables(data.decode('utf-8'), context) .encode('utf-8') diff --git a/Lib/venv/scripts/common/activate b/Lib/venv/scripts/common/activate index 44f137672e9d2e..70673a265d41f8 100644 --- a/Lib/venv/scripts/common/activate +++ b/Lib/venv/scripts/common/activate @@ -41,20 +41,20 @@ case "$(uname)" in CYGWIN*|MSYS*|MINGW*) # transform D:\path\to\venv to /d/path/to/venv on MSYS and MINGW # and to /cygdrive/d/path/to/venv on Cygwin - VIRTUAL_ENV=$(cygpath "__VENV_DIR__") + VIRTUAL_ENV=$(cygpath __VENV_DIR__) export VIRTUAL_ENV ;; *) # use the path as-is - export VIRTUAL_ENV="__VENV_DIR__" + export VIRTUAL_ENV=__VENV_DIR__ ;; esac _OLD_VIRTUAL_PATH="$PATH" -PATH="$VIRTUAL_ENV/__VENV_BIN_NAME__:$PATH" +PATH="$VIRTUAL_ENV/"__VENV_BIN_NAME__":$PATH" export PATH -VIRTUAL_ENV_PROMPT="__VENV_PROMPT__" +VIRTUAL_ENV_PROMPT=__VENV_PROMPT__ export VIRTUAL_ENV_PROMPT # unset PYTHONHOME if set @@ -67,7 +67,7 @@ fi if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then _OLD_VIRTUAL_PS1="${PS1:-}" - PS1="(__VENV_PROMPT__) ${PS1:-}" + PS1="("__VENV_PROMPT__") ${PS1:-}" export PS1 fi diff --git a/Lib/venv/scripts/common/activate.fish b/Lib/venv/scripts/common/activate.fish index 25c42756789bbc..284a7469c99b57 100644 --- a/Lib/venv/scripts/common/activate.fish +++ b/Lib/venv/scripts/common/activate.fish @@ -33,11 +33,11 @@ end # Unset irrelevant variables. deactivate nondestructive -set -gx VIRTUAL_ENV "__VENV_DIR__" +set -gx VIRTUAL_ENV __VENV_DIR__ set -gx _OLD_VIRTUAL_PATH $PATH -set -gx PATH "$VIRTUAL_ENV/__VENV_BIN_NAME__" $PATH -set -gx VIRTUAL_ENV_PROMPT "__VENV_PROMPT__" +set -gx PATH "$VIRTUAL_ENV/"__VENV_BIN_NAME__ $PATH +set -gx VIRTUAL_ENV_PROMPT __VENV_PROMPT__ # Unset PYTHONHOME if set. if set -q PYTHONHOME @@ -57,7 +57,7 @@ if test -z "$VIRTUAL_ENV_DISABLE_PROMPT" set -l old_status $status # Output the venv prompt; color taken from the blue of the Python logo. - printf "%s(%s)%s " (set_color 4B8BBE) "__VENV_PROMPT__" (set_color normal) + printf "%s(%s)%s " (set_color 4B8BBE) __VENV_PROMPT__ (set_color normal) # Restore the return status of the previous command. echo "exit $old_status" | . diff --git a/Lib/venv/scripts/nt/activate.bat b/Lib/venv/scripts/nt/activate.bat index dd5ea8eb67b90a..35533e4b551155 100644 --- a/Lib/venv/scripts/nt/activate.bat +++ b/Lib/venv/scripts/nt/activate.bat @@ -8,7 +8,7 @@ if defined _OLD_CODEPAGE ( "%SystemRoot%\System32\chcp.com" 65001 > nul ) -set VIRTUAL_ENV=__VENV_DIR__ +set "VIRTUAL_ENV=__VENV_DIR__" if not defined PROMPT set PROMPT=$P$G @@ -24,8 +24,8 @@ set PYTHONHOME= if defined _OLD_VIRTUAL_PATH set PATH=%_OLD_VIRTUAL_PATH% if not defined _OLD_VIRTUAL_PATH set _OLD_VIRTUAL_PATH=%PATH% -set PATH=%VIRTUAL_ENV%\__VENV_BIN_NAME__;%PATH% -set VIRTUAL_ENV_PROMPT=__VENV_PROMPT__ +set "PATH=%VIRTUAL_ENV%\__VENV_BIN_NAME__;%PATH%" +set "VIRTUAL_ENV_PROMPT=__VENV_PROMPT__" :END if defined _OLD_CODEPAGE ( diff --git a/Lib/venv/scripts/posix/activate.csh b/Lib/venv/scripts/posix/activate.csh index b5db4a0f847e06..2a3fa835476ab9 100644 --- a/Lib/venv/scripts/posix/activate.csh +++ b/Lib/venv/scripts/posix/activate.csh @@ -9,17 +9,17 @@ alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PA # Unset irrelevant variables. deactivate nondestructive -setenv VIRTUAL_ENV "__VENV_DIR__" +setenv VIRTUAL_ENV __VENV_DIR__ set _OLD_VIRTUAL_PATH="$PATH" -setenv PATH "$VIRTUAL_ENV/__VENV_BIN_NAME__:$PATH" -setenv VIRTUAL_ENV_PROMPT "__VENV_PROMPT__" +setenv PATH "$VIRTUAL_ENV/"__VENV_BIN_NAME__":$PATH" +setenv VIRTUAL_ENV_PROMPT __VENV_PROMPT__ set _OLD_VIRTUAL_PROMPT="$prompt" if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then - set prompt = "(__VENV_PROMPT__) $prompt:q" + set prompt = "("__VENV_PROMPT__") $prompt:q" endif alias pydoc python -m pydoc diff --git a/Misc/NEWS.d/next/Library/2024-09-28-02-03-04.gh-issue-124651.bLBGtH.rst b/Misc/NEWS.d/next/Library/2024-09-28-02-03-04.gh-issue-124651.bLBGtH.rst new file mode 100644 index 00000000000000..17fc9171390dd9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-28-02-03-04.gh-issue-124651.bLBGtH.rst @@ -0,0 +1 @@ +Properly quote template strings in :mod:`venv` activation scripts. From 4848b0b92ce2737cea08fa3b322fd0f0a671bb07 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 21 Oct 2024 15:49:58 -0600 Subject: [PATCH 068/106] gh-125716: Use A Global Mutex When Initializing Global State For The _interpqueues Module (gh-125803) This includes a drive-by cleanup in _queues_init() and _queues_fini(). This change also applies to the _interpchannels module. --- Modules/_interpchannelsmodule.c | 64 +++++++++++++++++------------ Modules/_interpqueuesmodule.c | 72 ++++++++++++++++++--------------- 2 files changed, 79 insertions(+), 57 deletions(-) diff --git a/Modules/_interpchannelsmodule.c b/Modules/_interpchannelsmodule.c index c52cde6da500f7..8e6b21db76e01c 100644 --- a/Modules/_interpchannelsmodule.c +++ b/Modules/_interpchannelsmodule.c @@ -28,6 +28,7 @@ This module has the following process-global state: _globals (static struct globals): + mutex (PyMutex) module_count (int) channels (struct _channels): numopen (int64_t) @@ -1349,21 +1350,29 @@ typedef struct _channels { static void _channels_init(_channels *channels, PyThread_type_lock mutex) { - channels->mutex = mutex; - channels->head = NULL; - channels->numopen = 0; - channels->next_id = 0; + assert(mutex != NULL); + assert(channels->mutex == NULL); + *channels = (_channels){ + .mutex = mutex, + .head = NULL, + .numopen = 0, + .next_id = 0, + }; } static void -_channels_fini(_channels *channels) +_channels_fini(_channels *channels, PyThread_type_lock *p_mutex) { + PyThread_type_lock mutex = channels->mutex; + assert(mutex != NULL); + + PyThread_acquire_lock(mutex, WAIT_LOCK); assert(channels->numopen == 0); assert(channels->head == NULL); - if (channels->mutex != NULL) { - PyThread_free_lock(channels->mutex); - channels->mutex = NULL; - } + *channels = (_channels){0}; + PyThread_release_lock(mutex); + + *p_mutex = mutex; } static int64_t @@ -2812,6 +2821,7 @@ set_channelend_types(PyObject *mod, PyTypeObject *send, PyTypeObject *recv) the data that we need to share between interpreters, so it cannot hold PyObject values. */ static struct globals { + PyMutex mutex; int module_count; _channels channels; } _globals = {0}; @@ -2819,32 +2829,36 @@ static struct globals { static int _globals_init(void) { - // XXX This isn't thread-safe. + PyMutex_Lock(&_globals.mutex); + assert(_globals.module_count >= 0); _globals.module_count++; - if (_globals.module_count > 1) { - // Already initialized. - return 0; - } - - assert(_globals.channels.mutex == NULL); - PyThread_type_lock mutex = PyThread_allocate_lock(); - if (mutex == NULL) { - return ERR_CHANNELS_MUTEX_INIT; + if (_globals.module_count == 1) { + // Called for the first time. + PyThread_type_lock mutex = PyThread_allocate_lock(); + if (mutex == NULL) { + _globals.module_count--; + PyMutex_Unlock(&_globals.mutex); + return ERR_CHANNELS_MUTEX_INIT; + } + _channels_init(&_globals.channels, mutex); } - _channels_init(&_globals.channels, mutex); + PyMutex_Unlock(&_globals.mutex); return 0; } static void _globals_fini(void) { - // XXX This isn't thread-safe. + PyMutex_Lock(&_globals.mutex); + assert(_globals.module_count > 0); _globals.module_count--; - if (_globals.module_count > 0) { - return; + if (_globals.module_count == 0) { + PyThread_type_lock mutex; + _channels_fini(&_globals.channels, &mutex); + assert(mutex != NULL); + PyThread_free_lock(mutex); } - - _channels_fini(&_globals.channels); + PyMutex_Unlock(&_globals.mutex); } static _channels * diff --git a/Modules/_interpqueuesmodule.c b/Modules/_interpqueuesmodule.c index aa70134d82b046..297a1763a98ce6 100644 --- a/Modules/_interpqueuesmodule.c +++ b/Modules/_interpqueuesmodule.c @@ -845,28 +845,31 @@ typedef struct _queues { static void _queues_init(_queues *queues, PyThread_type_lock mutex) { - queues->mutex = mutex; - queues->head = NULL; - queues->count = 0; - queues->next_id = 1; + assert(mutex != NULL); + assert(queues->mutex == NULL); + *queues = (_queues){ + .mutex = mutex, + .head = NULL, + .count = 0, + .next_id = 1, + }; } static void -_queues_fini(_queues *queues) +_queues_fini(_queues *queues, PyThread_type_lock *p_mutex) { + PyThread_type_lock mutex = queues->mutex; + assert(mutex != NULL); + + PyThread_acquire_lock(mutex, WAIT_LOCK); if (queues->count > 0) { - PyThread_acquire_lock(queues->mutex, WAIT_LOCK); - assert((queues->count == 0) != (queues->head != NULL)); - _queueref *head = queues->head; - queues->head = NULL; - queues->count = 0; - PyThread_release_lock(queues->mutex); - _queuerefs_clear(head); - } - if (queues->mutex != NULL) { - PyThread_free_lock(queues->mutex); - queues->mutex = NULL; + assert(queues->head != NULL); + _queuerefs_clear(queues->head); } + *queues = (_queues){0}; + PyThread_release_lock(mutex); + + *p_mutex = mutex; } static int64_t @@ -1398,6 +1401,7 @@ _queueobj_shared(PyThreadState *tstate, PyObject *queueobj, the data that we need to share between interpreters, so it cannot hold PyObject values. */ static struct globals { + PyMutex mutex; int module_count; _queues queues; } _globals = {0}; @@ -1405,32 +1409,36 @@ static struct globals { static int _globals_init(void) { - // XXX This isn't thread-safe. + PyMutex_Lock(&_globals.mutex); + assert(_globals.module_count >= 0); _globals.module_count++; - if (_globals.module_count > 1) { - // Already initialized. - return 0; - } - - assert(_globals.queues.mutex == NULL); - PyThread_type_lock mutex = PyThread_allocate_lock(); - if (mutex == NULL) { - return ERR_QUEUES_ALLOC; + if (_globals.module_count == 1) { + // Called for the first time. + PyThread_type_lock mutex = PyThread_allocate_lock(); + if (mutex == NULL) { + _globals.module_count--; + PyMutex_Unlock(&_globals.mutex); + return ERR_QUEUES_ALLOC; + } + _queues_init(&_globals.queues, mutex); } - _queues_init(&_globals.queues, mutex); + PyMutex_Unlock(&_globals.mutex); return 0; } static void _globals_fini(void) { - // XXX This isn't thread-safe. + PyMutex_Lock(&_globals.mutex); + assert(_globals.module_count > 0); _globals.module_count--; - if (_globals.module_count > 0) { - return; + if (_globals.module_count == 0) { + PyThread_type_lock mutex; + _queues_fini(&_globals.queues, &mutex); + assert(mutex != NULL); + PyThread_free_lock(mutex); } - - _queues_fini(&_globals.queues); + PyMutex_Unlock(&_globals.mutex); } static _queues * From d0bfff47fb2aea9272b56ac05984eaacc32379cc Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Mon, 21 Oct 2024 23:37:31 +0100 Subject: [PATCH 069/106] gh-119786: [doc] more consistent syntax in InternalDocs (#125815) --- InternalDocs/adaptive.md | 3 +- InternalDocs/compiler.md | 418 +++++++++++++---------------- InternalDocs/exception_handling.md | 28 +- InternalDocs/frames.md | 18 +- InternalDocs/garbage_collector.md | 128 ++++----- InternalDocs/parser.md | 214 +++++++-------- 6 files changed, 380 insertions(+), 429 deletions(-) diff --git a/InternalDocs/adaptive.md b/InternalDocs/adaptive.md index 09245730b271fa..4ae9e85b387f39 100644 --- a/InternalDocs/adaptive.md +++ b/InternalDocs/adaptive.md @@ -31,8 +31,7 @@ although these are not fundamental and may change: ## Example family -The `LOAD_GLOBAL` instruction (in -[Python/bytecodes.c](https://github.com/python/cpython/blob/main/Python/bytecodes.c)) +The `LOAD_GLOBAL` instruction (in [Python/bytecodes.c](../Python/bytecodes.c)) already has an adaptive family that serves as a relatively simple example. The `LOAD_GLOBAL` instruction performs adaptive specialization, diff --git a/InternalDocs/compiler.md b/InternalDocs/compiler.md index e9608977b0cbb3..0da4670c792cb5 100644 --- a/InternalDocs/compiler.md +++ b/InternalDocs/compiler.md @@ -7,17 +7,16 @@ Abstract In CPython, the compilation from source code to bytecode involves several steps: -1. Tokenize the source code - [Parser/lexer/](https://github.com/python/cpython/blob/main/Parser/lexer/) - and [Parser/tokenizer/](https://github.com/python/cpython/blob/main/Parser/tokenizer/). +1. Tokenize the source code [Parser/lexer/](../Parser/lexer/) + and [Parser/tokenizer/](../Parser/tokenizer/). 2. Parse the stream of tokens into an Abstract Syntax Tree - [Parser/parser.c](https://github.com/python/cpython/blob/main/Parser/parser.c). + [Parser/parser.c](../Parser/parser.c). 3. Transform AST into an instruction sequence - [Python/compile.c](https://github.com/python/cpython/blob/main/Python/compile.c). + [Python/compile.c](../Python/compile.c). 4. Construct a Control Flow Graph and apply optimizations to it - [Python/flowgraph.c](https://github.com/python/cpython/blob/main/Python/flowgraph.c). + [Python/flowgraph.c](../Python/flowgraph.c). 5. Emit bytecode based on the Control Flow Graph - [Python/assemble.c](https://github.com/python/cpython/blob/main/Python/assemble.c). + [Python/assemble.c](../Python/assemble.c). This document outlines how these steps of the process work. @@ -36,12 +35,10 @@ of tokens rather than a stream of characters which is more common with PEG parsers. The grammar file for Python can be found in -[Grammar/python.gram](https://github.com/python/cpython/blob/main/Grammar/python.gram). -The definitions for literal tokens (such as ``:``, numbers, etc.) can be found in -[Grammar/Tokens](https://github.com/python/cpython/blob/main/Grammar/Tokens). -Various C files, including -[Parser/parser.c](https://github.com/python/cpython/blob/main/Parser/parser.c) -are generated from these. +[Grammar/python.gram](../Grammar/python.gram). +The definitions for literal tokens (such as `:`, numbers, etc.) can be found in +[Grammar/Tokens](../Grammar/Tokens). Various C files, including +[Parser/parser.c](../Parser/parser.c) are generated from these. See Also: @@ -63,7 +60,7 @@ specification of the AST nodes is specified using the Zephyr Abstract Syntax Definition Language (ASDL) [^1], [^2]. The definition of the AST nodes for Python is found in the file -[Parser/Python.asdl](https://github.com/python/cpython/blob/main/Parser/Python.asdl). +[Parser/Python.asdl](../Parser/Python.asdl). Each AST node (representing statements, expressions, and several specialized types, like list comprehensions and exception handlers) is @@ -87,14 +84,14 @@ approach and syntax: The preceding example describes two different kinds of statements and an expression: function definitions, return statements, and yield expressions. -All three kinds are considered of type ``stmt`` as shown by ``|`` separating +All three kinds are considered of type `stmt` as shown by `|` separating the various kinds. They all take arguments of various kinds and amounts. -Modifiers on the argument type specify the number of values needed; ``?`` -means it is optional, ``*`` means 0 or more, while no modifier means only one -value for the argument and it is required. ``FunctionDef``, for instance, -takes an ``identifier`` for the *name*, ``arguments`` for *args*, zero or more -``stmt`` arguments for *body*, and zero or more ``expr`` arguments for +Modifiers on the argument type specify the number of values needed; `?` +means it is optional, `*` means 0 or more, while no modifier means only one +value for the argument and it is required. `FunctionDef`, for instance, +takes an `identifier` for the *name*, `arguments` for *args*, zero or more +`stmt` arguments for *body*, and zero or more `expr` arguments for *decorators*. Do notice that something like 'arguments', which is a node type, is @@ -132,9 +129,9 @@ The statement definitions above generate the following C structure type: ``` Also generated are a series of constructor functions that allocate (in -this case) a ``stmt_ty`` struct with the appropriate initialization. The -``kind`` field specifies which component of the union is initialized. The -``FunctionDef()`` constructor function sets 'kind' to ``FunctionDef_kind`` and +this case) a `stmt_ty` struct with the appropriate initialization. The +`kind` field specifies which component of the union is initialized. The +`FunctionDef()` constructor function sets 'kind' to `FunctionDef_kind` and initializes the *name*, *args*, *body*, and *attributes* fields. See also @@ -156,13 +153,13 @@ In general, unless you are working on the critical core of the compiler, memory management can be completely ignored. But if you are working at either the very beginning of the compiler or the end, you need to care about how the arena works. All code relating to the arena is in either -[Include/internal/pycore_pyarena.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_pyarena.h) -or [Python/pyarena.c](https://github.com/python/cpython/blob/main/Python/pyarena.c). +[Include/internal/pycore_pyarena.h](../Include/internal/pycore_pyarena.h) +or [Python/pyarena.c](../Python/pyarena.c). -``PyArena_New()`` will create a new arena. The returned ``PyArena`` structure +`PyArena_New()` will create a new arena. The returned `PyArena` structure will store pointers to all memory given to it. This does the bookkeeping of what memory needs to be freed when the compiler is finished with the memory it -used. That freeing is done with ``PyArena_Free()``. This only needs to be +used. That freeing is done with `PyArena_Free()`. This only needs to be called in strategic areas where the compiler exits. As stated above, in general you should not have to worry about memory @@ -173,25 +170,25 @@ The only exception comes about when managing a PyObject. Since the rest of Python uses reference counting, there is extra support added to the arena to cleanup each PyObject that was allocated. These cases are very rare. However, if you've allocated a PyObject, you must tell -the arena about it by calling ``PyArena_AddPyObject()``. +the arena about it by calling `PyArena_AddPyObject()`. Source code to AST ================== The AST is generated from source code using the function -``_PyParser_ASTFromString()`` or ``_PyParser_ASTFromFile()`` -[Parser/peg_api.c](https://github.com/python/cpython/blob/main/Parser/peg_api.c). +`_PyParser_ASTFromString()` or `_PyParser_ASTFromFile()` +[Parser/peg_api.c](../Parser/peg_api.c). After some checks, a helper function in -[Parser/parser.c](https://github.com/python/cpython/blob/main/Parser/parser.c) +[Parser/parser.c](../Parser/parser.c) begins applying production rules on the source code it receives; converting source code to tokens and matching these tokens recursively to their corresponding rule. The production rule's corresponding rule function is called on every match. These rule functions follow the format `xx_rule`. Where *xx* is the grammar rule that the function handles and is automatically derived from -[Grammar/python.gram](https://github.com/python/cpython/blob/main/Grammar/python.gram) by -[Tools/peg_generator/pegen/c_generator.py](https://github.com/python/cpython/blob/main/Tools/peg_generator/pegen/c_generator.py). +[Grammar/python.gram](../Grammar/python.gram) by +[Tools/peg_generator/pegen/c_generator.py](../Tools/peg_generator/pegen/c_generator.py). Each rule function in turn creates an AST node as it goes along. It does this by allocating all the new nodes it needs, calling the proper AST node creation @@ -202,18 +199,15 @@ there are no more rules, an error is set and the parsing ends. The AST node creation helper functions have the name `_PyAST_{xx}` where *xx* is the AST node that the function creates. These are defined by the -ASDL grammar and contained in -[Python/Python-ast.c](https://github.com/python/cpython/blob/main/Python/Python-ast.c) -(which is generated by -[Parser/asdl_c.py](https://github.com/python/cpython/blob/main/Parser/asdl_c.py) -from -[Parser/Python.asdl](https://github.com/python/cpython/blob/main/Parser/Python.asdl)). -This all leads to a sequence of AST nodes stored in ``asdl_seq`` structs. +ASDL grammar and contained in [Python/Python-ast.c](../Python/Python-ast.c) +(which is generated by [Parser/asdl_c.py](../Parser/asdl_c.py) +from [Parser/Python.asdl](../Parser/Python.asdl)). +This all leads to a sequence of AST nodes stored in `asdl_seq` structs. To demonstrate everything explained so far, here's the rule function responsible for a simple named import statement such as -``import sys``. Note that error-checking and debugging code has been -omitted. Removed parts are represented by ``...``. +`import sys`. Note that error-checking and debugging code has been +omitted. Removed parts are represented by `...`. Furthermore, some comments have been added for explanation. These comments may not be present in the actual code. @@ -255,55 +249,52 @@ may not be present in the actual code. To improve backtracking performance, some rules (chosen by applying a -``(memo)`` flag in the grammar file) are memoized. Each rule function checks if +`(memo)` flag in the grammar file) are memoized. Each rule function checks if a memoized version exists and returns that if so, else it continues in the manner stated in the previous paragraphs. -There are macros for creating and using ``asdl_xx_seq *`` types, where *xx* is +There are macros for creating and using `asdl_xx_seq *` types, where *xx* is a type of the ASDL sequence. Three main types are defined -manually -- ``generic``, ``identifier`` and ``int``. These types are found in -[Python/asdl.c](https://github.com/python/cpython/blob/main/Python/asdl.c) -and its corresponding header file -[Include/internal/pycore_asdl.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_asdl.h). -Functions and macros for creating ``asdl_xx_seq *`` types are as follows: - -``_Py_asdl_generic_seq_new(Py_ssize_t, PyArena *)`` - Allocate memory for an ``asdl_generic_seq`` of the specified length -``_Py_asdl_identifier_seq_new(Py_ssize_t, PyArena *)`` - Allocate memory for an ``asdl_identifier_seq`` of the specified length -``_Py_asdl_int_seq_new(Py_ssize_t, PyArena *)`` - Allocate memory for an ``asdl_int_seq`` of the specified length +manually -- `generic`, `identifier` and `int`. These types are found in +[Python/asdl.c](../Python/asdl.c) and its corresponding header file +[Include/internal/pycore_asdl.h](../Include/internal/pycore_asdl.h). +Functions and macros for creating `asdl_xx_seq *` types are as follows: + +`_Py_asdl_generic_seq_new(Py_ssize_t, PyArena *)` + Allocate memory for an `asdl_generic_seq` of the specified length +`_Py_asdl_identifier_seq_new(Py_ssize_t, PyArena *)` + Allocate memory for an `asdl_identifier_seq` of the specified length +`_Py_asdl_int_seq_new(Py_ssize_t, PyArena *)` + Allocate memory for an `asdl_int_seq` of the specified length In addition to the three types mentioned above, some ASDL sequence types are -automatically generated by -[Parser/asdl_c.py](https://github.com/python/cpython/blob/main/Parser/asdl_c.py) -and found in -[Include/internal/pycore_ast.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_ast.h). +automatically generated by [Parser/asdl_c.py](../Parser/asdl_c.py) and found in +[Include/internal/pycore_ast.h](../Include/internal/pycore_ast.h). Macros for using both manually defined and automatically generated ASDL sequence types are as follows: -``asdl_seq_GET(asdl_xx_seq *, int)`` - Get item held at a specific position in an ``asdl_xx_seq`` -``asdl_seq_SET(asdl_xx_seq *, int, stmt_ty)`` - Set a specific index in an ``asdl_xx_seq`` to the specified value +`asdl_seq_GET(asdl_xx_seq *, int)` + Get item held at a specific position in an `asdl_xx_seq` +`asdl_seq_SET(asdl_xx_seq *, int, stmt_ty)` + Set a specific index in an `asdl_xx_seq` to the specified value Untyped counterparts exist for some of the typed macros. These are useful when a function needs to manipulate a generic ASDL sequence: -``asdl_seq_GET_UNTYPED(asdl_seq *, int)`` - Get item held at a specific position in an ``asdl_seq`` -``asdl_seq_SET_UNTYPED(asdl_seq *, int, stmt_ty)`` - Set a specific index in an ``asdl_seq`` to the specified value -``asdl_seq_LEN(asdl_seq *)`` - Return the length of an ``asdl_seq`` or ``asdl_xx_seq`` +`asdl_seq_GET_UNTYPED(asdl_seq *, int)` + Get item held at a specific position in an `asdl_seq` +`asdl_seq_SET_UNTYPED(asdl_seq *, int, stmt_ty)` + Set a specific index in an `asdl_seq` to the specified value +`asdl_seq_LEN(asdl_seq *)` + Return the length of an `asdl_seq` or `asdl_xx_seq` Note that typed macros and functions are recommended over their untyped counterparts. Typed macros carry out checks in debug mode and aid -debugging errors caused by incorrectly casting from ``void *``. +debugging errors caused by incorrectly casting from `void *`. If you are working with statements, you must also worry about keeping track of what line number generated the statement. Currently the line -number is passed as the last parameter to each ``stmt_ty`` function. +number is passed as the last parameter to each `stmt_ty` function. See also [PEP 617: New PEG parser for CPython](https://peps.python.org/pep-0617/). @@ -333,19 +324,19 @@ else: end() ``` -The ``x < 10`` guard is represented by its own basic block that -compares ``x`` with ``10`` and then ends in a conditional jump based on +The `x < 10` guard is represented by its own basic block that +compares `x` with `10` and then ends in a conditional jump based on the result of the comparison. This conditional jump allows the block -to point to both the body of the ``if`` and the body of the ``else``. The -``if`` basic block contains the ``f1()`` and ``f2()`` calls and points to -the ``end()`` basic block. The ``else`` basic block contains the ``g()`` -call and similarly points to the ``end()`` block. +to point to both the body of the `if` and the body of the `else`. The +`if` basic block contains the `f1()` and `f2()` calls and points to +the `end()` basic block. The `else` basic block contains the `g()` +call and similarly points to the `end()` block. -Note that more complex code in the guard, the ``if`` body, or the ``else`` +Note that more complex code in the guard, the `if` body, or the `else` body may be represented by multiple basic blocks. For instance, -short-circuiting boolean logic in a guard like ``if x or y:`` -will produce one basic block that tests the truth value of ``x`` -and then points both (1) to the start of the ``if`` body and (2) to +short-circuiting boolean logic in a guard like `if x or y:` +will produce one basic block that tests the truth value of `x` +and then points both (1) to the start of the `if` body and (2) to a different basic block that tests the truth value of y. CFGs are useful as an intermediate representation of the code because @@ -354,27 +345,24 @@ they are a convenient data structure for optimizations. AST to CFG to bytecode ====================== -The conversion of an ``AST`` to bytecode is initiated by a call to the function -``_PyAST_Compile()`` in -[Python/compile.c](https://github.com/python/cpython/blob/main/Python/compile.c). +The conversion of an `AST` to bytecode is initiated by a call to the function +`_PyAST_Compile()` in [Python/compile.c](../Python/compile.c). The first step is to construct the symbol table. This is implemented by -``_PySymtable_Build()`` in -[Python/symtable.c](https://github.com/python/cpython/blob/main/Python/symtable.c). +`_PySymtable_Build()` in [Python/symtable.c](../Python/symtable.c). This function begins by entering the starting code block for the AST (passed-in) and then calling the proper `symtable_visit_{xx}` function (with *xx* being the AST node type). Next, the AST tree is walked with the various code blocks that delineate the reach of a local variable as blocks are entered and exited using -``symtable_enter_block()`` and ``symtable_exit_block()``, respectively. - -Once the symbol table is created, the ``AST`` is transformed by ``compiler_codegen()`` -in [Python/compile.c](https://github.com/python/cpython/blob/main/Python/compile.c) -into a sequence of pseudo instructions. These are similar to bytecode, but -in some cases they are more abstract, and are resolved later into actual -bytecode. The construction of this instruction sequence is handled by several -functions that break the task down by various AST node types. The functions are -all named `compiler_visit_{xx}` where *xx* is the name of the node type (such -as ``stmt``, ``expr``, etc.). Each function receives a ``struct compiler *`` +`symtable_enter_block()` and `symtable_exit_block()`, respectively. + +Once the symbol table is created, the `AST` is transformed by `compiler_codegen()` +in [Python/compile.c](../Python/compile.c) into a sequence of pseudo instructions. +These are similar to bytecode, but in some cases they are more abstract, and are +resolved later into actual bytecode. The construction of this instruction sequence +is handled by several functions that break the task down by various AST node types. +The functions are all named `compiler_visit_{xx}` where *xx* is the name of the node +type (such as `stmt`, `expr`, etc.). Each function receives a `struct compiler *` and `{xx}_ty` where *xx* is the AST node type. Typically these functions consist of a large 'switch' statement, branching based on the kind of node type passed to it. Simple things are handled inline in the @@ -382,242 +370,224 @@ node type passed to it. Simple things are handled inline in the functions named `compiler_{xx}` with *xx* being a descriptive name of what is being handled. -When transforming an arbitrary AST node, use the ``VISIT()`` macro. +When transforming an arbitrary AST node, use the `VISIT()` macro. The appropriate `compiler_visit_{xx}` function is called, based on the value passed in for (so `VISIT({c}, expr, {node})` calls -`compiler_visit_expr({c}, {node})`). The ``VISIT_SEQ()`` macro is very similar, +`compiler_visit_expr({c}, {node})`). The `VISIT_SEQ()` macro is very similar, but is called on AST node sequences (those values that were created as arguments to a node that used the '*' modifier). Emission of bytecode is handled by the following macros: -* ``ADDOP(struct compiler *, location, int)`` +* `ADDOP(struct compiler *, location, int)` add a specified opcode -* ``ADDOP_IN_SCOPE(struct compiler *, location, int)`` - like ``ADDOP``, but also exits current scope; used for adding return value +* `ADDOP_IN_SCOPE(struct compiler *, location, int)` + like `ADDOP`, but also exits current scope; used for adding return value opcodes in lambdas and closures -* ``ADDOP_I(struct compiler *, location, int, Py_ssize_t)`` +* `ADDOP_I(struct compiler *, location, int, Py_ssize_t)` add an opcode that takes an integer argument -* ``ADDOP_O(struct compiler *, location, int, PyObject *, TYPE)`` +* `ADDOP_O(struct compiler *, location, int, PyObject *, TYPE)` add an opcode with the proper argument based on the position of the specified PyObject in PyObject sequence object, but with no handling of mangled names; used for when you need to do named lookups of objects such as globals, consts, or parameters where name mangling is not possible and the scope of the name is known; *TYPE* is the name of PyObject sequence - (``names`` or ``varnames``) -* ``ADDOP_N(struct compiler *, location, int, PyObject *, TYPE)`` - just like ``ADDOP_O``, but steals a reference to PyObject -* ``ADDOP_NAME(struct compiler *, location, int, PyObject *, TYPE)`` - just like ``ADDOP_O``, but name mangling is also handled; used for + (`names` or `varnames`) +* `ADDOP_N(struct compiler *, location, int, PyObject *, TYPE)` + just like `ADDOP_O`, but steals a reference to PyObject +* `ADDOP_NAME(struct compiler *, location, int, PyObject *, TYPE)` + just like `ADDOP_O`, but name mangling is also handled; used for attribute loading or importing based on name -* ``ADDOP_LOAD_CONST(struct compiler *, location, PyObject *)`` - add the ``LOAD_CONST`` opcode with the proper argument based on the +* `ADDOP_LOAD_CONST(struct compiler *, location, PyObject *)` + add the `LOAD_CONST` opcode with the proper argument based on the position of the specified PyObject in the consts table. -* ``ADDOP_LOAD_CONST_NEW(struct compiler *, location, PyObject *)`` - just like ``ADDOP_LOAD_CONST_NEW``, but steals a reference to PyObject -* ``ADDOP_JUMP(struct compiler *, location, int, basicblock *)`` +* `ADDOP_LOAD_CONST_NEW(struct compiler *, location, PyObject *)` + just like `ADDOP_LOAD_CONST_NEW`, but steals a reference to PyObject +* `ADDOP_JUMP(struct compiler *, location, int, basicblock *)` create a jump to a basic block -The ``location`` argument is a struct with the source location to be +The `location` argument is a struct with the source location to be associated with this instruction. It is typically extracted from an -``AST`` node with the ``LOC`` macro. The ``NO_LOCATION`` can be used +`AST` node with the `LOC` macro. The `NO_LOCATION` can be used for *synthetic* instructions, which we do not associate with a line -number at this stage. For example, the implicit ``return None`` +number at this stage. For example, the implicit `return None` which is added at the end of a function is not associated with any line in the source code. There are several helper functions that will emit pseudo-instructions and are named `compiler_{xx}()` where *xx* is what the function helps -with (``list``, ``boolop``, etc.). A rather useful one is ``compiler_nameop()``. +with (`list`, `boolop`, etc.). A rather useful one is `compiler_nameop()`. This function looks up the scope of a variable and, based on the expression context, emits the proper opcode to load, store, or delete the variable. Once the instruction sequence is created, it is transformed into a CFG -by ``_PyCfg_FromInstructionSequence()``. Then ``_PyCfg_OptimizeCodeUnit()`` +by `_PyCfg_FromInstructionSequence()`. Then `_PyCfg_OptimizeCodeUnit()` applies various peephole optimizations, and -``_PyCfg_OptimizedCfgToInstructionSequence()`` converts the optimized ``CFG`` +`_PyCfg_OptimizedCfgToInstructionSequence()` converts the optimized `CFG` back into an instruction sequence. These conversions and optimizations are -implemented in -[Python/flowgraph.c](https://github.com/python/cpython/blob/main/Python/flowgraph.c). +implemented in [Python/flowgraph.c](../Python/flowgraph.c). Finally, the sequence of pseudo-instructions is converted into actual bytecode. This includes transforming pseudo instructions into actual instructions, converting jump targets from logical labels to relative offsets, and -construction of the -[exception table](exception_handling.md) and -[locations table](https://github.com/python/cpython/blob/main/InternalDocs/locations.md). -The bytecode and tables are then wrapped into a ``PyCodeObject`` along with additional -metadata, including the ``consts`` and ``names`` arrays, information about function +construction of the [exception table](exception_handling.md) and +[locations table](locations.md). +The bytecode and tables are then wrapped into a `PyCodeObject` along with additional +metadata, including the `consts` and `names` arrays, information about function reference to the source code (filename, etc). All of this is implemented by -``_PyAssemble_MakeCodeObject()`` in -[Python/assemble.c](https://github.com/python/cpython/blob/main/Python/assemble.c). +`_PyAssemble_MakeCodeObject()` in [Python/assemble.c](../Python/assemble.c). Code objects ============ -The result of ``PyAST_CompileObject()`` is a ``PyCodeObject`` which is defined in -[Include/cpython/code.h](https://github.com/python/cpython/blob/main/Include/cpython/code.h). +The result of `PyAST_CompileObject()` is a `PyCodeObject` which is defined in +[Include/cpython/code.h](../Include/cpython/code.h). And with that you now have executable Python bytecode! -The code objects (byte code) are executed in -[Python/ceval.c](https://github.com/python/cpython/blob/main/Python/ceval.c). +The code objects (byte code) are executed in [Python/ceval.c](../Python/ceval.c). This file will also need a new case statement for the new opcode in the big switch -statement in ``_PyEval_EvalFrameDefault()``. +statement in `_PyEval_EvalFrameDefault()`. Important files =============== -* [Parser/](https://github.com/python/cpython/blob/main/Parser/) +* [Parser/](../Parser/) - * [Parser/Python.asdl](https://github.com/python/cpython/blob/main/Parser/Python.asdl): + * [Parser/Python.asdl](../Parser/Python.asdl): ASDL syntax file. - * [Parser/asdl.py](https://github.com/python/cpython/blob/main/Parser/asdl.py): + * [Parser/asdl.py](../Parser/asdl.py): Parser for ASDL definition files. Reads in an ASDL description and parses it into an AST that describes it. - * [Parser/asdl_c.py](https://github.com/python/cpython/blob/main/Parser/asdl_c.py): + * [Parser/asdl_c.py](../Parser/asdl_c.py): Generate C code from an ASDL description. Generates - [Python/Python-ast.c](https://github.com/python/cpython/blob/main/Python/Python-ast.c) - and - [Include/internal/pycore_ast.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_ast.h). - - * [Parser/parser.c](https://github.com/python/cpython/blob/main/Parser/parser.c): - The new PEG parser introduced in Python 3.9. - Generated by - [Tools/peg_generator/pegen/c_generator.py](https://github.com/python/cpython/blob/main/Tools/peg_generator/pegen/c_generator.py) - from the grammar [Grammar/python.gram](https://github.com/python/cpython/blob/main/Grammar/python.gram). + [Python/Python-ast.c](../Python/Python-ast.c) and + [Include/internal/pycore_ast.h](../Include/internal/pycore_ast.h). + + * [Parser/parser.c](../Parser/parser.c): + The new PEG parser introduced in Python 3.9. Generated by + [Tools/peg_generator/pegen/c_generator.py](../Tools/peg_generator/pegen/c_generator.py) + from the grammar [Grammar/python.gram](../Grammar/python.gram). Creates the AST from source code. Rule functions for their corresponding production rules are found here. - * [Parser/peg_api.c](https://github.com/python/cpython/blob/main/Parser/peg_api.c): - Contains high-level functions which are - used by the interpreter to create an AST from source code. + * [Parser/peg_api.c](../Parser/peg_api.c): + Contains high-level functions which are used by the interpreter to create + an AST from source code. - * [Parser/pegen.c](https://github.com/python/cpython/blob/main/Parser/pegen.c): + * [Parser/pegen.c](../Parser/pegen.c): Contains helper functions which are used by functions in - [Parser/parser.c](https://github.com/python/cpython/blob/main/Parser/parser.c) - to construct the AST. Also contains helper functions which help raise better error messages - when parsing source code. + [Parser/parser.c](../Parser/parser.c) to construct the AST. Also contains + helper functions which help raise better error messages when parsing source code. - * [Parser/pegen.h](https://github.com/python/cpython/blob/main/Parser/pegen.h): - Header file for the corresponding - [Parser/pegen.c](https://github.com/python/cpython/blob/main/Parser/pegen.c). - Also contains definitions of the ``Parser`` and ``Token`` structs. + * [Parser/pegen.h](../Parser/pegen.h): + Header file for the corresponding [Parser/pegen.c](../Parser/pegen.c). + Also contains definitions of the `Parser` and `Token` structs. -* [Python/](https://github.com/python/cpython/blob/main/Python) +* [Python/](../Python) - * [Python/Python-ast.c](https://github.com/python/cpython/blob/main/Python/Python-ast.c): + * [Python/Python-ast.c](../Python/Python-ast.c): Creates C structs corresponding to the ASDL types. Also contains code for marshalling AST nodes (core ASDL types have marshalling code in - [Python/asdl.c](https://github.com/python/cpython/blob/main/Python/asdl.c)). - File automatically generated by - [Parser/asdl_c.py](https://github.com/python/cpython/blob/main/Parser/asdl_c.py). + [Python/asdl.c](../Python/asdl.c)). + File automatically generated by [Parser/asdl_c.py](../Parser/asdl_c.py). This file must be committed separately after every grammar change - is committed since the ``__version__`` value is set to the latest + is committed since the `__version__` value is set to the latest grammar change revision number. - * [Python/asdl.c](https://github.com/python/cpython/blob/main/Python/asdl.c): + * [Python/asdl.c](../Python/asdl.c): Contains code to handle the ASDL sequence type. Also has code to handle marshalling the core ASDL types, such as number - and identifier. Used by - [Python/Python-ast.c](https://github.com/python/cpython/blob/main/Python/Python-ast.c) + and identifier. Used by [Python/Python-ast.c](../Python/Python-ast.c) for marshalling AST nodes. - * [Python/ast.c](https://github.com/python/cpython/blob/main/Python/ast.c): + * [Python/ast.c](../Python/ast.c): Used for validating the AST. - * [Python/ast_opt.c](https://github.com/python/cpython/blob/main/Python/ast_opt.c): + * [Python/ast_opt.c](../Python/ast_opt.c): Optimizes the AST. - * [Python/ast_unparse.c](https://github.com/python/cpython/blob/main/Python/ast_unparse.c): + * [Python/ast_unparse.c](../Python/ast_unparse.c): Converts the AST expression node back into a string (for string annotations). - * [Python/ceval.c](https://github.com/python/cpython/blob/main/Python/ceval.c): + * [Python/ceval.c](../Python/ceval.c): Executes byte code (aka, eval loop). - * [Python/symtable.c](https://github.com/python/cpython/blob/main/Python/symtable.c): + * [Python/symtable.c](../Python/symtable.c): Generates a symbol table from AST. - * [Python/pyarena.c](https://github.com/python/cpython/blob/main/Python/pyarena.c): + * [Python/pyarena.c](../Python/pyarena.c): Implementation of the arena memory manager. - * [Python/compile.c](https://github.com/python/cpython/blob/main/Python/compile.c): + * [Python/compile.c](../Python/compile.c): Emits pseudo bytecode based on the AST. - * [Python/flowgraph.c](https://github.com/python/cpython/blob/main/Python/flowgraph.c): + * [Python/flowgraph.c](../Python/flowgraph.c): Implements peephole optimizations. - * [Python/assemble.c](https://github.com/python/cpython/blob/main/Python/assemble.c): + * [Python/assemble.c](../Python/assemble.c): Constructs a code object from a sequence of pseudo instructions. - * [Python/instruction_sequence.c](https://github.com/python/cpython/blob/main/Python/instruction_sequence.c): + * [Python/instruction_sequence.c](../Python/instruction_sequence.c): A data structure representing a sequence of bytecode-like pseudo-instructions. -* [Include/](https://github.com/python/cpython/blob/main/Include/) +* [Include/](../Include/) - * [Include/cpython/code.h](https://github.com/python/cpython/blob/main/Include/cpython/code.h) - : Header file for - [Objects/codeobject.c](https://github.com/python/cpython/blob/main/Objects/codeobject.c); - contains definition of ``PyCodeObject``. + * [Include/cpython/code.h](../Include/cpython/code.h) + : Header file for [Objects/codeobject.c](../Objects/codeobject.c); + contains definition of `PyCodeObject`. - * [Include/opcode.h](https://github.com/python/cpython/blob/main/Include/opcode.h) - : One of the files that must be modified if - [Lib/opcode.py](https://github.com/python/cpython/blob/main/Lib/opcode.py) is. + * [Include/opcode.h](../Include/opcode.h) + : One of the files that must be modified whenever + [Lib/opcode.py](../Lib/opcode.py) is. - * [Include/internal/pycore_ast.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_ast.h) + * [Include/internal/pycore_ast.h](../Include/internal/pycore_ast.h) : Contains the actual definitions of the C structs as generated by - [Python/Python-ast.c](https://github.com/python/cpython/blob/main/Python/Python-ast.c) - Automatically generated by - [Parser/asdl_c.py](https://github.com/python/cpython/blob/main/Parser/asdl_c.py). - - * [Include/internal/pycore_asdl.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_asdl.h) - : Header for the corresponding - [Python/ast.c](https://github.com/python/cpython/blob/main/Python/ast.c). - - * [Include/internal/pycore_ast.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_ast.h) - : Declares ``_PyAST_Validate()`` external (from - [Python/ast.c](https://github.com/python/cpython/blob/main/Python/ast.c)). - - * [Include/internal/pycore_symtable.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_symtable.h) - : Header for - [Python/symtable.c](https://github.com/python/cpython/blob/main/Python/symtable.c). - ``struct symtable`` and ``PySTEntryObject`` are defined here. - - * [Include/internal/pycore_parser.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_parser.h) - : Header for the corresponding - [Parser/peg_api.c](https://github.com/python/cpython/blob/main/Parser/peg_api.c). - - * [Include/internal/pycore_pyarena.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_pyarena.h) - : Header file for the corresponding - [Python/pyarena.c](https://github.com/python/cpython/blob/main/Python/pyarena.c). - - * [Include/opcode_ids.h](https://github.com/python/cpython/blob/main/Include/opcode_ids.h) - : List of opcodes. Generated from - [Python/bytecodes.c](https://github.com/python/cpython/blob/main/Python/bytecodes.c) + [Python/Python-ast.c](../Python/Python-ast.c). + Automatically generated by [Parser/asdl_c.py](../Parser/asdl_c.py). + + * [Include/internal/pycore_asdl.h](../Include/internal/pycore_asdl.h) + : Header for the corresponding [Python/ast.c](../Python/ast.c). + + * [Include/internal/pycore_ast.h](../Include/internal/pycore_ast.h) + : Declares `_PyAST_Validate()` external (from [Python/ast.c](../Python/ast.c)). + + * [Include/internal/pycore_symtable.h](../Include/internal/pycore_symtable.h) + : Header for [Python/symtable.c](../Python/symtable.c). + `struct symtable` and `PySTEntryObject` are defined here. + + * [Include/internal/pycore_parser.h](../Include/internal/pycore_parser.h) + : Header for the corresponding [Parser/peg_api.c](../Parser/peg_api.c). + + * [Include/internal/pycore_pyarena.h](../Include/internal/pycore_pyarena.h) + : Header file for the corresponding [Python/pyarena.c](../Python/pyarena.c). + + * [Include/opcode_ids.h](../Include/opcode_ids.h) + : List of opcodes. Generated from [Python/bytecodes.c](../Python/bytecodes.c) by - [Tools/cases_generator/opcode_id_generator.py](https://github.com/python/cpython/blob/main/Tools/cases_generator/opcode_id_generator.py). + [Tools/cases_generator/opcode_id_generator.py](../Tools/cases_generator/opcode_id_generator.py). -* [Objects/](https://github.com/python/cpython/blob/main/Objects/) +* [Objects/](../Objects/) - * [Objects/codeobject.c](https://github.com/python/cpython/blob/main/Objects/codeobject.c) + * [Objects/codeobject.c](../Objects/codeobject.c) : Contains PyCodeObject-related code. - * [Objects/frameobject.c](https://github.com/python/cpython/blob/main/Objects/frameobject.c) - : Contains the ``frame_setlineno()`` function which should determine whether it is allowed + * [Objects/frameobject.c](../Objects/frameobject.c) + : Contains the `frame_setlineno()` function which should determine whether it is allowed to make a jump between two points in a bytecode. -* [Lib/](https://github.com/python/cpython/blob/main/Lib/) +* [Lib/](../Lib/) - * [Lib/opcode.py](https://github.com/python/cpython/blob/main/Lib/opcode.py) + * [Lib/opcode.py](../Lib/opcode.py) : opcode utilities exposed to Python. - * [Include/core/pycore_magic_number.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_magic_number.h) - : Home of the magic number (named ``MAGIC_NUMBER``) for bytecode versioning. + * [Include/core/pycore_magic_number.h](../Include/internal/pycore_magic_number.h) + : Home of the magic number (named `MAGIC_NUMBER`) for bytecode versioning. Objects @@ -625,7 +595,7 @@ Objects * [Locations](locations.md): Describes the location table * [Frames](frames.md): Describes frames and the frame stack -* [Objects/object_layout.md](https://github.com/python/cpython/blob/main/Objects/object_layout.md): Describes object layout for 3.11 and later +* [Objects/object_layout.md](../Objects/object_layout.md): Describes object layout for 3.11 and later * [Exception Handling](exception_handling.md): Describes the exception table diff --git a/InternalDocs/exception_handling.md b/InternalDocs/exception_handling.md index 64a346b55b8413..14066a5864b4da 100644 --- a/InternalDocs/exception_handling.md +++ b/InternalDocs/exception_handling.md @@ -68,18 +68,16 @@ Handling Exceptions ------------------- At runtime, when an exception occurs, the interpreter calls -``get_exception_handler()`` in -[Python/ceval.c](https://github.com/python/cpython/blob/main/Python/ceval.c) +`get_exception_handler()` in [Python/ceval.c](../Python/ceval.c) to look up the offset of the current instruction in the exception table. If it finds a handler, control flow transfers to it. Otherwise, the exception bubbles up to the caller, and the caller's frame is checked for a handler covering the `CALL` instruction. This repeats until a handler is found or the topmost frame is reached. If no handler is found, then the interpreter function -(``_PyEval_EvalFrameDefault()``) returns NULL. During unwinding, +(`_PyEval_EvalFrameDefault()`) returns NULL. During unwinding, the traceback is constructed as each frame is added to it by -``PyTraceBack_Here()``, which is in -[Python/traceback.c](https://github.com/python/cpython/blob/main/Python/traceback.c). +`PyTraceBack_Here()`, which is in [Python/traceback.c](../Python/traceback.c). Along with the location of an exception handler, each entry of the exception table also contains the stack depth of the `try` instruction @@ -174,22 +172,20 @@ which is then encoded as: for a total of five bytes. -The code to construct the exception table is in ``assemble_exception_table()`` -in [Python/assemble.c](https://github.com/python/cpython/blob/main/Python/assemble.c). +The code to construct the exception table is in `assemble_exception_table()` +in [Python/assemble.c](../Python/assemble.c). The interpreter's function to lookup the table by instruction offset is -``get_exception_handler()`` in -[Python/ceval.c](https://github.com/python/cpython/blob/main/Python/ceval.c). -The Python function ``_parse_exception_table()`` in -[Lib/dis.py](https://github.com/python/cpython/blob/main/Lib/dis.py) +`get_exception_handler()` in [Python/ceval.c](../Python/ceval.c). +The Python function `_parse_exception_table()` in [Lib/dis.py](../Lib/dis.py) returns the exception table content as a list of namedtuple instances. Exception Chaining Implementation --------------------------------- [Exception chaining](https://docs.python.org/dev/tutorial/errors.html#exception-chaining) -refers to setting the ``__context__`` and ``__cause__`` fields of an exception as it is -being raised. The ``__context__`` field is set by ``_PyErr_SetObject()`` in -[Python/errors.c](https://github.com/python/cpython/blob/main/Python/errors.c) -(which is ultimately called by all ``PyErr_Set*()`` functions). -The ``__cause__`` field (explicit chaining) is set by the ``RAISE_VARARGS`` bytecode. +refers to setting the `__context__` and `__cause__` fields of an exception as it is +being raised. The `__context__` field is set by `_PyErr_SetObject()` in +[Python/errors.c](../Python/errors.c) (which is ultimately called by all +`PyErr_Set*()` functions). The `__cause__` field (explicit chaining) is set by +the `RAISE_VARARGS` bytecode. diff --git a/InternalDocs/frames.md b/InternalDocs/frames.md index 34682adb1b422e..06dc8f0702c3d9 100644 --- a/InternalDocs/frames.md +++ b/InternalDocs/frames.md @@ -10,20 +10,19 @@ of three conceptual sections: globals dict, code object, instruction pointer, stack depth, the previous frame, etc. -The definition of the ``_PyInterpreterFrame`` struct is in -[Include/internal/pycore_frame.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_frame.h). +The definition of the `_PyInterpreterFrame` struct is in +[Include/internal/pycore_frame.h](../Include/internal/pycore_frame.h). # Allocation Python semantics allows frames to outlive the activation, so they need to be allocated outside the C call stack. To reduce overhead and improve locality of reference, most frames are allocated contiguously in a per-thread stack -(see ``_PyThreadState_PushFrame`` in -[Python/pystate.c](https://github.com/python/cpython/blob/main/Python/pystate.c)). +(see `_PyThreadState_PushFrame` in [Python/pystate.c](../Python/pystate.c)). Frames of generators and coroutines are embedded in the generator and coroutine -objects, so are not allocated in the per-thread stack. See ``PyGenObject`` in -[Include/internal/pycore_genobject.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_genobject.h). +objects, so are not allocated in the per-thread stack. See `PyGenObject` in +[Include/internal/pycore_genobject.h](../Include/internal/pycore_genobject.h). ## Layout @@ -82,16 +81,15 @@ frames for each activation, but with low runtime overhead. ### Generators and Coroutines -Generators (objects of type ``PyGen_Type``, ``PyCoro_Type`` or -``PyAsyncGen_Type``) have a `_PyInterpreterFrame` embedded in them, so +Generators (objects of type `PyGen_Type`, `PyCoro_Type` or +`PyAsyncGen_Type`) have a `_PyInterpreterFrame` embedded in them, so that they can be created with a single memory allocation. When such an embedded frame is iterated or awaited, it can be linked with frames on the per-thread stack via the linkage fields. If a frame object associated with a generator outlives the generator, then the embedded `_PyInterpreterFrame` is copied into the frame object (see -``take_ownership()`` in -[Python/frame.c](https://github.com/python/cpython/blob/main/Python/frame.c)). +`take_ownership()` in [Python/frame.c](../Python/frame.c)). ### Field names diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index fd0246fa1a60e2..a6ee5c09e19efd 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -12,7 +12,7 @@ a local variable in some C function. When an object’s reference count becomes the object is deallocated. If it contains references to other objects, their reference counts are decremented. Those other objects may be deallocated in turn, if this decrement makes their reference count become zero, and so on. The reference -count field can be examined using the ``sys.getrefcount()`` function (notice that the +count field can be examined using the `sys.getrefcount()` function (notice that the value returned by this function is always 1 more as the function also has a reference to the object when called): @@ -39,7 +39,7 @@ cycles. For instance, consider this code: >>> del container ``` -In this example, ``container`` holds a reference to itself, so even when we remove +In this example, `container` holds a reference to itself, so even when we remove our reference to it (the variable "container") the reference count never falls to 0 because it still has its own internal reference. Therefore it would never be cleaned just by simple reference counting. For this reason some additional machinery @@ -127,7 +127,7 @@ GC for the free-threaded build ------------------------------ In the free-threaded build, Python objects contain a 1-byte field -``ob_gc_bits`` that is used to track garbage collection related state. The +`ob_gc_bits` that is used to track garbage collection related state. The field exists in all objects, including ones that do not support cyclic garbage collection. The field is used to identify objects that are tracked by the collector, ensure that finalizers are called only once per object, @@ -146,14 +146,14 @@ and, during garbage collection, differentiate reachable vs. unreachable objects. | ... | ``` -Note that not all fields are to scale. ``pad`` is two bytes, ``ob_mutex`` and -``ob_gc_bits`` are each one byte, and ``ob_ref_local`` is four bytes. The -other fields, ``ob_tid``, ``ob_ref_shared``, and ``ob_type``, are all +Note that not all fields are to scale. `pad` is two bytes, `ob_mutex` and +`ob_gc_bits` are each one byte, and `ob_ref_local` is four bytes. The +other fields, `ob_tid`, `ob_ref_shared`, and `ob_type`, are all pointer-sized (that is, eight bytes on a 64-bit platform). -The garbage collector also temporarily repurposes the ``ob_tid`` (thread ID) -and ``ob_ref_local`` (local reference count) fields for other purposes during +The garbage collector also temporarily repurposes the `ob_tid` (thread ID) +and `ob_ref_local` (local reference count) fields for other purposes during collections. @@ -165,17 +165,17 @@ objects with GC support. These APIs can be found in the [Garbage Collector C API documentation](https://docs.python.org/3/c-api/gcsupport.html). Apart from this object structure, the type object for objects supporting garbage -collection must include the ``Py_TPFLAGS_HAVE_GC`` in its ``tp_flags`` slot and -provide an implementation of the ``tp_traverse`` handler. Unless it can be proven +collection must include the `Py_TPFLAGS_HAVE_GC` in its `tp_flags` slot and +provide an implementation of the `tp_traverse` handler. Unless it can be proven that the objects cannot form reference cycles with only objects of its type or unless -the type is immutable, a ``tp_clear`` implementation must also be provided. +the type is immutable, a `tp_clear` implementation must also be provided. Identifying reference cycles ============================ The algorithm that CPython uses to detect those reference cycles is -implemented in the ``gc`` module. The garbage collector **only focuses** +implemented in the `gc` module. The garbage collector **only focuses** on cleaning container objects (that is, objects that can contain a reference to one or more objects). These can be arrays, dictionaries, lists, custom class instances, classes in extension modules, etc. One could think that @@ -195,7 +195,7 @@ the interpreter create cycles everywhere. Some notable examples: To correctly dispose of these objects once they become unreachable, they need to be identified first. To understand how the algorithm works, let’s take the case of a circular linked list which has one link referenced by a -variable ``A``, and one self-referencing object which is completely +variable `A`, and one self-referencing object which is completely unreachable: ```pycon @@ -234,7 +234,7 @@ objects have a refcount larger than the number of incoming references from within the candidate set. Every object that supports garbage collection will have an extra reference -count field initialized to the reference count (``gc_ref`` in the figures) +count field initialized to the reference count (`gc_ref` in the figures) of that object when the algorithm starts. This is because the algorithm needs to modify the reference count to do the computations and in this way the interpreter will not modify the real reference count field. @@ -243,43 +243,43 @@ interpreter will not modify the real reference count field. The GC then iterates over all containers in the first list and decrements by one the `gc_ref` field of any other object that container is referencing. Doing -this makes use of the ``tp_traverse`` slot in the container class (implemented +this makes use of the `tp_traverse` slot in the container class (implemented using the C API or inherited by a superclass) to know what objects are referenced by each container. After all the objects have been scanned, only the objects that have -references from outside the “objects to scan” list will have ``gc_ref > 0``. +references from outside the “objects to scan” list will have `gc_ref > 0`. ![gc-image2](images/python-cyclic-gc-2-new-page.png) -Notice that having ``gc_ref == 0`` does not imply that the object is unreachable. -This is because another object that is reachable from the outside (``gc_ref > 0``) -can still have references to it. For instance, the ``link_2`` object in our example -ended having ``gc_ref == 0`` but is referenced still by the ``link_1`` object that +Notice that having `gc_ref == 0` does not imply that the object is unreachable. +This is because another object that is reachable from the outside (`gc_ref > 0`) +can still have references to it. For instance, the `link_2` object in our example +ended having `gc_ref == 0` but is referenced still by the `link_1` object that is reachable from the outside. To obtain the set of objects that are really unreachable, the garbage collector re-scans the container objects using the -``tp_traverse`` slot; this time with a different traverse function that marks objects with -``gc_ref == 0`` as "tentatively unreachable" and then moves them to the +`tp_traverse` slot; this time with a different traverse function that marks objects with +`gc_ref == 0` as "tentatively unreachable" and then moves them to the tentatively unreachable list. The following image depicts the state of the lists in a -moment when the GC processed the ``link_3`` and ``link_4`` objects but has not -processed ``link_1`` and ``link_2`` yet. +moment when the GC processed the `link_3` and `link_4` objects but has not +processed `link_1` and `link_2` yet. ![gc-image3](images/python-cyclic-gc-3-new-page.png) -Then the GC scans the next ``link_1`` object. Because it has ``gc_ref == 1``, +Then the GC scans the next `link_1` object. Because it has `gc_ref == 1`, the gc does not do anything special because it knows it has to be reachable (and is already in what will become the reachable list): ![gc-image4](images/python-cyclic-gc-4-new-page.png) -When the GC encounters an object which is reachable (``gc_ref > 0``), it traverses -its references using the ``tp_traverse`` slot to find all the objects that are +When the GC encounters an object which is reachable (`gc_ref > 0`), it traverses +its references using the `tp_traverse` slot to find all the objects that are reachable from it, moving them to the end of the list of reachable objects (where -they started originally) and setting its ``gc_ref`` field to 1. This is what happens -to ``link_2`` and ``link_3`` below as they are reachable from ``link_1``. From the -state in the previous image and after examining the objects referred to by ``link_1`` -the GC knows that ``link_3`` is reachable after all, so it is moved back to the -original list and its ``gc_ref`` field is set to 1 so that if the GC visits it again, +they started originally) and setting its `gc_ref` field to 1. This is what happens +to `link_2` and `link_3` below as they are reachable from `link_1`. From the +state in the previous image and after examining the objects referred to by `link_1` +the GC knows that `link_3` is reachable after all, so it is moved back to the +original list and its `gc_ref` field is set to 1 so that if the GC visits it again, it will know that it's reachable. To avoid visiting an object twice, the GC marks all -objects that have already been visited once (by unsetting the ``PREV_MASK_COLLECTING`` +objects that have already been visited once (by unsetting the `PREV_MASK_COLLECTING` flag) so that if an object that has already been processed is referenced by some other object, the GC does not process it twice. @@ -295,7 +295,7 @@ list are really unreachable and can thus be garbage collected. Pragmatically, it's important to note that no recursion is required by any of this, and neither does it in any other way require additional memory proportional to the number of objects, number of pointers, or the lengths of pointer chains. Apart from -``O(1)`` storage for internal C needs, the objects themselves contain all the storage +`O(1)` storage for internal C needs, the objects themselves contain all the storage the GC algorithms require. Why moving unreachable objects is better @@ -331,7 +331,7 @@ with the objective of completely destroying these objects. Roughly, the process follows these steps in order: 1. Handle and clear weak references (if any). Weak references to unreachable objects - are set to ``None``. If the weak reference has an associated callback, the callback + are set to `None`. If the weak reference has an associated callback, the callback is enqueued to be called once the clearing of weak references is finished. We only invoke callbacks for weak references that are themselves reachable. If both the weak reference and the pointed-to object are unreachable we do not execute the callback. @@ -339,15 +339,15 @@ follows these steps in order: object and support for weak references predates support for object resurrection. Ignoring the weak reference's callback is fine because both the object and the weakref are going away, so it's legitimate to say the weak reference is going away first. -2. If an object has legacy finalizers (``tp_del`` slot) move it to the - ``gc.garbage`` list. -3. Call the finalizers (``tp_finalize`` slot) and mark the objects as already +2. If an object has legacy finalizers (`tp_del` slot) move it to the + `gc.garbage` list. +3. Call the finalizers (`tp_finalize` slot) and mark the objects as already finalized to avoid calling finalizers twice if the objects are resurrected or if other finalizers have removed the object first. 4. Deal with resurrected objects. If some objects have been resurrected, the GC finds the new subset of objects that are still unreachable by running the cycle detection algorithm again and continues with them. -5. Call the ``tp_clear`` slot of every object so all internal links are broken and +5. Call the `tp_clear` slot of every object so all internal links are broken and the reference counts fall to 0, triggering the destruction of all unreachable objects. @@ -376,9 +376,9 @@ generations. Every collection operates on the entire heap. In order to decide when to run, the collector keeps track of the number of object allocations and deallocations since the last collection. When the number of -allocations minus the number of deallocations exceeds ``threshold_0``, +allocations minus the number of deallocations exceeds `threshold_0`, collection starts. Initially only generation 0 is examined. If generation 0 has -been examined more than ``threshold_1`` times since generation 1 has been +been examined more than `threshold_1` times since generation 1 has been examined, then generation 1 is examined as well. With generation 2, things are a bit more complicated; see [Collecting the oldest generation](#Collecting-the-oldest-generation) for @@ -393,8 +393,8 @@ function: ``` The content of these generations can be examined using the -``gc.get_objects(generation=NUM)`` function and collections can be triggered -specifically in a generation by calling ``gc.collect(generation=NUM)``. +`gc.get_objects(generation=NUM)` function and collections can be triggered +specifically in a generation by calling `gc.collect(generation=NUM)`. ```pycon >>> import gc @@ -433,7 +433,7 @@ Collecting the oldest generation -------------------------------- In addition to the various configurable thresholds, the GC only triggers a full -collection of the oldest generation if the ratio ``long_lived_pending / long_lived_total`` +collection of the oldest generation if the ratio `long_lived_pending / long_lived_total` is above a given value (hardwired to 25%). The reason is that, while "non-full" collections (that is, collections of the young and middle generations) will always examine roughly the same number of objects (determined by the aforementioned @@ -463,12 +463,12 @@ used for tags or to keep other information – most often as a bit field (each bit a separate tag) – as long as code that uses the pointer masks out these bits before accessing memory. For example, on a 32-bit architecture (for both addresses and word size), a word is 32 bits = 4 bytes, so word-aligned -addresses are always a multiple of 4, hence end in ``00``, leaving the last 2 bits +addresses are always a multiple of 4, hence end in `00`, leaving the last 2 bits available; while on a 64-bit architecture, a word is 64 bits = 8 bytes, so -word-aligned addresses end in ``000``, leaving the last 3 bits available. +word-aligned addresses end in `000`, leaving the last 3 bits available. The CPython GC makes use of two fat pointers that correspond to the extra fields -of ``PyGC_Head`` discussed in the `Memory layout and object structure`_ section: +of `PyGC_Head` discussed in the `Memory layout and object structure`_ section: > [!WARNING] > Because the presence of extra information, "tagged" or "fat" pointers cannot be @@ -478,23 +478,23 @@ of ``PyGC_Head`` discussed in the `Memory layout and object structure`_ section: > normally assume the pointers inside the lists are in a consistent state. -- The ``_gc_prev`` field is normally used as the "previous" pointer to maintain the +- The `_gc_prev` field is normally used as the "previous" pointer to maintain the doubly linked list but its lowest two bits are used to keep the flags - ``PREV_MASK_COLLECTING`` and ``_PyGC_PREV_MASK_FINALIZED``. Between collections, - the only flag that can be present is ``_PyGC_PREV_MASK_FINALIZED`` that indicates - if an object has been already finalized. During collections ``_gc_prev`` is - temporarily used for storing a copy of the reference count (``gc_ref``), in + `PREV_MASK_COLLECTING` and `_PyGC_PREV_MASK_FINALIZED`. Between collections, + the only flag that can be present is `_PyGC_PREV_MASK_FINALIZED` that indicates + if an object has been already finalized. During collections `_gc_prev` is + temporarily used for storing a copy of the reference count (`gc_ref`), in addition to two flags, and the GC linked list becomes a singly linked list until - ``_gc_prev`` is restored. + `_gc_prev` is restored. -- The ``_gc_next`` field is used as the "next" pointer to maintain the doubly linked +- The `_gc_next` field is used as the "next" pointer to maintain the doubly linked list but during collection its lowest bit is used to keep the - ``NEXT_MASK_UNREACHABLE`` flag that indicates if an object is tentatively + `NEXT_MASK_UNREACHABLE` flag that indicates if an object is tentatively unreachable during the cycle detection algorithm. This is a drawback to using only doubly linked lists to implement partitions: while most needed operations are constant-time, there is no efficient way to determine which partition an object is currently in. Instead, when that's needed, ad hoc tricks (like the - ``NEXT_MASK_UNREACHABLE`` flag) are employed. + `NEXT_MASK_UNREACHABLE` flag) are employed. Optimization: delay tracking containers ======================================= @@ -531,7 +531,7 @@ benefit from delayed tracking: full garbage collection (all generations), the collector will untrack any dictionaries whose contents are not tracked. -The garbage collector module provides the Python function ``is_tracked(obj)``, which returns +The garbage collector module provides the Python function `is_tracked(obj)`, which returns the current tracking status of the object. Subsequent garbage collections may change the tracking status of the object. @@ -556,20 +556,20 @@ Differences between GC implementations This section summarizes the differences between the GC implementation in the default build and the implementation in the free-threaded build. -The default build implementation makes extensive use of the ``PyGC_Head`` data +The default build implementation makes extensive use of the `PyGC_Head` data structure, while the free-threaded build implementation does not use that data structure. - The default build implementation stores all tracked objects in a doubly - linked list using ``PyGC_Head``. The free-threaded build implementation + linked list using `PyGC_Head`. The free-threaded build implementation instead relies on the embedded mimalloc memory allocator to scan the heap for tracked objects. -- The default build implementation uses ``PyGC_Head`` for the unreachable +- The default build implementation uses `PyGC_Head` for the unreachable object list. The free-threaded build implementation repurposes the - ``ob_tid`` field to store a unreachable objects linked list. -- The default build implementation stores flags in the ``_gc_prev`` field of - ``PyGC_Head``. The free-threaded build implementation stores these flags - in ``ob_gc_bits``. + `ob_tid` field to store a unreachable objects linked list. +- The default build implementation stores flags in the `_gc_prev` field of + `PyGC_Head`. The free-threaded build implementation stores these flags + in `ob_gc_bits`. The default build implementation relies on the diff --git a/InternalDocs/parser.md b/InternalDocs/parser.md index 11aaf11253646d..6398ba6cd2838f 100644 --- a/InternalDocs/parser.md +++ b/InternalDocs/parser.md @@ -9,12 +9,12 @@ Python's Parser is currently a [`PEG` (Parser Expression Grammar)](https://en.wikipedia.org/wiki/Parsing_expression_grammar) parser. It was introduced in [PEP 617: New PEG parser for CPython](https://peps.python.org/pep-0617/) to replace -the original [``LL(1)``](https://en.wikipedia.org/wiki/LL_parser) parser. +the original [`LL(1)`](https://en.wikipedia.org/wiki/LL_parser) parser. The code implementing the parser is generated from a grammar definition by a [parser generator](https://en.wikipedia.org/wiki/Compiler-compiler). Therefore, changes to the Python language are made by modifying the -[grammar file](https://github.com/python/cpython/blob/main/Grammar/python.gram). +[grammar file](../Grammar/python.gram). Developers rarely need to modify the generator itself. See the devguide's [Changing CPython's grammar](https://devguide.python.org/developer-workflow/grammar/#grammar) @@ -33,9 +33,9 @@ is ordered. This means that when writing: rule: A | B | C ``` -a parser that implements a context-free-grammar (such as an ``LL(1)`` parser) will +a parser that implements a context-free-grammar (such as an `LL(1)` parser) will generate constructions that, given an input string, *deduce* which alternative -(``A``, ``B`` or ``C``) must be expanded. On the other hand, a PEG parser will +(`A`, `B` or `C`) must be expanded. On the other hand, a PEG parser will check each alternative, in the order in which they are specified, and select that first one that succeeds. @@ -67,21 +67,21 @@ time complexity with a technique called which not only loads the entire program in memory before parsing it but also allows the parser to backtrack arbitrarily. This is made efficient by memoizing the rules already matched for each position. The cost of the memoization cache -is that the parser will naturally use more memory than a simple ``LL(1)`` parser, +is that the parser will naturally use more memory than a simple `LL(1)` parser, which normally are table-based. Key ideas --------- -- Alternatives are ordered ( ``A | B`` is not the same as ``B | A`` ). +- Alternatives are ordered ( `A | B` is not the same as `B | A` ). - If a rule returns a failure, it doesn't mean that the parsing has failed, it just means "try something else". - By default PEG parsers run in exponential time, which can be optimized to linear by using memoization. - If parsing fails completely (no rule succeeds in parsing all the input text), the PEG parser doesn't have a concept of "where the - [``SyntaxError``](https://docs.python.org/3/library/exceptions.html#SyntaxError) is". + [`SyntaxError`](https://docs.python.org/3/library/exceptions.html#SyntaxError) is". > [!IMPORTANT] @@ -111,16 +111,16 @@ the following two rules (in these examples, a token is an individual character): second_rule: ('aa' | 'a' ) 'a' ``` -In a regular EBNF grammar, both rules specify the language ``{aa, aaa}`` but -in PEG, one of these two rules accepts the string ``aaa`` but not the string -``aa``. The other does the opposite -- it accepts the string ``aa`` -but not the string ``aaa``. The rule ``('a'|'aa')'a'`` does -not accept ``aaa`` because ``'a'|'aa'`` consumes the first ``a``, letting the -final ``a`` in the rule consume the second, and leaving out the third ``a``. +In a regular EBNF grammar, both rules specify the language `{aa, aaa}` but +in PEG, one of these two rules accepts the string `aaa` but not the string +`aa`. The other does the opposite -- it accepts the string `aa` +but not the string `aaa`. The rule `('a'|'aa')'a'` does +not accept `aaa` because `'a'|'aa'` consumes the first `a`, letting the +final `a` in the rule consume the second, and leaving out the third `a`. As the rule has succeeded, no attempt is ever made to go back and let -``'a'|'aa'`` try the second alternative. The expression ``('aa'|'a')'a'`` does -not accept ``aa`` because ``'aa'|'a'`` accepts all of ``aa``, leaving nothing -for the final ``a``. Again, the second alternative of ``'aa'|'a'`` is not +`'a'|'aa'` try the second alternative. The expression `('aa'|'a')'a'` does +not accept `aa` because `'aa'|'a'` accepts all of `aa`, leaving nothing +for the final `a`. Again, the second alternative of `'aa'|'a'` is not tried. > [!CAUTION] @@ -137,7 +137,7 @@ one is in almost all cases a mistake, for example: ``` In this example, the second alternative will never be tried because the first one will -succeed first (even if the input string has an ``'else' block`` that follows). To correctly +succeed first (even if the input string has an `'else' block` that follows). To correctly write this rule you can simply alter the order: ``` @@ -146,7 +146,7 @@ write this rule you can simply alter the order: | 'if' expression 'then' block ``` -In this case, if the input string doesn't have an ``'else' block``, the first alternative +In this case, if the input string doesn't have an `'else' block`, the first alternative will fail and the second will be attempted. Grammar Syntax @@ -166,8 +166,8 @@ the rule: rule_name[return_type]: expression ``` -If the return type is omitted, then a ``void *`` is returned in C and an -``Any`` in Python. +If the return type is omitted, then a `void *` is returned in C and an +`Any` in Python. Grammar expressions ------------------- @@ -214,7 +214,7 @@ Variables in the grammar ------------------------ A sub-expression can be named by preceding it with an identifier and an -``=`` sign. The name can then be used in the action (see below), like this: +`=` sign. The name can then be used in the action (see below), like this: ``` rule_name[return_type]: '(' a=some_other_rule ')' { a } @@ -387,9 +387,9 @@ returns a valid C-based Python AST: | NUMBER ``` -Here ``EXTRA`` is a macro that expands to ``start_lineno, start_col_offset, -end_lineno, end_col_offset, p->arena``, those being variables automatically -injected by the parser; ``p`` points to an object that holds on to all state +Here `EXTRA` is a macro that expands to `start_lineno, start_col_offset, +end_lineno, end_col_offset, p->arena`, those being variables automatically +injected by the parser; `p` points to an object that holds on to all state for the parser. A similar grammar written to target Python AST objects: @@ -422,50 +422,47 @@ Pegen Pegen is the parser generator used in CPython to produce the final PEG parser used by the interpreter. It is the program that can be used to read the python -grammar located in -[`Grammar/python.gram`](https://github.com/python/cpython/blob/main/Grammar/python.gram) -and produce the final C parser. It contains the following pieces: +grammar located in [`Grammar/python.gram`](../Grammar/python.gram) and produce +the final C parser. It contains the following pieces: - A parser generator that can read a grammar file and produce a PEG parser written in Python or C that can parse said grammar. The generator is located at - [`Tools/peg_generator/pegen`](https://github.com/python/cpython/blob/main/Tools/peg_generator/pegen). + [`Tools/peg_generator/pegen`](../Tools/peg_generator/pegen). - A PEG meta-grammar that automatically generates a Python parser which is used for the parser generator itself (this means that there are no manually-written parsers). The meta-grammar is located at - [`Tools/peg_generator/pegen/metagrammar.gram`](https://github.com/python/cpython/blob/main/Tools/peg_generator/pegen/metagrammar.gram). + [`Tools/peg_generator/pegen/metagrammar.gram`](../Tools/peg_generator/pegen/metagrammar.gram). - A generated parser (using the parser generator) that can directly produce C and Python AST objects. -The source code for Pegen lives at -[`Tools/peg_generator/pegen`](https://github.com/python/cpython/blob/main/Tools/peg_generator/pegen) +The source code for Pegen lives at [`Tools/peg_generator/pegen`](../Tools/peg_generator/pegen) but normally all typical commands to interact with the parser generator are executed from the main makefile. How to regenerate the parser ---------------------------- -Once you have made the changes to the grammar files, to regenerate the ``C`` +Once you have made the changes to the grammar files, to regenerate the `C` parser (the one used by the interpreter) just execute: ``` make regen-pegen ``` -using the ``Makefile`` in the main directory. If you are on Windows you can +using the `Makefile` in the main directory. If you are on Windows you can use the Visual Studio project files to regenerate the parser or to execute: ``` ./PCbuild/build.bat --regen ``` -The generated parser file is located at -[`Parser/parser.c`](https://github.com/python/cpython/blob/main/Parser/parser.c). +The generated parser file is located at [`Parser/parser.c`](../Parser/parser.c). How to regenerate the meta-parser --------------------------------- The meta-grammar (the grammar that describes the grammar for the grammar files themselves) is located at -[`Tools/peg_generator/pegen/metagrammar.gram`](https://github.com/python/cpython/blob/main/Tools/peg_generator/pegen/metagrammar.gram). +[`Tools/peg_generator/pegen/metagrammar.gram`](../Tools/peg_generator/pegen/metagrammar.gram). Although it is very unlikely that you will ever need to modify it, if you make any modifications to this file (in order to implement new Pegen features) you will need to regenerate the meta-parser (the parser that parses the grammar files). @@ -488,11 +485,11 @@ Grammatical elements and rules Pegen has some special grammatical elements and rules: -- Strings with single quotes (') (for example, ``'class'``) denote KEYWORDS. -- Strings with double quotes (") (for example, ``"match"``) denote SOFT KEYWORDS. -- Uppercase names (for example, ``NAME``) denote tokens in the - [`Grammar/Tokens`](https://github.com/python/cpython/blob/main/Grammar/Tokens) file. -- Rule names starting with ``invalid_`` are used for specialized syntax errors. +- Strings with single quotes (') (for example, `'class'`) denote KEYWORDS. +- Strings with double quotes (") (for example, `"match"`) denote SOFT KEYWORDS. +- Uppercase names (for example, `NAME`) denote tokens in the + [`Grammar/Tokens`](../Grammar/Tokens) file. +- Rule names starting with `invalid_` are used for specialized syntax errors. - These rules are NOT used in the first pass of the parser. - Only if the first pass fails to parse, a second pass including the invalid @@ -509,14 +506,13 @@ Tokenization It is common among PEG parser frameworks that the parser does both the parsing and the tokenization, but this does not happen in Pegen. The reason is that the Python language needs a custom tokenizer to handle things like indentation -boundaries, some special keywords like ``ASYNC`` and ``AWAIT`` (for +boundaries, some special keywords like `ASYNC` and `AWAIT` (for compatibility purposes), backtracking errors (such as unclosed parenthesis), dealing with encoding, interactive mode and much more. Some of these reasons are also there for historical purposes, and some others are useful even today. The list of tokens (all uppercase names in the grammar) that you can use can -be found in thei -[`Grammar/Tokens`](https://github.com/python/cpython/blob/main/Grammar/Tokens) +be found in the [`Grammar/Tokens`](../Grammar/Tokens) file. If you change this file to add new tokens, make sure to regenerate the files by executing: @@ -532,9 +528,7 @@ the tokens or to execute: ``` How tokens are generated and the rules governing this are completely up to the tokenizer -([`Parser/lexer`](https://github.com/python/cpython/blob/main/Parser/lexer) -and -[`Parser/tokenizer`](https://github.com/python/cpython/blob/main/Parser/tokenizer)); +([`Parser/lexer`](../Parser/lexer) and [`Parser/tokenizer`](../Parser/tokenizer)); the parser just receives tokens from it. Memoization @@ -548,7 +542,7 @@ both in memory and time. Although the memory cost is obvious (the parser needs memory for storing previous results in the cache) the execution time cost comes for continuously checking if the given rule has a cache hit or not. In many situations, just parsing it again can be faster. Pegen **disables memoization -by default** except for rules with the special marker ``memo`` after the rule +by default** except for rules with the special marker `memo` after the rule name (and type, if present): ``` @@ -567,8 +561,7 @@ To determine whether a new rule needs memoization or not, benchmarking is requir (comparing execution times and memory usage of some considerably large files with and without memoization). There is a very simple instrumentation API available in the generated C parse code that allows to measure how much each rule uses -memoization (check the -[`Parser/pegen.c`](https://github.com/python/cpython/blob/main/Parser/pegen.c) +memoization (check the [`Parser/pegen.c`](../Parser/pegen.c) file for more information) but it needs to be manually activated. Automatic variables @@ -578,9 +571,9 @@ To make writing actions easier, Pegen injects some automatic variables in the namespace available when writing actions. In the C parser, some of these automatic variable names are: -- ``p``: The parser structure. -- ``EXTRA``: This is a macro that expands to - ``(_start_lineno, _start_col_offset, _end_lineno, _end_col_offset, p->arena)``, +- `p`: The parser structure. +- `EXTRA`: This is a macro that expands to + `(_start_lineno, _start_col_offset, _end_lineno, _end_col_offset, p->arena)`, which is normally used to create AST nodes as almost all constructors need these attributes to be provided. All of the location variables are taken from the location information of the current token. @@ -590,13 +583,13 @@ Hard and soft keywords > [!NOTE] > In the grammar files, keywords are defined using **single quotes** (for example, -> ``'class'``) while soft keywords are defined using **double quotes** (for example, -> ``"match"``). +> `'class'`) while soft keywords are defined using **double quotes** (for example, +> `"match"`). There are two kinds of keywords allowed in pegen grammars: *hard* and *soft* keywords. The difference between hard and soft keywords is that hard keywords are always reserved words, even in positions where they make no sense -(for example, ``x = class + 1``), while soft keywords only get a special +(for example, `x = class + 1`), while soft keywords only get a special meaning in context. Trying to use a hard keyword as a variable will always fail: @@ -621,7 +614,7 @@ one where they are defined as keywords: >>> foo(match="Yeah!") ``` -The ``match`` and ``case`` keywords are soft keywords, so that they are +The `match` and `case` keywords are soft keywords, so that they are recognized as keywords at the beginning of a match statement or case block respectively, but are allowed to be used in other places as variable or argument names. @@ -662,7 +655,7 @@ is, and it will unwind the stack and report the exception. This means that if a [rule action](#grammar-actions) raises an exception, all parsing will stop at that exact point. This is done to allow to correctly propagate any exception set by calling Python's C API functions. This also includes -[``SyntaxError``](https://docs.python.org/3/library/exceptions.html#SyntaxError) +[`SyntaxError`](https://docs.python.org/3/library/exceptions.html#SyntaxError) exceptions and it is the main mechanism the parser uses to report custom syntax error messages. @@ -684,10 +677,10 @@ grammar. To report generic syntax errors, pegen uses a common heuristic in PEG parsers: the location of *generic* syntax errors is reported to be the furthest token that was attempted to be matched but failed. This is only done if parsing has failed -(the parser returns ``NULL`` in C or ``None`` in Python) but no exception has +(the parser returns `NULL` in C or `None` in Python) but no exception has been raised. -As the Python grammar was primordially written as an ``LL(1)`` grammar, this heuristic +As the Python grammar was primordially written as an `LL(1)` grammar, this heuristic has an extremely high success rate, but some PEG features, such as lookaheads, can impact this. @@ -699,19 +692,19 @@ can impact this. To generate more precise syntax errors, custom rules are used. This is a common practice also in context free grammars: the parser will try to accept some construct that is known to be incorrect just to report a specific syntax error -for that construct. In pegen grammars, these rules start with the ``invalid_`` +for that construct. In pegen grammars, these rules start with the `invalid_` prefix. This is because trying to match these rules normally has a performance impact on parsing (and can also affect the 'correct' grammar itself in some tricky cases, depending on the ordering of the rules) so the generated parser acts in two phases: 1. The first phase will try to parse the input stream without taking into - account rules that start with the ``invalid_`` prefix. If the parsing + account rules that start with the `invalid_` prefix. If the parsing succeeds it will return the generated AST and the second phase will be skipped. 2. If the first phase failed, a second parsing attempt is done including the - rules that start with an ``invalid_`` prefix. By design this attempt + rules that start with an `invalid_` prefix. By design this attempt **cannot succeed** and is only executed to give to the invalid rules a chance to detect specific situations where custom, more precise, syntax errors can be raised. This also allows to trade a bit of performance for @@ -723,15 +716,15 @@ acts in two phases: > When defining invalid rules: > > - Make sure all custom invalid rules raise -> [``SyntaxError``](https://docs.python.org/3/library/exceptions.html#SyntaxError) +> [`SyntaxError`](https://docs.python.org/3/library/exceptions.html#SyntaxError) > exceptions (or a subclass of it). -> - Make sure **all** invalid rules start with the ``invalid_`` prefix to not +> - Make sure **all** invalid rules start with the `invalid_` prefix to not > impact performance of parsing correct Python code. > - Make sure the parser doesn't behave differently for regular rules when you introduce invalid rules > (see the [how PEG parsers work](#how-peg-parsers-work) section for more information). You can find a collection of macros to raise specialized syntax errors in the -[`Parser/pegen.h`](https://github.com/python/cpython/blob/main/Parser/pegen.h) +[`Parser/pegen.h`](../Parser/pegen.h) header file. These macros allow also to report ranges for the custom errors, which will be highlighted in the tracebacks that will be displayed when the error is reported. @@ -746,35 +739,33 @@ displayed when the error is reported. $ 42 ``` -should trigger the syntax error in the ``$`` character. If your rule is not correctly defined this +should trigger the syntax error in the `$` character. If your rule is not correctly defined this won't happen. As another example, suppose that you try to define a rule to match Python 2 style -``print`` statements in order to create a better error message and you define it as: +`print` statements in order to create a better error message and you define it as: ``` invalid_print: "print" expression ``` -This will **seem** to work because the parser will correctly parse ``print(something)`` because it is valid -code and the second phase will never execute but if you try to parse ``print(something) $ 3`` the first pass -of the parser will fail (because of the ``$``) and in the second phase, the rule will match the -``print(something)`` as ``print`` followed by the variable ``something`` between parentheses and the error -will be reported there instead of the ``$`` character. +This will **seem** to work because the parser will correctly parse `print(something)` because it is valid +code and the second phase will never execute but if you try to parse `print(something) $ 3` the first pass +of the parser will fail (because of the `$`) and in the second phase, the rule will match the +`print(something)` as `print` followed by the variable `something` between parentheses and the error +will be reported there instead of the `$` character. Generating AST objects ---------------------- The output of the C parser used by CPython, which is generated from the -[grammar file](https://github.com/python/cpython/blob/main/Grammar/python.gram), -is a Python AST object (using C structures). This means that the actions in the -grammar file generate AST objects when they succeed. Constructing these objects -can be quite cumbersome (see the [AST compiler section](compiler.md#abstract-syntax-trees-ast) +[grammar file](../Grammar/python.gram), is a Python AST object (using C +structures). This means that the actions in the grammar file generate AST +objects when they succeed. Constructing these objects can be quite cumbersome +(see the [AST compiler section](compiler.md#abstract-syntax-trees-ast) for more information on how these objects are constructed and how they are used by the compiler), so special helper functions are used. These functions are -declared in the -[`Parser/pegen.h`](https://github.com/python/cpython/blob/main/Parser/pegen.h) -header file and defined in the -[`Parser/action_helpers.c`](https://github.com/python/cpython/blob/main/Parser/action_helpers.c) -file. The helpers include functions that join AST sequences, get specific elements +declared in the [`Parser/pegen.h`](../Parser/pegen.h) header file and defined +in the [`Parser/action_helpers.c`](../Parser/action_helpers.c) file. The +helpers include functions that join AST sequences, get specific elements from them or to perform extra processing on the generated tree. @@ -788,11 +779,9 @@ from them or to perform extra processing on the generated tree. As a general rule, if an action spawns multiple lines or requires something more complicated than a single expression of C code, is normally better to create a -custom helper in -[`Parser/action_helpers.c`](https://github.com/python/cpython/blob/main/Parser/action_helpers.c) -and expose it in the -[`Parser/pegen.h`](https://github.com/python/cpython/blob/main/Parser/pegen.h) -header file so that it can be used from the grammar. +custom helper in [`Parser/action_helpers.c`](../Parser/action_helpers.c) +and expose it in the [`Parser/pegen.h`](../Parser/pegen.h) header file so that +it can be used from the grammar. When parsing succeeds, the parser **must** return a **valid** AST object. @@ -801,16 +790,15 @@ Testing There are three files that contain tests for the grammar and the parser: -- [test_grammar.py](https://github.com/python/cpython/blob/main/Lib/test/test_grammar.py) -- [test_syntax.py](https://github.com/python/cpython/blob/main/Lib/test/test_syntax.py) -- [test_exceptions.py](https://github.com/python/cpython/blob/main/Lib/test/test_exceptions.py) +- [test_grammar.py](../Lib/test/test_grammar.py) +- [test_syntax.py](../Lib/test/test_syntax.py) +- [test_exceptions.py](../Lib/test/test_exceptions.py) -Check the contents of these files to know which is the best place for new tests, depending -on the nature of the new feature you are adding. +Check the contents of these files to know which is the best place for new +tests, depending on the nature of the new feature you are adding. Tests for the parser generator itself can be found in the -[test_peg_generator](https://github.com/python/cpython/blob/main/Lib/test_peg_generator) -directory. +[test_peg_generator](../Lib/test_peg_generator) directory. Debugging generated parsers @@ -825,33 +813,32 @@ correctly compile and execute Python anymore. This makes it a bit challenging to debug when something goes wrong, especially when experimenting. For this reason it is a good idea to experiment first by generating a Python -parser. To do this, you can go to the -[Tools/peg_generator](https://github.com/python/cpython/blob/main/Tools/peg_generator) +parser. To do this, you can go to the [Tools/peg_generator](../Tools/peg_generator) directory on the CPython repository and manually call the parser generator by executing: ``` $ python -m pegen python ``` -This will generate a file called ``parse.py`` in the same directory that you +This will generate a file called `parse.py` in the same directory that you can use to parse some input: ``` $ python parse.py file_with_source_code_to_test.py ``` -As the generated ``parse.py`` file is just Python code, you can modify it +As the generated `parse.py` file is just Python code, you can modify it and add breakpoints to debug or better understand some complex situations. Verbose mode ------------ -When Python is compiled in debug mode (by adding ``--with-pydebug`` when -running the configure step in Linux or by adding ``-d`` when calling the -[PCbuild/build.bat](https://github.com/python/cpython/blob/main/PCbuild/build.bat)), -it is possible to activate a **very** verbose mode in the generated parser. This -is very useful to debug the generated parser and to understand how it works, but it +When Python is compiled in debug mode (by adding `--with-pydebug` when +running the configure step in Linux or by adding `-d` when calling the +[PCbuild/build.bat](../PCbuild/build.bat)), it is possible to activate a +**very** verbose mode in the generated parser. This is very useful to +debug the generated parser and to understand how it works, but it can be a bit hard to understand at first. > [!NOTE] @@ -859,13 +846,13 @@ can be a bit hard to understand at first. > interactive mode as it can be much harder to understand, because interactive > mode involves some special steps compared to regular parsing. -To activate verbose mode you can add the ``-d`` flag when executing Python: +To activate verbose mode you can add the `-d` flag when executing Python: ``` $ python -d file_to_test.py ``` -This will print **a lot** of output to ``stderr`` so it is probably better to dump +This will print **a lot** of output to `stderr` so it is probably better to dump it to a file for further analysis. The output consists of trace lines with the following structure:: @@ -873,17 +860,17 @@ following structure:: ('>'|'-'|'+'|'!') []: ... ``` -Every line is indented by a different amount (````) depending on how +Every line is indented by a different amount (``) depending on how deep the call stack is. The next character marks the type of the trace: -- ``>`` indicates that a rule is going to be attempted to be parsed. -- ``-`` indicates that a rule has failed to be parsed. -- ``+`` indicates that a rule has been parsed correctly. -- ``!`` indicates that an exception or an error has been detected and the parser is unwinding. +- `>` indicates that a rule is going to be attempted to be parsed. +- `-` indicates that a rule has failed to be parsed. +- `+` indicates that a rule has been parsed correctly. +- `!` indicates that an exception or an error has been detected and the parser is unwinding. -The ```` part indicates the current index in the token array, -the ```` part indicates what rule is being parsed and -the ```` part indicates what alternative within that rule +The `` part indicates the current index in the token array, +the `` part indicates what rule is being parsed and +the `` part indicates what alternative within that rule is being attempted. @@ -891,4 +878,5 @@ is being attempted. > **Document history** > > Pablo Galindo Salgado - Original author +> > Irit Katriel and Jacob Coffee - Convert to Markdown From 03f9264ecef4b1df5e71586327a04ec3b9331cbe Mon Sep 17 00:00:00 2001 From: Arjun Singh <98927961+xylocone@users.noreply.github.com> Date: Tue, 22 Oct 2024 08:18:16 +0530 Subject: [PATCH 070/106] fix grammar in comment in dictobject.c (#125822) --- Objects/dictobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index c4e11a3e9c0bc7..3134f6141dc9be 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -34,7 +34,7 @@ As of Python 3.6, this is compact and ordered. Basic idea is described here: dk_indices is actual hashtable. It holds index in entries, or DKIX_EMPTY(-1) or DKIX_DUMMY(-2). -Size of indices is dk_size. Type of each index in indices is vary on dk_size: +Size of indices is dk_size. Type of each index in indices varies with dk_size: * int8 for dk_size <= 128 * int16 for 256 <= dk_size <= 2**15 From 4efe64aa56e7a9a96b94c0ae0201db8d402a5f53 Mon Sep 17 00:00:00 2001 From: Mikhail Efimov Date: Tue, 22 Oct 2024 11:41:30 +0300 Subject: [PATCH 071/106] gh-125811: Remove DeprecationWarnings in test_peg_generator (#125812) --- Lib/test/test_peg_generator/test_pegen.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_peg_generator/test_pegen.py b/Lib/test/test_peg_generator/test_pegen.py index 86db767b99a228..54c9dce2d0c90d 100644 --- a/Lib/test/test_peg_generator/test_pegen.py +++ b/Lib/test/test_peg_generator/test_pegen.py @@ -484,7 +484,7 @@ def test_left_recursive(self) -> None: def test_python_expr(self) -> None: grammar = """ - start: expr NEWLINE? $ { ast.Expression(expr, lineno=1, col_offset=0) } + start: expr NEWLINE? $ { ast.Expression(expr) } expr: ( expr '+' term { ast.BinOp(expr, ast.Add(), term, lineno=expr.lineno, col_offset=expr.col_offset, end_lineno=term.end_lineno, end_col_offset=term.end_col_offset) } | expr '-' term { ast.BinOp(expr, ast.Sub(), term, lineno=expr.lineno, col_offset=expr.col_offset, end_lineno=term.end_lineno, end_col_offset=term.end_col_offset) } | term { term } @@ -893,7 +893,7 @@ def test_unreachable_implicit3(self) -> None: def test_locations_in_alt_action_and_group(self) -> None: grammar = """ - start: t=term NEWLINE? $ { ast.Expression(t, LOCATIONS) } + start: t=term NEWLINE? $ { ast.Expression(t) } term: | l=term '*' r=factor { ast.BinOp(l, ast.Mult(), r, LOCATIONS) } | l=term '/' r=factor { ast.BinOp(l, ast.Div(), r, LOCATIONS) } From c1bdbe84c8ab29b68bb109328e02af9464f104b3 Mon Sep 17 00:00:00 2001 From: Mikhail Efimov Date: Tue, 22 Oct 2024 11:42:56 +0300 Subject: [PATCH 072/106] gh-124889: Rework Python generator cache (#125816) --- Tools/peg_generator/pegen/python_generator.py | 75 ++++++++++++------- 1 file changed, 48 insertions(+), 27 deletions(-) diff --git a/Tools/peg_generator/pegen/python_generator.py b/Tools/peg_generator/pegen/python_generator.py index 588d3d3f6ef8f8..7057135a9061f6 100644 --- a/Tools/peg_generator/pegen/python_generator.py +++ b/Tools/peg_generator/pegen/python_generator.py @@ -1,6 +1,6 @@ import os.path import token -from typing import IO, Any, Dict, Optional, Sequence, Set, Text, Tuple +from typing import IO, Any, Callable, Dict, Optional, Sequence, Set, Text, Tuple from pegen import grammar from pegen.grammar import ( @@ -93,7 +93,7 @@ def visit_Forced(self, node: Forced) -> bool: class PythonCallMakerVisitor(GrammarVisitor): def __init__(self, parser_generator: ParserGenerator): self.gen = parser_generator - self.cache: Dict[Any, Any] = {} + self.cache: Dict[str, Tuple[str, str]] = {} def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]: name = node.value @@ -110,16 +110,6 @@ def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]: def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]: return "literal", f"self.expect({node.value})" - def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]: - if node in self.cache: - return self.cache[node] - if len(node.alts) == 1 and len(node.alts[0].items) == 1: - self.cache[node] = self.visit(node.alts[0].items[0]) - else: - name = self.gen.artificial_rule_from_rhs(node) - self.cache[node] = name, f"self.{name}()" - return self.cache[node] - def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]: name, call = self.visit(node.item) if node.name: @@ -151,26 +141,57 @@ def visit_Opt(self, node: Opt) -> Tuple[str, str]: else: return "opt", f"{call}," + def _generate_artificial_rule_call( + self, + node: Any, + prefix: str, + call_by_name_func: Callable[[str], str], + rule_generation_func: Callable[[], str], + ) -> Tuple[str, str]: + node_str = f"{node}" + key = f"{prefix}_{node_str}" + if key in self.cache: + return self.cache[key] + + name = rule_generation_func() + call = call_by_name_func(name) + self.cache[key] = name, call + return self.cache[key] + + def visit_Rhs(self, node: Rhs) -> Tuple[str, str]: + if len(node.alts) == 1 and len(node.alts[0].items) == 1: + return self.visit(node.alts[0].items[0]) + + return self._generate_artificial_rule_call( + node, + "rhs", + lambda name: f"self.{name}()", + lambda: self.gen.artificial_rule_from_rhs(node), + ) + def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]: - if node in self.cache: - return self.cache[node] - name = self.gen.artificial_rule_from_repeat(node.node, False) - self.cache[node] = name, f"self.{name}()," # Also a trailing comma! - return self.cache[node] + return self._generate_artificial_rule_call( + node, + "repeat0", + lambda name: f"self.{name}(),", # Also a trailing comma! + lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=False), + ) def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]: - if node in self.cache: - return self.cache[node] - name = self.gen.artificial_rule_from_repeat(node.node, True) - self.cache[node] = name, f"self.{name}()" # But no trailing comma here! - return self.cache[node] + return self._generate_artificial_rule_call( + node, + "repeat1", + lambda name: f"self.{name}()", # But no trailing comma here! + lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=True), + ) def visit_Gather(self, node: Gather) -> Tuple[str, str]: - if node in self.cache: - return self.cache[node] - name = self.gen.artificial_rule_from_gather(node) - self.cache[node] = name, f"self.{name}()" # No trailing comma here either! - return self.cache[node] + return self._generate_artificial_rule_call( + node, + "gather", + lambda name: f"self.{name}()", # No trailing comma here either! + lambda: self.gen.artificial_rule_from_gather(node), + ) def visit_Group(self, node: Group) -> Tuple[Optional[str], str]: return self.visit(node.rhs) From 57e3c59bb64fc2f8b2845a7e03ab0abb029ccd02 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 22 Oct 2024 10:11:29 +0100 Subject: [PATCH 073/106] GH-125521: Remove `if (true)` from generated output to reduce C compiler warnings (GH-125700) --- Lib/test/test_generated_cases.py | 27 +++++++++ Python/generated_cases.c.h | 68 +++++++++++----------- Tools/cases_generator/analyzer.py | 2 +- Tools/cases_generator/generators_common.py | 18 ++++-- 4 files changed, 75 insertions(+), 40 deletions(-) diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index cd3718b80612bd..95813e1e32c7af 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -1270,6 +1270,33 @@ def test_push_then_error(self): """ self.run_cases_test(input, output) + def test_error_if_true(self): + + input = """ + inst(OP1, ( --)) { + ERROR_IF(true, here); + } + inst(OP2, ( --)) { + ERROR_IF(1, there); + } + """ + output = """ + TARGET(OP1) { + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(OP1); + goto here; + } + + TARGET(OP2) { + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(OP2); + goto there; + } + """ + self.run_cases_test(input, output) + def test_scalar_array_inconsistency(self): input = """ diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 388031af87a79f..efbf2fba8c3106 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -689,7 +689,7 @@ for (int _i = oparg*2; --_i >= 0;) { PyStackRef_CLOSE(values[_i]); } - if (true) { + { stack_pointer += -oparg*2; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -731,7 +731,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(values[_i]); } - if (true) { + { stack_pointer += -oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -748,7 +748,7 @@ } if (err != 0) { Py_DECREF(set_o); - if (true) { + { stack_pointer += -oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -803,7 +803,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(pieces[_i]); } - if (true) { + { stack_pointer += -oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -945,7 +945,7 @@ for (int i = 0; i < total_args; i++) { PyStackRef_CLOSE(args[i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -1343,7 +1343,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(args[_i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -1422,7 +1422,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(args[_i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -1509,7 +1509,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(args[_i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -1971,7 +1971,7 @@ PyStackRef_CLOSE(args[_i]); } PyStackRef_CLOSE(kwnames); - if (true) { + { stack_pointer += -3 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -2174,7 +2174,7 @@ PyStackRef_CLOSE(args[_i]); } PyStackRef_CLOSE(kwnames); - if (true) { + { stack_pointer += -3 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -2431,7 +2431,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(args[_i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -2516,7 +2516,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(args[_i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -2749,7 +2749,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(args[_i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -3103,7 +3103,7 @@ if (err < 0) { PyStackRef_CLOSE(exc_value_st); PyStackRef_CLOSE(match_type_st); - if (true) goto pop_2_error; + goto pop_2_error; } PyObject *match_o = NULL; PyObject *rest_o = NULL; @@ -3149,7 +3149,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame); if (err < 0) { PyStackRef_CLOSE(right); - if (true) goto pop_1_error; + goto pop_1_error; } _PyFrame_SetStackPointer(frame, stack_pointer); int res = PyErr_GivenExceptionMatches(left_o, right_o); @@ -3583,7 +3583,7 @@ PyTuple_GetItem(_PyFrame_GetCode(frame)->co_localsplusnames, oparg) ); stack_pointer = _PyFrame_GetStackPointer(frame); - if (1) goto error; + goto error; } SETLOCAL(oparg, PyStackRef_NULL); DISPATCH(); @@ -3682,7 +3682,7 @@ _PyEval_FormatKwargsError(tstate, callable_o, update_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(update); - if (true) goto pop_1_error; + goto pop_1_error; } PyStackRef_CLOSE(update); stack_pointer += -1; @@ -3715,7 +3715,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame); } PyStackRef_CLOSE(update); - if (true) goto pop_1_error; + goto pop_1_error; } PyStackRef_CLOSE(update); stack_pointer += -1; @@ -4173,7 +4173,7 @@ type->tp_name); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(obj); - if (true) goto pop_1_error; + goto pop_1_error; } _PyFrame_SetStackPointer(frame, stack_pointer); iter_o = (*getter)(obj_o); @@ -4191,7 +4191,7 @@ Py_TYPE(iter_o)->tp_name); stack_pointer = _PyFrame_GetStackPointer(frame); Py_DECREF(iter_o); - if (true) goto error; + goto error; } iter = PyStackRef_FromPyObjectSteal(iter_o); stack_pointer[-1] = iter; @@ -4458,7 +4458,7 @@ for (int i = 0; i < total_args; i++) { PyStackRef_CLOSE(args[i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -5210,7 +5210,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame); } PyStackRef_CLOSE(iterable_st); - if (true) goto pop_1_error; + goto pop_1_error; } assert(Py_IsNone(none_val)); PyStackRef_CLOSE(iterable_st); @@ -5866,7 +5866,7 @@ _PyErr_SetString(tstate, PyExc_NameError, "__build_class__ not found"); stack_pointer = _PyFrame_GetStackPointer(frame); - if (true) goto error; + goto error; } bc = PyStackRef_FromPyObjectSteal(bc_o); stack_pointer[0] = bc; @@ -5920,7 +5920,7 @@ _PyFrame_SetStackPointer(frame, stack_pointer); _PyEval_FormatExcUnbound(tstate, _PyFrame_GetCode(frame), oparg); stack_pointer = _PyFrame_GetStackPointer(frame); - if (true) goto error; + goto error; } value = PyStackRef_FromPyObjectSteal(value_o); stack_pointer[0] = value; @@ -5969,7 +5969,7 @@ PyTuple_GetItem(_PyFrame_GetCode(frame)->co_localsplusnames, oparg) ); stack_pointer = _PyFrame_GetStackPointer(frame); - if (1) goto error; + goto error; } value = PyStackRef_DUP(value_s); stack_pointer[0] = value; @@ -6088,7 +6088,7 @@ tstate, PyExc_NameError, NAME_ERROR_MSG, name); stack_pointer = _PyFrame_GetStackPointer(frame); - if (true) goto error; + goto error; } } } @@ -6237,7 +6237,7 @@ _PyErr_SetString(tstate, PyExc_SystemError, "no locals found"); stack_pointer = _PyFrame_GetStackPointer(frame); - if (true) goto error; + goto error; } locals = PyStackRef_FromPyObjectNew(l); stack_pointer[0] = locals; @@ -6288,7 +6288,7 @@ Py_TYPE(owner_o)->tp_name); stack_pointer = _PyFrame_GetStackPointer(frame); } - if (true) goto error; + goto error; } attr = PyStackRef_FromPyObjectSteal(attr_o); self_or_null = self_or_null_o == NULL ? @@ -6348,7 +6348,7 @@ PyStackRef_CLOSE(global_super_st); PyStackRef_CLOSE(class_st); PyStackRef_CLOSE(self_st); - if (true) goto pop_3_error; + goto pop_3_error; } } // we make no attempt to optimize here; specializations should @@ -6466,7 +6466,7 @@ PyStackRef_CLOSE(class_st); if (attr_o == NULL) { PyStackRef_CLOSE(self_st); - if (true) goto pop_3_error; + goto pop_3_error; } if (method_found) { self_or_null = self_st; // transfer ownership @@ -6838,7 +6838,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame); goto exception_unwind; } - if (true) goto error; + goto error; } TARGET(RERAISE) { @@ -7130,7 +7130,7 @@ } else { PyStackRef_CLOSE(v); - if (true) goto pop_1_error; + goto pop_1_error; } } PyStackRef_CLOSE(v); @@ -7202,7 +7202,7 @@ _PyErr_Format(tstate, PyExc_SystemError, "no locals found when setting up annotations"); stack_pointer = _PyFrame_GetStackPointer(frame); - if (true) goto error; + goto error; } /* check if __annotations__ in locals()... */ _PyFrame_SetStackPointer(frame, stack_pointer); @@ -7559,7 +7559,7 @@ "no locals found when storing %R", name); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(v); - if (true) goto pop_1_error; + goto pop_1_error; } if (PyDict_CheckExact(ns)) { _PyFrame_SetStackPointer(frame, stack_pointer); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 381ad3a4e2082c..f41a8d161099df 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -744,7 +744,7 @@ def always_exits(op: parser.InstDef) -> bool: if tkn.text == "DEOPT_IF" or tkn.text == "ERROR_IF": next(tkn_iter) # '(' t = next(tkn_iter) - if t.text == "true": + if t.text in ("true", "1"): return True return False diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 7e032c21d2485c..3b158f5ac4eb48 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -165,16 +165,24 @@ def error_if( storage: Storage, inst: Instruction | None, ) -> bool: - self.out.emit_at("if ", tkn) lparen = next(tkn_iter) - self.emit(lparen) assert lparen.kind == "LPAREN" first_tkn = tkn_iter.peek() - emit_to(self.out, tkn_iter, "COMMA") + unconditional = always_true(first_tkn) + if unconditional: + next(tkn_iter) + comma = next(tkn_iter) + if comma.kind != "COMMA": + raise analysis_error(f"Expected comma, got '{comma.text}'", comma) + self.out.start_line() + else: + self.out.emit_at("if ", tkn) + self.emit(lparen) + emit_to(self.out, tkn_iter, "COMMA") + self.out.emit(") ") label = next(tkn_iter).text next(tkn_iter) # RPAREN next(tkn_iter) # Semi colon - self.out.emit(") ") storage.clear_inputs("at ERROR_IF") c_offset = storage.stack.peek_offset() try: @@ -196,7 +204,7 @@ def error_if( self.out.emit(label) self.out.emit(";\n") self.out.emit("}\n") - return not always_true(first_tkn) + return not unconditional def error_no_pop( self, From 759a54d28ffe7eac8c23917f5d3dfad8309856be Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 22 Oct 2024 13:57:25 +0300 Subject: [PATCH 074/106] gh-125355: Rewrite parse_intermixed_args() in argparse (GH-125356) * The parser no longer changes temporarily during parsing. * Default values are not processed twice. * Required mutually exclusive groups containing positional arguments are now supported. * The missing arguments report now includes the names of all required optional and positional arguments. * Unknown options can be intermixed with positional arguments in parse_known_intermixed_args(). --- Lib/argparse.py | 99 +++++++------------ Lib/test/test_argparse.py | 56 +++++++---- ...-10-22-13-28-00.gh-issue-125355.zssHm_.rst | 7 ++ 3 files changed, 80 insertions(+), 82 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-22-13-28-00.gh-issue-125355.zssHm_.rst diff --git a/Lib/argparse.py b/Lib/argparse.py index 49271a146c7282..024622bec17c3b 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -1924,6 +1924,9 @@ def parse_args(self, args=None, namespace=None): return args def parse_known_args(self, args=None, namespace=None): + return self._parse_known_args2(args, namespace, intermixed=False) + + def _parse_known_args2(self, args, namespace, intermixed): if args is None: # args default to the system args args = _sys.argv[1:] @@ -1950,18 +1953,18 @@ def parse_known_args(self, args=None, namespace=None): # parse the arguments and exit if there are any errors if self.exit_on_error: try: - namespace, args = self._parse_known_args(args, namespace) + namespace, args = self._parse_known_args(args, namespace, intermixed) except ArgumentError as err: self.error(str(err)) else: - namespace, args = self._parse_known_args(args, namespace) + namespace, args = self._parse_known_args(args, namespace, intermixed) if hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR): args.extend(getattr(namespace, _UNRECOGNIZED_ARGS_ATTR)) delattr(namespace, _UNRECOGNIZED_ARGS_ATTR) return namespace, args - def _parse_known_args(self, arg_strings, namespace): + def _parse_known_args(self, arg_strings, namespace, intermixed): # replace arg strings that are file references if self.fromfile_prefix_chars is not None: arg_strings = self._read_args_from_files(arg_strings) @@ -2052,6 +2055,7 @@ def consume_optional(start_index): # if we found no optional action, skip it if action is None: extras.append(arg_strings[start_index]) + extras_pattern.append('O') return start_index + 1 # if there is an explicit argument, try to match the @@ -2087,6 +2091,7 @@ def consume_optional(start_index): sep = '' else: extras.append(char + explicit_arg) + extras_pattern.append('O') stop = start_index + 1 break # if the action expect exactly one argument, we've @@ -2165,6 +2170,7 @@ def consume_positionals(start_index): # consume Positionals and Optionals alternately, until we have # passed the last option string extras = [] + extras_pattern = [] start_index = 0 if option_string_indices: max_option_string_index = max(option_string_indices) @@ -2178,7 +2184,7 @@ def consume_positionals(start_index): if next_option_string_index in option_string_indices: break next_option_string_index += 1 - if start_index != next_option_string_index: + if not intermixed and start_index != next_option_string_index: positionals_end_index = consume_positionals(start_index) # only try to parse the next optional if we didn't consume @@ -2194,16 +2200,35 @@ def consume_positionals(start_index): if start_index not in option_string_indices: strings = arg_strings[start_index:next_option_string_index] extras.extend(strings) + extras_pattern.extend(arg_strings_pattern[start_index:next_option_string_index]) start_index = next_option_string_index # consume the next optional and any arguments for it start_index = consume_optional(start_index) - # consume any positionals following the last Optional - stop_index = consume_positionals(start_index) + if not intermixed: + # consume any positionals following the last Optional + stop_index = consume_positionals(start_index) - # if we didn't consume all the argument strings, there were extras - extras.extend(arg_strings[stop_index:]) + # if we didn't consume all the argument strings, there were extras + extras.extend(arg_strings[stop_index:]) + else: + extras.extend(arg_strings[start_index:]) + extras_pattern.extend(arg_strings_pattern[start_index:]) + extras_pattern = ''.join(extras_pattern) + assert len(extras_pattern) == len(extras) + # consume all positionals + arg_strings = [s for s, c in zip(extras, extras_pattern) if c != 'O'] + arg_strings_pattern = extras_pattern.replace('O', '') + stop_index = consume_positionals(0) + # leave unknown optionals and non-consumed positionals in extras + for i, c in enumerate(extras_pattern): + if not stop_index: + break + if c != 'O': + stop_index -= 1 + extras[i] = None + extras = [s for s in extras if s is not None] # make sure all required actions were present and also convert # action defaults which were not given as arguments @@ -2469,10 +2494,6 @@ def parse_known_intermixed_args(self, args=None, namespace=None): # are then parsed. If the parser definition is incompatible with the # intermixed assumptions (e.g. use of REMAINDER, subparsers) a # TypeError is raised. - # - # positionals are 'deactivated' by setting nargs and default to - # SUPPRESS. This blocks the addition of that positional to the - # namespace positionals = self._get_positional_actions() a = [action for action in positionals @@ -2481,59 +2502,7 @@ def parse_known_intermixed_args(self, args=None, namespace=None): raise TypeError('parse_intermixed_args: positional arg' ' with nargs=%s'%a[0].nargs) - if [action.dest for group in self._mutually_exclusive_groups - for action in group._group_actions if action in positionals]: - raise TypeError('parse_intermixed_args: positional in' - ' mutuallyExclusiveGroup') - - try: - save_usage = self.usage - try: - if self.usage is None: - # capture the full usage for use in error messages - self.usage = self.format_usage()[7:] - for action in positionals: - # deactivate positionals - action.save_nargs = action.nargs - # action.nargs = 0 - action.nargs = SUPPRESS - action.save_default = action.default - action.default = SUPPRESS - namespace, remaining_args = self.parse_known_args(args, - namespace) - for action in positionals: - # remove the empty positional values from namespace - if (hasattr(namespace, action.dest) - and getattr(namespace, action.dest)==[]): - from warnings import warn - warn('Do not expect %s in %s' % (action.dest, namespace)) - delattr(namespace, action.dest) - finally: - # restore nargs and usage before exiting - for action in positionals: - action.nargs = action.save_nargs - action.default = action.save_default - optionals = self._get_optional_actions() - try: - # parse positionals. optionals aren't normally required, but - # they could be, so make sure they aren't. - for action in optionals: - action.save_required = action.required - action.required = False - for group in self._mutually_exclusive_groups: - group.save_required = group.required - group.required = False - namespace, extras = self.parse_known_args(remaining_args, - namespace) - finally: - # restore parser values before exiting - for action in optionals: - action.required = action.save_required - for group in self._mutually_exclusive_groups: - group.required = group.save_required - finally: - self.usage = save_usage - return namespace, extras + return self._parse_known_args2(args, namespace, intermixed=True) # ======================== # Value conversion methods diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 4fa669718abc50..4bd7a935b9b757 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -6412,12 +6412,23 @@ def test_basic(self): # cannot parse the '1,2,3' self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1]), args) self.assertEqual(["2", "3"], extras) + args, extras = parser.parse_known_intermixed_args(argv) + self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1, 2, 3]), args) + self.assertEqual([], extras) + # unknown optionals go into extras + argv = 'cmd --foo x --error 1 2 --bar y 3'.split() + args, extras = parser.parse_known_intermixed_args(argv) + self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1, 2, 3]), args) + self.assertEqual(['--error'], extras) argv = 'cmd --foo x 1 --error 2 --bar y 3'.split() args, extras = parser.parse_known_intermixed_args(argv) - # unknown optionals go into extras - self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1]), args) - self.assertEqual(['--error', '2', '3'], extras) + self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1, 2, 3]), args) + self.assertEqual(['--error'], extras) + argv = 'cmd --foo x 1 2 --error --bar y 3'.split() + args, extras = parser.parse_known_intermixed_args(argv) + self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1, 2, 3]), args) + self.assertEqual(['--error'], extras) # restores attributes that were temporarily changed self.assertIsNone(parser.usage) @@ -6436,37 +6447,48 @@ def test_remainder(self): parser.parse_intermixed_args(argv) self.assertRegex(str(cm.exception), r'\.\.\.') - def test_exclusive(self): - # mutually exclusive group; intermixed works fine - parser = ErrorRaisingArgumentParser(prog='PROG') + def test_required_exclusive(self): + # required mutually exclusive group; intermixed works fine + parser = argparse.ArgumentParser(prog='PROG', exit_on_error=False) group = parser.add_mutually_exclusive_group(required=True) group.add_argument('--foo', action='store_true', help='FOO') group.add_argument('--spam', help='SPAM') parser.add_argument('badger', nargs='*', default='X', help='BADGER') + args = parser.parse_intermixed_args('--foo 1 2'.split()) + self.assertEqual(NS(badger=['1', '2'], foo=True, spam=None), args) args = parser.parse_intermixed_args('1 --foo 2'.split()) self.assertEqual(NS(badger=['1', '2'], foo=True, spam=None), args) - self.assertRaises(ArgumentParserError, parser.parse_intermixed_args, '1 2'.split()) + self.assertRaisesRegex(argparse.ArgumentError, + 'one of the arguments --foo --spam is required', + parser.parse_intermixed_args, '1 2'.split()) self.assertEqual(group.required, True) - def test_exclusive_incompatible(self): - # mutually exclusive group including positional - fail - parser = ErrorRaisingArgumentParser(prog='PROG') + def test_required_exclusive_with_positional(self): + # required mutually exclusive group with positional argument + parser = argparse.ArgumentParser(prog='PROG', exit_on_error=False) group = parser.add_mutually_exclusive_group(required=True) group.add_argument('--foo', action='store_true', help='FOO') group.add_argument('--spam', help='SPAM') group.add_argument('badger', nargs='*', default='X', help='BADGER') - self.assertRaises(TypeError, parser.parse_intermixed_args, []) + args = parser.parse_intermixed_args(['--foo']) + self.assertEqual(NS(foo=True, spam=None, badger='X'), args) + args = parser.parse_intermixed_args(['a', 'b']) + self.assertEqual(NS(foo=False, spam=None, badger=['a', 'b']), args) + self.assertRaisesRegex(argparse.ArgumentError, + 'one of the arguments --foo --spam badger is required', + parser.parse_intermixed_args, []) + self.assertRaisesRegex(argparse.ArgumentError, + 'argument badger: not allowed with argument --foo', + parser.parse_intermixed_args, ['--foo', 'a', 'b']) + self.assertRaisesRegex(argparse.ArgumentError, + 'argument badger: not allowed with argument --foo', + parser.parse_intermixed_args, ['a', '--foo', 'b']) self.assertEqual(group.required, True) def test_invalid_args(self): parser = ErrorRaisingArgumentParser(prog='PROG') self.assertRaises(ArgumentParserError, parser.parse_intermixed_args, ['a']) - parser = ErrorRaisingArgumentParser(prog='PROG') - parser.add_argument('--foo', nargs="*") - parser.add_argument('foo') - with self.assertWarns(UserWarning): - parser.parse_intermixed_args(['hello', '--foo']) class TestIntermixedMessageContentError(TestCase): # case where Intermixed gives different error message @@ -6485,7 +6507,7 @@ def test_missing_argument_name_in_message(self): with self.assertRaises(ArgumentParserError) as cm: parser.parse_intermixed_args([]) msg = str(cm.exception) - self.assertNotRegex(msg, 'req_pos') + self.assertRegex(msg, 'req_pos') self.assertRegex(msg, 'req_opt') # ========================== diff --git a/Misc/NEWS.d/next/Library/2024-10-22-13-28-00.gh-issue-125355.zssHm_.rst b/Misc/NEWS.d/next/Library/2024-10-22-13-28-00.gh-issue-125355.zssHm_.rst new file mode 100644 index 00000000000000..fd67f697641d92 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-22-13-28-00.gh-issue-125355.zssHm_.rst @@ -0,0 +1,7 @@ +Fix several bugs in :meth:`argparse.ArgumentParser.parse_intermixed_args`. + +* The parser no longer changes temporarily during parsing. +* Default values are not processed twice. +* Required mutually exclusive groups containing positional arguments are now supported. +* The missing arguments report now includes the names of all required optional and positional arguments. +* Unknown options can be intermixed with positional arguments in parse_known_intermixed_args(). From 91ddde4af0c3031c84a967bcf59f6fb4f8a48c0d Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Tue, 22 Oct 2024 14:07:09 +0100 Subject: [PATCH 075/106] Doc: Show object descriptions in the table of contents (#125757) --- Doc/conf.py | 3 ++- Doc/tools/extensions/pyspecific.py | 1 + Doc/tools/static/sidebar-wrap.css | 6 ++++++ 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 Doc/tools/static/sidebar-wrap.css diff --git a/Doc/conf.py b/Doc/conf.py index db8fb9a9a68c6b..7ee3c91581345d 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -94,7 +94,8 @@ # Create table of contents entries for domain objects (e.g. functions, classes, # attributes, etc.). Default is True. -toc_object_entries = False +toc_object_entries = True +toc_object_entries_show_parents = 'hide' # Ignore any .rst files in the includes/ directory; # they're embedded in pages but not rendered individually. diff --git a/Doc/tools/extensions/pyspecific.py b/Doc/tools/extensions/pyspecific.py index bcb8a421e32d09..f4df7ec0839339 100644 --- a/Doc/tools/extensions/pyspecific.py +++ b/Doc/tools/extensions/pyspecific.py @@ -434,5 +434,6 @@ def setup(app): app.add_directive_to_domain('py', 'awaitablemethod', PyAwaitableMethod) app.add_directive_to_domain('py', 'abstractmethod', PyAbstractMethod) app.add_directive('miscnews', MiscNews) + app.add_css_file('sidebar-wrap.css') app.connect('env-check-consistency', patch_pairindextypes) return {'version': '1.0', 'parallel_read_safe': True} diff --git a/Doc/tools/static/sidebar-wrap.css b/Doc/tools/static/sidebar-wrap.css new file mode 100644 index 00000000000000..0a80f516f28349 --- /dev/null +++ b/Doc/tools/static/sidebar-wrap.css @@ -0,0 +1,6 @@ +div.sphinxsidebarwrapper { + overflow-x: scroll; +} +div.sphinxsidebarwrapper li code { + overflow-wrap: normal; +} From 079875e39589eb0628b5883f7ffa387e7476ec06 Mon Sep 17 00:00:00 2001 From: Mikhail Efimov Date: Tue, 22 Oct 2024 19:00:25 +0300 Subject: [PATCH 076/106] gh-125038: Fix crash after genexpr.gi_frame.f_locals manipulations (#125178) --- Lib/test/test_dis.py | 1 + Lib/test/test_generators.py | 73 +++++++++++++++++++ ...-10-09-13-53-50.gh-issue-125038.ffSLCz.rst | 2 + Python/codegen.c | 1 + 4 files changed, 77 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-09-13-53-50.gh-issue-125038.ffSLCz.rst diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 1ee0fbe98914be..1f9c04cdbc926c 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -810,6 +810,7 @@ def foo(x): POP_TOP L1: RESUME 0 LOAD_FAST 0 (.0) + GET_ITER L2: FOR_ITER 10 (to L3) STORE_FAST 1 (z) LOAD_DEREF 2 (x) diff --git a/Lib/test/test_generators.py b/Lib/test/test_generators.py index 03a31ec6a05726..bf2cb1160723b0 100644 --- a/Lib/test/test_generators.py +++ b/Lib/test/test_generators.py @@ -268,6 +268,79 @@ def loop(): #This should not raise loop() + +class ModifyUnderlyingIterableTest(unittest.TestCase): + iterables = [ + range(0), + range(20), + [1, 2, 3], + (2,), + {13, 48, 211}, + frozenset((15, 8, 6)), + {1: 2, 3: 4}, + ] + + non_iterables = [ + None, + 42, + 3.0, + 2j, + ] + + def genexpr(self): + return (x for x in range(10)) + + def genfunc(self): + def gen(it): + for x in it: + yield x + return gen(range(10)) + + def process_tests(self, get_generator): + for obj in self.iterables: + g_obj = get_generator(obj) + with self.subTest(g_obj=g_obj, obj=obj): + self.assertListEqual(list(g_obj), list(obj)) + + g_iter = get_generator(iter(obj)) + with self.subTest(g_iter=g_iter, obj=obj): + self.assertListEqual(list(g_iter), list(obj)) + + err_regex = "'.*' object is not iterable" + for obj in self.non_iterables: + g_obj = get_generator(obj) + with self.subTest(g_obj=g_obj): + self.assertRaisesRegex(TypeError, err_regex, list, g_obj) + + def test_modify_f_locals(self): + def modify_f_locals(g, local, obj): + g.gi_frame.f_locals[local] = obj + return g + + def get_generator_genexpr(obj): + return modify_f_locals(self.genexpr(), '.0', obj) + + def get_generator_genfunc(obj): + return modify_f_locals(self.genfunc(), 'it', obj) + + self.process_tests(get_generator_genexpr) + self.process_tests(get_generator_genfunc) + + def test_new_gen_from_gi_code(self): + def new_gen_from_gi_code(g, obj): + generator_func = types.FunctionType(g.gi_code, {}) + return generator_func(obj) + + def get_generator_genexpr(obj): + return new_gen_from_gi_code(self.genexpr(), obj) + + def get_generator_genfunc(obj): + return new_gen_from_gi_code(self.genfunc(), obj) + + self.process_tests(get_generator_genexpr) + self.process_tests(get_generator_genfunc) + + class ExceptionTest(unittest.TestCase): # Tests for the issue #23353: check that the currently handled exception # is correctly saved/restored in PyEval_EvalFrameEx(). diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-09-13-53-50.gh-issue-125038.ffSLCz.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-09-13-53-50.gh-issue-125038.ffSLCz.rst new file mode 100644 index 00000000000000..15de48ec0e4450 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-09-13-53-50.gh-issue-125038.ffSLCz.rst @@ -0,0 +1,2 @@ +Fix crash when iterating over a generator expression after direct changes on ``gi_frame.f_locals``. +Patch by Mikhail Efimov. diff --git a/Python/codegen.c b/Python/codegen.c index 689d2b5124e9d3..bfacc6f0c55593 100644 --- a/Python/codegen.c +++ b/Python/codegen.c @@ -4164,6 +4164,7 @@ codegen_sync_comprehension_generator(compiler *c, location loc, if (IS_JUMP_TARGET_LABEL(start)) { depth++; + ADDOP(c, LOC(gen->iter), GET_ITER); USE_LABEL(c, start); ADDOP_JUMP(c, LOC(gen->iter), FOR_ITER, anchor); } From aaed91cabcedc16c089c4b1c9abb1114659a83d3 Mon Sep 17 00:00:00 2001 From: Ethan Furman Date: Tue, 22 Oct 2024 11:04:00 -0700 Subject: [PATCH 077/106] gh-125710: [Enum] fix hashable<->nonhashable comparisons for member values (GH-125735) --- Lib/enum.py | 26 ++++++++++++++----- Lib/test/test_enum.py | 7 +++++ ...-10-19-13-37-37.gh-issue-125710.FyFAAr.rst | 1 + 3 files changed, 28 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-19-13-37-37.gh-issue-125710.FyFAAr.rst diff --git a/Lib/enum.py b/Lib/enum.py index 17d72738792982..4f9912229603a6 100644 --- a/Lib/enum.py +++ b/Lib/enum.py @@ -327,6 +327,8 @@ def __set_name__(self, enum_class, member_name): # to the map, and by-value lookups for this value will be # linear. enum_class._value2member_map_.setdefault(value, enum_member) + if value not in enum_class._hashable_values_: + enum_class._hashable_values_.append(value) except TypeError: # keep track of the value in a list so containment checks are quick enum_class._unhashable_values_.append(value) @@ -538,7 +540,8 @@ def __new__(metacls, cls, bases, classdict, *, boundary=None, _simple=False, **k classdict['_member_names_'] = [] classdict['_member_map_'] = {} classdict['_value2member_map_'] = {} - classdict['_unhashable_values_'] = [] + classdict['_hashable_values_'] = [] # for comparing with non-hashable types + classdict['_unhashable_values_'] = [] # e.g. frozenset() with set() classdict['_unhashable_values_map_'] = {} classdict['_member_type_'] = member_type # now set the __repr__ for the value @@ -748,7 +751,10 @@ def __contains__(cls, value): try: return value in cls._value2member_map_ except TypeError: - return value in cls._unhashable_values_ + return ( + value in cls._unhashable_values_ # both structures are lists + or value in cls._hashable_values_ + ) def __delattr__(cls, attr): # nicer error message when someone tries to delete an attribute @@ -1166,8 +1172,11 @@ def __new__(cls, value): pass except TypeError: # not there, now do long search -- O(n) behavior - for name, values in cls._unhashable_values_map_.items(): - if value in values: + for name, unhashable_values in cls._unhashable_values_map_.items(): + if value in unhashable_values: + return cls[name] + for name, member in cls._member_map_.items(): + if value == member._value_: return cls[name] # still not found -- verify that members exist, in-case somebody got here mistakenly # (such as via super when trying to override __new__) @@ -1233,6 +1242,7 @@ def _add_value_alias_(self, value): # to the map, and by-value lookups for this value will be # linear. cls._value2member_map_.setdefault(value, self) + cls._hashable_values_.append(value) except TypeError: # keep track of the value in a list so containment checks are quick cls._unhashable_values_.append(value) @@ -1763,6 +1773,7 @@ def convert_class(cls): body['_member_names_'] = member_names = [] body['_member_map_'] = member_map = {} body['_value2member_map_'] = value2member_map = {} + body['_hashable_values_'] = hashable_values = [] body['_unhashable_values_'] = unhashable_values = [] body['_unhashable_values_map_'] = {} body['_member_type_'] = member_type = etype._member_type_ @@ -1826,7 +1837,7 @@ def convert_class(cls): contained = value2member_map.get(member._value_) except TypeError: contained = None - if member._value_ in unhashable_values: + if member._value_ in unhashable_values or member.value in hashable_values: for m in enum_class: if m._value_ == member._value_: contained = m @@ -1846,6 +1857,7 @@ def convert_class(cls): else: enum_class._add_member_(name, member) value2member_map[value] = member + hashable_values.append(value) if _is_single_bit(value): # not a multi-bit alias, record in _member_names_ and _flag_mask_ member_names.append(name) @@ -1882,7 +1894,7 @@ def convert_class(cls): contained = value2member_map.get(member._value_) except TypeError: contained = None - if member._value_ in unhashable_values: + if member._value_ in unhashable_values or member._value_ in hashable_values: for m in enum_class: if m._value_ == member._value_: contained = m @@ -1908,6 +1920,8 @@ def convert_class(cls): # to the map, and by-value lookups for this value will be # linear. enum_class._value2member_map_.setdefault(value, member) + if value not in hashable_values: + hashable_values.append(value) except TypeError: # keep track of the value in a list so containment checks are quick enum_class._unhashable_values_.append(value) diff --git a/Lib/test/test_enum.py b/Lib/test/test_enum.py index 5b4a8070526fcf..7184769bfd6fc3 100644 --- a/Lib/test/test_enum.py +++ b/Lib/test/test_enum.py @@ -3460,6 +3460,13 @@ def test_empty_names(self): self.assertRaisesRegex(TypeError, '.int. object is not iterable', Enum, 'bad_enum', names=0) self.assertRaisesRegex(TypeError, '.int. object is not iterable', Enum, 'bad_enum', 0, type=int) + def test_nonhashable_matches_hashable(self): # issue 125710 + class Directions(Enum): + DOWN_ONLY = frozenset({"sc"}) + UP_ONLY = frozenset({"cs"}) + UNRESTRICTED = frozenset({"sc", "cs"}) + self.assertIs(Directions({"sc"}), Directions.DOWN_ONLY) + class TestOrder(unittest.TestCase): "test usage of the `_order_` attribute" diff --git a/Misc/NEWS.d/next/Library/2024-10-19-13-37-37.gh-issue-125710.FyFAAr.rst b/Misc/NEWS.d/next/Library/2024-10-19-13-37-37.gh-issue-125710.FyFAAr.rst new file mode 100644 index 00000000000000..8d5220e9889c3a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-19-13-37-37.gh-issue-125710.FyFAAr.rst @@ -0,0 +1 @@ +[Enum] fix hashable<->nonhashable comparisons for member values From 34653bba644aa5481613f398153757d7357e39ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mario=20=C5=A0a=C5=A1ko?= Date: Tue, 22 Oct 2024 22:42:22 +0200 Subject: [PATCH 078/106] gh-125259: Fix error notes removal in enum initialization (GH-125647) --- Lib/enum.py | 16 +++++----------- Lib/test/test_enum.py | 19 +++++++++++++++++++ ...-10-17-16-10-29.gh-issue-125259.oMew0c.rst | 1 + 3 files changed, 25 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-17-16-10-29.gh-issue-125259.oMew0c.rst diff --git a/Lib/enum.py b/Lib/enum.py index 4f9912229603a6..27be3fb83b2afb 100644 --- a/Lib/enum.py +++ b/Lib/enum.py @@ -557,22 +557,16 @@ def __new__(metacls, cls, bases, classdict, *, boundary=None, _simple=False, **k classdict['_all_bits_'] = 0 classdict['_inverted_'] = None try: - exc = None classdict['_%s__in_progress' % cls] = True enum_class = super().__new__(metacls, cls, bases, classdict, **kwds) classdict['_%s__in_progress' % cls] = False delattr(enum_class, '_%s__in_progress' % cls) except Exception as e: - # since 3.12 the line "Error calling __set_name__ on '_proto_member' instance ..." - # is tacked on to the error instead of raising a RuntimeError - # recreate the exception to discard - exc = type(e)(str(e)) - exc.__cause__ = e.__cause__ - exc.__context__ = e.__context__ - tb = e.__traceback__ - if exc is not None: - raise exc.with_traceback(tb) - # + # since 3.12 the note "Error calling __set_name__ on '_proto_member' instance ..." + # is tacked on to the error instead of raising a RuntimeError, so discard it + if hasattr(e, '__notes__'): + del e.__notes__ + raise # update classdict with any changes made by __init_subclass__ classdict.update(enum_class.__dict__) # diff --git a/Lib/test/test_enum.py b/Lib/test/test_enum.py index 7184769bfd6fc3..b9e13fb8c3585e 100644 --- a/Lib/test/test_enum.py +++ b/Lib/test/test_enum.py @@ -1888,6 +1888,25 @@ def test_wrong_inheritance_order(self): class Wrong(Enum, str): NotHere = 'error before this point' + def test_raise_custom_error_on_creation(self): + class InvalidRgbColorError(ValueError): + def __init__(self, r, g, b): + self.r = r + self.g = g + self.b = b + super().__init__(f'({r}, {g}, {b}) is not a valid RGB color') + + with self.assertRaises(InvalidRgbColorError): + class RgbColor(Enum): + RED = (255, 0, 0) + GREEN = (0, 255, 0) + BLUE = (0, 0, 255) + INVALID = (256, 0, 0) + + def __init__(self, r, g, b): + if not all(0 <= val <= 255 for val in (r, g, b)): + raise InvalidRgbColorError(r, g, b) + def test_intenum_transitivity(self): class number(IntEnum): one = 1 diff --git a/Misc/NEWS.d/next/Library/2024-10-17-16-10-29.gh-issue-125259.oMew0c.rst b/Misc/NEWS.d/next/Library/2024-10-17-16-10-29.gh-issue-125259.oMew0c.rst new file mode 100644 index 00000000000000..4fa6330abea512 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-17-16-10-29.gh-issue-125259.oMew0c.rst @@ -0,0 +1 @@ +Fix the notes removal logic for errors thrown in enum initialization. From c75ff2ef8eb71d91b1f92db9c2bc7ff18c582ab1 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 23 Oct 2024 00:41:33 -0400 Subject: [PATCH 079/106] gh-80958: unittest: discovery support for namespace packages as start directory (#123820) --- Doc/library/unittest.rst | 35 +++++------ Doc/whatsnew/3.14.rst | 9 +++ .../namespace_test_pkg/bar/__init__.py | 0 .../namespace_test_pkg/bar/test_bar.py | 5 ++ .../namespace_test_pkg/noop/no2/__init__.py | 0 .../namespace_test_pkg/noop/no2/test_no2.py | 5 ++ .../namespace_test_pkg/noop/test_noop.py | 5 ++ .../namespace_test_pkg/test_foo.py | 5 ++ Lib/test/test_unittest/test_discovery.py | 54 ++++++++++++++++- Lib/unittest/loader.py | 59 ++++++++++++++----- Makefile.pre.in | 4 ++ ...4-09-07-13-57-49.gh-issue-80958.fVYnqV.rst | 1 + 12 files changed, 145 insertions(+), 37 deletions(-) create mode 100644 Lib/test/test_unittest/namespace_test_pkg/bar/__init__.py create mode 100644 Lib/test/test_unittest/namespace_test_pkg/bar/test_bar.py create mode 100644 Lib/test/test_unittest/namespace_test_pkg/noop/no2/__init__.py create mode 100644 Lib/test/test_unittest/namespace_test_pkg/noop/no2/test_no2.py create mode 100644 Lib/test/test_unittest/namespace_test_pkg/noop/test_noop.py create mode 100644 Lib/test/test_unittest/namespace_test_pkg/test_foo.py create mode 100644 Misc/NEWS.d/next/Library/2024-09-07-13-57-49.gh-issue-80958.fVYnqV.rst diff --git a/Doc/library/unittest.rst b/Doc/library/unittest.rst index c49aba69b12126..38bad9405597dd 100644 --- a/Doc/library/unittest.rst +++ b/Doc/library/unittest.rst @@ -340,28 +340,21 @@ Test modules and packages can customize test loading and discovery by through the `load_tests protocol`_. .. versionchanged:: 3.4 - Test discovery supports :term:`namespace packages ` - for the start directory. Note that you need to specify the top level - directory too (e.g. - ``python -m unittest discover -s root/namespace -t root``). + Test discovery supports :term:`namespace packages `. .. versionchanged:: 3.11 - :mod:`unittest` dropped the :term:`namespace packages ` - support in Python 3.11. It has been broken since Python 3.7. Start directory and - subdirectories containing tests must be regular package that have - ``__init__.py`` file. + Test discovery dropped the :term:`namespace packages ` + support. It has been broken since Python 3.7. + Start directory and its subdirectories containing tests must be regular + package that have ``__init__.py`` file. - Directories containing start directory still can be a namespace package. - In this case, you need to specify start directory as dotted package name, - and target directory explicitly. For example:: + If the start directory is the dotted name of the package, the ancestor packages + can be namespace packages. - # proj/ <-- current directory - # namespace/ - # mypkg/ - # __init__.py - # test_mypkg.py - - python -m unittest discover -s namespace.mypkg -t . +.. versionchanged:: 3.14 + Test discovery supports :term:`namespace package` as start directory again. + To avoid scanning directories unrelated to Python, + tests are not searched in subdirectories that do not contain ``__init__.py``. .. _organizing-tests: @@ -1915,10 +1908,8 @@ Loading and running tests Modules that raise :exc:`SkipTest` on import are recorded as skips, not errors. - .. versionchanged:: 3.4 *start_dir* can be a :term:`namespace packages `. - .. versionchanged:: 3.4 Paths are sorted before being imported so that execution order is the same even if the underlying file system's ordering is not dependent on file name. @@ -1930,11 +1921,13 @@ Loading and running tests .. versionchanged:: 3.11 *start_dir* can not be a :term:`namespace packages `. - It has been broken since Python 3.7 and Python 3.11 officially remove it. + It has been broken since Python 3.7, and Python 3.11 officially removes it. .. versionchanged:: 3.13 *top_level_dir* is only stored for the duration of *discover* call. + .. versionchanged:: 3.14 + *start_dir* can once again be a :term:`namespace package`. The following attributes of a :class:`TestLoader` can be configured either by subclassing or assignment on an instance: diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index d52faa614db94e..1dd6c19018934b 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -421,6 +421,15 @@ unicodedata * The Unicode database has been updated to Unicode 16.0.0. + +unittest +-------- + +* unittest discovery supports :term:`namespace package` as start + directory again. It was removed in Python 3.11. + (Contributed by Jacob Walls in :gh:`80958`.) + + .. Add improved modules above alphabetically, not here at the end. Optimizations diff --git a/Lib/test/test_unittest/namespace_test_pkg/bar/__init__.py b/Lib/test/test_unittest/namespace_test_pkg/bar/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Lib/test/test_unittest/namespace_test_pkg/bar/test_bar.py b/Lib/test/test_unittest/namespace_test_pkg/bar/test_bar.py new file mode 100644 index 00000000000000..05b184d9eba685 --- /dev/null +++ b/Lib/test/test_unittest/namespace_test_pkg/bar/test_bar.py @@ -0,0 +1,5 @@ +import unittest + +class PassingTest(unittest.TestCase): + def test_true(self): + self.assertTrue(True) diff --git a/Lib/test/test_unittest/namespace_test_pkg/noop/no2/__init__.py b/Lib/test/test_unittest/namespace_test_pkg/noop/no2/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Lib/test/test_unittest/namespace_test_pkg/noop/no2/test_no2.py b/Lib/test/test_unittest/namespace_test_pkg/noop/no2/test_no2.py new file mode 100644 index 00000000000000..05b184d9eba685 --- /dev/null +++ b/Lib/test/test_unittest/namespace_test_pkg/noop/no2/test_no2.py @@ -0,0 +1,5 @@ +import unittest + +class PassingTest(unittest.TestCase): + def test_true(self): + self.assertTrue(True) diff --git a/Lib/test/test_unittest/namespace_test_pkg/noop/test_noop.py b/Lib/test/test_unittest/namespace_test_pkg/noop/test_noop.py new file mode 100644 index 00000000000000..05b184d9eba685 --- /dev/null +++ b/Lib/test/test_unittest/namespace_test_pkg/noop/test_noop.py @@ -0,0 +1,5 @@ +import unittest + +class PassingTest(unittest.TestCase): + def test_true(self): + self.assertTrue(True) diff --git a/Lib/test/test_unittest/namespace_test_pkg/test_foo.py b/Lib/test/test_unittest/namespace_test_pkg/test_foo.py new file mode 100644 index 00000000000000..05b184d9eba685 --- /dev/null +++ b/Lib/test/test_unittest/namespace_test_pkg/test_foo.py @@ -0,0 +1,5 @@ +import unittest + +class PassingTest(unittest.TestCase): + def test_true(self): + self.assertTrue(True) diff --git a/Lib/test/test_unittest/test_discovery.py b/Lib/test/test_unittest/test_discovery.py index a44b18406c08be..38c9779daaf87d 100644 --- a/Lib/test/test_unittest/test_discovery.py +++ b/Lib/test/test_unittest/test_discovery.py @@ -4,12 +4,14 @@ import sys import types import pickle +from importlib._bootstrap_external import NamespaceLoader from test import support from test.support import import_helper import unittest import unittest.mock import test.test_unittest +from test.test_importlib import util as test_util class TestableTestProgram(unittest.TestProgram): @@ -395,7 +397,7 @@ def restore_isdir(): self.addCleanup(restore_isdir) _find_tests_args = [] - def _find_tests(start_dir, pattern): + def _find_tests(start_dir, pattern, namespace=None): _find_tests_args.append((start_dir, pattern)) return ['tests'] loader._find_tests = _find_tests @@ -815,7 +817,7 @@ def test_discovery_from_dotted_path(self): expectedPath = os.path.abspath(os.path.dirname(test.test_unittest.__file__)) self.wasRun = False - def _find_tests(start_dir, pattern): + def _find_tests(start_dir, pattern, namespace=None): self.wasRun = True self.assertEqual(start_dir, expectedPath) return tests @@ -848,6 +850,54 @@ def restore(): 'Can not use builtin modules ' 'as dotted module names') + def test_discovery_from_dotted_namespace_packages(self): + loader = unittest.TestLoader() + + package = types.ModuleType('package') + package.__name__ = "tests" + package.__path__ = ['/a', '/b'] + package.__file__ = None + package.__spec__ = types.SimpleNamespace( + name=package.__name__, + loader=NamespaceLoader(package.__name__, package.__path__, None), + submodule_search_locations=['/a', '/b'] + ) + + def _import(packagename, *args, **kwargs): + sys.modules[packagename] = package + return package + + _find_tests_args = [] + def _find_tests(start_dir, pattern, namespace=None): + _find_tests_args.append((start_dir, pattern)) + return ['%s/tests' % start_dir] + + loader._find_tests = _find_tests + loader.suiteClass = list + + with unittest.mock.patch('builtins.__import__', _import): + # Since loader.discover() can modify sys.path, restore it when done. + with import_helper.DirsOnSysPath(): + # Make sure to remove 'package' from sys.modules when done. + with test_util.uncache('package'): + suite = loader.discover('package') + + self.assertEqual(suite, ['/a/tests', '/b/tests']) + + def test_discovery_start_dir_is_namespace(self): + """Subdirectory discovery not affected if start_dir is a namespace pkg.""" + loader = unittest.TestLoader() + with ( + import_helper.DirsOnSysPath(os.path.join(os.path.dirname(__file__))), + test_util.uncache('namespace_test_pkg') + ): + suite = loader.discover('namespace_test_pkg') + self.assertEqual( + {list(suite)[0]._tests[0].__module__ for suite in suite._tests if list(suite)}, + # files under namespace_test_pkg.noop not discovered. + {'namespace_test_pkg.test_foo', 'namespace_test_pkg.bar.test_bar'}, + ) + def test_discovery_failed_discovery(self): from test.test_importlib import util diff --git a/Lib/unittest/loader.py b/Lib/unittest/loader.py index 22797b83a68bc8..a52950dad224ee 100644 --- a/Lib/unittest/loader.py +++ b/Lib/unittest/loader.py @@ -274,6 +274,8 @@ def discover(self, start_dir, pattern='test*.py', top_level_dir=None): self._top_level_dir = top_level_dir is_not_importable = False + is_namespace = False + tests = [] if os.path.isdir(os.path.abspath(start_dir)): start_dir = os.path.abspath(start_dir) if start_dir != top_level_dir: @@ -286,12 +288,25 @@ def discover(self, start_dir, pattern='test*.py', top_level_dir=None): is_not_importable = True else: the_module = sys.modules[start_dir] - top_part = start_dir.split('.')[0] - try: - start_dir = os.path.abspath( - os.path.dirname((the_module.__file__))) - except AttributeError: - if the_module.__name__ in sys.builtin_module_names: + if not hasattr(the_module, "__file__") or the_module.__file__ is None: + # look for namespace packages + try: + spec = the_module.__spec__ + except AttributeError: + spec = None + + if spec and spec.submodule_search_locations is not None: + is_namespace = True + + for path in the_module.__path__: + if (not set_implicit_top and + not path.startswith(top_level_dir)): + continue + self._top_level_dir = \ + (path.split(the_module.__name__ + .replace(".", os.path.sep))[0]) + tests.extend(self._find_tests(path, pattern, namespace=True)) + elif the_module.__name__ in sys.builtin_module_names: # builtin module raise TypeError('Can not use builtin modules ' 'as dotted module names') from None @@ -300,14 +315,27 @@ def discover(self, start_dir, pattern='test*.py', top_level_dir=None): f"don't know how to discover from {the_module!r}" ) from None + else: + top_part = start_dir.split('.')[0] + start_dir = os.path.abspath(os.path.dirname((the_module.__file__))) + if set_implicit_top: - self._top_level_dir = self._get_directory_containing_module(top_part) + if not is_namespace: + if sys.modules[top_part].__file__ is None: + self._top_level_dir = os.path.dirname(the_module.__file__) + if self._top_level_dir not in sys.path: + sys.path.insert(0, self._top_level_dir) + else: + self._top_level_dir = \ + self._get_directory_containing_module(top_part) sys.path.remove(top_level_dir) if is_not_importable: raise ImportError('Start directory is not importable: %r' % start_dir) - tests = list(self._find_tests(start_dir, pattern)) + if not is_namespace: + tests = list(self._find_tests(start_dir, pattern)) + self._top_level_dir = original_top_level_dir return self.suiteClass(tests) @@ -343,7 +371,7 @@ def _match_path(self, path, full_path, pattern): # override this method to use alternative matching strategy return fnmatch(path, pattern) - def _find_tests(self, start_dir, pattern): + def _find_tests(self, start_dir, pattern, namespace=False): """Used by discovery. Yields test suites it loads.""" # Handle the __init__ in this package name = self._get_name_from_path(start_dir) @@ -352,7 +380,8 @@ def _find_tests(self, start_dir, pattern): if name != '.' and name not in self._loading_packages: # name is in self._loading_packages while we have called into # loadTestsFromModule with name. - tests, should_recurse = self._find_test_path(start_dir, pattern) + tests, should_recurse = self._find_test_path( + start_dir, pattern, namespace) if tests is not None: yield tests if not should_recurse: @@ -363,7 +392,8 @@ def _find_tests(self, start_dir, pattern): paths = sorted(os.listdir(start_dir)) for path in paths: full_path = os.path.join(start_dir, path) - tests, should_recurse = self._find_test_path(full_path, pattern) + tests, should_recurse = self._find_test_path( + full_path, pattern, False) if tests is not None: yield tests if should_recurse: @@ -371,11 +401,11 @@ def _find_tests(self, start_dir, pattern): name = self._get_name_from_path(full_path) self._loading_packages.add(name) try: - yield from self._find_tests(full_path, pattern) + yield from self._find_tests(full_path, pattern, False) finally: self._loading_packages.discard(name) - def _find_test_path(self, full_path, pattern): + def _find_test_path(self, full_path, pattern, namespace=False): """Used by discovery. Loads tests from a single file, or a directories' __init__.py when @@ -419,7 +449,8 @@ def _find_test_path(self, full_path, pattern): msg % (mod_name, module_dir, expected_dir)) return self.loadTestsFromModule(module, pattern=pattern), False elif os.path.isdir(full_path): - if not os.path.isfile(os.path.join(full_path, '__init__.py')): + if (not namespace and + not os.path.isfile(os.path.join(full_path, '__init__.py'))): return None, False load_tests = None diff --git a/Makefile.pre.in b/Makefile.pre.in index fb6f22d57397db..d6f75a931a3db2 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2534,6 +2534,10 @@ TESTSUBDIRS= idlelib/idle_test \ test/test_tools \ test/test_ttk \ test/test_unittest \ + test/test_unittest/namespace_test_pkg \ + test/test_unittest/namespace_test_pkg/bar \ + test/test_unittest/namespace_test_pkg/noop \ + test/test_unittest/namespace_test_pkg/noop/no2 \ test/test_unittest/testmock \ test/test_warnings \ test/test_warnings/data \ diff --git a/Misc/NEWS.d/next/Library/2024-09-07-13-57-49.gh-issue-80958.fVYnqV.rst b/Misc/NEWS.d/next/Library/2024-09-07-13-57-49.gh-issue-80958.fVYnqV.rst new file mode 100644 index 00000000000000..f0edd7b1ac6e8b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-07-13-57-49.gh-issue-80958.fVYnqV.rst @@ -0,0 +1 @@ +unittest discovery supports PEP 420 namespace packages as start directory again. From 834ba5aaf21ac7fd123534dae8f9e478ee526aaa Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 23 Oct 2024 10:50:29 +0300 Subject: [PATCH 080/106] gh-58032: Deprecate the argparse.FileType type converter (GH-124664) --- .../pending-removal-in-future.rst | 21 +++---- Doc/library/argparse.rst | 25 +++++--- Doc/whatsnew/3.14.rst | 6 ++ Lib/argparse.py | 18 ++++-- Lib/test/test_argparse.py | 57 ++++++++++++------- ...4-09-27-13-10-17.gh-issue-58032.0aNAQ0.rst | 1 + 6 files changed, 83 insertions(+), 45 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-09-27-13-10-17.gh-issue-58032.0aNAQ0.rst diff --git a/Doc/deprecations/pending-removal-in-future.rst b/Doc/deprecations/pending-removal-in-future.rst index d77fc86eab0ed6..5a4502ac08a5f0 100644 --- a/Doc/deprecations/pending-removal-in-future.rst +++ b/Doc/deprecations/pending-removal-in-future.rst @@ -4,16 +4,6 @@ Pending removal in future versions The following APIs will be removed in the future, although there is currently no date scheduled for their removal. -* :mod:`argparse`: - - * Nesting argument groups and nesting mutually exclusive - groups are deprecated. - * Passing the undocumented keyword argument *prefix_chars* to - :meth:`~argparse.ArgumentParser.add_argument_group` is now - deprecated. - -* :mod:`array`'s ``'u'`` format code (:gh:`57281`) - * :mod:`builtins`: * ``bool(NotImplemented)``. @@ -43,6 +33,17 @@ although there is currently no date scheduled for their removal. as a single positional argument. (Contributed by Serhiy Storchaka in :gh:`109218`.) +* :mod:`argparse`: + + * Nesting argument groups and nesting mutually exclusive + groups are deprecated. + * Passing the undocumented keyword argument *prefix_chars* to + :meth:`~argparse.ArgumentParser.add_argument_group` is now + deprecated. + * The :class:`argparse.FileType` type converter is deprecated. + +* :mod:`array`'s ``'u'`` format code (:gh:`57281`) + * :mod:`calendar`: ``calendar.January`` and ``calendar.February`` constants are deprecated and replaced by :data:`calendar.JANUARY` and :data:`calendar.FEBRUARY`. diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index ef0db3e9789c98..65663d43f50a9d 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -865,16 +865,14 @@ See also :ref:`specifying-ambiguous-arguments`. The supported values are: output files:: >>> parser = argparse.ArgumentParser() - >>> parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), - ... default=sys.stdin) - >>> parser.add_argument('outfile', nargs='?', type=argparse.FileType('w'), - ... default=sys.stdout) + >>> parser.add_argument('infile', nargs='?') + >>> parser.add_argument('outfile', nargs='?') >>> parser.parse_args(['input.txt', 'output.txt']) - Namespace(infile=<_io.TextIOWrapper name='input.txt' encoding='UTF-8'>, - outfile=<_io.TextIOWrapper name='output.txt' encoding='UTF-8'>) + Namespace(infile='input.txt', outfile='output.txt') + >>> parser.parse_args(['input.txt']) + Namespace(infile='input.txt', outfile=None) >>> parser.parse_args([]) - Namespace(infile=<_io.TextIOWrapper name='' encoding='UTF-8'>, - outfile=<_io.TextIOWrapper name='' encoding='UTF-8'>) + Namespace(infile=None, outfile=None) .. index:: single: * (asterisk); in argparse module @@ -1033,7 +1031,6 @@ Common built-in types and functions can be used as type converters: parser.add_argument('distance', type=float) parser.add_argument('street', type=ascii) parser.add_argument('code_point', type=ord) - parser.add_argument('dest_file', type=argparse.FileType('w', encoding='latin-1')) parser.add_argument('datapath', type=pathlib.Path) User defined functions can be used as well: @@ -1827,9 +1824,19 @@ FileType objects >>> parser.parse_args(['-']) Namespace(infile=<_io.TextIOWrapper name='' encoding='UTF-8'>) + .. note:: + + If one argument uses *FileType* and then a subsequent argument fails, + an error is reported but the file is not automatically closed. + This can also clobber the output files. + In this case, it would be better to wait until after the parser has + run and then use the :keyword:`with`-statement to manage the files. + .. versionchanged:: 3.4 Added the *encodings* and *errors* parameters. + .. deprecated:: 3.14 + Argument groups ^^^^^^^^^^^^^^^ diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 1dd6c19018934b..b389e6da4c0ac3 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -464,6 +464,12 @@ Deprecated as a single positional argument. (Contributed by Serhiy Storchaka in :gh:`109218`.) +* :mod:`argparse`: + Deprecated the :class:`argparse.FileType` type converter. + Anything with resource management should be done downstream after the + arguments are parsed. + (Contributed by Serhiy Storchaka in :gh:`58032`.) + * :mod:`multiprocessing` and :mod:`concurrent.futures`: The default start method (see :ref:`multiprocessing-start-methods`) changed away from *fork* to *forkserver* on platforms where it was not already diff --git a/Lib/argparse.py b/Lib/argparse.py index 024622bec17c3b..9746173984c6ca 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -18,11 +18,12 @@ 'integers', metavar='int', nargs='+', type=int, help='an integer to be summed') parser.add_argument( - '--log', default=sys.stdout, type=argparse.FileType('w'), + '--log', help='the file where the sum should be written') args = parser.parse_args() - args.log.write('%s' % sum(args.integers)) - args.log.close() + with (open(args.log, 'w') if args.log is not None + else contextlib.nullcontext(sys.stdout)) as log: + log.write('%s' % sum(args.integers)) The module contains the following public classes: @@ -39,7 +40,8 @@ - FileType -- A factory for defining types of files to be created. As the example above shows, instances of FileType are typically passed as - the type= argument of add_argument() calls. + the type= argument of add_argument() calls. Deprecated since + Python 3.14. - Action -- The base class for parser actions. Typically actions are selected by passing strings like 'store_true' or 'append_const' to @@ -1252,7 +1254,7 @@ def __call__(self, parser, namespace, values, option_string=None): # ============== class FileType(object): - """Factory for creating file object types + """Deprecated factory for creating file object types Instances of FileType are typically passed as type= arguments to the ArgumentParser add_argument() method. @@ -1269,6 +1271,12 @@ class FileType(object): """ def __init__(self, mode='r', bufsize=-1, encoding=None, errors=None): + import warnings + warnings.warn( + "FileType is deprecated. Simply open files after parsing arguments.", + category=PendingDeprecationWarning, + stacklevel=2 + ) self._mode = mode self._bufsize = bufsize self._encoding = encoding diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 4bd7a935b9b757..ed1c5c34e526aa 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -1773,27 +1773,43 @@ def convert_arg_line_to_args(self, arg_line): # Type conversion tests # ===================== +def FileType(*args, **kwargs): + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'FileType is deprecated', + PendingDeprecationWarning, __name__) + return argparse.FileType(*args, **kwargs) + + +class TestFileTypeDeprecation(TestCase): + + def test(self): + with self.assertWarns(PendingDeprecationWarning) as cm: + argparse.FileType() + self.assertIn('FileType is deprecated', str(cm.warning)) + self.assertEqual(cm.filename, __file__) + + class TestFileTypeRepr(TestCase): def test_r(self): - type = argparse.FileType('r') + type = FileType('r') self.assertEqual("FileType('r')", repr(type)) def test_wb_1(self): - type = argparse.FileType('wb', 1) + type = FileType('wb', 1) self.assertEqual("FileType('wb', 1)", repr(type)) def test_r_latin(self): - type = argparse.FileType('r', encoding='latin_1') + type = FileType('r', encoding='latin_1') self.assertEqual("FileType('r', encoding='latin_1')", repr(type)) def test_w_big5_ignore(self): - type = argparse.FileType('w', encoding='big5', errors='ignore') + type = FileType('w', encoding='big5', errors='ignore') self.assertEqual("FileType('w', encoding='big5', errors='ignore')", repr(type)) def test_r_1_replace(self): - type = argparse.FileType('r', 1, errors='replace') + type = FileType('r', 1, errors='replace') self.assertEqual("FileType('r', 1, errors='replace')", repr(type)) @@ -1847,7 +1863,6 @@ def __eq__(self, other): text = text.decode('ascii') return self.name == other.name == text - class TestFileTypeR(TempDirMixin, ParserTestCase): """Test the FileType option/argument type for reading files""" @@ -1860,8 +1875,8 @@ def setUp(self): self.create_readonly_file('readonly') argument_signatures = [ - Sig('-x', type=argparse.FileType()), - Sig('spam', type=argparse.FileType('r')), + Sig('-x', type=FileType()), + Sig('spam', type=FileType('r')), ] failures = ['-x', '', 'non-existent-file.txt'] successes = [ @@ -1881,7 +1896,7 @@ def setUp(self): file.close() argument_signatures = [ - Sig('-c', type=argparse.FileType('r'), default='no-file.txt'), + Sig('-c', type=FileType('r'), default='no-file.txt'), ] # should provoke no such file error failures = [''] @@ -1900,8 +1915,8 @@ def setUp(self): file.write(file_name) argument_signatures = [ - Sig('-x', type=argparse.FileType('rb')), - Sig('spam', type=argparse.FileType('rb')), + Sig('-x', type=FileType('rb')), + Sig('spam', type=FileType('rb')), ] failures = ['-x', ''] successes = [ @@ -1939,8 +1954,8 @@ def setUp(self): self.create_writable_file('writable') argument_signatures = [ - Sig('-x', type=argparse.FileType('w')), - Sig('spam', type=argparse.FileType('w')), + Sig('-x', type=FileType('w')), + Sig('spam', type=FileType('w')), ] failures = ['-x', '', 'readonly'] successes = [ @@ -1962,8 +1977,8 @@ def setUp(self): self.create_writable_file('writable') argument_signatures = [ - Sig('-x', type=argparse.FileType('x')), - Sig('spam', type=argparse.FileType('x')), + Sig('-x', type=FileType('x')), + Sig('spam', type=FileType('x')), ] failures = ['-x', '', 'readonly', 'writable'] successes = [ @@ -1977,8 +1992,8 @@ class TestFileTypeWB(TempDirMixin, ParserTestCase): """Test the FileType option/argument type for writing binary files""" argument_signatures = [ - Sig('-x', type=argparse.FileType('wb')), - Sig('spam', type=argparse.FileType('wb')), + Sig('-x', type=FileType('wb')), + Sig('spam', type=FileType('wb')), ] failures = ['-x', ''] successes = [ @@ -1994,8 +2009,8 @@ class TestFileTypeXB(TestFileTypeX): "Test the FileType option/argument type for writing new binary files only" argument_signatures = [ - Sig('-x', type=argparse.FileType('xb')), - Sig('spam', type=argparse.FileType('xb')), + Sig('-x', type=FileType('xb')), + Sig('spam', type=FileType('xb')), ] successes = [ ('-x foo bar', NS(x=WFile('foo'), spam=WFile('bar'))), @@ -2007,7 +2022,7 @@ class TestFileTypeOpenArgs(TestCase): """Test that open (the builtin) is correctly called""" def test_open_args(self): - FT = argparse.FileType + FT = FileType cases = [ (FT('rb'), ('rb', -1, None, None)), (FT('w', 1), ('w', 1, None, None)), @@ -2022,7 +2037,7 @@ def test_open_args(self): def test_invalid_file_type(self): with self.assertRaises(ValueError): - argparse.FileType('b')('-test') + FileType('b')('-test') class TestFileTypeMissingInitialization(TestCase): diff --git a/Misc/NEWS.d/next/Library/2024-09-27-13-10-17.gh-issue-58032.0aNAQ0.rst b/Misc/NEWS.d/next/Library/2024-09-27-13-10-17.gh-issue-58032.0aNAQ0.rst new file mode 100644 index 00000000000000..278512b22a8d3f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-27-13-10-17.gh-issue-58032.0aNAQ0.rst @@ -0,0 +1 @@ +Deprecate the :class:`argparse.FileType` type converter. From de0d5c6e2e12f24ade1ccc457afaf5fb2c650c64 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Wed, 23 Oct 2024 14:48:39 +0100 Subject: [PATCH 081/106] gh-119786: move 'changing grammar' checklist from devguide to InternalDocs (#125874) --- InternalDocs/README.md | 2 + InternalDocs/changing_grammar.md | 63 ++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 InternalDocs/changing_grammar.md diff --git a/InternalDocs/README.md b/InternalDocs/README.md index 48c893bde2a631..2ef6e653ac19d4 100644 --- a/InternalDocs/README.md +++ b/InternalDocs/README.md @@ -19,6 +19,8 @@ Compiling Python Source Code - [Compiler Design](compiler.md) +- [Changing Python's Grammar](changing_grammar.md) + Runtime Objects --- diff --git a/InternalDocs/changing_grammar.md b/InternalDocs/changing_grammar.md new file mode 100644 index 00000000000000..1a5eebdc1418dc --- /dev/null +++ b/InternalDocs/changing_grammar.md @@ -0,0 +1,63 @@ +# Changing CPython's grammar + +There's more to changing Python's grammar than editing +[`Grammar/python.gram`](../Grammar/python.gram). +Below is a checklist of things that may need to change. + +> [!NOTE] +> +> Many of these changes require re-generating some of the derived +> files. If things mysteriously don't work, it may help to run +> ``make clean``. + +## Checklist + +* [`Grammar/python.gram`](../Grammar/python.gram): The grammar definition, + with actions that build AST nodes. + After changing it, run ``make regen-pegen`` (or ``build.bat --regen`` on Windows), + to regenerate [`Parser/parser.c`](../Parser/parser.c). + (This runs Python's parser generator, [`Tools/peg_generator`](../Tools/peg_generator)). + +* [`Grammar/Tokens`](../Grammar/Tokens) is a place for adding new token types. After + changing it, run ``make regen-token`` to regenerate + [`Include/internal/pycore_token.h`](../Include/internal/pycore_token.h), + [`Parser/token.c`](../Parser/token.c), [`Lib/token.py`](../Lib/token.py) + and [`Doc/library/token-list.inc`](../Doc/library/token-list.inc). + If you change both ``python.gram`` and ``Tokens``, run ``make regen-token`` + before ``make regen-pegen``. + On Windows, ``build.bat --regen`` will regenerate both at the same time. + +* [`Parser/Python.asdl`](../Parser/Python.asdl) may need changes to match the grammar. + Then run ``make regen-ast`` to regenerate + [`Include/internal/pycore_ast.h`](../Include/internal/pycore_ast.h) and + [`Python/Python-ast.c`](../Python/Python-ast.c). + +* [`Parser/lexer/`](../Parser/lexer/) contains the tokenization code. + This is where you would add a new type of comment or string literal, for example. + +* [`Python/ast.c`](../Python/ast.c) will need changes to validate AST objects + involved with the grammar change. + +* [`Python/ast_unparse.c`](../Python/ast_unparse.c) will need changes to unparse + AST involved with the grammar change ("unparsing" is used to turn annotations + into strings per [PEP 563](https://peps.python.org/pep-0563/). + +* The [`compiler`](compiler.md) may need to change when there are changes + to the `AST`. + +* ``_Unparser`` in the [`Lib/ast.py`](../Lib/ast.py) file may need changes + to accommodate any modifications in the AST nodes. + +* [`Doc/library/ast.rst`](../Doc/library/ast.rst) may need to be updated + to reflect changes to AST nodes. + +* Add some usage of your new syntax to ``test_grammar.py``. + +* Certain changes may require tweaks to the library module + [`pyclbr`](https://docs.python.org/3/library/pyclbr.html#module-pyclbr). + +* [`Lib/tokenize.py`](../Lib/tokenize.py) needs changes to match changes + to the tokenizer. + +* Documentation must be written! Specifically, one or more of the pages in + [`Doc/reference/`](../Doc/reference/) will need to be updated. From 6f26d496d3c894970ee18a125e9100791ebc2b36 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 23 Oct 2024 10:10:06 -0600 Subject: [PATCH 082/106] gh-125286: Share the Main Refchain With Legacy Interpreters (gh-125709) They used to be shared, before 3.12. Returning to sharing them resolves a failure on Py_TRACE_REFS builds. Co-authored-by: Petr Viktorin --- Doc/library/sys.rst | 29 +++++++++++++ Doc/using/configure.rst | 2 +- Doc/whatsnew/3.14.rst | 9 ++++ Objects/object.c | 92 ++++++++++++++++++++--------------------- Objects/unicodeobject.c | 8 ---- Python/pylifecycle.c | 14 +++++++ Python/pystate.c | 6 +-- 7 files changed, 99 insertions(+), 61 deletions(-) diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 20a06a1ecd1a4c..37f1719db607de 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -920,6 +920,35 @@ always available. It is not guaranteed to exist in all implementations of Python. +.. function:: getobjects(limit[, type]) + + This function only exists if CPython was built using the + specialized configure option :option:`--with-trace-refs`. + It is intended only for debugging garbage-collection issues. + + Return a list of up to *limit* dynamically allocated Python objects. + If *type* is given, only objects of that exact type (not subtypes) + are included. + + Objects from the list are not safe to use. + Specifically, the result will include objects from all interpreters that + share their object allocator state (that is, ones created with + :c:member:`PyInterpreterConfig.use_main_obmalloc` set to 1 + or using :c:func:`Py_NewInterpreter`, and the + :ref:`main interpreter `). + Mixing objects from different interpreters may lead to crashes + or other unexpected behavior. + + .. impl-detail:: + + This function should be used for specialized purposes only. + It is not guaranteed to exist in all implementations of Python. + + .. versionchanged:: next + + The result may include objects from other interpreters. + + .. function:: getprofile() .. index:: diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index 10cdf2376229ff..0e7b1be5b4bc2e 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -702,7 +702,7 @@ Debug options Effects: * Define the ``Py_TRACE_REFS`` macro. - * Add :func:`!sys.getobjects` function. + * Add :func:`sys.getobjects` function. * Add :envvar:`PYTHONDUMPREFS` environment variable. The :envvar:`PYTHONDUMPREFS` environment variable can be used to dump diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index b389e6da4c0ac3..64f3d18e7fc6a4 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -416,6 +416,15 @@ symtable (Contributed by Bénédikt Tran in :gh:`120029`.) + +sys +--- + +* The previously undocumented special function :func:`sys.getobjects`, + which only exists in specialized builds of Python, may now return objects + from other interpreters than the one it's called in. + + unicodedata ----------- diff --git a/Objects/object.c b/Objects/object.c index 1a15b70d3dc63f..7cc74a8dc0d8eb 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -171,6 +171,48 @@ _PyDebug_PrintTotalRefs(void) { #define REFCHAIN(interp) interp->object_state.refchain #define REFCHAIN_VALUE ((void*)(uintptr_t)1) +static inline int +has_own_refchain(PyInterpreterState *interp) +{ + if (interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) { + return (_Py_IsMainInterpreter(interp) + || _PyInterpreterState_Main() == NULL); + } + return 1; +} + +static int +refchain_init(PyInterpreterState *interp) +{ + if (!has_own_refchain(interp)) { + // Legacy subinterpreters share a refchain with the main interpreter. + REFCHAIN(interp) = REFCHAIN(_PyInterpreterState_Main()); + return 0; + } + _Py_hashtable_allocator_t alloc = { + // Don't use default PyMem_Malloc() and PyMem_Free() which + // require the caller to hold the GIL. + .malloc = PyMem_RawMalloc, + .free = PyMem_RawFree, + }; + REFCHAIN(interp) = _Py_hashtable_new_full( + _Py_hashtable_hash_ptr, _Py_hashtable_compare_direct, + NULL, NULL, &alloc); + if (REFCHAIN(interp) == NULL) { + return -1; + } + return 0; +} + +static void +refchain_fini(PyInterpreterState *interp) +{ + if (has_own_refchain(interp) && REFCHAIN(interp) != NULL) { + _Py_hashtable_destroy(REFCHAIN(interp)); + } + REFCHAIN(interp) = NULL; +} + bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj) { @@ -2191,16 +2233,7 @@ PyStatus _PyObject_InitState(PyInterpreterState *interp) { #ifdef Py_TRACE_REFS - _Py_hashtable_allocator_t alloc = { - // Don't use default PyMem_Malloc() and PyMem_Free() which - // require the caller to hold the GIL. - .malloc = PyMem_RawMalloc, - .free = PyMem_RawFree, - }; - REFCHAIN(interp) = _Py_hashtable_new_full( - _Py_hashtable_hash_ptr, _Py_hashtable_compare_direct, - NULL, NULL, &alloc); - if (REFCHAIN(interp) == NULL) { + if (refchain_init(interp) < 0) { return _PyStatus_NO_MEMORY(); } #endif @@ -2211,8 +2244,7 @@ void _PyObject_FiniState(PyInterpreterState *interp) { #ifdef Py_TRACE_REFS - _Py_hashtable_destroy(REFCHAIN(interp)); - REFCHAIN(interp) = NULL; + refchain_fini(interp); #endif } @@ -2501,42 +2533,6 @@ _Py_ResurrectReference(PyObject *op) #ifdef Py_TRACE_REFS -/* Make sure the ref is associated with the right interpreter. - * This only needs special attention for heap-allocated objects - * that have been immortalized, and only when the object might - * outlive the interpreter where it was created. That means the - * object was necessarily created using a global allocator - * (i.e. from the main interpreter). Thus in that specific case - * we move the object over to the main interpreter's refchain. - * - * This was added for the sake of the immortal interned strings, - * where legacy subinterpreters share the main interpreter's - * interned dict (and allocator), and therefore the strings can - * outlive the subinterpreter. - * - * It may make sense to fold this into _Py_SetImmortalUntracked(), - * but that requires further investigation. In the meantime, it is - * up to the caller to know if this is needed. There should be - * very few cases. - */ -void -_Py_NormalizeImmortalReference(PyObject *op) -{ - assert(_Py_IsImmortal(op)); - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (!_PyRefchain_IsTraced(interp, op)) { - return; - } - PyInterpreterState *main_interp = _PyInterpreterState_Main(); - if (interp != main_interp - && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) - { - assert(!_PyRefchain_IsTraced(main_interp, op)); - _PyRefchain_Remove(interp, op); - _PyRefchain_Trace(main_interp, op); - } -} - void _Py_ForgetReference(PyObject *op) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b94a74c2c688a9..9cd9781e412524 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15444,10 +15444,6 @@ _PyUnicode_InternStatic(PyInterpreterState *interp, PyObject **p) assert(*p); } -#ifdef Py_TRACE_REFS -extern void _Py_NormalizeImmortalReference(PyObject *); -#endif - static void immortalize_interned(PyObject *s) { @@ -15463,10 +15459,6 @@ immortalize_interned(PyObject *s) #endif _PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL; _Py_SetImmortal(s); -#ifdef Py_TRACE_REFS - /* Make sure the ref is associated with the right interpreter. */ - _Py_NormalizeImmortalReference(s); -#endif } static /* non-null */ PyObject* diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index b8f424854ecb86..8f38fbedae9842 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -674,6 +674,13 @@ pycore_create_interpreter(_PyRuntimeState *runtime, return status; } + // This could be done in init_interpreter() (in pystate.c) if it + // didn't depend on interp->feature_flags being set already. + status = _PyObject_InitState(interp); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + // initialize the interp->obmalloc state. This must be done after // the settings are loaded (so that feature_flags are set) but before // any calls are made to obmalloc functions. @@ -2297,6 +2304,13 @@ new_interpreter(PyThreadState **tstate_p, goto error; } + // This could be done in init_interpreter() (in pystate.c) if it + // didn't depend on interp->feature_flags being set already. + status = _PyObject_InitState(interp); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + // initialize the interp->obmalloc state. This must be done after // the settings are loaded (so that feature_flags are set) but before // any calls are made to obmalloc functions. diff --git a/Python/pystate.c b/Python/pystate.c index 7df872cd6d7d8a..36b31f3b9e4200 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -629,10 +629,8 @@ init_interpreter(PyInterpreterState *interp, assert(next != NULL || (interp == runtime->interpreters.main)); interp->next = next; - PyStatus status = _PyObject_InitState(interp); - if (_PyStatus_EXCEPTION(status)) { - return status; - } + // We would call _PyObject_InitState() at this point + // if interp->feature_flags were alredy set. _PyEval_InitState(interp); _PyGC_InitState(&interp->gc); From 9c01db40aa5edbd75ce50342c08f7ed018ee7864 Mon Sep 17 00:00:00 2001 From: Wulian <1055917385@qq.com> Date: Thu, 24 Oct 2024 04:29:32 +0800 Subject: [PATCH 083/106] gh-125665: Update turtledemo docstrings with correct file names (#125691) Co-authored-by: Wulian Co-authored-by: Terry Jan Reedy --- Doc/library/turtle.rst | 3 --- Lib/turtledemo/bytedesign.py | 4 +--- Lib/turtledemo/chaos.py | 8 +++----- Lib/turtledemo/clock.py | 9 ++------- Lib/turtledemo/colormixer.py | 3 +-- Lib/turtledemo/forest.py | 15 ++++++--------- Lib/turtledemo/fractalcurves.py | 4 +--- Lib/turtledemo/lindenmayer.py | 4 +--- Lib/turtledemo/minimal_hanoi.py | 7 +------ Lib/turtledemo/nim.py | 4 +--- Lib/turtledemo/paint.py | 15 +++++---------- Lib/turtledemo/peace.py | 4 +--- Lib/turtledemo/penrose.py | 6 ++---- Lib/turtledemo/planet_and_moon.py | 4 +--- Lib/turtledemo/rosette.py | 4 +--- Lib/turtledemo/round_dance.py | 7 +------ Lib/turtledemo/sorting_animate.py | 7 +------ Lib/turtledemo/tree.py | 4 +--- Lib/turtledemo/two_canvases.py | 2 +- Lib/turtledemo/yinyang.py | 4 +--- 20 files changed, 32 insertions(+), 86 deletions(-) diff --git a/Doc/library/turtle.rst b/Doc/library/turtle.rst index efa4b6f8f1d3f9..8eb4f8271fcfae 100644 --- a/Doc/library/turtle.rst +++ b/Doc/library/turtle.rst @@ -2778,9 +2778,6 @@ Changes since Python 3.0 :func:`Screen.numinput `. These pop up input dialogs and return strings and numbers respectively. -- Two example scripts :file:`tdemo_nim.py` and :file:`tdemo_round_dance.py` - have been added to the :file:`Lib/turtledemo` directory. - .. doctest:: :skipif: _tkinter is None diff --git a/Lib/turtledemo/bytedesign.py b/Lib/turtledemo/bytedesign.py index 476cdaabfceab1..a5d76a6b6ff295 100644 --- a/Lib/turtledemo/bytedesign.py +++ b/Lib/turtledemo/bytedesign.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_bytedesign.py +"""turtledemo/bytedesign.py An example adapted from the example-suite of PythonCard's turtle graphics. diff --git a/Lib/turtledemo/chaos.py b/Lib/turtledemo/chaos.py index 6a45d0d807ef0b..b25f0fa42c901d 100644 --- a/Lib/turtledemo/chaos.py +++ b/Lib/turtledemo/chaos.py @@ -1,9 +1,7 @@ -# File: tdemo_chaos.py -# Author: Gregor Lingl -# Date: 2009-06-24 - -# A demonstration of chaos +"""turtledemo/chaos.py +A demonstration of chaos. +""" from turtle import * N = 80 diff --git a/Lib/turtledemo/clock.py b/Lib/turtledemo/clock.py index 8a630e29b8da50..8b639066c4f440 100644 --- a/Lib/turtledemo/clock.py +++ b/Lib/turtledemo/clock.py @@ -1,12 +1,7 @@ -""" turtle-example-suite: - - turtledemo/clock.py +"""turtledemo/clock.py Enhanced clock-program, showing date -and time - ------------------------------------ - Press STOP to exit the program! - ------------------------------------ +and time. """ from turtle import * from datetime import datetime diff --git a/Lib/turtledemo/colormixer.py b/Lib/turtledemo/colormixer.py index 448db83361a649..f66012c8154317 100644 --- a/Lib/turtledemo/colormixer.py +++ b/Lib/turtledemo/colormixer.py @@ -1,5 +1,4 @@ -# colormixer - +"""turtledemo/colormixer.py""" from turtle import Screen, Turtle, mainloop class ColorTurtle(Turtle): diff --git a/Lib/turtledemo/forest.py b/Lib/turtledemo/forest.py index cac553223828db..e1fa85a577ffce 100644 --- a/Lib/turtledemo/forest.py +++ b/Lib/turtledemo/forest.py @@ -1,14 +1,11 @@ -""" turtlegraphics-example-suite: +"""turtledemo/forest.py - tdemo_forest.py +Displays a 'forest' of 3 breadth-first trees, +similar to the one in tree.py. +For further details, see tree.py. -Displays a 'forest' of 3 breadth-first-trees -similar to the one in tree. -For further remarks see tree.py - -This example is a 'breadth-first'-rewrite of -a Logo program written by Erich Neuwirth. See -http://homepage.univie.ac.at/erich.neuwirth/ +This example is a breadth-first rewrite of +a Logo program by Erich Neuwirth. """ from turtle import Turtle, colormode, tracer, mainloop from random import randrange diff --git a/Lib/turtledemo/fractalcurves.py b/Lib/turtledemo/fractalcurves.py index fda193e06fedee..2d0a506a4f5b9f 100644 --- a/Lib/turtledemo/fractalcurves.py +++ b/Lib/turtledemo/fractalcurves.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_fractalCurves.py +"""turtledemo/fractalcurves.py This program draws two fractal-curve-designs: (1) A hilbert curve (in a box) diff --git a/Lib/turtledemo/lindenmayer.py b/Lib/turtledemo/lindenmayer.py index 7c7a84796c3c28..eb309afb9381b1 100644 --- a/Lib/turtledemo/lindenmayer.py +++ b/Lib/turtledemo/lindenmayer.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - xtx_lindenmayer_indian.py +"""turtledemo/lindenmayer.py Each morning women in Tamil Nadu, in southern India, place designs, created by using rice diff --git a/Lib/turtledemo/minimal_hanoi.py b/Lib/turtledemo/minimal_hanoi.py index 08d8b630fec3b4..e44330eaaf7f18 100644 --- a/Lib/turtledemo/minimal_hanoi.py +++ b/Lib/turtledemo/minimal_hanoi.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_minimal_hanoi.py +"""turtledemo/minimal_hanoi.py A minimal 'Towers of Hanoi' animation: A tower of 6 discs is transferred from the @@ -12,9 +10,6 @@ Discs are turtles with shape "square", but stretched to rectangles by shapesize() - --------------------------------------- - To exit press STOP button - --------------------------------------- """ from turtle import * diff --git a/Lib/turtledemo/nim.py b/Lib/turtledemo/nim.py index 9ae6cc5c01b903..f87c479714d662 100644 --- a/Lib/turtledemo/nim.py +++ b/Lib/turtledemo/nim.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_nim.py +"""turtledemo/nim.py Play nim against the computer. The player who takes the last stick is the winner. diff --git a/Lib/turtledemo/paint.py b/Lib/turtledemo/paint.py index 6e63d004454589..780300fb2da9d1 100644 --- a/Lib/turtledemo/paint.py +++ b/Lib/turtledemo/paint.py @@ -1,12 +1,9 @@ -""" turtle-example-suite: +"""turtledemo/paint.py - tdemo_paint.py - -A simple event-driven paint program - -- left mouse button moves turtle -- middle mouse button changes color -- right mouse button toggles between pen up +A simple event-driven paint program. +- Left mouse button moves turtle. +- Middle mouse button changes color. +- Right mouse button toggles between pen up (no line drawn when the turtle moves) and pen down (line is drawn). If pen up follows at least two pen-down moves, the polygon that @@ -14,8 +11,6 @@ ------------------------------------------- Play around by clicking into the canvas using all three mouse buttons. - ------------------------------------------- - To exit press STOP button ------------------------------------------- """ from turtle import * diff --git a/Lib/turtledemo/peace.py b/Lib/turtledemo/peace.py index fd6abe390ef198..d86c94a48a2472 100644 --- a/Lib/turtledemo/peace.py +++ b/Lib/turtledemo/peace.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_peace.py +"""turtledemo/peace.py A simple drawing suitable as a beginner's programming example. Aside from the diff --git a/Lib/turtledemo/penrose.py b/Lib/turtledemo/penrose.py index ac12c899d3844e..ceaefedac24a67 100644 --- a/Lib/turtledemo/penrose.py +++ b/Lib/turtledemo/penrose.py @@ -1,6 +1,4 @@ -""" xturtle-example-suite: - - xtx_kites_and_darts.py +"""turtledemo/penrose.py Constructs two aperiodic penrose-tilings, consisting of kites and darts, by the method @@ -11,7 +9,7 @@ consisting of five darts. For more information see: - http://en.wikipedia.org/wiki/Penrose_tiling + https://en.wikipedia.org/wiki/Penrose_tiling ------------------------------------------- """ from turtle import * diff --git a/Lib/turtledemo/planet_and_moon.py b/Lib/turtledemo/planet_and_moon.py index c0e2c5b79e173e..571afcf922103f 100644 --- a/Lib/turtledemo/planet_and_moon.py +++ b/Lib/turtledemo/planet_and_moon.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_planets_and_moon.py +"""turtledemo/planets_and_moon.py Gravitational system simulation using the approximation method from Feynman-lectures, diff --git a/Lib/turtledemo/rosette.py b/Lib/turtledemo/rosette.py index 47d0f00e9da9d1..48897a620f9d8b 100644 --- a/Lib/turtledemo/rosette.py +++ b/Lib/turtledemo/rosette.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_wikipedia3.py +"""turtledemo/rosette.py This example is inspired by the Wikipedia article on turtle diff --git a/Lib/turtledemo/round_dance.py b/Lib/turtledemo/round_dance.py index 10383614c6e974..9da6389b213207 100644 --- a/Lib/turtledemo/round_dance.py +++ b/Lib/turtledemo/round_dance.py @@ -1,9 +1,4 @@ -""" turtle-example-suite: - - tdemo_round_dance.py - -(Needs version 1.1 of the turtle module that -comes with Python 3.1) +"""turtledemo/round_dance.py Dancing turtles have a compound shape consisting of a series of triangles of diff --git a/Lib/turtledemo/sorting_animate.py b/Lib/turtledemo/sorting_animate.py index ef4946db38250e..e0a2877cd5d621 100644 --- a/Lib/turtledemo/sorting_animate.py +++ b/Lib/turtledemo/sorting_animate.py @@ -1,6 +1,4 @@ -""" - - sorting_animation.py +"""turtledemo/sorting_animation.py A minimal sorting algorithm animation: Sorts a shelf of 10 blocks using insertion @@ -10,9 +8,6 @@ Blocks are turtles with shape "square", but stretched to rectangles by shapesize() - --------------------------------------- - To exit press space button - --------------------------------------- """ from turtle import * import random diff --git a/Lib/turtledemo/tree.py b/Lib/turtledemo/tree.py index 12729e23688a48..6ad8fcc854a155 100644 --- a/Lib/turtledemo/tree.py +++ b/Lib/turtledemo/tree.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_tree.py +"""turtledemo/tree.py Displays a 'breadth-first-tree' - in contrast to the classical Logo tree drawing programs, diff --git a/Lib/turtledemo/two_canvases.py b/Lib/turtledemo/two_canvases.py index f3602585ab0592..2c8020378edf1b 100644 --- a/Lib/turtledemo/two_canvases.py +++ b/Lib/turtledemo/two_canvases.py @@ -1,4 +1,4 @@ -"""turtledemo.two_canvases +"""turtledemo/two_canvases.py Use TurtleScreen and RawTurtle to draw on two distinct canvases in a separate window. The diff --git a/Lib/turtledemo/yinyang.py b/Lib/turtledemo/yinyang.py index 791060d17e6b6a..6e92d4bf739194 100644 --- a/Lib/turtledemo/yinyang.py +++ b/Lib/turtledemo/yinyang.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_yinyang.py +"""turtledemo/yinyang.py Another drawing suitable as a beginner's programming example. From 13c9fa3d64e0653d696daad716703ef05fd5002b Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 23 Oct 2024 23:37:06 +0200 Subject: [PATCH 084/106] gh-121938: ctypes: Skip test of _pack_-ed struct with c_int64 on x86 (GH-125877) The current auto-generated tests don't cover this; it's instead tested manually. --- Lib/test/test_ctypes/test_generated_structs.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Lib/test/test_ctypes/test_generated_structs.py b/Lib/test/test_ctypes/test_generated_structs.py index cbd73c4e911e4e..d61754d6d49e70 100644 --- a/Lib/test/test_ctypes/test_generated_structs.py +++ b/Lib/test/test_ctypes/test_generated_structs.py @@ -135,6 +135,18 @@ class Packed3(Structure): @register() class Packed4(Structure): + def _maybe_skip(): + # `_pack_` enables MSVC-style packing, but keeps platform-specific + # alignments. + # The C code we generate for GCC/clang currently uses + # `__attribute__((ms_struct))`, which activates MSVC layout *and* + # alignments, that is, sizeof(basic type) == alignment(basic type). + # On a Pentium, int64 is 32-bit aligned, so the two won't match. + # The expected behavior is instead tested in + # StructureTestCase.test_packed, over in test_structures.py. + if sizeof(c_int64) != alignment(c_int64): + raise unittest.SkipTest('cannot test on this platform') + _fields_ = [('a', c_int8), ('b', c_int64)] _pack_ = 8 @@ -436,6 +448,8 @@ def test_generated_data(self): """ for name, cls in TESTCASES.items(): with self.subTest(name=name): + if _maybe_skip := getattr(cls, '_maybe_skip', None): + _maybe_skip() expected = iter(_ctypes_test.get_generated_test_data(name)) expected_name = next(expected) if expected_name is None: From 8f2c0f7a03b71485b5635cb47c000e4e8ace8800 Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Wed, 23 Oct 2024 15:04:30 -0700 Subject: [PATCH 085/106] gh-125884: Support breakpoint on functions with annotations (#125892) --- Lib/pdb.py | 7 ++-- Lib/test/test_pdb.py | 36 +++++++++++++++++++ ...-10-23-17-45-40.gh-issue-125884.41E_PD.rst | 1 + 3 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-23-17-45-40.gh-issue-125884.41E_PD.rst diff --git a/Lib/pdb.py b/Lib/pdb.py index 832213abbb98e6..3c0cbb525e28ef 100644 --- a/Lib/pdb.py +++ b/Lib/pdb.py @@ -118,7 +118,7 @@ def find_first_executable_line(code): return code.co_firstlineno def find_function(funcname, filename): - cre = re.compile(r'def\s+%s\s*[(]' % re.escape(funcname)) + cre = re.compile(r'def\s+%s(\s*\[.+\])?\s*[(]' % re.escape(funcname)) try: fp = tokenize.open(filename) except OSError: @@ -138,9 +138,12 @@ def find_function(funcname, filename): if funcdef: try: - funccode = compile(funcdef, filename, 'exec').co_consts[0] + code = compile(funcdef, filename, 'exec') except SyntaxError: continue + # We should always be able to find the code object here + funccode = next(c for c in code.co_consts if + isinstance(c, CodeType) and c.co_name == funcname) lineno_offset = find_first_executable_line(funccode) return funcname, filename, funcstart + lineno_offset - 1 return None diff --git a/Lib/test/test_pdb.py b/Lib/test/test_pdb.py index 1ea93ed037005d..e5f9848319021a 100644 --- a/Lib/test/test_pdb.py +++ b/Lib/test/test_pdb.py @@ -363,6 +363,42 @@ def test_pdb_breakpoint_commands(): 4 """ +def test_pdb_breakpoint_on_annotated_function_def(): + """Test breakpoints on function definitions with annotation. + + >>> def foo[T](): + ... return 0 + + >>> def bar() -> int: + ... return 0 + + >>> def foobar[T]() -> int: + ... return 0 + + >>> reset_Breakpoint() + + >>> def test_function(): + ... import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() + ... pass + + >>> with PdbTestInput([ # doctest: +NORMALIZE_WHITESPACE + ... 'break foo', + ... 'break bar', + ... 'break foobar', + ... 'continue', + ... ]): + ... test_function() + > (2)test_function() + -> import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() + (Pdb) break foo + Breakpoint 1 at :2 + (Pdb) break bar + Breakpoint 2 at :2 + (Pdb) break foobar + Breakpoint 3 at :2 + (Pdb) continue + """ + def test_pdb_commands(): """Test the commands command of pdb. diff --git a/Misc/NEWS.d/next/Library/2024-10-23-17-45-40.gh-issue-125884.41E_PD.rst b/Misc/NEWS.d/next/Library/2024-10-23-17-45-40.gh-issue-125884.41E_PD.rst new file mode 100644 index 00000000000000..684b1f282b143e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-23-17-45-40.gh-issue-125884.41E_PD.rst @@ -0,0 +1 @@ +Fixed the bug for :mod:`pdb` where it can't set breakpoints on functions with certain annotations. From d3be6f945a4def7d123b2ef4d11d59abcdd3e446 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Wed, 23 Oct 2024 16:27:55 -0700 Subject: [PATCH 086/106] gh-125614: annotationlib: Fix bug where not all Stringifiers are converted (#125635) --- Lib/annotationlib.py | 28 +++++++++-- Lib/test/test_annotationlib.py | 46 +++++++++++++++++++ ...-10-16-22-45-50.gh-issue-125614.3OEo_Q.rst | 3 ++ 3 files changed, 73 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-16-22-45-50.gh-issue-125614.3OEo_Q.rst diff --git a/Lib/annotationlib.py b/Lib/annotationlib.py index d5166170c071c4..732fbfa628cf5f 100644 --- a/Lib/annotationlib.py +++ b/Lib/annotationlib.py @@ -45,6 +45,7 @@ class Format(enum.IntEnum): "__globals__", "__owner__", "__cell__", + "__stringifier_dict__", ) @@ -268,7 +269,16 @@ class _Stringifier: # instance of the other in place. __slots__ = _SLOTS - def __init__(self, node, globals=None, owner=None, is_class=False, cell=None): + def __init__( + self, + node, + globals=None, + owner=None, + is_class=False, + cell=None, + *, + stringifier_dict, + ): # Either an AST node or a simple str (for the common case where a ForwardRef # represent a single name). assert isinstance(node, (ast.AST, str)) @@ -283,6 +293,7 @@ def __init__(self, node, globals=None, owner=None, is_class=False, cell=None): self.__globals__ = globals self.__cell__ = cell self.__owner__ = owner + self.__stringifier_dict__ = stringifier_dict def __convert_to_ast(self, other): if isinstance(other, _Stringifier): @@ -317,9 +328,15 @@ def __get_ast(self): return node def __make_new(self, node): - return _Stringifier( - node, self.__globals__, self.__owner__, self.__forward_is_class__ + stringifier = _Stringifier( + node, + self.__globals__, + self.__owner__, + self.__forward_is_class__, + stringifier_dict=self.__stringifier_dict__, ) + self.__stringifier_dict__.stringifiers.append(stringifier) + return stringifier # Must implement this since we set __eq__. We hash by identity so that # stringifiers in dict keys are kept separate. @@ -462,6 +479,7 @@ def __missing__(self, key): globals=self.globals, owner=self.owner, is_class=self.is_class, + stringifier_dict=self, ) self.stringifiers.append(fwdref) return fwdref @@ -516,7 +534,7 @@ def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): name = freevars[i] else: name = "__cell__" - fwdref = _Stringifier(name) + fwdref = _Stringifier(name, stringifier_dict=globals) new_closure.append(types.CellType(fwdref)) closure = tuple(new_closure) else: @@ -573,6 +591,7 @@ def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): owner=owner, globals=annotate.__globals__, is_class=is_class, + stringifier_dict=globals, ) globals.stringifiers.append(fwdref) new_closure.append(types.CellType(fwdref)) @@ -591,6 +610,7 @@ def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): result = func(Format.VALUE) for obj in globals.stringifiers: obj.__class__ = ForwardRef + obj.__stringifier_dict__ = None # not needed for ForwardRef if isinstance(obj.__ast_node__, str): obj.__arg__ = obj.__ast_node__ obj.__ast_node__ = None diff --git a/Lib/test/test_annotationlib.py b/Lib/test/test_annotationlib.py index eedf2506a14912..2ca7058c14398c 100644 --- a/Lib/test/test_annotationlib.py +++ b/Lib/test/test_annotationlib.py @@ -80,6 +80,42 @@ def f(x: int, y: doesntexist): fwdref.evaluate() self.assertEqual(fwdref.evaluate(globals={"doesntexist": 1}), 1) + def test_nonexistent_attribute(self): + def f( + x: some.module, + y: some[module], + z: some(module), + alpha: some | obj, + beta: +some, + gamma: some < obj, + ): + pass + + anno = annotationlib.get_annotations(f, format=Format.FORWARDREF) + x_anno = anno["x"] + self.assertIsInstance(x_anno, ForwardRef) + self.assertEqual(x_anno, ForwardRef("some.module")) + + y_anno = anno["y"] + self.assertIsInstance(y_anno, ForwardRef) + self.assertEqual(y_anno, ForwardRef("some[module]")) + + z_anno = anno["z"] + self.assertIsInstance(z_anno, ForwardRef) + self.assertEqual(z_anno, ForwardRef("some(module)")) + + alpha_anno = anno["alpha"] + self.assertIsInstance(alpha_anno, ForwardRef) + self.assertEqual(alpha_anno, ForwardRef("some | obj")) + + beta_anno = anno["beta"] + self.assertIsInstance(beta_anno, ForwardRef) + self.assertEqual(beta_anno, ForwardRef("+some")) + + gamma_anno = anno["gamma"] + self.assertIsInstance(gamma_anno, ForwardRef) + self.assertEqual(gamma_anno, ForwardRef("some < obj")) + class TestSourceFormat(unittest.TestCase): def test_closure(self): @@ -91,6 +127,16 @@ def inner(arg: x): anno = annotationlib.get_annotations(inner, format=Format.STRING) self.assertEqual(anno, {"arg": "x"}) + def test_closure_undefined(self): + if False: + x = 0 + + def inner(arg: x): + pass + + anno = annotationlib.get_annotations(inner, format=Format.STRING) + self.assertEqual(anno, {"arg": "x"}) + def test_function(self): def f(x: int, y: doesntexist): pass diff --git a/Misc/NEWS.d/next/Library/2024-10-16-22-45-50.gh-issue-125614.3OEo_Q.rst b/Misc/NEWS.d/next/Library/2024-10-16-22-45-50.gh-issue-125614.3OEo_Q.rst new file mode 100644 index 00000000000000..5f4803c9b74578 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-16-22-45-50.gh-issue-125614.3OEo_Q.rst @@ -0,0 +1,3 @@ +In the :data:`~annotationlib.Format.FORWARDREF` format of +:mod:`annotationlib`, fix bug where nested expressions were not returned as +:class:`annotationlib.ForwardRef` format. From c35b33bfb7c491dfbdd40195d70dcfc4618265db Mon Sep 17 00:00:00 2001 From: Marat Sharafutdinov Date: Thu, 24 Oct 2024 05:04:49 +0300 Subject: [PATCH 087/106] Fix typo in garbage_collector.md (#125556) --- InternalDocs/garbage_collector.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index a6ee5c09e19efd..d624cf4befd31a 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -56,7 +56,7 @@ Starting in version 3.13, CPython contains two GC implementations: performing a collection for thread safety. Both implementations use the same basic algorithms, but operate on different -data structures. The the section on +data structures. See the section on [Differences between GC implementations](#Differences-between-GC-implementations) for the details. From b61fece8523d0fa6d9cc6ad3fd855a136c34f0cd Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 24 Oct 2024 11:57:02 +0100 Subject: [PATCH 088/106] GH-125868: Fix STORE_ATTR_WITH_HINT specialization (GH-125876) --- Lib/dis.py | 4 +- Lib/test/test_opcache.py | 44 +++++++++++++++++++ ...-10-23-14-05-47.gh-issue-125868.uLfXYB.rst | 3 ++ Python/bytecodes.c | 7 ++- Python/executor_cases.c.h | 10 +++-- Python/generated_cases.c.h | 7 ++- 6 files changed, 62 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-23-14-05-47.gh-issue-125868.uLfXYB.rst diff --git a/Lib/dis.py b/Lib/dis.py index e87e6a78469ab0..db69848e9ab8ee 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -778,8 +778,10 @@ def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=N if caches: cache_info = [] + cache_offset = offset for name, size in _cache_format[opname[deop]].items(): - data = code[offset + 2: offset + 2 + 2 * size] + data = code[cache_offset + 2: cache_offset + 2 + 2 * size] + cache_offset += size * 2 cache_info.append((name, size, data)) else: cache_info = None diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index acf8158b0d0ea1..cdcddb0d717f23 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -1155,6 +1155,50 @@ class D(dict): pass {'a':1, 'b':2} ) + def test_125868(self): + + def make_special_dict(): + """Create a dictionary an object with a this table: + index | key | value + ----- | --- | ----- + 0 | 'b' | 'value' + 1 | 'b' | NULL + """ + class A: + pass + a = A() + a.a = 1 + a.b = 2 + d = a.__dict__.copy() + del d['a'] + del d['b'] + d['b'] = "value" + return d + + class NoInlineAorB: + pass + for i in range(ord('c'), ord('z')): + setattr(NoInlineAorB(), chr(i), i) + + c = NoInlineAorB() + c.a = 0 + c.b = 1 + self.assertFalse(_testinternalcapi.has_inline_values(c)) + + def f(o, n): + for i in range(n): + o.b = i + # Prime f to store to dict slot 1 + f(c, 100) + + test_obj = NoInlineAorB() + test_obj.__dict__ = make_special_dict() + self.assertEqual(test_obj.b, "value") + + #This should set x.b = 0 + f(test_obj, 1) + self.assertEqual(test_obj.b, 0) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-23-14-05-47.gh-issue-125868.uLfXYB.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-23-14-05-47.gh-issue-125868.uLfXYB.rst new file mode 100644 index 00000000000000..dea250e7166ec6 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-23-14-05-47.gh-issue-125868.uLfXYB.rst @@ -0,0 +1,3 @@ +It was possible in 3.14.0a1 only for attribute lookup to give the wrong +value. This was due to an incorrect specialization in very specific +circumstances. This is fixed in 3.14.0a2. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 62e9b5ddd1584c..eaf2537fa07d27 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2303,17 +2303,16 @@ dummy_func( assert(PyDict_CheckExact((PyObject *)dict)); PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries); - PyObject *old_value; DEOPT_IF(!DK_IS_UNICODE(dict->ma_keys)); PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint; DEOPT_IF(ep->me_key != name); + PyObject *old_value = ep->me_value; + DEOPT_IF(old_value == NULL); /* Ensure dict is GC tracked if it needs to be */ if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) { _PyObject_GC_TRACK(dict); } - old_value = ep->me_value; - PyDict_WatchEvent event = old_value == NULL ? PyDict_EVENT_ADDED : PyDict_EVENT_MODIFIED; - _PyDict_NotifyEvent(tstate->interp, event, dict, name, PyStackRef_AsPyObjectBorrow(value)); + _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value)); ep->me_value = PyStackRef_AsPyObjectSteal(value); // old_value should be DECREFed after GC track checking is done, if not, it could raise a segmentation fault, // when dict only holds the strong reference to value in ep->me_value. diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 5df4986cd838b5..3a7015ccb78987 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2815,7 +2815,6 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - PyObject *old_value; if (!DK_IS_UNICODE(dict->ma_keys)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); @@ -2825,14 +2824,17 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + PyObject *old_value = ep->me_value; + if (old_value == NULL) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } /* Ensure dict is GC tracked if it needs to be */ if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) { _PyObject_GC_TRACK(dict); } - old_value = ep->me_value; - PyDict_WatchEvent event = old_value == NULL ? PyDict_EVENT_ADDED : PyDict_EVENT_MODIFIED; _PyFrame_SetStackPointer(frame, stack_pointer); - _PyDict_NotifyEvent(tstate->interp, event, dict, name, PyStackRef_AsPyObjectBorrow(value)); + _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value)); stack_pointer = _PyFrame_GetStackPointer(frame); ep->me_value = PyStackRef_AsPyObjectSteal(value); // old_value should be DECREFed after GC track checking is done, if not, it could raise a segmentation fault, diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index efbf2fba8c3106..f658ae503cd70e 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -7443,18 +7443,17 @@ assert(PyDict_CheckExact((PyObject *)dict)); PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries, STORE_ATTR); - PyObject *old_value; DEOPT_IF(!DK_IS_UNICODE(dict->ma_keys), STORE_ATTR); PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint; DEOPT_IF(ep->me_key != name, STORE_ATTR); + PyObject *old_value = ep->me_value; + DEOPT_IF(old_value == NULL, STORE_ATTR); /* Ensure dict is GC tracked if it needs to be */ if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) { _PyObject_GC_TRACK(dict); } - old_value = ep->me_value; - PyDict_WatchEvent event = old_value == NULL ? PyDict_EVENT_ADDED : PyDict_EVENT_MODIFIED; _PyFrame_SetStackPointer(frame, stack_pointer); - _PyDict_NotifyEvent(tstate->interp, event, dict, name, PyStackRef_AsPyObjectBorrow(value)); + _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value)); stack_pointer = _PyFrame_GetStackPointer(frame); ep->me_value = PyStackRef_AsPyObjectSteal(value); // old_value should be DECREFed after GC track checking is done, if not, it could raise a segmentation fault, From e545ead66ce725aae6fb0ad5d733abe806c19750 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 24 Oct 2024 09:33:11 -0400 Subject: [PATCH 089/106] gh-125859: Fix crash when `gc.get_objects` is called during GC (#125882) This fixes a crash when `gc.get_objects()` or `gc.get_referrers()` is called during a GC in the free threading build. Switch to `_PyObjectStack` to avoid corrupting the `struct worklist` linked list maintained by the GC. Also, don't return objects that are frozen (`gc.freeze()`) or in the process of being collected to more closely match the behavior of the default build. --- Include/internal/pycore_object_stack.h | 10 ++ Lib/test/test_free_threading/test_gc.py | 61 ++++++++ Lib/test/test_gc.py | 23 +++ ...-10-23-14-42-27.gh-issue-125859.m3EF9E.rst | 2 + Python/gc_free_threading.c | 137 ++++++++---------- 5 files changed, 160 insertions(+), 73 deletions(-) create mode 100644 Lib/test/test_free_threading/test_gc.py create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-23-14-42-27.gh-issue-125859.m3EF9E.rst diff --git a/Include/internal/pycore_object_stack.h b/Include/internal/pycore_object_stack.h index c607ea8bc52545..39e69b7cde52a1 100644 --- a/Include/internal/pycore_object_stack.h +++ b/Include/internal/pycore_object_stack.h @@ -71,6 +71,16 @@ _PyObjectStack_Pop(_PyObjectStack *stack) return obj; } +static inline Py_ssize_t +_PyObjectStack_Size(_PyObjectStack *stack) +{ + Py_ssize_t size = 0; + for (_PyObjectStackChunk *buf = stack->head; buf != NULL; buf = buf->prev) { + size += buf->n; + } + return size; +} + // Merge src into dst, leaving src empty extern void _PyObjectStack_Merge(_PyObjectStack *dst, _PyObjectStack *src); diff --git a/Lib/test/test_free_threading/test_gc.py b/Lib/test/test_free_threading/test_gc.py new file mode 100644 index 00000000000000..401067fe9c612c --- /dev/null +++ b/Lib/test/test_free_threading/test_gc.py @@ -0,0 +1,61 @@ +import unittest + +import threading +from threading import Thread +from unittest import TestCase +import gc + +from test.support import threading_helper + + +class MyObj: + pass + + +@threading_helper.requires_working_threading() +class TestGC(TestCase): + def test_get_objects(self): + event = threading.Event() + + def gc_thread(): + for i in range(100): + o = gc.get_objects() + event.set() + + def mutator_thread(): + while not event.is_set(): + o1 = MyObj() + o2 = MyObj() + o3 = MyObj() + o4 = MyObj() + + gcs = [Thread(target=gc_thread)] + mutators = [Thread(target=mutator_thread) for _ in range(4)] + with threading_helper.start_threads(gcs + mutators): + pass + + def test_get_referrers(self): + event = threading.Event() + + obj = MyObj() + + def gc_thread(): + for i in range(100): + o = gc.get_referrers(obj) + event.set() + + def mutator_thread(): + while not event.is_set(): + d1 = { "key": obj } + d2 = { "key": obj } + d3 = { "key": obj } + d4 = { "key": obj } + + gcs = [Thread(target=gc_thread) for _ in range(2)] + mutators = [Thread(target=mutator_thread) for _ in range(4)] + with threading_helper.start_threads(gcs + mutators): + pass + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index bb7df1f5cfa7f7..cc2b4fac05b48b 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1065,6 +1065,29 @@ def test_get_referents_on_capsule(self): self.assertEqual(len(gc.get_referents(untracked_capsule)), 0) gc.get_referents(tracked_capsule) + @cpython_only + def test_get_objects_during_gc(self): + # gh-125859: Calling gc.get_objects() or gc.get_referrers() during a + # collection should not crash. + test = self + collected = False + + class GetObjectsOnDel: + def __del__(self): + nonlocal collected + collected = True + objs = gc.get_objects() + # NB: can't use "in" here because some objects override __eq__ + for obj in objs: + test.assertTrue(obj is not self) + test.assertEqual(gc.get_referrers(self), []) + + obj = GetObjectsOnDel() + obj.cycle = obj + del obj + + gc.collect() + self.assertTrue(collected) class IncrementalGCTests(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-23-14-42-27.gh-issue-125859.m3EF9E.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-23-14-42-27.gh-issue-125859.m3EF9E.rst new file mode 100644 index 00000000000000..d36aa8fbe7482f --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-23-14-42-27.gh-issue-125859.m3EF9E.rst @@ -0,0 +1,2 @@ +Fix a crash in the free threading build when :func:`gc.get_objects` or +:func:`gc.get_referrers` is called during an in-progress garbage collection. diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 8558d4555a9a3a..1969ed608ea524 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1401,10 +1401,32 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) return n + m; } +static PyObject * +list_from_object_stack(_PyObjectStack *stack) +{ + PyObject *list = PyList_New(_PyObjectStack_Size(stack)); + if (list == NULL) { + PyObject *op; + while ((op = _PyObjectStack_Pop(stack)) != NULL) { + Py_DECREF(op); + } + return NULL; + } + + PyObject *op; + Py_ssize_t idx = 0; + while ((op = _PyObjectStack_Pop(stack)) != NULL) { + assert(idx < PyList_GET_SIZE(list)); + PyList_SET_ITEM(list, idx++, op); + } + assert(idx == PyList_GET_SIZE(list)); + return list; +} + struct get_referrers_args { struct visitor_args base; PyObject *objs; - struct worklist results; + _PyObjectStack results; }; static int @@ -1428,11 +1450,21 @@ visit_get_referrers(const mi_heap_t *heap, const mi_heap_area_t *area, if (op == NULL) { return true; } + if (op->ob_gc_bits & (_PyGC_BITS_UNREACHABLE | _PyGC_BITS_FROZEN)) { + // Exclude unreachable objects (in-progress GC) and frozen + // objects from gc.get_objects() to match the default build. + return true; + } struct get_referrers_args *arg = (struct get_referrers_args *)args; + if (op == arg->objs) { + // Don't include the tuple itself in the referrers list. + return true; + } if (Py_TYPE(op)->tp_traverse(op, referrersvisit, arg->objs)) { - op->ob_tid = 0; // we will restore the refcount later - worklist_push(&arg->results, op); + if (_PyObjectStack_Push(&arg->results, Py_NewRef(op)) < 0) { + return false; + } } return true; @@ -1441,48 +1473,25 @@ visit_get_referrers(const mi_heap_t *heap, const mi_heap_area_t *area, PyObject * _PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs) { - PyObject *result = PyList_New(0); - if (!result) { - return NULL; - } - - _PyEval_StopTheWorld(interp); - - // Append all objects to a worklist. This abuses ob_tid. We will restore - // it later. NOTE: We can't append to the PyListObject during - // gc_visit_heaps() because PyList_Append() may reclaim an abandoned - // mimalloc segments while we are traversing them. + // NOTE: We can't append to the PyListObject during gc_visit_heaps() + // because PyList_Append() may reclaim an abandoned mimalloc segments + // while we are traversing them. struct get_referrers_args args = { .objs = objs }; - gc_visit_heaps(interp, &visit_get_referrers, &args.base); - - bool error = false; - PyObject *op; - while ((op = worklist_pop(&args.results)) != NULL) { - gc_restore_tid(op); - if (op != objs && PyList_Append(result, op) < 0) { - error = true; - break; - } - } - - // In case of error, clear the remaining worklist - while ((op = worklist_pop(&args.results)) != NULL) { - gc_restore_tid(op); - } - + _PyEval_StopTheWorld(interp); + int err = gc_visit_heaps(interp, &visit_get_referrers, &args.base); _PyEval_StartTheWorld(interp); - if (error) { - Py_DECREF(result); - return NULL; + PyObject *list = list_from_object_stack(&args.results); + if (err < 0) { + PyErr_NoMemory(); + Py_CLEAR(list); } - - return result; + return list; } struct get_objects_args { struct visitor_args base; - struct worklist objects; + _PyObjectStack objects; }; static bool @@ -1493,54 +1502,36 @@ visit_get_objects(const mi_heap_t *heap, const mi_heap_area_t *area, if (op == NULL) { return true; } + if (op->ob_gc_bits & (_PyGC_BITS_UNREACHABLE | _PyGC_BITS_FROZEN)) { + // Exclude unreachable objects (in-progress GC) and frozen + // objects from gc.get_objects() to match the default build. + return true; + } struct get_objects_args *arg = (struct get_objects_args *)args; - op->ob_tid = 0; // we will restore the refcount later - worklist_push(&arg->objects, op); - + if (_PyObjectStack_Push(&arg->objects, Py_NewRef(op)) < 0) { + return false; + } return true; } PyObject * _PyGC_GetObjects(PyInterpreterState *interp, int generation) { - PyObject *result = PyList_New(0); - if (!result) { - return NULL; - } - - _PyEval_StopTheWorld(interp); - - // Append all objects to a worklist. This abuses ob_tid. We will restore - // it later. NOTE: We can't append to the list during gc_visit_heaps() - // because PyList_Append() may reclaim an abandoned mimalloc segment - // while we are traversing it. + // NOTE: We can't append to the PyListObject during gc_visit_heaps() + // because PyList_Append() may reclaim an abandoned mimalloc segments + // while we are traversing them. struct get_objects_args args = { 0 }; - gc_visit_heaps(interp, &visit_get_objects, &args.base); - - bool error = false; - PyObject *op; - while ((op = worklist_pop(&args.objects)) != NULL) { - gc_restore_tid(op); - if (op != result && PyList_Append(result, op) < 0) { - error = true; - break; - } - } - - // In case of error, clear the remaining worklist - while ((op = worklist_pop(&args.objects)) != NULL) { - gc_restore_tid(op); - } - + _PyEval_StopTheWorld(interp); + int err = gc_visit_heaps(interp, &visit_get_objects, &args.base); _PyEval_StartTheWorld(interp); - if (error) { - Py_DECREF(result); - return NULL; + PyObject *list = list_from_object_stack(&args.objects); + if (err < 0) { + PyErr_NoMemory(); + Py_CLEAR(list); } - - return result; + return list; } static bool From ad6110a93ffa82cae71af6c78692de065d3871b5 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 24 Oct 2024 12:03:50 -0400 Subject: [PATCH 090/106] gh-125842: Fix `sys.exit(0xffff_ffff)` on Windows (#125896) On Windows, `long` is a signed 32-bit integer so it can't represent `0xffff_ffff` without overflow. Windows exit codes are unsigned 32-bit integers, so if a child process exits with `-1`, it will be represented as `0xffff_ffff`. Also fix a number of other possible cases where `_Py_HandleSystemExit` could return with an exception set, leading to a `SystemError` (or fatal error in debug builds) later on during shutdown. --- Lib/test/test_sys.py | 14 ++++ ...-10-23-17-24-23.gh-issue-125842.m3EF9E.rst | 2 + Python/pythonrun.c | 80 +++++++++++-------- 3 files changed, 63 insertions(+), 33 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2024-10-23-17-24-23.gh-issue-125842.m3EF9E.rst diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 9689ef8e96e072..c0862d7d15f39e 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -206,6 +206,20 @@ def test_exit(self): self.assertEqual(out, b'') self.assertEqual(err, b'') + # gh-125842: Windows uses 32-bit unsigned integers for exit codes + # so a -1 exit code is sometimes interpreted as 0xffff_ffff. + rc, out, err = assert_python_failure('-c', 'import sys; sys.exit(0xffff_ffff)') + self.assertIn(rc, (-1, 0xff, 0xffff_ffff)) + self.assertEqual(out, b'') + self.assertEqual(err, b'') + + # Overflow results in a -1 exit code, which may be converted to 0xff + # or 0xffff_ffff. + rc, out, err = assert_python_failure('-c', 'import sys; sys.exit(2**128)') + self.assertIn(rc, (-1, 0xff, 0xffff_ffff)) + self.assertEqual(out, b'') + self.assertEqual(err, b'') + # call with integer argument with self.assertRaises(SystemExit) as cm: sys.exit(42) diff --git a/Misc/NEWS.d/next/Windows/2024-10-23-17-24-23.gh-issue-125842.m3EF9E.rst b/Misc/NEWS.d/next/Windows/2024-10-23-17-24-23.gh-issue-125842.m3EF9E.rst new file mode 100644 index 00000000000000..63644721d57f5b --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2024-10-23-17-24-23.gh-issue-125842.m3EF9E.rst @@ -0,0 +1,2 @@ +Fix a :exc:`SystemError` when :func:`sys.exit` is called with ``0xffffffff`` +on Windows. diff --git a/Python/pythonrun.c b/Python/pythonrun.c index fc0f11bc4e8af4..8b57018321c070 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -564,6 +564,30 @@ PyRun_SimpleStringFlags(const char *command, PyCompilerFlags *flags) return _PyRun_SimpleStringFlagsWithName(command, NULL, flags); } +static int +parse_exit_code(PyObject *code, int *exitcode_p) +{ + if (PyLong_Check(code)) { + // gh-125842: Use a long long to avoid an overflow error when `long` + // is 32-bit. We still truncate the result to an int. + int exitcode = (int)PyLong_AsLongLong(code); + if (exitcode == -1 && PyErr_Occurred()) { + // On overflow or other error, clear the exception and use -1 + // as the exit code to match historical Python behavior. + PyErr_Clear(); + *exitcode_p = -1; + return 1; + } + *exitcode_p = exitcode; + return 1; + } + else if (code == Py_None) { + *exitcode_p = 0; + return 1; + } + return 0; +} + int _Py_HandleSystemExit(int *exitcode_p) { @@ -580,50 +604,40 @@ _Py_HandleSystemExit(int *exitcode_p) fflush(stdout); - int exitcode = 0; - PyObject *exc = PyErr_GetRaisedException(); - if (exc == NULL) { - goto done; - } - assert(PyExceptionInstance_Check(exc)); + assert(exc != NULL && PyExceptionInstance_Check(exc)); - /* The error code should be in the `code' attribute. */ PyObject *code = PyObject_GetAttr(exc, &_Py_ID(code)); - if (code) { + if (code == NULL) { + // If the exception has no 'code' attribute, print the exception below + PyErr_Clear(); + } + else if (parse_exit_code(code, exitcode_p)) { + Py_DECREF(code); + Py_CLEAR(exc); + return 1; + } + else { + // If code is not an int or None, print it below Py_SETREF(exc, code); - if (exc == Py_None) { - goto done; - } } - /* If we failed to dig out the 'code' attribute, - * just let the else clause below print the error. - */ - if (PyLong_Check(exc)) { - exitcode = (int)PyLong_AsLong(exc); + PyThreadState *tstate = _PyThreadState_GET(); + PyObject *sys_stderr = _PySys_GetAttr(tstate, &_Py_ID(stderr)); + if (sys_stderr != NULL && sys_stderr != Py_None) { + if (PyFile_WriteObject(exc, sys_stderr, Py_PRINT_RAW) < 0) { + PyErr_Clear(); + } } else { - PyThreadState *tstate = _PyThreadState_GET(); - PyObject *sys_stderr = _PySys_GetAttr(tstate, &_Py_ID(stderr)); - /* We clear the exception here to avoid triggering the assertion - * in PyObject_Str that ensures it won't silently lose exception - * details. - */ - PyErr_Clear(); - if (sys_stderr != NULL && sys_stderr != Py_None) { - PyFile_WriteObject(exc, sys_stderr, Py_PRINT_RAW); - } else { - PyObject_Print(exc, stderr, Py_PRINT_RAW); - fflush(stderr); + if (PyObject_Print(exc, stderr, Py_PRINT_RAW) < 0) { + PyErr_Clear(); } - PySys_WriteStderr("\n"); - exitcode = 1; + fflush(stderr); } - -done: + PySys_WriteStderr("\n"); Py_CLEAR(exc); - *exitcode_p = exitcode; + *exitcode_p = 1; return 1; } From 5003ad5c5ea508f0dde1b374cd8bc6a481ad5c5d Mon Sep 17 00:00:00 2001 From: partev Date: Thu, 24 Oct 2024 12:41:01 -0400 Subject: [PATCH 091/106] gh-125909: Avoid a redirect when linking to the devguide (#125826) --- Doc/tools/templates/indexcontent.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/tools/templates/indexcontent.html b/Doc/tools/templates/indexcontent.html index f2e9fbb0106452..2686f48dad2a95 100644 --- a/Doc/tools/templates/indexcontent.html +++ b/Doc/tools/templates/indexcontent.html @@ -59,7 +59,7 @@

{{ docstitle|e }}

- + From 3c4a7fa6178d852ccb73527aaa2d0a5e93022e89 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 24 Oct 2024 12:44:38 -0400 Subject: [PATCH 092/106] gh-124218: Avoid refcount contention on builtins module (GH-125847) This replaces `_PyEval_BuiltinsFromGlobals` with `_PyDict_LoadBuiltinsFromGlobals`, which returns a new reference instead of a borrowed reference. Internally, the new function uses per-thread reference counting when possible to avoid contention on the refcount fields on the builtins module. --- Include/internal/pycore_ceval.h | 3 --- Include/internal/pycore_dict.h | 29 ++++++++++++++++++++++++++++ Objects/dictobject.c | 34 +++++++++++++++++++++++++++++++++ Objects/frameobject.c | 24 ++++------------------- Objects/funcobject.c | 25 +++--------------------- Python/ceval.c | 6 ++++-- 6 files changed, 74 insertions(+), 47 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index cff2b1f7114793..411bbff106dd69 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -83,9 +83,6 @@ extern void _PyEval_Fini(void); extern PyObject* _PyEval_GetBuiltins(PyThreadState *tstate); -extern PyObject* _PyEval_BuiltinsFromGlobals( - PyThreadState *tstate, - PyObject *globals); // Trampoline API diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 1d185559b3ef43..c5399ad8e0497f 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -108,6 +108,9 @@ extern Py_ssize_t _PyDictKeys_StringLookup(PyDictKeysObject* dictkeys, PyObject PyAPI_FUNC(PyObject *)_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *); PyAPI_FUNC(void) _PyDict_LoadGlobalStackRef(PyDictObject *, PyDictObject *, PyObject *, _PyStackRef *); +// Loads the __builtins__ object from the globals dict. Returns a new reference. +extern PyObject *_PyDict_LoadBuiltinsFromGlobals(PyObject *globals); + /* Consumes references to key and value */ PyAPI_FUNC(int) _PyDict_SetItem_Take2(PyDictObject *op, PyObject *key, PyObject *value); extern int _PyDict_SetItem_LockHeld(PyDictObject *dict, PyObject *name, PyObject *value); @@ -318,6 +321,8 @@ PyDictObject *_PyObject_MaterializeManagedDict_LockHeld(PyObject *); #ifndef Py_GIL_DISABLED # define _Py_INCREF_DICT Py_INCREF # define _Py_DECREF_DICT Py_DECREF +# define _Py_INCREF_BUILTINS Py_INCREF +# define _Py_DECREF_BUILTINS Py_DECREF #else static inline Py_ssize_t _PyDict_UniqueId(PyDictObject *mp) @@ -341,6 +346,30 @@ _Py_DECREF_DICT(PyObject *op) Py_ssize_t id = _PyDict_UniqueId((PyDictObject *)op); _Py_THREAD_DECREF_OBJECT(op, id); } + +// Like `_Py_INCREF_DICT`, but also handles non-dict objects because builtins +// may not be a dict. +static inline void +_Py_INCREF_BUILTINS(PyObject *op) +{ + if (PyDict_CheckExact(op)) { + _Py_INCREF_DICT(op); + } + else { + Py_INCREF(op); + } +} + +static inline void +_Py_DECREF_BUILTINS(PyObject *op) +{ + if (PyDict_CheckExact(op)) { + _Py_DECREF_DICT(op); + } + else { + Py_DECREF(op); + } +} #endif #ifdef __cplusplus diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 3134f6141dc9be..68ba2f74fdc67a 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2511,6 +2511,40 @@ _PyDict_LoadGlobalStackRef(PyDictObject *globals, PyDictObject *builtins, PyObje assert(ix >= 0 || PyStackRef_IsNull(*res)); } +PyObject * +_PyDict_LoadBuiltinsFromGlobals(PyObject *globals) +{ + if (!PyDict_Check(globals)) { + PyErr_BadInternalCall(); + return NULL; + } + + PyDictObject *mp = (PyDictObject *)globals; + PyObject *key = &_Py_ID(__builtins__); + Py_hash_t hash = unicode_get_hash(key); + + // Use the stackref variant to avoid reference count contention on the + // builtins module in the free threading build. It's important not to + // make any escaping calls between the lookup and the `PyStackRef_CLOSE()` + // because the `ref` is not visible to the GC. + _PyStackRef ref; + Py_ssize_t ix = _Py_dict_lookup_threadsafe_stackref(mp, key, hash, &ref); + if (ix == DKIX_ERROR) { + return NULL; + } + if (PyStackRef_IsNull(ref)) { + return Py_NewRef(PyEval_GetBuiltins()); + } + PyObject *builtins = PyStackRef_AsPyObjectBorrow(ref); + if (PyModule_Check(builtins)) { + builtins = _PyModule_GetDict(builtins); + assert(builtins != NULL); + } + _Py_INCREF_BUILTINS(builtins); + PyStackRef_CLOSE(ref); + return builtins; +} + /* Consumes references to key and value */ static int setitem_take2_lock_held(PyDictObject *mp, PyObject *key, PyObject *value) diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 5ef48919a081be..af2a2ef18e627a 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -1,8 +1,9 @@ /* Frame object implementation */ #include "Python.h" -#include "pycore_ceval.h" // _PyEval_BuiltinsFromGlobals() +#include "pycore_ceval.h" // _PyEval_SetOpcodeTrace() #include "pycore_code.h" // CO_FAST_LOCAL, etc. +#include "pycore_dict.h" // _PyDict_LoadBuiltinsFromGlobals() #include "pycore_function.h" // _PyFunction_FromConstructor() #include "pycore_moduleobject.h" // _PyModule_GetDict() #include "pycore_modsupport.h" // _PyArg_CheckPositional() @@ -1899,7 +1900,7 @@ PyFrameObject* PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals, PyObject *locals) { - PyObject *builtins = _PyEval_BuiltinsFromGlobals(tstate, globals); // borrowed ref + PyObject *builtins = _PyDict_LoadBuiltinsFromGlobals(globals); if (builtins == NULL) { return NULL; } @@ -1914,6 +1915,7 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, .fc_closure = NULL }; PyFunctionObject *func = _PyFunction_FromConstructor(&desc); + _Py_DECREF_BUILTINS(builtins); if (func == NULL) { return NULL; } @@ -2204,21 +2206,3 @@ PyFrame_GetGenerator(PyFrameObject *frame) PyGenObject *gen = _PyGen_GetGeneratorFromFrame(frame->f_frame); return Py_NewRef(gen); } - -PyObject* -_PyEval_BuiltinsFromGlobals(PyThreadState *tstate, PyObject *globals) -{ - PyObject *builtins = PyDict_GetItemWithError(globals, &_Py_ID(__builtins__)); - if (builtins) { - if (PyModule_Check(builtins)) { - builtins = _PyModule_GetDict(builtins); - assert(builtins != NULL); - } - return builtins; - } - if (PyErr_Occurred()) { - return NULL; - } - - return _PyEval_GetBuiltins(tstate); -} diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 44fb4ac0907d7b..e72a7d98c0a79e 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -2,7 +2,6 @@ /* Function object implementation */ #include "Python.h" -#include "pycore_ceval.h" // _PyEval_BuiltinsFromGlobals() #include "pycore_dict.h" // _Py_INCREF_DICT() #include "pycore_long.h" // _PyLong_GetOne() #include "pycore_modsupport.h" // _PyArg_NoKeywords() @@ -115,12 +114,7 @@ _PyFunction_FromConstructor(PyFrameConstructor *constr) } _Py_INCREF_DICT(constr->fc_globals); op->func_globals = constr->fc_globals; - if (PyDict_Check(constr->fc_builtins)) { - _Py_INCREF_DICT(constr->fc_builtins); - } - else { - Py_INCREF(constr->fc_builtins); - } + _Py_INCREF_BUILTINS(constr->fc_builtins); op->func_builtins = constr->fc_builtins; op->func_name = Py_NewRef(constr->fc_name); op->func_qualname = Py_NewRef(constr->fc_qualname); @@ -153,8 +147,6 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname assert(PyDict_Check(globals)); _Py_INCREF_DICT(globals); - PyThreadState *tstate = _PyThreadState_GET(); - PyCodeObject *code_obj = (PyCodeObject *)code; _Py_INCREF_CODE(code_obj); @@ -188,16 +180,10 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname goto error; } - builtins = _PyEval_BuiltinsFromGlobals(tstate, globals); // borrowed ref + builtins = _PyDict_LoadBuiltinsFromGlobals(globals); if (builtins == NULL) { goto error; } - if (PyDict_Check(builtins)) { - _Py_INCREF_DICT(builtins); - } - else { - Py_INCREF(builtins); - } PyFunctionObject *op = PyObject_GC_New(PyFunctionObject, &PyFunction_Type); if (op == NULL) { @@ -1078,12 +1064,7 @@ func_clear(PyObject *self) PyObject *builtins = op->func_builtins; op->func_builtins = NULL; if (builtins != NULL) { - if (PyDict_Check(builtins)) { - _Py_DECREF_DICT(builtins); - } - else { - Py_DECREF(builtins); - } + _Py_DECREF_BUILTINS(builtins); } Py_CLEAR(op->func_module); Py_CLEAR(op->func_defaults); diff --git a/Python/ceval.c b/Python/ceval.c index ca75646b585f07..ece7ef1d32048f 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -639,7 +639,7 @@ PyEval_EvalCode(PyObject *co, PyObject *globals, PyObject *locals) if (locals == NULL) { locals = globals; } - PyObject *builtins = _PyEval_BuiltinsFromGlobals(tstate, globals); // borrowed ref + PyObject *builtins = _PyDict_LoadBuiltinsFromGlobals(globals); if (builtins == NULL) { return NULL; } @@ -654,6 +654,7 @@ PyEval_EvalCode(PyObject *co, PyObject *globals, PyObject *locals) .fc_closure = NULL }; PyFunctionObject *func = _PyFunction_FromConstructor(&desc); + _Py_DECREF_BUILTINS(builtins); if (func == NULL) { return NULL; } @@ -1899,7 +1900,7 @@ PyEval_EvalCodeEx(PyObject *_co, PyObject *globals, PyObject *locals, if (defaults == NULL) { return NULL; } - PyObject *builtins = _PyEval_BuiltinsFromGlobals(tstate, globals); // borrowed ref + PyObject *builtins = _PyDict_LoadBuiltinsFromGlobals(globals); if (builtins == NULL) { Py_DECREF(defaults); return NULL; @@ -1954,6 +1955,7 @@ PyEval_EvalCodeEx(PyObject *_co, PyObject *globals, PyObject *locals, Py_XDECREF(func); Py_XDECREF(kwnames); PyMem_Free(newargs); + _Py_DECREF_BUILTINS(builtins); Py_DECREF(defaults); return res; } From 41bd9d959ccdb1095b6662b903bb3cbd2a47087b Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Thu, 24 Oct 2024 12:51:45 -0400 Subject: [PATCH 093/106] gh-125864: Propagate `pickle.loads()` failures in `InterpreterPoolExecutor` (gh-125898) Authored-by: Peter Bierma --- Lib/concurrent/futures/interpreter.py | 3 ++- .../test_interpreter_pool.py | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/Lib/concurrent/futures/interpreter.py b/Lib/concurrent/futures/interpreter.py index fd7941adb766bb..d17688dc9d7346 100644 --- a/Lib/concurrent/futures/interpreter.py +++ b/Lib/concurrent/futures/interpreter.py @@ -107,7 +107,8 @@ def _call(cls, func, args, kwargs, resultsid): @classmethod def _call_pickled(cls, pickled, resultsid): - fn, args, kwargs = pickle.loads(pickled) + with cls._capture_exc(resultsid): + fn, args, kwargs = pickle.loads(pickled) cls._call(fn, args, kwargs, resultsid) def __init__(self, initdata, shared=None): diff --git a/Lib/test/test_concurrent_futures/test_interpreter_pool.py b/Lib/test/test_concurrent_futures/test_interpreter_pool.py index 5264b1bb6e9c75..ea1512fc830d0c 100644 --- a/Lib/test/test_concurrent_futures/test_interpreter_pool.py +++ b/Lib/test/test_concurrent_futures/test_interpreter_pool.py @@ -56,6 +56,16 @@ def pipe(self): return r, w +class PickleShenanigans: + """Succeeds with pickle.dumps(), but fails with pickle.loads()""" + def __init__(self, value): + if value == 1: + raise RuntimeError("gotcha") + + def __reduce__(self): + return (self.__class__, (1,)) + + class InterpreterPoolExecutorTest( InterpretersMixin, ExecutorTest, BaseTestCase): @@ -279,6 +289,14 @@ def test_idle_thread_reuse(self): self.assertEqual(len(executor._threads), 1) executor.shutdown(wait=True) + def test_pickle_errors_propagate(self): + # GH-125864: Pickle errors happen before the script tries to execute, so the + # queue used to wait infinitely. + + fut = self.executor.submit(PickleShenanigans(0)) + with self.assertRaisesRegex(RuntimeError, "gotcha"): + fut.result() + class AsyncioTest(InterpretersMixin, testasyncio_utils.TestCase): From 3f24bde0b6689b8f05872a8118a97908b5a94659 Mon Sep 17 00:00:00 2001 From: Javad Shafique Date: Thu, 24 Oct 2024 19:41:16 +0200 Subject: [PATCH 094/106] gh-118950: Fix SSLProtocol.connection_lost not being called when OSError is thrown (#118960) Co-authored-by: Kumar Aditya --- Lib/asyncio/sslproto.py | 5 +- Lib/test/test_asyncio/test_sslproto.py | 48 +++++++++++++++++++ ...-05-12-03-10-36.gh-issue-118950.5Wc4vp.rst | 1 + 3 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-12-03-10-36.gh-issue-118950.5Wc4vp.rst diff --git a/Lib/asyncio/sslproto.py b/Lib/asyncio/sslproto.py index fa99d4533aa0a6..74c5f0d5ca0609 100644 --- a/Lib/asyncio/sslproto.py +++ b/Lib/asyncio/sslproto.py @@ -101,7 +101,7 @@ def get_protocol(self): return self._ssl_protocol._app_protocol def is_closing(self): - return self._closed + return self._closed or self._ssl_protocol._is_transport_closing() def close(self): """Close the transport. @@ -379,6 +379,9 @@ def _get_app_transport(self): self._app_transport_created = True return self._app_transport + def _is_transport_closing(self): + return self._transport is not None and self._transport.is_closing() + def connection_made(self, transport): """Called when the low-level connection is made. diff --git a/Lib/test/test_asyncio/test_sslproto.py b/Lib/test/test_asyncio/test_sslproto.py index f5f0afeab51c9e..761904c5146b6a 100644 --- a/Lib/test/test_asyncio/test_sslproto.py +++ b/Lib/test/test_asyncio/test_sslproto.py @@ -109,6 +109,54 @@ def test_connection_lost(self): test_utils.run_briefly(self.loop) self.assertIsInstance(waiter.exception(), ConnectionAbortedError) + def test_connection_lost_when_busy(self): + # gh-118950: SSLProtocol.connection_lost not being called when OSError + # is thrown on asyncio.write. + sock = mock.Mock() + sock.fileno = mock.Mock(return_value=12345) + sock.send = mock.Mock(side_effect=BrokenPipeError) + + # construct StreamWriter chain that contains loop dependant logic this emulates + # what _make_ssl_transport() does in BaseSelectorEventLoop + reader = asyncio.StreamReader(limit=2 ** 16, loop=self.loop) + protocol = asyncio.StreamReaderProtocol(reader, loop=self.loop) + ssl_proto = self.ssl_protocol(proto=protocol) + + # emulate reading decompressed data + sslobj = mock.Mock() + sslobj.read.side_effect = ssl.SSLWantReadError + sslobj.write.side_effect = ssl.SSLWantReadError + ssl_proto._sslobj = sslobj + + # emulate outgoing data + data = b'An interesting message' + + outgoing = mock.Mock() + outgoing.read = mock.Mock(return_value=data) + outgoing.pending = len(data) + ssl_proto._outgoing = outgoing + + # use correct socket transport to initialize the SSLProtocol + self.loop._make_socket_transport(sock, ssl_proto) + + transport = ssl_proto._app_transport + writer = asyncio.StreamWriter(transport, protocol, reader, self.loop) + + async def main(): + # writes data to transport + async def write(): + writer.write(data) + await writer.drain() + + # try to write for the first time + await write() + # try to write for the second time, this raises as the connection_lost + # callback should be done with error + with self.assertRaises(ConnectionResetError): + await write() + + self.loop.run_until_complete(main()) + def test_close_during_handshake(self): # bpo-29743 Closing transport during handshake process leaks socket waiter = self.loop.create_future() diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-12-03-10-36.gh-issue-118950.5Wc4vp.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-12-03-10-36.gh-issue-118950.5Wc4vp.rst new file mode 100644 index 00000000000000..82be975f4d808d --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-12-03-10-36.gh-issue-118950.5Wc4vp.rst @@ -0,0 +1 @@ +Fix bug where SSLProtocol.connection_lost wasn't getting called when OSError was thrown on writing to socket. From 500f5338a8fe13719478589333fcd296e8e8eb02 Mon Sep 17 00:00:00 2001 From: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Date: Thu, 24 Oct 2024 12:11:12 -0700 Subject: [PATCH 095/106] gh-123930: Better error for "from imports" when script shadows module (#123929) --- Doc/whatsnew/3.13.rst | 4 +- Include/internal/pycore_moduleobject.h | 2 + Lib/test/test_import/__init__.py | 325 ++++++++++++------ ...-09-11-01-32-07.gh-issue-123930.BkPfB6.rst | 4 + Objects/moduleobject.c | 28 +- Python/ceval.c | 150 +++++--- 6 files changed, 342 insertions(+), 171 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-09-11-01-32-07.gh-issue-123930.BkPfB6.rst diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index f9e74a9b8ff9c6..de4c7fd4c0486b 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -274,7 +274,7 @@ Improved error messages File "/home/me/random.py", line 3, in print(random.randint(5)) ^^^^^^^^^^^^^^ - AttributeError: module 'random' has no attribute 'randint' (consider renaming '/home/me/random.py' since it has the same name as the standard library module named 'random' and the import system gives it precedence) + AttributeError: module 'random' has no attribute 'randint' (consider renaming '/home/me/random.py' since it has the same name as the standard library module named 'random' and prevents importing that standard library module) Similarly, if a script has the same name as a third-party module that it attempts to import and this results in errors, @@ -289,7 +289,7 @@ Improved error messages File "/home/me/numpy.py", line 3, in np.array([1, 2, 3]) ^^^^^^^^ - AttributeError: module 'numpy' has no attribute 'array' (consider renaming '/home/me/numpy.py' if it has the same name as a third-party module you intended to import) + AttributeError: module 'numpy' has no attribute 'array' (consider renaming '/home/me/numpy.py' if it has the same name as a library you intended to import) (Contributed by Shantanu Jain in :gh:`95754`.) diff --git a/Include/internal/pycore_moduleobject.h b/Include/internal/pycore_moduleobject.h index cc2dda48ed9f28..9bb282a13a9659 100644 --- a/Include/internal/pycore_moduleobject.h +++ b/Include/internal/pycore_moduleobject.h @@ -11,6 +11,8 @@ extern "C" { extern void _PyModule_Clear(PyObject *); extern void _PyModule_ClearDict(PyObject *); extern int _PyModuleSpec_IsInitializing(PyObject *); +extern int _PyModuleSpec_GetFileOrigin(PyObject *, PyObject **); +extern int _PyModule_IsPossiblyShadowing(PyObject *); extern int _PyModule_IsExtension(PyObject *obj); diff --git a/Lib/test/test_import/__init__.py b/Lib/test/test_import/__init__.py index 5d0d02480b3929..5b7ba90b2cc7c6 100644 --- a/Lib/test/test_import/__init__.py +++ b/Lib/test/test_import/__init__.py @@ -804,104 +804,133 @@ def test_issue105979(self): str(cm.exception)) def test_script_shadowing_stdlib(self): - with os_helper.temp_dir() as tmp: - with open(os.path.join(tmp, "fractions.py"), "w", encoding='utf-8') as f: - f.write("import fractions\nfractions.Fraction") - - expected_error = ( - rb"AttributeError: module 'fractions' has no attribute 'Fraction' " - rb"\(consider renaming '.*fractions.py' since it has the " - rb"same name as the standard library module named 'fractions' " - rb"and the import system gives it precedence\)" + script_errors = [ + ( + "import fractions\nfractions.Fraction", + rb"AttributeError: module 'fractions' has no attribute 'Fraction'" + ), + ( + "from fractions import Fraction", + rb"ImportError: cannot import name 'Fraction' from 'fractions'" ) + ] + for script, error in script_errors: + with self.subTest(script=script), os_helper.temp_dir() as tmp: + with open(os.path.join(tmp, "fractions.py"), "w", encoding='utf-8') as f: + f.write(script) + + expected_error = error + ( + rb" \(consider renaming '.*fractions.py' since it has the " + rb"same name as the standard library module named 'fractions' " + rb"and prevents importing that standard library module\)" + ) - popen = script_helper.spawn_python(os.path.join(tmp, "fractions.py"), cwd=tmp) - stdout, stderr = popen.communicate() - self.assertRegex(stdout, expected_error) + popen = script_helper.spawn_python(os.path.join(tmp, "fractions.py"), cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) - popen = script_helper.spawn_python('-m', 'fractions', cwd=tmp) - stdout, stderr = popen.communicate() - self.assertRegex(stdout, expected_error) + popen = script_helper.spawn_python('-m', 'fractions', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) - popen = script_helper.spawn_python('-c', 'import fractions', cwd=tmp) - stdout, stderr = popen.communicate() - self.assertRegex(stdout, expected_error) + popen = script_helper.spawn_python('-c', 'import fractions', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) - # and there's no error at all when using -P - popen = script_helper.spawn_python('-P', 'fractions.py', cwd=tmp) - stdout, stderr = popen.communicate() - self.assertEqual(stdout, b'') + # and there's no error at all when using -P + popen = script_helper.spawn_python('-P', 'fractions.py', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertEqual(stdout, b'') - tmp_child = os.path.join(tmp, "child") - os.mkdir(tmp_child) + tmp_child = os.path.join(tmp, "child") + os.mkdir(tmp_child) - # test the logic with different cwd - popen = script_helper.spawn_python(os.path.join(tmp, "fractions.py"), cwd=tmp_child) - stdout, stderr = popen.communicate() - self.assertRegex(stdout, expected_error) + # test the logic with different cwd + popen = script_helper.spawn_python(os.path.join(tmp, "fractions.py"), cwd=tmp_child) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) - popen = script_helper.spawn_python('-m', 'fractions', cwd=tmp_child) - stdout, stderr = popen.communicate() - self.assertEqual(stdout, b'') # no error + popen = script_helper.spawn_python('-m', 'fractions', cwd=tmp_child) + stdout, stderr = popen.communicate() + self.assertEqual(stdout, b'') # no error - popen = script_helper.spawn_python('-c', 'import fractions', cwd=tmp_child) - stdout, stderr = popen.communicate() - self.assertEqual(stdout, b'') # no error + popen = script_helper.spawn_python('-c', 'import fractions', cwd=tmp_child) + stdout, stderr = popen.communicate() + self.assertEqual(stdout, b'') # no error def test_package_shadowing_stdlib_module(self): - with os_helper.temp_dir() as tmp: - os.mkdir(os.path.join(tmp, "fractions")) - with open(os.path.join(tmp, "fractions", "__init__.py"), "w", encoding='utf-8') as f: - f.write("shadowing_module = True") - with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: - f.write(""" -import fractions -fractions.shadowing_module -fractions.Fraction -""") - - expected_error = ( - rb"AttributeError: module 'fractions' has no attribute 'Fraction' " - rb"\(consider renaming '.*fractions.__init__.py' since it has the " - rb"same name as the standard library module named 'fractions' " - rb"and the import system gives it precedence\)" + script_errors = [ + ( + "fractions.Fraction", + rb"AttributeError: module 'fractions' has no attribute 'Fraction'" + ), + ( + "from fractions import Fraction", + rb"ImportError: cannot import name 'Fraction' from 'fractions'" ) + ] + for script, error in script_errors: + with self.subTest(script=script), os_helper.temp_dir() as tmp: + os.mkdir(os.path.join(tmp, "fractions")) + with open( + os.path.join(tmp, "fractions", "__init__.py"), "w", encoding='utf-8' + ) as f: + f.write("shadowing_module = True") + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write("import fractions; fractions.shadowing_module\n") + f.write(script) + + expected_error = error + ( + rb" \(consider renaming '.*[\\/]fractions[\\/]+__init__.py' since it has the " + rb"same name as the standard library module named 'fractions' " + rb"and prevents importing that standard library module\)" + ) - popen = script_helper.spawn_python(os.path.join(tmp, "main.py"), cwd=tmp) - stdout, stderr = popen.communicate() - self.assertRegex(stdout, expected_error) + popen = script_helper.spawn_python(os.path.join(tmp, "main.py"), cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) - popen = script_helper.spawn_python('-m', 'main', cwd=tmp) - stdout, stderr = popen.communicate() - self.assertRegex(stdout, expected_error) + popen = script_helper.spawn_python('-m', 'main', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) - # and there's no shadowing at all when using -P - popen = script_helper.spawn_python('-P', 'main.py', cwd=tmp) - stdout, stderr = popen.communicate() - self.assertRegex(stdout, b"module 'fractions' has no attribute 'shadowing_module'") + # and there's no shadowing at all when using -P + popen = script_helper.spawn_python('-P', 'main.py', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, b"module 'fractions' has no attribute 'shadowing_module'") def test_script_shadowing_third_party(self): - with os_helper.temp_dir() as tmp: - with open(os.path.join(tmp, "numpy.py"), "w", encoding='utf-8') as f: - f.write("import numpy\nnumpy.array") - - expected_error = ( - rb"AttributeError: module 'numpy' has no attribute 'array' " - rb"\(consider renaming '.*numpy.py' if it has the " - rb"same name as a third-party module you intended to import\)\s+\Z" + script_errors = [ + ( + "import numpy\nnumpy.array", + rb"AttributeError: module 'numpy' has no attribute 'array'" + ), + ( + "from numpy import array", + rb"ImportError: cannot import name 'array' from 'numpy'" ) + ] + for script, error in script_errors: + with self.subTest(script=script), os_helper.temp_dir() as tmp: + with open(os.path.join(tmp, "numpy.py"), "w", encoding='utf-8') as f: + f.write(script) + + expected_error = error + ( + rb" \(consider renaming '.*numpy.py' if it has the " + rb"same name as a library you intended to import\)\s+\Z" + ) - popen = script_helper.spawn_python(os.path.join(tmp, "numpy.py")) - stdout, stderr = popen.communicate() - self.assertRegex(stdout, expected_error) + popen = script_helper.spawn_python(os.path.join(tmp, "numpy.py")) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) - popen = script_helper.spawn_python('-m', 'numpy', cwd=tmp) - stdout, stderr = popen.communicate() - self.assertRegex(stdout, expected_error) + popen = script_helper.spawn_python('-m', 'numpy', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) - popen = script_helper.spawn_python('-c', 'import numpy', cwd=tmp) - stdout, stderr = popen.communicate() - self.assertRegex(stdout, expected_error) + popen = script_helper.spawn_python('-c', 'import numpy', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) def test_script_maybe_not_shadowing_third_party(self): with os_helper.temp_dir() as tmp: @@ -911,15 +940,23 @@ def test_script_maybe_not_shadowing_third_party(self): expected_error = ( rb"AttributeError: module 'numpy' has no attribute 'attr'\s+\Z" ) - popen = script_helper.spawn_python('-c', 'import numpy; numpy.attr', cwd=tmp) stdout, stderr = popen.communicate() self.assertRegex(stdout, expected_error) + expected_error = ( + rb"ImportError: cannot import name 'attr' from 'numpy' \(.*\)\s+\Z" + ) + popen = script_helper.spawn_python('-c', 'from numpy import attr', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + def test_script_shadowing_stdlib_edge_cases(self): with os_helper.temp_dir() as tmp: with open(os.path.join(tmp, "fractions.py"), "w", encoding='utf-8') as f: f.write("shadowing_module = True") + + # Unhashable str subclass with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: f.write(""" import fractions @@ -932,11 +969,28 @@ class substr(str): except TypeError as e: print(str(e)) """) + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + self.assertEqual(stdout.rstrip(), b"unhashable type: 'substr'") + + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write(""" +import fractions +fractions.shadowing_module +class substr(str): + __hash__ = None +fractions.__name__ = substr('fractions') +try: + from fractions import Fraction +except TypeError as e: + print(str(e)) +""") popen = script_helper.spawn_python("main.py", cwd=tmp) stdout, stderr = popen.communicate() self.assertEqual(stdout.rstrip(), b"unhashable type: 'substr'") + # Various issues with sys module with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: f.write(""" import fractions @@ -961,18 +1015,45 @@ class substr(str): except AttributeError as e: print(str(e)) """) + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + lines = stdout.splitlines() + self.assertEqual(len(lines), 3) + for line in lines: + self.assertEqual(line, b"module 'fractions' has no attribute 'Fraction'") + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write(""" +import fractions +fractions.shadowing_module + +import sys +sys.stdlib_module_names = None +try: + from fractions import Fraction +except ImportError as e: + print(str(e)) + +del sys.stdlib_module_names +try: + from fractions import Fraction +except ImportError as e: + print(str(e)) + +sys.path = [0] +try: + from fractions import Fraction +except ImportError as e: + print(str(e)) +""") popen = script_helper.spawn_python("main.py", cwd=tmp) stdout, stderr = popen.communicate() - self.assertEqual( - stdout.splitlines(), - [ - b"module 'fractions' has no attribute 'Fraction'", - b"module 'fractions' has no attribute 'Fraction'", - b"module 'fractions' has no attribute 'Fraction'", - ], - ) + lines = stdout.splitlines() + self.assertEqual(len(lines), 3) + for line in lines: + self.assertRegex(line, rb"cannot import name 'Fraction' from 'fractions' \(.*\)") + # Various issues with origin with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: f.write(""" import fractions @@ -992,37 +1073,61 @@ class substr(str): popen = script_helper.spawn_python("main.py", cwd=tmp) stdout, stderr = popen.communicate() - self.assertEqual( - stdout.splitlines(), - [ - b"module 'fractions' has no attribute 'Fraction'", - b"module 'fractions' has no attribute 'Fraction'" - ], - ) - - def test_script_shadowing_stdlib_sys_path_modification(self): - with os_helper.temp_dir() as tmp: - with open(os.path.join(tmp, "fractions.py"), "w", encoding='utf-8') as f: - f.write("shadowing_module = True") - - expected_error = ( - rb"AttributeError: module 'fractions' has no attribute 'Fraction' " - rb"\(consider renaming '.*fractions.py' since it has the " - rb"same name as the standard library module named 'fractions' " - rb"and the import system gives it precedence\)" - ) + lines = stdout.splitlines() + self.assertEqual(len(lines), 2) + for line in lines: + self.assertEqual(line, b"module 'fractions' has no attribute 'Fraction'") with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: f.write(""" -import sys -sys.path.insert(0, "this_folder_does_not_exist") import fractions -fractions.Fraction -""") +fractions.shadowing_module +del fractions.__spec__.origin +try: + from fractions import Fraction +except ImportError as e: + print(str(e)) +fractions.__spec__.origin = 0 +try: + from fractions import Fraction +except ImportError as e: + print(str(e)) +""") popen = script_helper.spawn_python("main.py", cwd=tmp) stdout, stderr = popen.communicate() - self.assertRegex(stdout, expected_error) + lines = stdout.splitlines() + self.assertEqual(len(lines), 2) + for line in lines: + self.assertRegex(line, rb"cannot import name 'Fraction' from 'fractions' \(.*\)") + + def test_script_shadowing_stdlib_sys_path_modification(self): + script_errors = [ + ( + "import fractions\nfractions.Fraction", + rb"AttributeError: module 'fractions' has no attribute 'Fraction'" + ), + ( + "from fractions import Fraction", + rb"ImportError: cannot import name 'Fraction' from 'fractions'" + ) + ] + for script, error in script_errors: + with self.subTest(script=script), os_helper.temp_dir() as tmp: + with open(os.path.join(tmp, "fractions.py"), "w", encoding='utf-8') as f: + f.write("shadowing_module = True") + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write('import sys; sys.path.insert(0, "this_folder_does_not_exist")\n') + f.write(script) + expected_error = error + ( + rb" \(consider renaming '.*fractions.py' since it has the " + rb"same name as the standard library module named 'fractions' " + rb"and prevents importing that standard library module\)" + ) + + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) @skip_if_dont_write_bytecode diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-11-01-32-07.gh-issue-123930.BkPfB6.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-11-01-32-07.gh-issue-123930.BkPfB6.rst new file mode 100644 index 00000000000000..3c8eb02b2dc2d6 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-11-01-32-07.gh-issue-123930.BkPfB6.rst @@ -0,0 +1,4 @@ +Improve the error message when a script shadowing a module from the standard +library causes :exc:`ImportError` to be raised during a "from" import. +Similarly, improve the error message when a script shadowing a third party module +attempts to "from" import an attribute from that third party module while still initialising. diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index c06badd5f3edfe..535b0d068f064f 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -836,15 +836,15 @@ _PyModuleSpec_IsUninitializedSubmodule(PyObject *spec, PyObject *name) return rc; } -static int -_get_file_origin_from_spec(PyObject *spec, PyObject **p_origin) +int +_PyModuleSpec_GetFileOrigin(PyObject *spec, PyObject **p_origin) { PyObject *has_location = NULL; int rc = PyObject_GetOptionalAttr(spec, &_Py_ID(has_location), &has_location); if (rc <= 0) { return rc; } - // If origin is not a location, or doesn't exist, or is not a str), we could consider falling + // If origin is not a location, or doesn't exist, or is not a str, we could consider falling // back to module.__file__. But the cases in which module.__file__ is not __spec__.origin // are cases in which we probably shouldn't be guessing. rc = PyObject_IsTrue(has_location); @@ -867,8 +867,8 @@ _get_file_origin_from_spec(PyObject *spec, PyObject **p_origin) return 1; } -static int -_is_module_possibly_shadowing(PyObject *origin) +int +_PyModule_IsPossiblyShadowing(PyObject *origin) { // origin must be a unicode subtype // Returns 1 if the module at origin could be shadowing a module of the @@ -993,11 +993,11 @@ _Py_module_getattro_impl(PyModuleObject *m, PyObject *name, int suppress) } PyObject *origin = NULL; - if (_get_file_origin_from_spec(spec, &origin) < 0) { + if (_PyModuleSpec_GetFileOrigin(spec, &origin) < 0) { goto done; } - int is_possibly_shadowing = _is_module_possibly_shadowing(origin); + int is_possibly_shadowing = _PyModule_IsPossiblyShadowing(origin); if (is_possibly_shadowing < 0) { goto done; } @@ -1018,20 +1018,23 @@ _Py_module_getattro_impl(PyModuleObject *m, PyObject *name, int suppress) "module '%U' has no attribute '%U' " "(consider renaming '%U' since it has the same " "name as the standard library module named '%U' " - "and the import system gives it precedence)", + "and prevents importing that standard library module)", mod_name, name, origin, mod_name); } else { int rc = _PyModuleSpec_IsInitializing(spec); - if (rc > 0) { + if (rc < 0) { + goto done; + } + else if (rc > 0) { if (is_possibly_shadowing) { assert(origin); - // For third-party modules, only mention the possibility of + // For non-stdlib modules, only mention the possibility of // shadowing if the module is being initialized. PyErr_Format(PyExc_AttributeError, "module '%U' has no attribute '%U' " "(consider renaming '%U' if it has the same name " - "as a third-party module you intended to import)", + "as a library you intended to import)", mod_name, name, origin); } else if (origin) { @@ -1049,7 +1052,8 @@ _Py_module_getattro_impl(PyModuleObject *m, PyObject *name, int suppress) mod_name, name); } } - else if (rc == 0) { + else { + assert(rc == 0); rc = _PyModuleSpec_IsUninitializedSubmodule(spec, name); if (rc > 0) { PyErr_Format(PyExc_AttributeError, diff --git a/Python/ceval.c b/Python/ceval.c index ece7ef1d32048f..beee5325cd6259 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2802,7 +2802,7 @@ PyObject * _PyEval_ImportFrom(PyThreadState *tstate, PyObject *v, PyObject *name) { PyObject *x; - PyObject *fullmodname, *pkgname, *pkgpath, *pkgname_or_unknown, *errmsg; + PyObject *fullmodname, *mod_name, *origin, *mod_name_or_unknown, *errmsg, *spec; if (PyObject_GetOptionalAttr(v, name, &x) != 0) { return x; @@ -2810,16 +2810,16 @@ _PyEval_ImportFrom(PyThreadState *tstate, PyObject *v, PyObject *name) /* Issue #17636: in case this failed because of a circular relative import, try to fallback on reading the module directly from sys.modules. */ - if (PyObject_GetOptionalAttr(v, &_Py_ID(__name__), &pkgname) < 0) { + if (PyObject_GetOptionalAttr(v, &_Py_ID(__name__), &mod_name) < 0) { return NULL; } - if (pkgname == NULL || !PyUnicode_Check(pkgname)) { - Py_CLEAR(pkgname); + if (mod_name == NULL || !PyUnicode_Check(mod_name)) { + Py_CLEAR(mod_name); goto error; } - fullmodname = PyUnicode_FromFormat("%U.%U", pkgname, name); + fullmodname = PyUnicode_FromFormat("%U.%U", mod_name, name); if (fullmodname == NULL) { - Py_DECREF(pkgname); + Py_DECREF(mod_name); return NULL; } x = PyImport_GetModule(fullmodname); @@ -2827,63 +2827,121 @@ _PyEval_ImportFrom(PyThreadState *tstate, PyObject *v, PyObject *name) if (x == NULL && !_PyErr_Occurred(tstate)) { goto error; } - Py_DECREF(pkgname); + Py_DECREF(mod_name); return x; + error: - if (pkgname == NULL) { - pkgname_or_unknown = PyUnicode_FromString(""); - if (pkgname_or_unknown == NULL) { + if (mod_name == NULL) { + mod_name_or_unknown = PyUnicode_FromString(""); + if (mod_name_or_unknown == NULL) { return NULL; } } else { - pkgname_or_unknown = pkgname; + mod_name_or_unknown = mod_name; } + // mod_name is no longer an owned reference + assert(mod_name_or_unknown); + assert(mod_name == NULL || mod_name == mod_name_or_unknown); - pkgpath = NULL; - if (PyModule_Check(v)) { - pkgpath = PyModule_GetFilenameObject(v); - if (pkgpath == NULL) { - if (!PyErr_ExceptionMatches(PyExc_SystemError)) { - Py_DECREF(pkgname_or_unknown); - return NULL; + origin = NULL; + if (PyObject_GetOptionalAttr(v, &_Py_ID(__spec__), &spec) < 0) { + Py_DECREF(mod_name_or_unknown); + return NULL; + } + if (spec == NULL) { + errmsg = PyUnicode_FromFormat( + "cannot import name %R from %R (unknown location)", + name, mod_name_or_unknown + ); + goto done_with_errmsg; + } + if (_PyModuleSpec_GetFileOrigin(spec, &origin) < 0) { + goto done; + } + + int is_possibly_shadowing = _PyModule_IsPossiblyShadowing(origin); + if (is_possibly_shadowing < 0) { + goto done; + } + int is_possibly_shadowing_stdlib = 0; + if (is_possibly_shadowing) { + PyObject *stdlib_modules = PySys_GetObject("stdlib_module_names"); + if (stdlib_modules && PyAnySet_Check(stdlib_modules)) { + is_possibly_shadowing_stdlib = PySet_Contains(stdlib_modules, mod_name_or_unknown); + if (is_possibly_shadowing_stdlib < 0) { + goto done; } - // module filename missing - _PyErr_Clear(tstate); } } - if (pkgpath == NULL || !PyUnicode_Check(pkgpath)) { - Py_CLEAR(pkgpath); + + if (is_possibly_shadowing_stdlib) { + assert(origin); errmsg = PyUnicode_FromFormat( - "cannot import name %R from %R (unknown location)", - name, pkgname_or_unknown + "cannot import name %R from %R " + "(consider renaming %R since it has the same " + "name as the standard library module named %R " + "and prevents importing that standard library module)", + name, mod_name_or_unknown, origin, mod_name_or_unknown ); } else { - PyObject *spec; - int rc = PyObject_GetOptionalAttr(v, &_Py_ID(__spec__), &spec); - if (rc > 0) { - rc = _PyModuleSpec_IsInitializing(spec); - Py_DECREF(spec); - } + int rc = _PyModuleSpec_IsInitializing(spec); if (rc < 0) { - Py_DECREF(pkgname_or_unknown); - Py_DECREF(pkgpath); - return NULL; + goto done; + } + else if (rc > 0) { + if (is_possibly_shadowing) { + assert(origin); + // For non-stdlib modules, only mention the possibility of + // shadowing if the module is being initialized. + errmsg = PyUnicode_FromFormat( + "cannot import name %R from %R " + "(consider renaming %R if it has the same name " + "as a library you intended to import)", + name, mod_name_or_unknown, origin + ); + } + else if (origin) { + errmsg = PyUnicode_FromFormat( + "cannot import name %R from partially initialized module %R " + "(most likely due to a circular import) (%S)", + name, mod_name_or_unknown, origin + ); + } + else { + errmsg = PyUnicode_FromFormat( + "cannot import name %R from partially initialized module %R " + "(most likely due to a circular import)", + name, mod_name_or_unknown + ); + } + } + else { + assert(rc == 0); + if (origin) { + errmsg = PyUnicode_FromFormat( + "cannot import name %R from %R (%S)", + name, mod_name_or_unknown, origin + ); + } + else { + errmsg = PyUnicode_FromFormat( + "cannot import name %R from %R (unknown location)", + name, mod_name_or_unknown + ); + } } - const char *fmt = - rc ? - "cannot import name %R from partially initialized module %R " - "(most likely due to a circular import) (%S)" : - "cannot import name %R from %R (%S)"; - - errmsg = PyUnicode_FromFormat(fmt, name, pkgname_or_unknown, pkgpath); } - /* NULL checks for errmsg and pkgname done by PyErr_SetImportError. */ - _PyErr_SetImportErrorWithNameFrom(errmsg, pkgname, pkgpath, name); - Py_XDECREF(errmsg); - Py_DECREF(pkgname_or_unknown); - Py_XDECREF(pkgpath); +done_with_errmsg: + /* NULL checks for errmsg, mod_name, origin done by PyErr_SetImportError. */ + _PyErr_SetImportErrorWithNameFrom(errmsg, mod_name, origin, name); + Py_DECREF(errmsg); + +done: + Py_XDECREF(origin); + Py_XDECREF(spec); + Py_DECREF(mod_name_or_unknown); return NULL; } @@ -3243,5 +3301,3 @@ _PyEval_LoadName(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject *na } return value; } - - From 1306f33c84b2745aa8af5e3e8f680aa80b836c0e Mon Sep 17 00:00:00 2001 From: Kerim Kabirov Date: Thu, 24 Oct 2024 22:52:21 +0200 Subject: [PATCH 096/106] gh-125933: Add ARIA labels to select elements in the version switcher (#125934) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/tools/static/rtd_switcher.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/tools/static/rtd_switcher.js b/Doc/tools/static/rtd_switcher.js index f5dc7045a0dbc4..2bf01a002db90c 100644 --- a/Doc/tools/static/rtd_switcher.js +++ b/Doc/tools/static/rtd_switcher.js @@ -7,7 +7,7 @@ document.addEventListener("readthedocs-addons-data-ready", function(event) { const config = event.detail.data() const versionSelect = ` - ${ config.versions.active.map( (version) => `