From e8353cca99e57427b8d86295e249a313331f7c58 Mon Sep 17 00:00:00 2001 From: Itamar Ostricher Date: Mon, 24 Apr 2023 10:18:47 -0600 Subject: [PATCH 1/9] Defer formatting task name The default task name is "Task-" (if no name is passed in during Task creation). This is initialized in `Task.__init__` (C impl) using string formatting, which can be quite slow. Actually using the task name in real world code is not very common, so this is wasted init. Let's defer this string formatting to the first time the name is read (in `get_name` impl), so we don't need to pay the string formatting cost if the task name is never read. --- Modules/_asynciomodule.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/Modules/_asynciomodule.c b/Modules/_asynciomodule.c index 2476dca6f58ebf..d664c0c60b6631 100644 --- a/Modules/_asynciomodule.c +++ b/Modules/_asynciomodule.c @@ -129,6 +129,7 @@ typedef struct { int task_must_cancel; int task_log_destroy_pending; int task_num_cancels_requested; + uint64_t task_name_counter; } TaskObj; typedef struct { @@ -2065,12 +2066,16 @@ _asyncio_Task___init___impl(TaskObj *self, PyObject *coro, PyObject *loop, self->task_must_cancel = 0; self->task_log_destroy_pending = 1; self->task_num_cancels_requested = 0; + self->task_name_counter = 0; Py_INCREF(coro); Py_XSETREF(self->task_coro, coro); if (name == Py_None) { - name = PyUnicode_FromFormat("Task-%" PRIu64, - ++state->task_name_counter); + // optimization: defer task name formatting + // set task_name to None to indicate deferred formatting, and + // store the task name counter for formatting in get_name impl + Py_INCREF(name); + self->task_name_counter = ++state->task_name_counter; } else if (!PyUnicode_CheckExact(name)) { name = PyObject_Str(name); } else { @@ -2449,6 +2454,13 @@ _asyncio_Task_get_name_impl(TaskObj *self) /*[clinic end generated code: output=0ecf1570c3b37a8f input=a4a6595d12f4f0f8]*/ { if (self->task_name) { + if (Py_IsNone(self->task_name)) { + assert(self->task_name_counter > 0); + PyObject *name = PyUnicode_FromFormat( + "Task-%" PRIu64, self->task_name_counter); + Py_XSETREF(self->task_name, name); + return Py_NewRef(self->task_name); + } return Py_NewRef(self->task_name); } From 1822ff8355231ccbaf226683a16e3a06af6afb84 Mon Sep 17 00:00:00 2001 From: Itamar Ostricher Date: Mon, 24 Apr 2023 13:04:41 -0600 Subject: [PATCH 2/9] Defer task name formatting also in Python Task impl --- Lib/asyncio/tasks.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Lib/asyncio/tasks.py b/Lib/asyncio/tasks.py index c90d32c97add78..9fbdfe099c37e1 100644 --- a/Lib/asyncio/tasks.py +++ b/Lib/asyncio/tasks.py @@ -103,8 +103,10 @@ def __init__(self, coro, *, loop=None, name=None, context=None): self._log_destroy_pending = False raise TypeError(f"a coroutine was expected, got {coro!r}") + self._task_name_counter = 0 if name is None: - self._name = f'Task-{_task_name_counter()}' + self._task_name_counter = _task_name_counter() + self._name = None else: self._name = str(name) @@ -143,6 +145,8 @@ def get_context(self): return self._context def get_name(self): + if self._name is None: + self._name = f'Task-{self._task_name_counter}' return self._name def set_name(self, value): From 74d0084b7267601afecb24e3737516db27d914b1 Mon Sep 17 00:00:00 2001 From: Itamar Ostricher Date: Mon, 24 Apr 2023 16:31:00 -0600 Subject: [PATCH 3/9] Add NEWS and whatsnew entries --- Doc/whatsnew/3.12.rst | 3 +++ .../2023-04-24-14-38-16.gh-issue-103793.kqoH6Q.rst | 3 +++ 2 files changed, 6 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-04-24-14-38-16.gh-issue-103793.kqoH6Q.rst diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 373e31b37cd9dc..9208fae3343c54 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -553,6 +553,9 @@ Optimizations replacement strings containing group references by 2--3 times. (Contributed by Serhiy Storchaka in :gh:`91524`.) +* Speed up :class:`asyncio.Task` creation by deferring expensive string formatting. + (Contributed by Itamar O in :gh:`103793`.) + CPython bytecode changes ======================== diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-04-24-14-38-16.gh-issue-103793.kqoH6Q.rst b/Misc/NEWS.d/next/Core and Builtins/2023-04-24-14-38-16.gh-issue-103793.kqoH6Q.rst new file mode 100644 index 00000000000000..c48348798e7142 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-04-24-14-38-16.gh-issue-103793.kqoH6Q.rst @@ -0,0 +1,3 @@ +Optimized asyncio Task creation by deferring expensive string formatting +(task name generation) from Task creation to the first time ``get_name`` is +called. This makes asyncio benchmarks up to 5% faster. From 2efd376a739946c5fd9d2902f04634f21763c9a7 Mon Sep 17 00:00:00 2001 From: Itamar Ostricher Date: Mon, 24 Apr 2023 22:31:25 -0600 Subject: [PATCH 4/9] replace storing the task counter on task construction with just getting the next counter at lazy name generation --- Lib/asyncio/tasks.py | 5 ++--- Modules/_asynciomodule.c | 10 +++------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/Lib/asyncio/tasks.py b/Lib/asyncio/tasks.py index 9fbdfe099c37e1..6cd55bf6ae82df 100644 --- a/Lib/asyncio/tasks.py +++ b/Lib/asyncio/tasks.py @@ -103,9 +103,8 @@ def __init__(self, coro, *, loop=None, name=None, context=None): self._log_destroy_pending = False raise TypeError(f"a coroutine was expected, got {coro!r}") - self._task_name_counter = 0 if name is None: - self._task_name_counter = _task_name_counter() + # optimization: defer task name formatting to first get_name self._name = None else: self._name = str(name) @@ -146,7 +145,7 @@ def get_context(self): def get_name(self): if self._name is None: - self._name = f'Task-{self._task_name_counter}' + self._name = f'Task-{_task_name_counter()}' return self._name def set_name(self, value): diff --git a/Modules/_asynciomodule.c b/Modules/_asynciomodule.c index d664c0c60b6631..1a1cec4d1d4de8 100644 --- a/Modules/_asynciomodule.c +++ b/Modules/_asynciomodule.c @@ -129,7 +129,6 @@ typedef struct { int task_must_cancel; int task_log_destroy_pending; int task_num_cancels_requested; - uint64_t task_name_counter; } TaskObj; typedef struct { @@ -2066,16 +2065,13 @@ _asyncio_Task___init___impl(TaskObj *self, PyObject *coro, PyObject *loop, self->task_must_cancel = 0; self->task_log_destroy_pending = 1; self->task_num_cancels_requested = 0; - self->task_name_counter = 0; Py_INCREF(coro); Py_XSETREF(self->task_coro, coro); if (name == Py_None) { // optimization: defer task name formatting - // set task_name to None to indicate deferred formatting, and - // store the task name counter for formatting in get_name impl + // set task_name to None to indicate deferred formatting Py_INCREF(name); - self->task_name_counter = ++state->task_name_counter; } else if (!PyUnicode_CheckExact(name)) { name = PyObject_Str(name); } else { @@ -2455,9 +2451,9 @@ _asyncio_Task_get_name_impl(TaskObj *self) { if (self->task_name) { if (Py_IsNone(self->task_name)) { - assert(self->task_name_counter > 0); + asyncio_state *state = get_asyncio_state_by_def((PyObject *)self); PyObject *name = PyUnicode_FromFormat( - "Task-%" PRIu64, self->task_name_counter); + "Task-%" PRIu64, ++state->task_name_counter); Py_XSETREF(self->task_name, name); return Py_NewRef(self->task_name); } From 7cde29001edba17e59f51a4f24e336a20ac7210c Mon Sep 17 00:00:00 2001 From: Itamar Ostricher Date: Mon, 24 Apr 2023 22:38:37 -0600 Subject: [PATCH 5/9] remove redundant return --- Modules/_asynciomodule.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Modules/_asynciomodule.c b/Modules/_asynciomodule.c index 1a1cec4d1d4de8..058f47920ada2c 100644 --- a/Modules/_asynciomodule.c +++ b/Modules/_asynciomodule.c @@ -2455,7 +2455,6 @@ _asyncio_Task_get_name_impl(TaskObj *self) PyObject *name = PyUnicode_FromFormat( "Task-%" PRIu64, ++state->task_name_counter); Py_XSETREF(self->task_name, name); - return Py_NewRef(self->task_name); } return Py_NewRef(self->task_name); } From a155190e96b1666683c45cc716915d52f32a1e25 Mon Sep 17 00:00:00 2001 From: Itamar Ostricher Date: Tue, 25 Apr 2023 09:08:30 -0600 Subject: [PATCH 6/9] revert changes to python version --- Lib/asyncio/tasks.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Lib/asyncio/tasks.py b/Lib/asyncio/tasks.py index 6cd55bf6ae82df..c90d32c97add78 100644 --- a/Lib/asyncio/tasks.py +++ b/Lib/asyncio/tasks.py @@ -104,8 +104,7 @@ def __init__(self, coro, *, loop=None, name=None, context=None): raise TypeError(f"a coroutine was expected, got {coro!r}") if name is None: - # optimization: defer task name formatting to first get_name - self._name = None + self._name = f'Task-{_task_name_counter()}' else: self._name = str(name) @@ -144,8 +143,6 @@ def get_context(self): return self._context def get_name(self): - if self._name is None: - self._name = f'Task-{_task_name_counter()}' return self._name def set_name(self, value): From fb2bd7fbf086b633e8b0fcc49dc3f24ec6b7951c Mon Sep 17 00:00:00 2001 From: Itamar Ostricher Date: Tue, 25 Apr 2023 09:10:37 -0600 Subject: [PATCH 7/9] pass error up if PyUnicode_FromFormat returns NULL --- Modules/_asynciomodule.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Modules/_asynciomodule.c b/Modules/_asynciomodule.c index 058f47920ada2c..2273e035ea2299 100644 --- a/Modules/_asynciomodule.c +++ b/Modules/_asynciomodule.c @@ -2455,6 +2455,9 @@ _asyncio_Task_get_name_impl(TaskObj *self) PyObject *name = PyUnicode_FromFormat( "Task-%" PRIu64, ++state->task_name_counter); Py_XSETREF(self->task_name, name); + if (self->task_name == NULL) { + return NULL; + } } return Py_NewRef(self->task_name); } From 41e9a59cc85b7ece7db3ede35dc65df336c0f845 Mon Sep 17 00:00:00 2001 From: Itamar Ostricher Date: Wed, 26 Apr 2023 14:31:18 -0600 Subject: [PATCH 8/9] Switch back to original "assign task number at construction" but store the counter as PyLong for deferred formatting --- Modules/_asynciomodule.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/Modules/_asynciomodule.c b/Modules/_asynciomodule.c index 2273e035ea2299..82dbc087322aa9 100644 --- a/Modules/_asynciomodule.c +++ b/Modules/_asynciomodule.c @@ -2070,8 +2070,9 @@ _asyncio_Task___init___impl(TaskObj *self, PyObject *coro, PyObject *loop, if (name == Py_None) { // optimization: defer task name formatting - // set task_name to None to indicate deferred formatting - Py_INCREF(name); + // store the task counter as PyLong in the name + // for deferred formatting in get_name + name = PyLong_FromUnsignedLongLong(++state->task_name_counter); } else if (!PyUnicode_CheckExact(name)) { name = PyObject_Str(name); } else { @@ -2450,14 +2451,12 @@ _asyncio_Task_get_name_impl(TaskObj *self) /*[clinic end generated code: output=0ecf1570c3b37a8f input=a4a6595d12f4f0f8]*/ { if (self->task_name) { - if (Py_IsNone(self->task_name)) { - asyncio_state *state = get_asyncio_state_by_def((PyObject *)self); - PyObject *name = PyUnicode_FromFormat( - "Task-%" PRIu64, ++state->task_name_counter); - Py_XSETREF(self->task_name, name); - if (self->task_name == NULL) { + if (PyLong_CheckExact(self->task_name)) { + PyObject *name = PyUnicode_FromFormat("Task-%S", self->task_name); + if (name == NULL) { return NULL; } + Py_SETREF(self->task_name, name); } return Py_NewRef(self->task_name); } From 712edf9ab5faf83f5600081548611efed87b1a93 Mon Sep 17 00:00:00 2001 From: Itamar Ostricher Date: Wed, 26 Apr 2023 14:36:24 -0600 Subject: [PATCH 9/9] Add test that using an explicit PyLong as task name does not accidentally trigger the deferred name formatting logic --- Lib/test/test_asyncio/test_tasks.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Lib/test/test_asyncio/test_tasks.py b/Lib/test/test_asyncio/test_tasks.py index 31622c91470bcb..6e8a51ce2555d5 100644 --- a/Lib/test/test_asyncio/test_tasks.py +++ b/Lib/test/test_asyncio/test_tasks.py @@ -399,6 +399,18 @@ async def notmuch(): self.loop.run_until_complete(t1) self.loop.run_until_complete(t2) + def test_task_set_name_pylong(self): + # test that setting the task name to a PyLong explicitly doesn't + # incorrectly trigger the deferred name formatting logic + async def notmuch(): + return 123 + + t = self.new_task(self.loop, notmuch(), name=987654321) + self.assertEqual(t.get_name(), '987654321') + t.set_name(123456789) + self.assertEqual(t.get_name(), '123456789') + self.loop.run_until_complete(t) + def test_task_repr_name_not_str(self): async def notmuch(): return 123