From f1235fe774a0a04e2f1312ae62ae1089b0053633 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 16 Jun 2022 13:13:40 +0100 Subject: [PATCH 1/4] Streamline pushing frames, by deopting if insufficient space. --- Include/cpython/code.h | 4 +-- Include/internal/pycore_frame.h | 44 +++++++++++++++++---------------- Objects/codeobject.c | 2 ++ Objects/frameobject.c | 2 +- Python/ceval.c | 31 +++++++++-------------- Python/frame.c | 16 ------------ Python/pystate.c | 2 +- Tools/scripts/deepfreeze.py | 2 ++ 8 files changed, 43 insertions(+), 60 deletions(-) diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 1812617d3c3067..f78852ee7e3c04 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -73,8 +73,8 @@ typedef uint16_t _Py_CODEUNIT; \ /* redundant values (derived from co_localsplusnames and \ co_localspluskinds) */ \ - int co_nlocalsplus; /* number of local + cell + free variables \ - */ \ + int co_nlocalsplus; /* number of local + cell + free variables */ \ + int co_framesize; /* Size of frame in words */ \ int co_nlocals; /* number of local variables */ \ int co_nplaincellvars; /* number of non-arg cell variables */ \ int co_ncellvars; /* total number of cell variables */ \ diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 405afd67d2274a..4c242eb9f1764c 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -98,16 +98,16 @@ void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest); static inline void _PyFrame_InitializeSpecials( _PyInterpreterFrame *frame, PyFunctionObject *func, - PyObject *locals, int nlocalsplus) + PyObject *locals, PyCodeObject *code) { frame->f_func = func; - frame->f_code = (PyCodeObject *)Py_NewRef(func->func_code); + frame->f_code = (PyCodeObject *)Py_NewRef(code); frame->f_builtins = func->func_builtins; frame->f_globals = func->func_globals; frame->f_locals = Py_XNewRef(locals); - frame->stacktop = nlocalsplus; + frame->stacktop = code->co_nlocalsplus; frame->frame_obj = NULL; - frame->prev_instr = _PyCode_CODE(frame->f_code) - 1; + frame->prev_instr = _PyCode_CODE(code) - 1; frame->is_entry = false; frame->owner = FRAME_OWNED_BY_THREAD; } @@ -172,29 +172,31 @@ _PyFrame_FastToLocalsWithError(_PyInterpreterFrame *frame); void _PyFrame_LocalsToFast(_PyInterpreterFrame *frame, int clear); -extern _PyInterpreterFrame * -_PyThreadState_BumpFramePointerSlow(PyThreadState *tstate, size_t size); -static inline _PyInterpreterFrame * -_PyThreadState_BumpFramePointer(PyThreadState *tstate, size_t size) +static inline bool +_PyThreadState_HasStackSpace(PyThreadState *tstate, PyCodeObject *code) { - PyObject **base = tstate->datastack_top; - if (base) { - PyObject **top = base + size; - assert(tstate->datastack_limit); - if (top < tstate->datastack_limit) { - tstate->datastack_top = top; - return (_PyInterpreterFrame *)base; - } - } - return _PyThreadState_BumpFramePointerSlow(tstate, size); + return tstate->datastack_top + code->co_framesize < tstate->datastack_limit; } +extern _PyInterpreterFrame * +_PyThreadState_PushFrame(PyThreadState *tstate, size_t size); + void _PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame *frame); -/* Consume reference to func */ -_PyInterpreterFrame * -_PyFrame_Push(PyThreadState *tstate, PyFunctionObject *func); +/* Pushes a frame without checking for space. + * Must be guarded by _PyThreadState_HasStackSpace() + * Consumes reference to func. */ +static inline _PyInterpreterFrame * +_PyFrame_PushUnchecked(PyThreadState *tstate, PyFunctionObject *func) +{ + PyCodeObject *code = (PyCodeObject *)func->func_code; + _PyInterpreterFrame *new_frame = (_PyInterpreterFrame *)tstate->datastack_top; + tstate->datastack_top += code->co_framesize; + assert(tstate->datastack_top < tstate->datastack_limit); + _PyFrame_InitializeSpecials(new_frame, func, NULL, code); + return new_frame; +} int _PyInterpreterFrame_GetLine(_PyInterpreterFrame *frame); diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 8ac4e9914f8887..556d3a31c75142 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -4,6 +4,7 @@ #include "opcode.h" #include "structmember.h" // PyMemberDef #include "pycore_code.h" // _PyCodeConstructor +#include "pycore_frame.h" // FRAME_SPECIALS_SIZE #include "pycore_interp.h" // PyInterpreterState.co_extra_freefuncs #include "pycore_opcode.h" // _PyOpcode_Deopt #include "pycore_pystate.h" // _PyInterpreterState_GET() @@ -327,6 +328,7 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) /* derived values */ co->co_nlocalsplus = nlocalsplus; co->co_nlocals = nlocals; + co->co_framesize = nlocalsplus + con->stacksize + FRAME_SPECIALS_SIZE; co->co_nplaincellvars = nplaincellvars; co->co_ncellvars = ncellvars; co->co_nfreevars = nfreevars; diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 545c1b6b90f1a8..5c50a52a8588c0 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -859,7 +859,7 @@ init_frame(_PyInterpreterFrame *frame, PyFunctionObject *func, PyObject *locals) /* _PyFrame_InitializeSpecials consumes reference to func */ Py_INCREF(func); PyCodeObject *code = (PyCodeObject *)func->func_code; - _PyFrame_InitializeSpecials(frame, func, locals, code->co_nlocalsplus); + _PyFrame_InitializeSpecials(frame, func, locals, code); for (Py_ssize_t i = 0; i < code->co_nlocalsplus; i++) { frame->localsplus[i] = NULL; } diff --git a/Python/ceval.c b/Python/ceval.c index f9ec640ef1722a..7dccc4fb9bba06 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2240,16 +2240,12 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int PyFunctionObject *getitem = (PyFunctionObject *)cached; DEOPT_IF(getitem->func_version != cache->func_version, BINARY_SUBSCR); PyCodeObject *code = (PyCodeObject *)getitem->func_code; - size_t size = code->co_nlocalsplus + code->co_stacksize + FRAME_SPECIALS_SIZE; assert(code->co_argcount == 2); - _PyInterpreterFrame *new_frame = _PyThreadState_BumpFramePointer(tstate, size); - if (new_frame == NULL) { - goto error; - } - CALL_STAT_INC(frames_pushed); + DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code), BINARY_SUBSCR); Py_INCREF(getitem); - _PyFrame_InitializeSpecials(new_frame, getitem, - NULL, code->co_nlocalsplus); + _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, getitem); + CALL_STAT_INC(frames_pushed); + CALL_STAT_INC(inlined_py_calls); STACK_SHRINK(2); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; @@ -4774,11 +4770,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int DEOPT_IF(func->func_version != read_u32(cache->func_version), CALL); PyCodeObject *code = (PyCodeObject *)func->func_code; DEOPT_IF(code->co_argcount != argcount, CALL); + DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code), call); STAT_INC(CALL, hit); - _PyInterpreterFrame *new_frame = _PyFrame_Push(tstate, func); - if (new_frame == NULL) { - goto error; - } + _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, func); + CALL_STAT_INC(frames_pushed); CALL_STAT_INC(inlined_py_calls); STACK_SHRINK(argcount); for (int i = 0; i < argcount; i++) { @@ -4810,11 +4805,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int DEOPT_IF(argcount > code->co_argcount, CALL); int minargs = cache->min_args; DEOPT_IF(argcount < minargs, CALL); + DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code), call); STAT_INC(CALL, hit); - _PyInterpreterFrame *new_frame = _PyFrame_Push(tstate, func); - if (new_frame == NULL) { - goto error; - } + _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, func); + CALL_STAT_INC(frames_pushed); CALL_STAT_INC(inlined_py_calls); STACK_SHRINK(argcount); for (int i = 0; i < argcount; i++) { @@ -6269,13 +6263,12 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func, size_t argcount, PyObject *kwnames) { PyCodeObject * code = (PyCodeObject *)func->func_code; - size_t size = code->co_nlocalsplus + code->co_stacksize + FRAME_SPECIALS_SIZE; CALL_STAT_INC(frames_pushed); - _PyInterpreterFrame *frame = _PyThreadState_BumpFramePointer(tstate, size); + _PyInterpreterFrame *frame = _PyThreadState_PushFrame(tstate, code->co_framesize); if (frame == NULL) { goto fail; } - _PyFrame_InitializeSpecials(frame, func, locals, code->co_nlocalsplus); + _PyFrame_InitializeSpecials(frame, func, locals, code); PyObject **localsarray = &frame->localsplus[0]; for (int i = 0; i < code->co_nlocalsplus; i++) { localsarray[i] = NULL; diff --git a/Python/frame.c b/Python/frame.c index 3573f54ad63e9b..96cfef762cee0a 100644 --- a/Python/frame.c +++ b/Python/frame.c @@ -114,22 +114,6 @@ _PyFrame_Clear(_PyInterpreterFrame *frame) Py_DECREF(frame->f_code); } -/* Consumes reference to func */ -_PyInterpreterFrame * -_PyFrame_Push(PyThreadState *tstate, PyFunctionObject *func) -{ - PyCodeObject *code = (PyCodeObject *)func->func_code; - size_t size = code->co_nlocalsplus + code->co_stacksize + FRAME_SPECIALS_SIZE; - CALL_STAT_INC(frames_pushed); - _PyInterpreterFrame *new_frame = _PyThreadState_BumpFramePointer(tstate, size); - if (new_frame == NULL) { - Py_DECREF(func); - return NULL; - } - _PyFrame_InitializeSpecials(new_frame, func, NULL, code->co_nlocalsplus); - return new_frame; -} - int _PyInterpreterFrame_GetLine(_PyInterpreterFrame *frame) { diff --git a/Python/pystate.c b/Python/pystate.c index df56c0530f05b0..56af52c3d29eab 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2167,7 +2167,7 @@ push_chunk(PyThreadState *tstate, int size) } _PyInterpreterFrame * -_PyThreadState_BumpFramePointerSlow(PyThreadState *tstate, size_t size) +_PyThreadState_PushFrame(PyThreadState *tstate, size_t size) { assert(size < INT_MAX/sizeof(PyObject *)); PyObject **base = tstate->datastack_top; diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py index 443fcf2274909c..de8fde448e30b1 100644 --- a/Tools/scripts/deepfreeze.py +++ b/Tools/scripts/deepfreeze.py @@ -118,6 +118,7 @@ def __init__(self, file: TextIO) -> None: self.write('#include "Python.h"') self.write('#include "internal/pycore_gc.h"') self.write('#include "internal/pycore_code.h"') + self.write('#include "internal/pycore_frame.h"') self.write('#include "internal/pycore_long.h"') self.write("") @@ -256,6 +257,7 @@ def generate_code(self, name: str, code: types.CodeType) -> str: self.field(code, "co_argcount") self.field(code, "co_posonlyargcount") self.field(code, "co_kwonlyargcount") + self.write(f".co_framesize = {code.co_stacksize + len(localsplusnames)} + FRAME_SPECIALS_SIZE,") self.field(code, "co_stacksize") self.field(code, "co_firstlineno") self.write(f".co_nlocalsplus = {len(localsplusnames)},") From 6f7b085233a55c1f6929ab6d1d0594cec2ffeef4 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 16 Jun 2022 13:18:56 +0100 Subject: [PATCH 2/4] Move NULL check and incref off the fast path. --- Include/internal/pycore_frame.h | 4 ++-- Objects/frameobject.c | 1 + Python/ceval.c | 7 ++++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 4c242eb9f1764c..d5892dc47edb18 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -94,7 +94,7 @@ static inline void _PyFrame_StackPush(_PyInterpreterFrame *f, PyObject *value) { void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest); -/* Consumes reference to func */ +/* Consumes reference to func and locals */ static inline void _PyFrame_InitializeSpecials( _PyInterpreterFrame *frame, PyFunctionObject *func, @@ -104,7 +104,7 @@ _PyFrame_InitializeSpecials( frame->f_code = (PyCodeObject *)Py_NewRef(code); frame->f_builtins = func->func_builtins; frame->f_globals = func->func_globals; - frame->f_locals = Py_XNewRef(locals); + frame->f_locals = locals; frame->stacktop = code->co_nlocalsplus; frame->frame_obj = NULL; frame->prev_instr = _PyCode_CODE(code) - 1; diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 5c50a52a8588c0..94bd245eafc74b 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -858,6 +858,7 @@ init_frame(_PyInterpreterFrame *frame, PyFunctionObject *func, PyObject *locals) { /* _PyFrame_InitializeSpecials consumes reference to func */ Py_INCREF(func); + Py_XINCREF(locals); PyCodeObject *code = (PyCodeObject *)func->func_code; _PyFrame_InitializeSpecials(frame, func, locals, code); for (Py_ssize_t i = 0; i < code->co_nlocalsplus; i++) { diff --git a/Python/ceval.c b/Python/ceval.c index 7dccc4fb9bba06..46350de97f162a 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4684,7 +4684,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int // Check if the call can be inlined or not if (Py_TYPE(function) == &PyFunction_Type && tstate->interp->eval_frame == NULL) { int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(function))->co_flags; - PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : PyFunction_GET_GLOBALS(function); + PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(function)); STACK_SHRINK(total_args); _PyInterpreterFrame *new_frame = _PyEvalFramePushAndInit( tstate, (PyFunctionObject *)function, locals, @@ -6256,7 +6256,7 @@ initialize_locals(PyThreadState *tstate, PyFunctionObject *func, return -1; } -/* Consumes references to func and all the args */ +/* Consumes references to func, locals and all the args */ static _PyInterpreterFrame * _PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func, PyObject *locals, PyObject* const* args, @@ -6312,8 +6312,9 @@ _PyEval_Vector(PyThreadState *tstate, PyFunctionObject *func, PyObject *kwnames) { /* _PyEvalFramePushAndInit consumes the references - * to func and all its arguments */ + * to func, locals and all its arguments */ Py_INCREF(func); + Py_XINCREF(locals); for (size_t i = 0; i < argcount; i++) { Py_INCREF(args[i]); } From 5fdd7f8dc5a1f9f8cffcd4f6bee86766f56963d2 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 16 Jun 2022 13:26:40 +0100 Subject: [PATCH 3/4] Use size in frame space check. Allows multiple frames to be checked at once. --- Include/internal/pycore_frame.h | 4 ++-- Python/ceval.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index d5892dc47edb18..9fc89cace6d598 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -174,9 +174,9 @@ _PyFrame_LocalsToFast(_PyInterpreterFrame *frame, int clear); static inline bool -_PyThreadState_HasStackSpace(PyThreadState *tstate, PyCodeObject *code) +_PyThreadState_HasStackSpace(PyThreadState *tstate, int size) { - return tstate->datastack_top + code->co_framesize < tstate->datastack_limit; + return tstate->datastack_top + size < tstate->datastack_limit; } extern _PyInterpreterFrame * diff --git a/Python/ceval.c b/Python/ceval.c index 46350de97f162a..7ae856016f16fa 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2241,7 +2241,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int DEOPT_IF(getitem->func_version != cache->func_version, BINARY_SUBSCR); PyCodeObject *code = (PyCodeObject *)getitem->func_code; assert(code->co_argcount == 2); - DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code), BINARY_SUBSCR); + DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR); Py_INCREF(getitem); _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, getitem); CALL_STAT_INC(frames_pushed); @@ -4770,7 +4770,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int DEOPT_IF(func->func_version != read_u32(cache->func_version), CALL); PyCodeObject *code = (PyCodeObject *)func->func_code; DEOPT_IF(code->co_argcount != argcount, CALL); - DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code), call); + DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL); STAT_INC(CALL, hit); _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, func); CALL_STAT_INC(frames_pushed); @@ -4805,7 +4805,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int DEOPT_IF(argcount > code->co_argcount, CALL); int minargs = cache->min_args; DEOPT_IF(argcount < minargs, CALL); - DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code), call); + DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL); STAT_INC(CALL, hit); _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, func); CALL_STAT_INC(frames_pushed); From 38e04b5980a63f3a75217a774ecd47446902c5d2 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 16 Jun 2022 15:13:34 +0100 Subject: [PATCH 4/4] DRY stats call. --- Include/internal/pycore_frame.h | 2 ++ Python/ceval.c | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 9fc89cace6d598..eed26fbb06218a 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -6,6 +6,7 @@ extern "C" { #include #include +#include "pycore_code.h" // STATS /* See Objects/frame_layout.md for an explanation of the frame stack * including explanation of the PyFrameObject and _PyInterpreterFrame @@ -190,6 +191,7 @@ void _PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame *frame); static inline _PyInterpreterFrame * _PyFrame_PushUnchecked(PyThreadState *tstate, PyFunctionObject *func) { + CALL_STAT_INC(frames_pushed); PyCodeObject *code = (PyCodeObject *)func->func_code; _PyInterpreterFrame *new_frame = (_PyInterpreterFrame *)tstate->datastack_top; tstate->datastack_top += code->co_framesize; diff --git a/Python/ceval.c b/Python/ceval.c index 7ae856016f16fa..2929980afa9e74 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2244,7 +2244,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR); Py_INCREF(getitem); _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, getitem); - CALL_STAT_INC(frames_pushed); CALL_STAT_INC(inlined_py_calls); STACK_SHRINK(2); new_frame->localsplus[0] = container; @@ -4773,7 +4772,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL); STAT_INC(CALL, hit); _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, func); - CALL_STAT_INC(frames_pushed); CALL_STAT_INC(inlined_py_calls); STACK_SHRINK(argcount); for (int i = 0; i < argcount; i++) { @@ -4808,7 +4806,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL); STAT_INC(CALL, hit); _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, func); - CALL_STAT_INC(frames_pushed); CALL_STAT_INC(inlined_py_calls); STACK_SHRINK(argcount); for (int i = 0; i < argcount; i++) {