From ab0e60101637b7cf47f3cc95813996791e7118c4 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 20 Jun 2022 13:00:42 +0100 Subject: [PATCH] GH-93516: Speedup line number checks when tracing. (GH-93763) * Use a lookup table to reduce overhead of getting line numbers during tracing. --- Include/cpython/code.h | 4 +- Include/internal/pycore_code.h | 29 ++++++++++ ...2-06-13-10-48-09.gh-issue-93516.yJSait.rst | 2 + Objects/codeobject.c | 56 +++++++++++++++++++ Python/ceval.c | 16 +++--- Tools/scripts/deepfreeze.py | 2 + 6 files changed, 101 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-06-13-10-48-09.gh-issue-93516.yJSait.rst diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 3ccd101415bf0b..238bf9450a9953 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -62,7 +62,8 @@ typedef uint16_t _Py_CODEUNIT; PyObject *co_exceptiontable; /* Byte string encoding exception handling \ table */ \ int co_flags; /* CO_..., see below */ \ - int co_warmup; /* Warmup counter for quickening */ \ + short co_warmup; /* Warmup counter for quickening */ \ + short _co_linearray_entry_size; /* Size of each entry in _co_linearray */ \ \ /* The rest are not so impactful on performance. */ \ int co_argcount; /* #arguments, except *args */ \ @@ -90,6 +91,7 @@ typedef uint16_t _Py_CODEUNIT; PyObject *co_weakreflist; /* to support weakrefs to code objects */ \ void *_co_code; /* cached co_code object/attribute */ \ int _co_firsttraceable; /* index of first traceable instruction */ \ + char *_co_linearray; /* array of line offsets */ \ /* Scratch space for extra data relating to the code object. \ Type is a void* to keep the format private in codeobject.c to force \ people to go through the proper APIs. */ \ diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 805c82483eec76..bb82d9fb9c52e9 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -461,6 +461,35 @@ adaptive_counter_backoff(uint16_t counter) { } +/* Line array cache for tracing */ + +extern int _PyCode_CreateLineArray(PyCodeObject *co); + +static inline int +_PyCode_InitLineArray(PyCodeObject *co) +{ + if (co->_co_linearray) { + return 0; + } + return _PyCode_CreateLineArray(co); +} + +static inline int +_PyCode_LineNumberFromArray(PyCodeObject *co, int index) +{ + assert(co->_co_linearray != NULL); + assert(index >= 0); + assert(index < Py_SIZE(co)); + if (co->_co_linearray_entry_size == 2) { + return ((int16_t *)co->_co_linearray)[index]; + } + else { + assert(co->_co_linearray_entry_size == 4); + return ((int32_t *)co->_co_linearray)[index]; + } +} + + #ifdef __cplusplus } #endif diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-06-13-10-48-09.gh-issue-93516.yJSait.rst b/Misc/NEWS.d/next/Core and Builtins/2022-06-13-10-48-09.gh-issue-93516.yJSait.rst new file mode 100644 index 00000000000000..5c22c7a67b6e51 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-06-13-10-48-09.gh-issue-93516.yJSait.rst @@ -0,0 +1,2 @@ +Lazily create a table mapping bytecode offsets to line numbers to speed up +calculation of line numbers when tracing. diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 556d3a31c75142..c1f696dc5723b7 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -339,6 +339,8 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) co->_co_code = NULL; co->co_warmup = QUICKENING_INITIAL_WARMUP_VALUE; + co->_co_linearray_entry_size = 0; + co->_co_linearray = NULL; memcpy(_PyCode_CODE(co), PyBytes_AS_STRING(con->code), PyBytes_GET_SIZE(con->code)); int entry_point = 0; @@ -703,6 +705,50 @@ PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno) lnotab_notes.txt for the details of the lnotab representation. */ +int +_PyCode_CreateLineArray(PyCodeObject *co) +{ + assert(co->_co_linearray == NULL); + PyCodeAddressRange bounds; + int size; + int max_line = 0; + _PyCode_InitAddressRange(co, &bounds); + while(_PyLineTable_NextAddressRange(&bounds)) { + if (bounds.ar_line > max_line) { + max_line = bounds.ar_line; + } + } + if (max_line < (1 << 15)) { + size = 2; + } + else { + size = 4; + } + co->_co_linearray = PyMem_Malloc(Py_SIZE(co)*size); + if (co->_co_linearray == NULL) { + PyErr_NoMemory(); + return -1; + } + co->_co_linearray_entry_size = size; + _PyCode_InitAddressRange(co, &bounds); + while(_PyLineTable_NextAddressRange(&bounds)) { + int start = bounds.ar_start / sizeof(_Py_CODEUNIT); + int end = bounds.ar_end / sizeof(_Py_CODEUNIT); + for (int index = start; index < end; index++) { + assert(index < (int)Py_SIZE(co)); + if (size == 2) { + assert(((int16_t)bounds.ar_line) == bounds.ar_line); + ((int16_t *)co->_co_linearray)[index] = bounds.ar_line; + } + else { + assert(size == 4); + ((int32_t *)co->_co_linearray)[index] = bounds.ar_line; + } + } + } + return 0; +} + int PyCode_Addr2Line(PyCodeObject *co, int addrq) { @@ -710,6 +756,9 @@ PyCode_Addr2Line(PyCodeObject *co, int addrq) return co->co_firstlineno; } assert(addrq >= 0 && addrq < _PyCode_NBYTES(co)); + if (co->_co_linearray) { + return _PyCode_LineNumberFromArray(co, addrq / sizeof(_Py_CODEUNIT)); + } PyCodeAddressRange bounds; _PyCode_InitAddressRange(co, &bounds); return _PyCode_CheckLineNumber(addrq, &bounds); @@ -1549,6 +1598,9 @@ code_dealloc(PyCodeObject *co) if (co->co_weakreflist != NULL) { PyObject_ClearWeakRefs((PyObject*)co); } + if (co->_co_linearray) { + PyMem_Free(co->_co_linearray); + } if (co->co_warmup == 0) { _Py_QuickenedCount--; } @@ -2106,6 +2158,10 @@ _PyStaticCode_Dealloc(PyCodeObject *co) PyObject_ClearWeakRefs((PyObject *)co); co->co_weakreflist = NULL; } + if (co->_co_linearray) { + PyMem_Free(co->_co_linearray); + co->_co_linearray = NULL; + } } int diff --git a/Python/ceval.c b/Python/ceval.c index 19b2bb4d393b45..a23ed58013abab 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -6799,9 +6799,10 @@ call_trace(Py_tracefunc func, PyObject *obj, tstate->tracing_what = what; PyThreadState_EnterTracing(tstate); assert(_PyInterpreterFrame_LASTI(frame) >= 0); - initialize_trace_info(&tstate->trace_info, frame); - int addr = _PyInterpreterFrame_LASTI(frame) * sizeof(_Py_CODEUNIT); - f->f_lineno = _PyCode_CheckLineNumber(addr, &tstate->trace_info.bounds); + if (_PyCode_InitLineArray(frame->f_code)) { + return -1; + } + f->f_lineno = _PyCode_LineNumberFromArray(frame->f_code, _PyInterpreterFrame_LASTI(frame)); result = func(obj, f, what, arg); f->f_lineno = 0; PyThreadState_LeaveTracing(tstate); @@ -6838,16 +6839,17 @@ maybe_call_line_trace(Py_tracefunc func, PyObject *obj, represents a jump backwards, update the frame's line number and then call the trace function if we're tracing source lines. */ - initialize_trace_info(&tstate->trace_info, frame); + if (_PyCode_InitLineArray(frame->f_code)) { + return -1; + } int lastline; if (instr_prev <= frame->f_code->_co_firsttraceable) { lastline = -1; } else { - lastline = _PyCode_CheckLineNumber(instr_prev*sizeof(_Py_CODEUNIT), &tstate->trace_info.bounds); + lastline = _PyCode_LineNumberFromArray(frame->f_code, instr_prev); } - int addr = _PyInterpreterFrame_LASTI(frame) * sizeof(_Py_CODEUNIT); - int line = _PyCode_CheckLineNumber(addr, &tstate->trace_info.bounds); + int line = _PyCode_LineNumberFromArray(frame->f_code, _PyInterpreterFrame_LASTI(frame)); PyFrameObject *f = _PyFrame_GetFrameObject(frame); if (f == NULL) { return -1; diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py index de8fde448e30b1..b2d091aca1f6cc 100644 --- a/Tools/scripts/deepfreeze.py +++ b/Tools/scripts/deepfreeze.py @@ -254,6 +254,7 @@ def generate_code(self, name: str, code: types.CodeType) -> str: self.write(f".co_exceptiontable = {co_exceptiontable},") self.field(code, "co_flags") self.write(".co_warmup = QUICKENING_INITIAL_WARMUP_VALUE,") + self.write("._co_linearray_entry_size = 0,") self.field(code, "co_argcount") self.field(code, "co_posonlyargcount") self.field(code, "co_kwonlyargcount") @@ -271,6 +272,7 @@ def generate_code(self, name: str, code: types.CodeType) -> str: self.write(f".co_name = {co_name},") self.write(f".co_qualname = {co_qualname},") self.write(f".co_linetable = {co_linetable},") + self.write("._co_linearray = NULL,") self.write(f".co_code_adaptive = {co_code_adaptive},") for i, op in enumerate(code.co_code[::2]): if op == RESUME: