From c707cffbbcb77c412438aebca051a83de7e5089c Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 8 Jul 2023 09:23:15 -0700 Subject: [PATCH 1/7] First steps towards POP_JUMP_IF_XXX uops This adds: - Hand-written uops JUMP_IF_{TRUE,FALSE,NONE,NOT_NONE}. These peek at the top of the stack. The jump target (in superblock space) is absolute. - Hand-written translation for POP_JUMP_IF_TRUE, assuming the jump is unlikely. This can serve as an example for the rest. (Probably adding the others will cause a little refactoring.) Once we implement jump-likelihood profiling, we can implement the jump-unlikely case. Note that the stubs are placed at the end of the trace array. There is a gap between the main superblock and the stubs. We could close the gap, we'd just have to patch up the JUMP uops. --- Python/ceval.c | 35 +++++++++++++++- Python/opcode_metadata.h | 56 ++++++++++++++----------- Python/optimizer.c | 31 +++++++++++++- Tools/cases_generator/generate_cases.py | 7 ++++ 4 files changed, 102 insertions(+), 27 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 1b8650a650412d..7a88893bd8e949 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2755,7 +2755,8 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject operand = self->trace[pc].operand; oparg = (int)operand; DPRINTF(3, - " uop %s, operand %" PRIu64 ", stack_level %d\n", + "%4d: uop %s, operand %" PRIu64 ", stack_level %d\n", + pc, opcode < 256 ? _PyOpcode_OpName[opcode] : _PyOpcode_uop_name[opcode], operand, (int)(stack_pointer - _PyFrame_Stackbase(frame))); @@ -2767,6 +2768,38 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject #define ENABLE_SPECIALIZATION 0 #include "executor_cases.c.h" + case JUMP_IF_FALSE: + { + if (Py_IsFalse(stack_pointer[-1])) { + pc = oparg; + } + break; + } + + case JUMP_IF_TRUE: + { + if (Py_IsTrue(stack_pointer[-1])) { + pc = oparg; + } + break; + } + + case JUMP_IF_NONE: + { + if (Py_IsNone(stack_pointer[-1])) { + pc = oparg; + } + break; + } + + case JUMP_IF_NOT_NONE: + { + if (!Py_IsNone(stack_pointer[-1])) { + pc = oparg; + } + break; + } + case SAVE_IP: { frame->prev_instr = ip_offset + oparg; diff --git a/Python/opcode_metadata.h b/Python/opcode_metadata.h index 70e1ca44ce7663..d305a71a713aa3 100644 --- a/Python/opcode_metadata.h +++ b/Python/opcode_metadata.h @@ -21,18 +21,22 @@ #define EXIT_TRACE 300 #define SAVE_IP 301 -#define _GUARD_BOTH_INT 302 -#define _BINARY_OP_MULTIPLY_INT 303 -#define _BINARY_OP_ADD_INT 304 -#define _BINARY_OP_SUBTRACT_INT 305 -#define _GUARD_BOTH_FLOAT 306 -#define _BINARY_OP_MULTIPLY_FLOAT 307 -#define _BINARY_OP_ADD_FLOAT 308 -#define _BINARY_OP_SUBTRACT_FLOAT 309 -#define _GUARD_BOTH_UNICODE 310 -#define _BINARY_OP_ADD_UNICODE 311 -#define _LOAD_LOCALS 312 -#define _LOAD_FROM_DICT_OR_GLOBALS 313 +#define JUMP_IF_FALSE 302 +#define JUMP_IF_TRUE 303 +#define JUMP_IF_NONE 304 +#define JUMP_IF_NOT_NONE 305 +#define _GUARD_BOTH_INT 306 +#define _BINARY_OP_MULTIPLY_INT 307 +#define _BINARY_OP_ADD_INT 308 +#define _BINARY_OP_SUBTRACT_INT 309 +#define _GUARD_BOTH_FLOAT 310 +#define _BINARY_OP_MULTIPLY_FLOAT 311 +#define _BINARY_OP_ADD_FLOAT 312 +#define _BINARY_OP_SUBTRACT_FLOAT 313 +#define _GUARD_BOTH_UNICODE 314 +#define _BINARY_OP_ADD_UNICODE 315 +#define _LOAD_LOCALS 316 +#define _LOAD_FROM_DICT_OR_GLOBALS 317 #ifndef NEED_OPCODE_METADATA extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); @@ -1284,18 +1288,22 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = { const char * const _PyOpcode_uop_name[512] = { [300] = "EXIT_TRACE", [301] = "SAVE_IP", - [302] = "_GUARD_BOTH_INT", - [303] = "_BINARY_OP_MULTIPLY_INT", - [304] = "_BINARY_OP_ADD_INT", - [305] = "_BINARY_OP_SUBTRACT_INT", - [306] = "_GUARD_BOTH_FLOAT", - [307] = "_BINARY_OP_MULTIPLY_FLOAT", - [308] = "_BINARY_OP_ADD_FLOAT", - [309] = "_BINARY_OP_SUBTRACT_FLOAT", - [310] = "_GUARD_BOTH_UNICODE", - [311] = "_BINARY_OP_ADD_UNICODE", - [312] = "_LOAD_LOCALS", - [313] = "_LOAD_FROM_DICT_OR_GLOBALS", + [302] = "JUMP_IF_FALSE", + [303] = "JUMP_IF_TRUE", + [304] = "JUMP_IF_NONE", + [305] = "JUMP_IF_NOT_NONE", + [306] = "_GUARD_BOTH_INT", + [307] = "_BINARY_OP_MULTIPLY_INT", + [308] = "_BINARY_OP_ADD_INT", + [309] = "_BINARY_OP_SUBTRACT_INT", + [310] = "_GUARD_BOTH_FLOAT", + [311] = "_BINARY_OP_MULTIPLY_FLOAT", + [312] = "_BINARY_OP_ADD_FLOAT", + [313] = "_BINARY_OP_SUBTRACT_FLOAT", + [314] = "_GUARD_BOTH_UNICODE", + [315] = "_BINARY_OP_ADD_UNICODE", + [316] = "_LOAD_LOCALS", + [317] = "_LOAD_FROM_DICT_OR_GLOBALS", }; #endif // NEED_OPCODE_METADATA #endif diff --git a/Python/optimizer.c b/Python/optimizer.c index 1d731ed2309c3c..75ff701a24506c 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -401,6 +401,14 @@ translate_bytecode_to_trace( trace[trace_length].operand = (OPERAND); \ trace_length++; +#define ADD_TO_STUB(INDEX, OPCODE, OPERAND) \ + DPRINTF(2, " ADD_TO_STUB(%d, %s, %" PRIu64 ")\n", \ + (INDEX), \ + (OPCODE) < 256 ? _PyOpcode_OpName[(OPCODE)] : _PyOpcode_uop_name[(OPCODE)], \ + (uint64_t)(OPERAND)); \ + trace[(INDEX)].opcode = (OPCODE); \ + trace[(INDEX)].operand = (OPERAND); + DPRINTF(4, "Optimizing %s (%s:%d) at byte offset %ld\n", PyUnicode_AsUTF8(code->co_qualname), @@ -409,7 +417,7 @@ translate_bytecode_to_trace( 2 * (long)(initial_instr - (_Py_CODEUNIT *)code->co_code_adaptive)); for (;;) { - ADD_TO_TRACE(SAVE_IP, (int)(instr - (_Py_CODEUNIT *)code->co_code_adaptive)); + ADD_TO_TRACE(SAVE_IP, instr - (_Py_CODEUNIT *)code->co_code_adaptive); int opcode = instr->op.code; int oparg = instr->op.arg; int extras = 0; @@ -426,6 +434,25 @@ translate_bytecode_to_trace( oparg = (oparg & 0xffffff00) | executor->vm_data.oparg; } switch (opcode) { + + case POP_JUMP_IF_FALSE: + { + // Assume jump unlikely (TODO: handle jump likely case) + // Reserve 7 entries (2 here, 3 stub, plus SAVE_IP + EXIT_TRACE) + if (trace_length + 7 > max_length) { + DPRINTF(1, "Ran out of space for POP_JUMP_IF_FALSE\n"); + goto done; + } + _Py_CODEUNIT *target_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg; + max_length -= 3; // Really the start of the stubs + ADD_TO_TRACE(JUMP_IF_FALSE, max_length); + ADD_TO_TRACE(POP_TOP, 0); + ADD_TO_STUB(max_length, POP_TOP, 0); + ADD_TO_STUB(max_length + 1, SAVE_IP, target_instr - (_Py_CODEUNIT *)code->co_code_adaptive); + ADD_TO_STUB(max_length + 2, EXIT_TRACE, 0); + break; + } + default: { const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode]; @@ -538,7 +565,7 @@ uop_optimize( return -1; } executor->base.execute = _PyUopExecute; - memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction)); + memcpy(executor->trace, trace, _Py_UOP_MAX_TRACE_LENGTH * sizeof(_PyUOpInstruction)); *exec_ptr = (_PyExecutorObject *)executor; return 1; } diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 14269ca8cbe750..9f928ff2bd10a1 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -1338,12 +1338,19 @@ def write_pseudo_instrs(self) -> None: def write_uop_items(self, make_text: typing.Callable[[str, int], str]) -> None: """Write '#define XXX NNN' for each uop""" counter = 300 # TODO: Avoid collision with pseudo instructions + def add(name: str) -> None: nonlocal counter self.out.emit(make_text(name, counter)) counter += 1 + add("EXIT_TRACE") add("SAVE_IP") + add("JUMP_IF_FALSE") + add("JUMP_IF_TRUE") + add("JUMP_IF_NONE") + add("JUMP_IF_NOT_NONE") + for instr in self.instrs.values(): if instr.kind == "op" and instr.is_viable_uop(): add(instr.name) From 1e5253a84a0f4deefcc152fc5fbb1be86992758d Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 9 Jul 2023 11:54:56 -0700 Subject: [PATCH 2/7] Add a test showing JUMP_IF_FALSE is emitted --- Lib/test/test_capi/test_misc.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 5f39f23401c3f2..45d3a1e0bd3ab6 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2527,6 +2527,22 @@ def testfunc(x): uops = {opname for opname, _ in ex} self.assertIn("UNPACK_SEQUENCE", uops) + def test_pop_jump_if_false(self): + def testfunc(n): + i = 0 + while i < n: + i += 1 + + opt = _testinternalcapi.get_uop_optimizer() + + with temporary_optimizer(opt): + testfunc(10) + + ex = get_first_executor(testfunc.__code__) + self.assertIsNotNone(ex) + uops = {opname for opname, _ in ex} + self.assertIn("JUMP_IF_FALSE", uops) + if __name__ == "__main__": unittest.main() From bf19586ec555209e309b0375d8b81ab4f87d9578 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 9 Jul 2023 11:58:21 -0700 Subject: [PATCH 3/7] Here's what it would look like with _POP_JUMP_IF_XXX uops --- Lib/test/test_capi/test_misc.py | 2 +- Python/ceval.c | 17 +++++++++++++---- Python/opcode_metadata.h | 16 ++++++++-------- Python/optimizer.c | 14 ++++++-------- Tools/cases_generator/generate_cases.py | 8 ++++---- 5 files changed, 32 insertions(+), 25 deletions(-) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 45d3a1e0bd3ab6..b3e7498e9c773f 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2541,7 +2541,7 @@ def testfunc(n): ex = get_first_executor(testfunc.__code__) self.assertIsNotNone(ex) uops = {opname for opname, _ in ex} - self.assertIn("JUMP_IF_FALSE", uops) + self.assertIn("_POP_JUMP_IF_FALSE", uops) if __name__ == "__main__": diff --git a/Python/ceval.c b/Python/ceval.c index 7a88893bd8e949..836b19779eb0b6 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2768,35 +2768,44 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject #define ENABLE_SPECIALIZATION 0 #include "executor_cases.c.h" - case JUMP_IF_FALSE: + // NOTE: These pop-jumps move the uop pc, not the bytecode ip + case _POP_JUMP_IF_FALSE: { if (Py_IsFalse(stack_pointer[-1])) { pc = oparg; } + stack_pointer--; break; } - case JUMP_IF_TRUE: + case _POP_JUMP_IF_TRUE: { if (Py_IsTrue(stack_pointer[-1])) { pc = oparg; } + stack_pointer--; break; } - case JUMP_IF_NONE: + case _POP_JUMP_IF_NONE: { if (Py_IsNone(stack_pointer[-1])) { pc = oparg; } + else { + Py_DECREF(stack_pointer[-1]); + } + stack_pointer--; break; } - case JUMP_IF_NOT_NONE: + case _POP_JUMP_IF_NOT_NONE: { if (!Py_IsNone(stack_pointer[-1])) { pc = oparg; + Py_DECREF(stack_pointer[-1]); } + stack_pointer--; break; } diff --git a/Python/opcode_metadata.h b/Python/opcode_metadata.h index d305a71a713aa3..602c5cc6e55e1d 100644 --- a/Python/opcode_metadata.h +++ b/Python/opcode_metadata.h @@ -21,10 +21,10 @@ #define EXIT_TRACE 300 #define SAVE_IP 301 -#define JUMP_IF_FALSE 302 -#define JUMP_IF_TRUE 303 -#define JUMP_IF_NONE 304 -#define JUMP_IF_NOT_NONE 305 +#define _POP_JUMP_IF_FALSE 302 +#define _POP_JUMP_IF_TRUE 303 +#define _POP_JUMP_IF_NONE 304 +#define _POP_JUMP_IF_NOT_NONE 305 #define _GUARD_BOTH_INT 306 #define _BINARY_OP_MULTIPLY_INT 307 #define _BINARY_OP_ADD_INT 308 @@ -1288,10 +1288,10 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = { const char * const _PyOpcode_uop_name[512] = { [300] = "EXIT_TRACE", [301] = "SAVE_IP", - [302] = "JUMP_IF_FALSE", - [303] = "JUMP_IF_TRUE", - [304] = "JUMP_IF_NONE", - [305] = "JUMP_IF_NOT_NONE", + [302] = "_POP_JUMP_IF_FALSE", + [303] = "_POP_JUMP_IF_TRUE", + [304] = "_POP_JUMP_IF_NONE", + [305] = "_POP_JUMP_IF_NOT_NONE", [306] = "_GUARD_BOTH_INT", [307] = "_BINARY_OP_MULTIPLY_INT", [308] = "_BINARY_OP_ADD_INT", diff --git a/Python/optimizer.c b/Python/optimizer.c index 75ff701a24506c..d230d428dfb10e 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -438,18 +438,16 @@ translate_bytecode_to_trace( case POP_JUMP_IF_FALSE: { // Assume jump unlikely (TODO: handle jump likely case) - // Reserve 7 entries (2 here, 3 stub, plus SAVE_IP + EXIT_TRACE) - if (trace_length + 7 > max_length) { + // Reserve 5 entries (1 here, 2 stub, plus SAVE_IP + EXIT_TRACE) + if (trace_length + 5 > max_length) { DPRINTF(1, "Ran out of space for POP_JUMP_IF_FALSE\n"); goto done; } _Py_CODEUNIT *target_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg; - max_length -= 3; // Really the start of the stubs - ADD_TO_TRACE(JUMP_IF_FALSE, max_length); - ADD_TO_TRACE(POP_TOP, 0); - ADD_TO_STUB(max_length, POP_TOP, 0); - ADD_TO_STUB(max_length + 1, SAVE_IP, target_instr - (_Py_CODEUNIT *)code->co_code_adaptive); - ADD_TO_STUB(max_length + 2, EXIT_TRACE, 0); + max_length -= 2; // Really the start of the stubs + ADD_TO_TRACE(_POP_JUMP_IF_FALSE, max_length); + ADD_TO_STUB(max_length, SAVE_IP, target_instr - (_Py_CODEUNIT *)code->co_code_adaptive); + ADD_TO_STUB(max_length + 1, EXIT_TRACE, 0); break; } diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 9f928ff2bd10a1..056fec70d56855 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -1346,10 +1346,10 @@ def add(name: str) -> None: add("EXIT_TRACE") add("SAVE_IP") - add("JUMP_IF_FALSE") - add("JUMP_IF_TRUE") - add("JUMP_IF_NONE") - add("JUMP_IF_NOT_NONE") + add("_POP_JUMP_IF_FALSE") + add("_POP_JUMP_IF_TRUE") + add("_POP_JUMP_IF_NONE") + add("_POP_JUMP_IF_NOT_NONE") for instr in self.instrs.values(): if instr.kind == "op" and instr.is_viable_uop(): From 11d0e11889b6445d2fe3531aa3070f40a7006f1b Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 10 Jul 2023 10:51:45 -0700 Subject: [PATCH 4/7] Add POP_JUMP_IF_TRUE; drop NONE variants --- Lib/test/test_capi/test_misc.py | 16 ++++++++ Python/ceval.c | 22 ----------- Python/opcode_metadata.h | 52 ++++++++++++------------- Python/optimizer.c | 14 +++++-- Tools/cases_generator/generate_cases.py | 2 - 5 files changed, 50 insertions(+), 56 deletions(-) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index b3e7498e9c773f..c4a5f494d199ff 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2543,6 +2543,22 @@ def testfunc(n): uops = {opname for opname, _ in ex} self.assertIn("_POP_JUMP_IF_FALSE", uops) + def test_pop_jump_if_true(self): + def testfunc(n): + i = 0 + while not i >= n: + i += 1 + + opt = _testinternalcapi.get_uop_optimizer() + + with temporary_optimizer(opt): + testfunc(10) + + ex = get_first_executor(testfunc.__code__) + self.assertIsNotNone(ex) + uops = {opname for opname, _ in ex} + self.assertIn("_POP_JUMP_IF_TRUE", uops) + if __name__ == "__main__": unittest.main() diff --git a/Python/ceval.c b/Python/ceval.c index 836b19779eb0b6..08d63cdb5e06bf 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2787,28 +2787,6 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject break; } - case _POP_JUMP_IF_NONE: - { - if (Py_IsNone(stack_pointer[-1])) { - pc = oparg; - } - else { - Py_DECREF(stack_pointer[-1]); - } - stack_pointer--; - break; - } - - case _POP_JUMP_IF_NOT_NONE: - { - if (!Py_IsNone(stack_pointer[-1])) { - pc = oparg; - Py_DECREF(stack_pointer[-1]); - } - stack_pointer--; - break; - } - case SAVE_IP: { frame->prev_instr = ip_offset + oparg; diff --git a/Python/opcode_metadata.h b/Python/opcode_metadata.h index 602c5cc6e55e1d..1135a3dff7ffbf 100644 --- a/Python/opcode_metadata.h +++ b/Python/opcode_metadata.h @@ -23,20 +23,18 @@ #define SAVE_IP 301 #define _POP_JUMP_IF_FALSE 302 #define _POP_JUMP_IF_TRUE 303 -#define _POP_JUMP_IF_NONE 304 -#define _POP_JUMP_IF_NOT_NONE 305 -#define _GUARD_BOTH_INT 306 -#define _BINARY_OP_MULTIPLY_INT 307 -#define _BINARY_OP_ADD_INT 308 -#define _BINARY_OP_SUBTRACT_INT 309 -#define _GUARD_BOTH_FLOAT 310 -#define _BINARY_OP_MULTIPLY_FLOAT 311 -#define _BINARY_OP_ADD_FLOAT 312 -#define _BINARY_OP_SUBTRACT_FLOAT 313 -#define _GUARD_BOTH_UNICODE 314 -#define _BINARY_OP_ADD_UNICODE 315 -#define _LOAD_LOCALS 316 -#define _LOAD_FROM_DICT_OR_GLOBALS 317 +#define _GUARD_BOTH_INT 304 +#define _BINARY_OP_MULTIPLY_INT 305 +#define _BINARY_OP_ADD_INT 306 +#define _BINARY_OP_SUBTRACT_INT 307 +#define _GUARD_BOTH_FLOAT 308 +#define _BINARY_OP_MULTIPLY_FLOAT 309 +#define _BINARY_OP_ADD_FLOAT 310 +#define _BINARY_OP_SUBTRACT_FLOAT 311 +#define _GUARD_BOTH_UNICODE 312 +#define _BINARY_OP_ADD_UNICODE 313 +#define _LOAD_LOCALS 314 +#define _LOAD_FROM_DICT_OR_GLOBALS 315 #ifndef NEED_OPCODE_METADATA extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); @@ -1290,20 +1288,18 @@ const char * const _PyOpcode_uop_name[512] = { [301] = "SAVE_IP", [302] = "_POP_JUMP_IF_FALSE", [303] = "_POP_JUMP_IF_TRUE", - [304] = "_POP_JUMP_IF_NONE", - [305] = "_POP_JUMP_IF_NOT_NONE", - [306] = "_GUARD_BOTH_INT", - [307] = "_BINARY_OP_MULTIPLY_INT", - [308] = "_BINARY_OP_ADD_INT", - [309] = "_BINARY_OP_SUBTRACT_INT", - [310] = "_GUARD_BOTH_FLOAT", - [311] = "_BINARY_OP_MULTIPLY_FLOAT", - [312] = "_BINARY_OP_ADD_FLOAT", - [313] = "_BINARY_OP_SUBTRACT_FLOAT", - [314] = "_GUARD_BOTH_UNICODE", - [315] = "_BINARY_OP_ADD_UNICODE", - [316] = "_LOAD_LOCALS", - [317] = "_LOAD_FROM_DICT_OR_GLOBALS", + [304] = "_GUARD_BOTH_INT", + [305] = "_BINARY_OP_MULTIPLY_INT", + [306] = "_BINARY_OP_ADD_INT", + [307] = "_BINARY_OP_SUBTRACT_INT", + [308] = "_GUARD_BOTH_FLOAT", + [309] = "_BINARY_OP_MULTIPLY_FLOAT", + [310] = "_BINARY_OP_ADD_FLOAT", + [311] = "_BINARY_OP_SUBTRACT_FLOAT", + [312] = "_GUARD_BOTH_UNICODE", + [313] = "_BINARY_OP_ADD_UNICODE", + [314] = "_LOAD_LOCALS", + [315] = "_LOAD_FROM_DICT_OR_GLOBALS", }; #endif // NEED_OPCODE_METADATA #endif diff --git a/Python/optimizer.c b/Python/optimizer.c index d230d428dfb10e..e9f02218a7b2c4 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -428,7 +428,8 @@ translate_bytecode_to_trace( oparg = (oparg << 8) | instr->op.arg; } if (opcode == ENTER_EXECUTOR) { - _PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg&255]; + _PyExecutorObject *executor = + (_PyExecutorObject *)code->co_executors->executors[oparg&255]; opcode = executor->vm_data.opcode; DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]); oparg = (oparg & 0xffffff00) | executor->vm_data.oparg; @@ -436,6 +437,7 @@ translate_bytecode_to_trace( switch (opcode) { case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: { // Assume jump unlikely (TODO: handle jump likely case) // Reserve 5 entries (1 here, 2 stub, plus SAVE_IP + EXIT_TRACE) @@ -443,10 +445,14 @@ translate_bytecode_to_trace( DPRINTF(1, "Ran out of space for POP_JUMP_IF_FALSE\n"); goto done; } - _Py_CODEUNIT *target_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg; + _Py_CODEUNIT *target_instr = + instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg; max_length -= 2; // Really the start of the stubs - ADD_TO_TRACE(_POP_JUMP_IF_FALSE, max_length); - ADD_TO_STUB(max_length, SAVE_IP, target_instr - (_Py_CODEUNIT *)code->co_code_adaptive); + int uopcode = opcode == POP_JUMP_IF_TRUE ? + _POP_JUMP_IF_TRUE : _POP_JUMP_IF_FALSE; + ADD_TO_TRACE(uopcode, max_length); + ADD_TO_STUB(max_length, SAVE_IP, + target_instr - (_Py_CODEUNIT *)code->co_code_adaptive); ADD_TO_STUB(max_length + 1, EXIT_TRACE, 0); break; } diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 056fec70d56855..932d0c14d398ab 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -1348,8 +1348,6 @@ def add(name: str) -> None: add("SAVE_IP") add("_POP_JUMP_IF_FALSE") add("_POP_JUMP_IF_TRUE") - add("_POP_JUMP_IF_NONE") - add("_POP_JUMP_IF_NOT_NONE") for instr in self.instrs.values(): if instr.kind == "op" and instr.is_viable_uop(): From 3923794dfc6592c0f85ff9ec08898c04e584b1c3 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 10 Jul 2023 11:17:57 -0700 Subject: [PATCH 5/7] Move stubs to be contiguous with main trace Also include them in the returned trace length. --- Python/optimizer.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index e9f02218a7b2c4..88a5d87dbcfa9d 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -372,12 +372,13 @@ translate_bytecode_to_trace( PyCodeObject *code, _Py_CODEUNIT *instr, _PyUOpInstruction *trace, - int max_length) + int buffer_size) { #ifdef Py_DEBUG _Py_CODEUNIT *initial_instr = instr; #endif int trace_length = 0; + int max_length = buffer_size; #ifdef Py_DEBUG char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); @@ -534,6 +535,30 @@ translate_bytecode_to_trace( code->co_firstlineno, 2 * (long)(initial_instr - (_Py_CODEUNIT *)code->co_code_adaptive), trace_length); + if (max_length < buffer_size && trace_length < max_length) { + // Move the stubs back to be immediately after the main trace + // (which ends at trace_length) + DPRINTF(2, + "Moving %d stub uops back by %d\n", + buffer_size - max_length, + max_length - trace_length); + memmove(trace + trace_length, + trace + max_length, + (buffer_size - max_length) * sizeof(_PyUOpInstruction)); + // Patch up the jump targets + for (int i = 0; i < trace_length; i++) { + if (trace[i].opcode == _POP_JUMP_IF_FALSE || + trace[i].opcode == _POP_JUMP_IF_TRUE) + { + int target = trace[i].operand; + if (target >= max_length) { + target += trace_length - max_length; + trace[i].operand = target; + } + } + } + trace_length += buffer_size - max_length; + } return trace_length; } else { From f119de0e9ef9fdb05593f6798e3e825c76c70d8a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 8 Jul 2023 08:10:07 -0700 Subject: [PATCH 6/7] Clean up TestUops a bit --- Lib/test/test_capi/test_misc.py | 35 ++++++++++++--------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index c4a5f494d199ff..fd27bd06097f6a 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2347,11 +2347,12 @@ def func(): @contextlib.contextmanager def temporary_optimizer(opt): + old_opt = _testinternalcapi.get_optimizer() _testinternalcapi.set_optimizer(opt) try: yield finally: - _testinternalcapi.set_optimizer(None) + _testinternalcapi.set_optimizer(old_opt) @contextlib.contextmanager @@ -2420,8 +2421,8 @@ def long_loop(): self.assertEqual(opt.get_count(), 10) - -def get_first_executor(code): +def get_first_executor(func): + code = func.__code__ co_code = code.co_code JUMP_BACKWARD = opcode.opmap["JUMP_BACKWARD"] for i in range(0, len(co_code), 2): @@ -2446,13 +2447,7 @@ def testfunc(x): with temporary_optimizer(opt): testfunc(1000) - ex = None - for offset in range(0, len(testfunc.__code__.co_code), 2): - try: - ex = _testinternalcapi.get_executor(testfunc.__code__, offset) - break - except ValueError: - pass + ex = get_first_executor(testfunc) self.assertIsNotNone(ex) uops = {opname for opname, _ in ex} self.assertIn("SAVE_IP", uops) @@ -2493,11 +2488,13 @@ def many_vars(): opt = _testinternalcapi.get_uop_optimizer() with temporary_optimizer(opt): - ex = get_first_executor(many_vars.__code__) + ex = get_first_executor(many_vars) self.assertIsNone(ex) many_vars() - ex = get_first_executor(many_vars.__code__) - self.assertIn(("LOAD_FAST", 259), list(ex)) + + ex = get_first_executor(many_vars) + self.assertIsNotNone(ex) + self.assertIn(("LOAD_FAST", 259), list(ex)) def test_unspecialized_unpack(self): # An example of an unspecialized opcode @@ -2516,13 +2513,7 @@ def testfunc(x): with temporary_optimizer(opt): testfunc(10) - ex = None - for offset in range(0, len(testfunc.__code__.co_code), 2): - try: - ex = _testinternalcapi.get_executor(testfunc.__code__, offset) - break - except ValueError: - pass + ex = get_first_executor(testfunc) self.assertIsNotNone(ex) uops = {opname for opname, _ in ex} self.assertIn("UNPACK_SEQUENCE", uops) @@ -2538,7 +2529,7 @@ def testfunc(n): with temporary_optimizer(opt): testfunc(10) - ex = get_first_executor(testfunc.__code__) + ex = get_first_executor(testfunc) self.assertIsNotNone(ex) uops = {opname for opname, _ in ex} self.assertIn("_POP_JUMP_IF_FALSE", uops) @@ -2554,7 +2545,7 @@ def testfunc(n): with temporary_optimizer(opt): testfunc(10) - ex = get_first_executor(testfunc.__code__) + ex = get_first_executor(testfunc) self.assertIsNotNone(ex) uops = {opname for opname, _ in ex} self.assertIn("_POP_JUMP_IF_TRUE", uops) From 4b6596e8ced7bbeb843865e03fc7925804f77ffa Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 10 Jul 2023 14:50:13 -0700 Subject: [PATCH 7/7] Include stubs in list(ex) This required improving len(ex) and ex[i] a bit. There is now a sentinel opcode (0) at the end, *unless* the trace array is full. --- Python/optimizer.c | 51 +++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 88a5d87dbcfa9d..48c29f55bee46b 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -307,7 +307,7 @@ uop_dealloc(_PyUOpExecutorObject *self) { static const char * uop_name(int index) { - if (index < EXIT_TRACE) { + if (index < 256) { return _PyOpcode_OpName[index]; } return _PyOpcode_uop_name[index]; @@ -316,9 +316,9 @@ uop_name(int index) { static Py_ssize_t uop_len(_PyUOpExecutorObject *self) { - int count = 1; + int count = 0; for (; count < _Py_UOP_MAX_TRACE_LENGTH; count++) { - if (self->trace[count-1].opcode == EXIT_TRACE) { + if (self->trace[count].opcode == 0) { break; } } @@ -328,28 +328,26 @@ uop_len(_PyUOpExecutorObject *self) static PyObject * uop_item(_PyUOpExecutorObject *self, Py_ssize_t index) { - for (int i = 0; i < _Py_UOP_MAX_TRACE_LENGTH; i++) { - if (self->trace[i].opcode == EXIT_TRACE) { - break; - } - if (i != index) { - continue; - } - const char *name = uop_name(self->trace[i].opcode); - PyObject *oname = _PyUnicode_FromASCII(name, strlen(name)); - if (oname == NULL) { - return NULL; - } - PyObject *operand = PyLong_FromUnsignedLongLong(self->trace[i].operand); - if (operand == NULL) { - Py_DECREF(oname); - return NULL; - } - PyObject *args[2] = { oname, operand }; - return _PyTuple_FromArraySteal(args, 2); + Py_ssize_t len = uop_len(self); + if (index < 0 || index >= len) { + PyErr_SetNone(PyExc_IndexError); + return NULL; } - PyErr_SetNone(PyExc_IndexError); - return NULL; + const char *name = uop_name(self->trace[index].opcode); + if (name == NULL) { + name = ""; + } + PyObject *oname = _PyUnicode_FromASCII(name, strlen(name)); + if (oname == NULL) { + return NULL; + } + PyObject *operand = PyLong_FromUnsignedLongLong(self->trace[index].operand); + if (operand == NULL) { + Py_DECREF(oname); + return NULL; + } + PyObject *args[2] = { oname, operand }; + return _PyTuple_FromArraySteal(args, 2); } PySequenceMethods uop_as_sequence = { @@ -594,7 +592,10 @@ uop_optimize( return -1; } executor->base.execute = _PyUopExecute; - memcpy(executor->trace, trace, _Py_UOP_MAX_TRACE_LENGTH * sizeof(_PyUOpInstruction)); + memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction)); + if (trace_length < _Py_UOP_MAX_TRACE_LENGTH) { + executor->trace[trace_length].opcode = 0; // Sentinel + } *exec_ptr = (_PyExecutorObject *)executor; return 1; }