From b28effa9f7b43ed892cc31bb2872708d64ec0588 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 15:37:24 -0800 Subject: [PATCH] Hacky way to make FOR_ITER a viable uop --- Include/internal/pycore_opcode_metadata.h | 86 +++++++++++++---------- Python/abstract_interp_cases.c.h | 10 +++ Python/bytecodes.c | 29 +++++++- Python/executor_cases.c.h | 49 +++++++++++++ Python/optimizer.c | 1 + Tools/cases_generator/flags.py | 2 +- Tools/cases_generator/generate_cases.py | 2 +- Tools/cases_generator/instructions.py | 2 +- 8 files changed, 137 insertions(+), 44 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 4d98b23df5d927..1442350411c90a 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -81,45 +81,46 @@ #define _IS_NONE 353 #define _SPECIALIZE_FOR_ITER 354 #define _FOR_ITER 355 -#define _ITER_CHECK_LIST 356 -#define _ITER_JUMP_LIST 357 -#define _GUARD_NOT_EXHAUSTED_LIST 358 -#define _ITER_NEXT_LIST 359 -#define _ITER_CHECK_TUPLE 360 -#define _ITER_JUMP_TUPLE 361 -#define _GUARD_NOT_EXHAUSTED_TUPLE 362 -#define _ITER_NEXT_TUPLE 363 -#define _ITER_CHECK_RANGE 364 -#define _ITER_JUMP_RANGE 365 -#define _GUARD_NOT_EXHAUSTED_RANGE 366 -#define _ITER_NEXT_RANGE 367 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 368 -#define _GUARD_KEYS_VERSION 369 -#define _LOAD_ATTR_METHOD_WITH_VALUES 370 -#define _LOAD_ATTR_METHOD_NO_DICT 371 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 372 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 373 -#define _CHECK_ATTR_METHOD_LAZY_DICT 374 -#define _LOAD_ATTR_METHOD_LAZY_DICT 375 -#define _SPECIALIZE_CALL 376 -#define _CALL 377 -#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 378 -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 379 -#define _CHECK_PEP_523 380 -#define _CHECK_FUNCTION_EXACT_ARGS 381 -#define _CHECK_STACK_SPACE 382 -#define _INIT_CALL_PY_EXACT_ARGS 383 -#define _PUSH_FRAME 384 -#define _SPECIALIZE_BINARY_OP 385 -#define _BINARY_OP 386 -#define _GUARD_IS_TRUE_POP 387 -#define _GUARD_IS_FALSE_POP 388 -#define _GUARD_IS_NONE_POP 389 -#define _GUARD_IS_NOT_NONE_POP 390 -#define _JUMP_TO_TOP 391 -#define _SAVE_RETURN_OFFSET 392 -#define _INSERT 393 -#define _CHECK_VALIDITY 394 +#define _FOR_ITER_TIER_TWO 356 +#define _ITER_CHECK_LIST 357 +#define _ITER_JUMP_LIST 358 +#define _GUARD_NOT_EXHAUSTED_LIST 359 +#define _ITER_NEXT_LIST 360 +#define _ITER_CHECK_TUPLE 361 +#define _ITER_JUMP_TUPLE 362 +#define _GUARD_NOT_EXHAUSTED_TUPLE 363 +#define _ITER_NEXT_TUPLE 364 +#define _ITER_CHECK_RANGE 365 +#define _ITER_JUMP_RANGE 366 +#define _GUARD_NOT_EXHAUSTED_RANGE 367 +#define _ITER_NEXT_RANGE 368 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 369 +#define _GUARD_KEYS_VERSION 370 +#define _LOAD_ATTR_METHOD_WITH_VALUES 371 +#define _LOAD_ATTR_METHOD_NO_DICT 372 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 373 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 374 +#define _CHECK_ATTR_METHOD_LAZY_DICT 375 +#define _LOAD_ATTR_METHOD_LAZY_DICT 376 +#define _SPECIALIZE_CALL 377 +#define _CALL 378 +#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 379 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 380 +#define _CHECK_PEP_523 381 +#define _CHECK_FUNCTION_EXACT_ARGS 382 +#define _CHECK_STACK_SPACE 383 +#define _INIT_CALL_PY_EXACT_ARGS 384 +#define _PUSH_FRAME 385 +#define _SPECIALIZE_BINARY_OP 386 +#define _BINARY_OP 387 +#define _GUARD_IS_TRUE_POP 388 +#define _GUARD_IS_FALSE_POP 389 +#define _GUARD_IS_NONE_POP 390 +#define _GUARD_IS_NOT_NONE_POP 391 +#define _JUMP_TO_TOP 392 +#define _SAVE_RETURN_OFFSET 393 +#define _INSERT 394 +#define _CHECK_VALIDITY 395 extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); #ifdef NEED_OPCODE_METADATA @@ -543,6 +544,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case _FOR_ITER: return 1; + case _FOR_ITER_TIER_TWO: + return 1; case FOR_ITER: return 1; case INSTRUMENTED_FOR_ITER: @@ -1181,6 +1184,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case _FOR_ITER: return 2; + case _FOR_ITER_TIER_TWO: + return 2; case FOR_ITER: return 2; case INSTRUMENTED_FOR_ITER: @@ -1676,6 +1681,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [GET_YIELD_FROM_ITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_SPECIALIZE_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [_FOR_ITER] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [_FOR_ITER_TIER_TWO] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_ITER_CHECK_LIST] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, @@ -1906,6 +1912,7 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN [MATCH_KEYS] = { .nuops = 1, .uops = { { MATCH_KEYS, 0, 0 } } }, [GET_ITER] = { .nuops = 1, .uops = { { GET_ITER, 0, 0 } } }, [GET_YIELD_FROM_ITER] = { .nuops = 1, .uops = { { GET_YIELD_FROM_ITER, 0, 0 } } }, + [FOR_ITER] = { .nuops = 1, .uops = { { _FOR_ITER, 0, 0 } } }, [FOR_ITER_LIST] = { .nuops = 3, .uops = { { _ITER_CHECK_LIST, 0, 0 }, { _ITER_JUMP_LIST, 0, 0 }, { _ITER_NEXT_LIST, 0, 0 } } }, [FOR_ITER_TUPLE] = { .nuops = 3, .uops = { { _ITER_CHECK_TUPLE, 0, 0 }, { _ITER_JUMP_TUPLE, 0, 0 }, { _ITER_NEXT_TUPLE, 0, 0 } } }, [FOR_ITER_RANGE] = { .nuops = 3, .uops = { { _ITER_CHECK_RANGE, 0, 0 }, { _ITER_JUMP_RANGE, 0, 0 }, { _ITER_NEXT_RANGE, 0, 0 } } }, @@ -2005,6 +2012,7 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = { [_IS_NONE] = "_IS_NONE", [_SPECIALIZE_FOR_ITER] = "_SPECIALIZE_FOR_ITER", [_FOR_ITER] = "_FOR_ITER", + [_FOR_ITER_TIER_TWO] = "_FOR_ITER_TIER_TWO", [_ITER_CHECK_LIST] = "_ITER_CHECK_LIST", [_ITER_JUMP_LIST] = "_ITER_JUMP_LIST", [_GUARD_NOT_EXHAUSTED_LIST] = "_GUARD_NOT_EXHAUSTED_LIST", diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index a2f6aa8def8f69..28338f53ea7fb9 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -242,6 +242,10 @@ break; } + case _SPECIALIZE_UNPACK_SEQUENCE: { + break; + } + case _UNPACK_SEQUENCE: { STACK_SHRINK(1); STACK_GROW(oparg); @@ -624,6 +628,12 @@ break; } + case _FOR_ITER_TIER_TWO: { + STACK_GROW(1); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + break; + } + case _ITER_CHECK_LIST: { break; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8a7dcb8416eb8c..da58cb75e607ed 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2368,7 +2368,7 @@ dummy_func( goto enter_tier_one; } - replaced op(_POP_JUMP_IF_FALSE, (unused/1, cond -- )) { + replaced op(_POP_JUMP_IF_FALSE, (unused/1, cond -- )) { assert(PyBool_Check(cond)); int flag = Py_IsFalse(cond); #if ENABLE_SPECIALIZATION @@ -2512,7 +2512,7 @@ dummy_func( #endif /* ENABLE_SPECIALIZATION */ } - op(_FOR_ITER, (iter -- iter, next)) { + replaced op(_FOR_ITER, (iter -- iter, next)) { /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ next = (*Py_TYPE(iter)->tp_iternext)(iter); if (next == NULL) { @@ -2535,6 +2535,31 @@ dummy_func( // Common case: no jump, leave it to the code generator } + op(_FOR_ITER_TIER_TWO, (iter -- iter, next)) { + /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ + next = (*Py_TYPE(iter)->tp_iternext)(iter); + if (next == NULL) { + if (_PyErr_Occurred(tstate)) { + if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + GOTO_ERROR(error); + } + _PyErr_Clear(tstate); + } + /* iterator ended normally */ + Py_DECREF(iter); + STACK_SHRINK(1); + /* HACK: Emulate DEOPT_IF to jump over END_FOR */ + _PyFrame_SetStackPointer(frame, stack_pointer); + frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; + assert(frame->instr_ptr[-1].op.code == END_FOR || + frame->instr_ptr[-1].op.code == INSTRUMENTED_END_FOR); + Py_DECREF(current_executor); + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); + goto enter_tier_one; + } + // Common case: no jump, leave it to the code generator + } + macro(FOR_ITER) = _SPECIALIZE_FOR_ITER + _FOR_ITER; inst(INSTRUMENTED_FOR_ITER, (unused/1 -- )) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 4e29fb9f0fa93d..4f2f73ee76d5ef 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -866,6 +866,24 @@ break; } + case _SPECIALIZE_UNPACK_SEQUENCE: { + PyObject *seq; + seq = stack_pointer[-1]; + uint16_t counter = (uint16_t)operand; + #if ENABLE_SPECIALIZATION + if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + next_instr = this_instr; + _Py_Specialize_UnpackSequence(seq, next_instr, oparg); + DISPATCH_SAME_OPARG(); + } + STAT_INC(UNPACK_SEQUENCE, deferred); + DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + #endif /* ENABLE_SPECIALIZATION */ + (void)seq; + (void)counter; + break; + } + case _UNPACK_SEQUENCE: { PyObject *seq; seq = stack_pointer[-1]; @@ -2101,6 +2119,37 @@ break; } + case _FOR_ITER_TIER_TWO: { + PyObject *iter; + PyObject *next; + iter = stack_pointer[-1]; + /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ + next = (*Py_TYPE(iter)->tp_iternext)(iter); + if (next == NULL) { + if (_PyErr_Occurred(tstate)) { + if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + GOTO_ERROR(error); + } + _PyErr_Clear(tstate); + } + /* iterator ended normally */ + Py_DECREF(iter); + STACK_SHRINK(1); + /* HACK: Emulate DEOPT_IF to jump over END_FOR */ + _PyFrame_SetStackPointer(frame, stack_pointer); + frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; + assert(frame->instr_ptr[-1].op.code == END_FOR || + frame->instr_ptr[-1].op.code == INSTRUMENTED_END_FOR); + Py_DECREF(current_executor); + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); + goto enter_tier_one; + } + // Common case: no jump, leave it to the code generator + STACK_GROW(1); + stack_pointer[-1] = next; + break; + } + case _ITER_CHECK_LIST: { PyObject *iter; iter = stack_pointer[-1]; diff --git a/Python/optimizer.c b/Python/optimizer.c index 5d1ef8a683c250..5c9f965aeefa85 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -392,6 +392,7 @@ _PyUop_Replacements[OPCODE_METADATA_SIZE] = { [_ITER_JUMP_RANGE] = _GUARD_NOT_EXHAUSTED_RANGE, [_ITER_JUMP_LIST] = _GUARD_NOT_EXHAUSTED_LIST, [_ITER_JUMP_TUPLE] = _GUARD_NOT_EXHAUSTED_TUPLE, + [_FOR_ITER] = _FOR_ITER_TIER_TWO, }; static const uint16_t diff --git a/Tools/cases_generator/flags.py b/Tools/cases_generator/flags.py index 808c9e82bbce07..bf76112159e38e 100644 --- a/Tools/cases_generator/flags.py +++ b/Tools/cases_generator/flags.py @@ -175,7 +175,7 @@ def variable_used_unspecialized(node: parsing.Node, name: str) -> bool: tokens: list[lx.Token] = [] skipping = False for i, token in enumerate(node.tokens): - if token.kind == "MACRO": + if token.kind == "CMACRO": text = "".join(token.text.split()) # TODO: Handle nested #if if text == "#if": diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index d1dbfeae8d74f6..ba45e3a625072e 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -658,7 +658,7 @@ def write_macro_expansions( if not part.instr.is_viable_uop() and "replaced" not in part.instr.annotations: # This note just reminds us about macros that cannot # be expanded to Tier 2 uops. It is not an error. - # It is sometimes emitted for macros that have a + # Suppress it using 'replaced op(...)' for macros having # manual translation in translate_bytecode_to_trace() # in Python/optimizer.c. if len(parts) > 1 or part.instr.name != name: diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index 9039ac5c6f127e..457221a0e15f75 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -115,7 +115,7 @@ def __init__(self, inst: parsing.InstDef): def is_viable_uop(self) -> bool: """Whether this instruction is viable as a uop.""" dprint: typing.Callable[..., None] = lambda *args, **kwargs: None - if "FRAME" in self.name: + if self.name == "_FOR_ITER_TIER_TWO": dprint = print if self.name == "_EXIT_TRACE":