Skip to content

Commit

Permalink
gh-106529: Implement POP_JUMP_IF_XXX uops (#106551)
Browse files Browse the repository at this point in the history
- Hand-written uops JUMP_IF_{TRUE,FALSE}.
  These peek at the top of the stack.
  The jump target (in superblock space) is absolute.

- Hand-written translation for POP_JUMP_IF_{TRUE,FALSE},
  assuming the jump is unlikely.
  Once we implement jump-likelihood profiling,
  we can implement the jump-unlikely case (in another PR).

- Tests (including some test cleanup).

- Improvements to len(ex) and ex[i] to expose the whole trace.
  • Loading branch information
gvanrossum authored Jul 10, 2023
1 parent 18dfbd0 commit 22988c3
Show file tree
Hide file tree
Showing 5 changed files with 181 additions and 72 deletions.
63 changes: 43 additions & 20 deletions Lib/test/test_capi/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2347,11 +2347,12 @@ def func():

@contextlib.contextmanager
def temporary_optimizer(opt):
old_opt = _testinternalcapi.get_optimizer()
_testinternalcapi.set_optimizer(opt)
try:
yield
finally:
_testinternalcapi.set_optimizer(None)
_testinternalcapi.set_optimizer(old_opt)


@contextlib.contextmanager
Expand Down Expand Up @@ -2420,8 +2421,8 @@ def long_loop():
self.assertEqual(opt.get_count(), 10)



def get_first_executor(code):
def get_first_executor(func):
code = func.__code__
co_code = code.co_code
JUMP_BACKWARD = opcode.opmap["JUMP_BACKWARD"]
for i in range(0, len(co_code), 2):
Expand All @@ -2446,13 +2447,7 @@ def testfunc(x):
with temporary_optimizer(opt):
testfunc(1000)

ex = None
for offset in range(0, len(testfunc.__code__.co_code), 2):
try:
ex = _testinternalcapi.get_executor(testfunc.__code__, offset)
break
except ValueError:
pass
ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _ in ex}
self.assertIn("SAVE_IP", uops)
Expand Down Expand Up @@ -2493,11 +2488,13 @@ def many_vars():

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
ex = get_first_executor(many_vars.__code__)
ex = get_first_executor(many_vars)
self.assertIsNone(ex)
many_vars()
ex = get_first_executor(many_vars.__code__)
self.assertIn(("LOAD_FAST", 259), list(ex))

ex = get_first_executor(many_vars)
self.assertIsNotNone(ex)
self.assertIn(("LOAD_FAST", 259), list(ex))

def test_unspecialized_unpack(self):
# An example of an unspecialized opcode
Expand All @@ -2516,17 +2513,43 @@ def testfunc(x):
with temporary_optimizer(opt):
testfunc(10)

ex = None
for offset in range(0, len(testfunc.__code__.co_code), 2):
try:
ex = _testinternalcapi.get_executor(testfunc.__code__, offset)
break
except ValueError:
pass
ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _ in ex}
self.assertIn("UNPACK_SEQUENCE", uops)

def test_pop_jump_if_false(self):
def testfunc(n):
i = 0
while i < n:
i += 1

opt = _testinternalcapi.get_uop_optimizer()

with temporary_optimizer(opt):
testfunc(10)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _ in ex}
self.assertIn("_POP_JUMP_IF_FALSE", uops)

def test_pop_jump_if_true(self):
def testfunc(n):
i = 0
while not i >= n:
i += 1

opt = _testinternalcapi.get_uop_optimizer()

with temporary_optimizer(opt):
testfunc(10)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _ in ex}
self.assertIn("_POP_JUMP_IF_TRUE", uops)


if __name__ == "__main__":
unittest.main()
22 changes: 21 additions & 1 deletion Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -2751,7 +2751,8 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
operand = self->trace[pc].operand;
oparg = (int)operand;
DPRINTF(3,
" uop %s, operand %" PRIu64 ", stack_level %d\n",
"%4d: uop %s, operand %" PRIu64 ", stack_level %d\n",
pc,
opcode < 256 ? _PyOpcode_OpName[opcode] : _PyOpcode_uop_name[opcode],
operand,
(int)(stack_pointer - _PyFrame_Stackbase(frame)));
Expand All @@ -2763,6 +2764,25 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
#define ENABLE_SPECIALIZATION 0
#include "executor_cases.c.h"

// NOTE: These pop-jumps move the uop pc, not the bytecode ip
case _POP_JUMP_IF_FALSE:
{
if (Py_IsFalse(stack_pointer[-1])) {
pc = oparg;
}
stack_pointer--;
break;
}

case _POP_JUMP_IF_TRUE:
{
if (Py_IsTrue(stack_pointer[-1])) {
pc = oparg;
}
stack_pointer--;
break;
}

case SAVE_IP:
{
frame->prev_instr = ip_offset + oparg;
Expand Down
52 changes: 28 additions & 24 deletions Python/opcode_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

111 changes: 84 additions & 27 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ uop_dealloc(_PyUOpExecutorObject *self) {

static const char *
uop_name(int index) {
if (index < EXIT_TRACE) {
if (index < 256) {
return _PyOpcode_OpName[index];
}
return _PyOpcode_uop_name[index];
Expand All @@ -316,9 +316,9 @@ uop_name(int index) {
static Py_ssize_t
uop_len(_PyUOpExecutorObject *self)
{
int count = 1;
int count = 0;
for (; count < _Py_UOP_MAX_TRACE_LENGTH; count++) {
if (self->trace[count-1].opcode == EXIT_TRACE) {
if (self->trace[count].opcode == 0) {
break;
}
}
Expand All @@ -328,28 +328,26 @@ uop_len(_PyUOpExecutorObject *self)
static PyObject *
uop_item(_PyUOpExecutorObject *self, Py_ssize_t index)
{
for (int i = 0; i < _Py_UOP_MAX_TRACE_LENGTH; i++) {
if (self->trace[i].opcode == EXIT_TRACE) {
break;
}
if (i != index) {
continue;
}
const char *name = uop_name(self->trace[i].opcode);
PyObject *oname = _PyUnicode_FromASCII(name, strlen(name));
if (oname == NULL) {
return NULL;
}
PyObject *operand = PyLong_FromUnsignedLongLong(self->trace[i].operand);
if (operand == NULL) {
Py_DECREF(oname);
return NULL;
}
PyObject *args[2] = { oname, operand };
return _PyTuple_FromArraySteal(args, 2);
Py_ssize_t len = uop_len(self);
if (index < 0 || index >= len) {
PyErr_SetNone(PyExc_IndexError);
return NULL;
}
PyErr_SetNone(PyExc_IndexError);
return NULL;
const char *name = uop_name(self->trace[index].opcode);
if (name == NULL) {
name = "<nil>";
}
PyObject *oname = _PyUnicode_FromASCII(name, strlen(name));
if (oname == NULL) {
return NULL;
}
PyObject *operand = PyLong_FromUnsignedLongLong(self->trace[index].operand);
if (operand == NULL) {
Py_DECREF(oname);
return NULL;
}
PyObject *args[2] = { oname, operand };
return _PyTuple_FromArraySteal(args, 2);
}

PySequenceMethods uop_as_sequence = {
Expand All @@ -372,12 +370,13 @@ translate_bytecode_to_trace(
PyCodeObject *code,
_Py_CODEUNIT *instr,
_PyUOpInstruction *trace,
int max_length)
int buffer_size)
{
#ifdef Py_DEBUG
_Py_CODEUNIT *initial_instr = instr;
#endif
int trace_length = 0;
int max_length = buffer_size;

#ifdef Py_DEBUG
char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
Expand All @@ -401,6 +400,14 @@ translate_bytecode_to_trace(
trace[trace_length].operand = (OPERAND); \
trace_length++;

#define ADD_TO_STUB(INDEX, OPCODE, OPERAND) \
DPRINTF(2, " ADD_TO_STUB(%d, %s, %" PRIu64 ")\n", \
(INDEX), \
(OPCODE) < 256 ? _PyOpcode_OpName[(OPCODE)] : _PyOpcode_uop_name[(OPCODE)], \
(uint64_t)(OPERAND)); \
trace[(INDEX)].opcode = (OPCODE); \
trace[(INDEX)].operand = (OPERAND);

DPRINTF(4,
"Optimizing %s (%s:%d) at byte offset %ld\n",
PyUnicode_AsUTF8(code->co_qualname),
Expand All @@ -409,7 +416,7 @@ translate_bytecode_to_trace(
2 * (long)(initial_instr - (_Py_CODEUNIT *)code->co_code_adaptive));

for (;;) {
ADD_TO_TRACE(SAVE_IP, (int)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
ADD_TO_TRACE(SAVE_IP, instr - (_Py_CODEUNIT *)code->co_code_adaptive);
int opcode = instr->op.code;
int oparg = instr->op.arg;
int extras = 0;
Expand All @@ -420,12 +427,35 @@ translate_bytecode_to_trace(
oparg = (oparg << 8) | instr->op.arg;
}
if (opcode == ENTER_EXECUTOR) {
_PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg&255];
_PyExecutorObject *executor =
(_PyExecutorObject *)code->co_executors->executors[oparg&255];
opcode = executor->vm_data.opcode;
DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]);
oparg = (oparg & 0xffffff00) | executor->vm_data.oparg;
}
switch (opcode) {

case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE:
{
// Assume jump unlikely (TODO: handle jump likely case)
// Reserve 5 entries (1 here, 2 stub, plus SAVE_IP + EXIT_TRACE)
if (trace_length + 5 > max_length) {
DPRINTF(1, "Ran out of space for POP_JUMP_IF_FALSE\n");
goto done;
}
_Py_CODEUNIT *target_instr =
instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg;
max_length -= 2; // Really the start of the stubs
int uopcode = opcode == POP_JUMP_IF_TRUE ?
_POP_JUMP_IF_TRUE : _POP_JUMP_IF_FALSE;
ADD_TO_TRACE(uopcode, max_length);
ADD_TO_STUB(max_length, SAVE_IP,
target_instr - (_Py_CODEUNIT *)code->co_code_adaptive);
ADD_TO_STUB(max_length + 1, EXIT_TRACE, 0);
break;
}

default:
{
const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode];
Expand Down Expand Up @@ -503,6 +533,30 @@ translate_bytecode_to_trace(
code->co_firstlineno,
2 * (long)(initial_instr - (_Py_CODEUNIT *)code->co_code_adaptive),
trace_length);
if (max_length < buffer_size && trace_length < max_length) {
// Move the stubs back to be immediately after the main trace
// (which ends at trace_length)
DPRINTF(2,
"Moving %d stub uops back by %d\n",
buffer_size - max_length,
max_length - trace_length);
memmove(trace + trace_length,
trace + max_length,
(buffer_size - max_length) * sizeof(_PyUOpInstruction));
// Patch up the jump targets
for (int i = 0; i < trace_length; i++) {
if (trace[i].opcode == _POP_JUMP_IF_FALSE ||
trace[i].opcode == _POP_JUMP_IF_TRUE)
{
int target = trace[i].operand;
if (target >= max_length) {
target += trace_length - max_length;
trace[i].operand = target;
}
}
}
trace_length += buffer_size - max_length;
}
return trace_length;
}
else {
Expand Down Expand Up @@ -539,6 +593,9 @@ uop_optimize(
}
executor->base.execute = _PyUopExecute;
memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction));
if (trace_length < _Py_UOP_MAX_TRACE_LENGTH) {
executor->trace[trace_length].opcode = 0; // Sentinel
}
*exec_ptr = (_PyExecutorObject *)executor;
return 1;
}
Expand Down
Loading

0 comments on commit 22988c3

Please sign in to comment.