Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-106529: Implement POP_JUMP_IF_XXX uops #106551

Merged
merged 8 commits into from
Jul 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 43 additions & 20 deletions Lib/test/test_capi/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2347,11 +2347,12 @@ def func():

@contextlib.contextmanager
def temporary_optimizer(opt):
old_opt = _testinternalcapi.get_optimizer()
_testinternalcapi.set_optimizer(opt)
try:
yield
finally:
_testinternalcapi.set_optimizer(None)
_testinternalcapi.set_optimizer(old_opt)


@contextlib.contextmanager
Expand Down Expand Up @@ -2420,8 +2421,8 @@ def long_loop():
self.assertEqual(opt.get_count(), 10)



def get_first_executor(code):
def get_first_executor(func):
code = func.__code__
co_code = code.co_code
JUMP_BACKWARD = opcode.opmap["JUMP_BACKWARD"]
for i in range(0, len(co_code), 2):
Expand All @@ -2446,13 +2447,7 @@ def testfunc(x):
with temporary_optimizer(opt):
testfunc(1000)

ex = None
for offset in range(0, len(testfunc.__code__.co_code), 2):
try:
ex = _testinternalcapi.get_executor(testfunc.__code__, offset)
break
except ValueError:
pass
ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _ in ex}
self.assertIn("SAVE_IP", uops)
Expand Down Expand Up @@ -2493,11 +2488,13 @@ def many_vars():

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
ex = get_first_executor(many_vars.__code__)
ex = get_first_executor(many_vars)
self.assertIsNone(ex)
many_vars()
ex = get_first_executor(many_vars.__code__)
self.assertIn(("LOAD_FAST", 259), list(ex))

ex = get_first_executor(many_vars)
self.assertIsNotNone(ex)
self.assertIn(("LOAD_FAST", 259), list(ex))

def test_unspecialized_unpack(self):
# An example of an unspecialized opcode
Expand All @@ -2516,17 +2513,43 @@ def testfunc(x):
with temporary_optimizer(opt):
testfunc(10)

ex = None
for offset in range(0, len(testfunc.__code__.co_code), 2):
try:
ex = _testinternalcapi.get_executor(testfunc.__code__, offset)
break
except ValueError:
pass
ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _ in ex}
self.assertIn("UNPACK_SEQUENCE", uops)

def test_pop_jump_if_false(self):
def testfunc(n):
i = 0
while i < n:
i += 1

opt = _testinternalcapi.get_uop_optimizer()

with temporary_optimizer(opt):
testfunc(10)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _ in ex}
self.assertIn("_POP_JUMP_IF_FALSE", uops)

def test_pop_jump_if_true(self):
def testfunc(n):
i = 0
while not i >= n:
i += 1

opt = _testinternalcapi.get_uop_optimizer()

with temporary_optimizer(opt):
testfunc(10)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _ in ex}
self.assertIn("_POP_JUMP_IF_TRUE", uops)


if __name__ == "__main__":
unittest.main()
22 changes: 21 additions & 1 deletion Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -2751,7 +2751,8 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
operand = self->trace[pc].operand;
oparg = (int)operand;
DPRINTF(3,
" uop %s, operand %" PRIu64 ", stack_level %d\n",
"%4d: uop %s, operand %" PRIu64 ", stack_level %d\n",
pc,
opcode < 256 ? _PyOpcode_OpName[opcode] : _PyOpcode_uop_name[opcode],
operand,
(int)(stack_pointer - _PyFrame_Stackbase(frame)));
Expand All @@ -2763,6 +2764,25 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
#define ENABLE_SPECIALIZATION 0
#include "executor_cases.c.h"

// NOTE: These pop-jumps move the uop pc, not the bytecode ip
case _POP_JUMP_IF_FALSE:
{
if (Py_IsFalse(stack_pointer[-1])) {
pc = oparg;
}
stack_pointer--;
break;
}

case _POP_JUMP_IF_TRUE:
{
if (Py_IsTrue(stack_pointer[-1])) {
pc = oparg;
}
stack_pointer--;
break;
}

case SAVE_IP:
{
frame->prev_instr = ip_offset + oparg;
Expand Down
52 changes: 28 additions & 24 deletions Python/opcode_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

111 changes: 84 additions & 27 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ uop_dealloc(_PyUOpExecutorObject *self) {

static const char *
uop_name(int index) {
if (index < EXIT_TRACE) {
if (index < 256) {
return _PyOpcode_OpName[index];
}
return _PyOpcode_uop_name[index];
Expand All @@ -316,9 +316,9 @@ uop_name(int index) {
static Py_ssize_t
uop_len(_PyUOpExecutorObject *self)
{
int count = 1;
int count = 0;
for (; count < _Py_UOP_MAX_TRACE_LENGTH; count++) {
if (self->trace[count-1].opcode == EXIT_TRACE) {
if (self->trace[count].opcode == 0) {
break;
}
}
Expand All @@ -328,28 +328,26 @@ uop_len(_PyUOpExecutorObject *self)
static PyObject *
uop_item(_PyUOpExecutorObject *self, Py_ssize_t index)
{
for (int i = 0; i < _Py_UOP_MAX_TRACE_LENGTH; i++) {
if (self->trace[i].opcode == EXIT_TRACE) {
break;
}
if (i != index) {
continue;
}
const char *name = uop_name(self->trace[i].opcode);
PyObject *oname = _PyUnicode_FromASCII(name, strlen(name));
if (oname == NULL) {
return NULL;
}
PyObject *operand = PyLong_FromUnsignedLongLong(self->trace[i].operand);
if (operand == NULL) {
Py_DECREF(oname);
return NULL;
}
PyObject *args[2] = { oname, operand };
return _PyTuple_FromArraySteal(args, 2);
Py_ssize_t len = uop_len(self);
if (index < 0 || index >= len) {
PyErr_SetNone(PyExc_IndexError);
return NULL;
}
PyErr_SetNone(PyExc_IndexError);
return NULL;
const char *name = uop_name(self->trace[index].opcode);
if (name == NULL) {
name = "<nil>";
}
PyObject *oname = _PyUnicode_FromASCII(name, strlen(name));
if (oname == NULL) {
return NULL;
}
PyObject *operand = PyLong_FromUnsignedLongLong(self->trace[index].operand);
if (operand == NULL) {
Py_DECREF(oname);
return NULL;
}
PyObject *args[2] = { oname, operand };
return _PyTuple_FromArraySteal(args, 2);
}

PySequenceMethods uop_as_sequence = {
Expand All @@ -372,12 +370,13 @@ translate_bytecode_to_trace(
PyCodeObject *code,
_Py_CODEUNIT *instr,
_PyUOpInstruction *trace,
int max_length)
int buffer_size)
{
#ifdef Py_DEBUG
_Py_CODEUNIT *initial_instr = instr;
#endif
int trace_length = 0;
int max_length = buffer_size;

#ifdef Py_DEBUG
char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
Expand All @@ -401,6 +400,14 @@ translate_bytecode_to_trace(
trace[trace_length].operand = (OPERAND); \
trace_length++;

#define ADD_TO_STUB(INDEX, OPCODE, OPERAND) \
DPRINTF(2, " ADD_TO_STUB(%d, %s, %" PRIu64 ")\n", \
(INDEX), \
(OPCODE) < 256 ? _PyOpcode_OpName[(OPCODE)] : _PyOpcode_uop_name[(OPCODE)], \
(uint64_t)(OPERAND)); \
trace[(INDEX)].opcode = (OPCODE); \
trace[(INDEX)].operand = (OPERAND);

DPRINTF(4,
"Optimizing %s (%s:%d) at byte offset %ld\n",
PyUnicode_AsUTF8(code->co_qualname),
Expand All @@ -409,7 +416,7 @@ translate_bytecode_to_trace(
2 * (long)(initial_instr - (_Py_CODEUNIT *)code->co_code_adaptive));

for (;;) {
ADD_TO_TRACE(SAVE_IP, (int)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
ADD_TO_TRACE(SAVE_IP, instr - (_Py_CODEUNIT *)code->co_code_adaptive);
int opcode = instr->op.code;
int oparg = instr->op.arg;
int extras = 0;
Expand All @@ -420,12 +427,35 @@ translate_bytecode_to_trace(
oparg = (oparg << 8) | instr->op.arg;
}
if (opcode == ENTER_EXECUTOR) {
_PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg&255];
_PyExecutorObject *executor =
(_PyExecutorObject *)code->co_executors->executors[oparg&255];
opcode = executor->vm_data.opcode;
DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]);
oparg = (oparg & 0xffffff00) | executor->vm_data.oparg;
}
switch (opcode) {

case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE:
{
// Assume jump unlikely (TODO: handle jump likely case)
// Reserve 5 entries (1 here, 2 stub, plus SAVE_IP + EXIT_TRACE)
if (trace_length + 5 > max_length) {
DPRINTF(1, "Ran out of space for POP_JUMP_IF_FALSE\n");
goto done;
}
_Py_CODEUNIT *target_instr =
instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg;
max_length -= 2; // Really the start of the stubs
int uopcode = opcode == POP_JUMP_IF_TRUE ?
_POP_JUMP_IF_TRUE : _POP_JUMP_IF_FALSE;
ADD_TO_TRACE(uopcode, max_length);
ADD_TO_STUB(max_length, SAVE_IP,
target_instr - (_Py_CODEUNIT *)code->co_code_adaptive);
ADD_TO_STUB(max_length + 1, EXIT_TRACE, 0);
break;
}

default:
{
const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode];
Expand Down Expand Up @@ -503,6 +533,30 @@ translate_bytecode_to_trace(
code->co_firstlineno,
2 * (long)(initial_instr - (_Py_CODEUNIT *)code->co_code_adaptive),
trace_length);
if (max_length < buffer_size && trace_length < max_length) {
// Move the stubs back to be immediately after the main trace
// (which ends at trace_length)
DPRINTF(2,
"Moving %d stub uops back by %d\n",
buffer_size - max_length,
max_length - trace_length);
memmove(trace + trace_length,
trace + max_length,
(buffer_size - max_length) * sizeof(_PyUOpInstruction));
// Patch up the jump targets
for (int i = 0; i < trace_length; i++) {
if (trace[i].opcode == _POP_JUMP_IF_FALSE ||
trace[i].opcode == _POP_JUMP_IF_TRUE)
{
int target = trace[i].operand;
if (target >= max_length) {
target += trace_length - max_length;
trace[i].operand = target;
}
}
}
trace_length += buffer_size - max_length;
}
return trace_length;
}
else {
Expand Down Expand Up @@ -539,6 +593,9 @@ uop_optimize(
}
executor->base.execute = _PyUopExecute;
memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction));
if (trace_length < _Py_UOP_MAX_TRACE_LENGTH) {
executor->trace[trace_length].opcode = 0; // Sentinel
}
*exec_ptr = (_PyExecutorObject *)executor;
return 1;
}
Expand Down
Loading