Skip to content

Commit

Permalink
gh-109039: Branch prediction for Tier 2 interpreter (#109038)
Browse files Browse the repository at this point in the history
This adds a 16-bit inline cache entry to the conditional branch instructions POP_JUMP_IF_{FALSE,TRUE,NONE,NOT_NONE} and their instrumented variants, which is used to keep track of the branch direction.

Each time we encounter these instructions we shift the cache entry left by one and set the bottom bit to whether we jumped.

Then when it's time to translate such a branch to Tier 2 uops, we use the bit count from the cache entry to decided whether to continue translating the "didn't jump" branch or the "jumped" branch.

The counter is initialized to a pattern of alternating ones and zeros to avoid bias.

The .pyc file magic number is updated. There's a new test, some fixes for existing tests, and a few miscellaneous cleanups.
  • Loading branch information
gvanrossum authored Sep 11, 2023
1 parent ecd21a6 commit bcce5e2
Show file tree
Hide file tree
Showing 15 changed files with 339 additions and 181 deletions.
1 change: 0 additions & 1 deletion Include/internal/pycore_instruments.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
# error "this header requires Py_BUILD_CORE define"
#endif

#include "pycore_bitutils.h" // _Py_popcount32
#include "pycore_frame.h" // _PyInterpreterFrame

#ifdef __cplusplus
Expand Down
22 changes: 13 additions & 9 deletions Include/internal/pycore_opcode_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Lib/importlib/_bootstrap_external.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,7 @@ def _write_atomic(path, data, mode=0o666):
# Python 3.13a1 3558 (Reorder the stack items for CALL)
# Python 3.13a1 3559 (Generate opcode IDs from bytecodes.c)
# Python 3.13a1 3560 (Add RESUME_CHECK instruction)
# Python 3.13a1 3561 (Add cache entry to branch instructions)

# Python 3.14 will start with 3600

Expand All @@ -472,7 +473,7 @@ def _write_atomic(path, data, mode=0o666):
# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
# in PC/launcher.c must also be updated.

MAGIC_NUMBER = (3560).to_bytes(2, 'little') + b'\r\n'
MAGIC_NUMBER = (3561).to_bytes(2, 'little') + b'\r\n'

_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c

Expand Down
12 changes: 12 additions & 0 deletions Lib/opcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,18 @@
"counter": 1,
"version": 2,
},
"POP_JUMP_IF_TRUE": {
"counter": 1,
},
"POP_JUMP_IF_FALSE": {
"counter": 1,
},
"POP_JUMP_IF_NONE": {
"counter": 1,
},
"POP_JUMP_IF_NOT_NONE": {
"counter": 1,
},
}

_inline_cache_entries = {
Expand Down
17 changes: 17 additions & 0 deletions Lib/test/support/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
"LOOPBACK_TIMEOUT", "INTERNET_TIMEOUT", "SHORT_TIMEOUT", "LONG_TIMEOUT",
"Py_DEBUG", "EXCEEDS_RECURSION_LIMIT", "Py_C_RECURSION_LIMIT",
"skip_on_s390x",
"without_optimizer",
]


Expand Down Expand Up @@ -2533,3 +2534,19 @@ def adjust_int_max_str_digits(max_digits):
'skipped on s390x')

Py_TRACE_REFS = hasattr(sys, 'getobjects')

# Decorator to disable optimizer while a function run
def without_optimizer(func):
try:
import _testinternalcapi
except ImportError:
return func
@functools.wraps(func)
def wrapper(*args, **kwargs):
save_opt = _testinternalcapi.get_optimizer()
try:
_testinternalcapi.set_optimizer(None)
return func(*args, **kwargs)
finally:
_testinternalcapi.set_optimizer(save_opt)
return wrapper
45 changes: 31 additions & 14 deletions Lib/test/test_capi/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2455,7 +2455,7 @@ def testfunc(x):
opt = _testinternalcapi.get_uop_optimizer()

with temporary_optimizer(opt):
testfunc(10)
testfunc(20)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
Expand All @@ -2470,7 +2470,7 @@ def testfunc(n):

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
testfunc(10)
testfunc(20)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
Expand All @@ -2485,7 +2485,7 @@ def testfunc(a):

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
testfunc(range(10))
testfunc(range(20))

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
Expand All @@ -2495,12 +2495,13 @@ def testfunc(a):
def test_pop_jump_if_not_none(self):
def testfunc(a):
for x in a:
x = None
if x is not None:
x = 0

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
testfunc(range(10))
testfunc(range(20))

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
Expand All @@ -2515,7 +2516,7 @@ def testfunc(n):

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
testfunc(10)
testfunc(20)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
Expand All @@ -2530,7 +2531,7 @@ def testfunc(n):

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
testfunc(10)
testfunc(20)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
Expand All @@ -2550,7 +2551,7 @@ def testfunc(n):

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
testfunc(10)
testfunc(20)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
Expand All @@ -2568,8 +2569,8 @@ def testfunc(n):

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
total = testfunc(10)
self.assertEqual(total, 45)
total = testfunc(20)
self.assertEqual(total, 190)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
Expand All @@ -2589,9 +2590,9 @@ def testfunc(a):

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
a = list(range(10))
a = list(range(20))
total = testfunc(a)
self.assertEqual(total, 45)
self.assertEqual(total, 190)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
Expand All @@ -2611,9 +2612,9 @@ def testfunc(a):

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
a = tuple(range(10))
a = tuple(range(20))
total = testfunc(a)
self.assertEqual(total, 45)
self.assertEqual(total, 190)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
Expand Down Expand Up @@ -2647,14 +2648,30 @@ def dummy(x):

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
testfunc(10)
testfunc(20)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _, _ in ex}
self.assertIn("_PUSH_FRAME", uops)
self.assertIn("_BINARY_OP_ADD_INT", uops)

def test_branch_taken(self):
def testfunc(n):
for i in range(n):
if i < 0:
i = 0
else:
i = 1

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
testfunc(20)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _, _ in ex}
self.assertIn("_POP_JUMP_IF_TRUE", uops)


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit bcce5e2

Please sign in to comment.