gh-109039: Branch prediction for Tier 2 interpreter (#109038)

This adds a 16-bit inline cache entry to the conditional branch instructions POP_JUMP_IF_{FALSE,TRUE,NONE,NOT_NONE} and their instrumented variants, which is used to keep track of the branch direction. Each time we encounter these instructions we shift the cache entry left by one and set the bottom bit to whether we jumped. Then when it's time to translate such a branch to Tier 2 uops, we use the bit count from the cache entry to decided whether to continue translating the "didn't jump" branch or the "jumped" branch. The counter is initialized to a pattern of alternating ones and zeros to avoid bias. The .pyc file magic number is updated. There's a new test, some fixes for existing tests, and a few miscellaneous cleanups.
python · Sep 11, 2023 · bcce5e2 · bcce5e2
1 parent ecd21a6
commit bcce5e2
Show file tree

Hide file tree

Showing 15 changed files with 339 additions and 181 deletions.
diff --git a/Include/internal/pycore_instruments.h b/Include/internal/pycore_instruments.h
@@ -5,7 +5,6 @@
 #  error "this header requires Py_BUILD_CORE define"
 #endif
 
-#include "pycore_bitutils.h"      // _Py_popcount32
 #include "pycore_frame.h"         // _PyInterpreterFrame
 
 #ifdef __cplusplus

diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h
diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py
@@ -456,6 +456,7 @@ def _write_atomic(path, data, mode=0o666):
 #     Python 3.13a1 3558 (Reorder the stack items for CALL)
 #     Python 3.13a1 3559 (Generate opcode IDs from bytecodes.c)
 #     Python 3.13a1 3560 (Add RESUME_CHECK instruction)
+#     Python 3.13a1 3561 (Add cache entry to branch instructions)
 
 #     Python 3.14 will start with 3600
 
@@ -472,7 +473,7 @@ def _write_atomic(path, data, mode=0o666):
 # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
 # in PC/launcher.c must also be updated.
 
-MAGIC_NUMBER = (3560).to_bytes(2, 'little') + b'\r\n'
+MAGIC_NUMBER = (3561).to_bytes(2, 'little') + b'\r\n'
 
 _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little')  # For import.c
 

diff --git a/Lib/opcode.py b/Lib/opcode.py
@@ -93,6 +93,18 @@
         "counter": 1,
         "version": 2,
     },
+    "POP_JUMP_IF_TRUE": {
+        "counter": 1,
+    },
+    "POP_JUMP_IF_FALSE": {
+        "counter": 1,
+    },
+    "POP_JUMP_IF_NONE": {
+        "counter": 1,
+    },
+    "POP_JUMP_IF_NOT_NONE": {
+        "counter": 1,
+    },
 }
 
 _inline_cache_entries = {

diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py
@@ -62,6 +62,7 @@
     "LOOPBACK_TIMEOUT", "INTERNET_TIMEOUT", "SHORT_TIMEOUT", "LONG_TIMEOUT",
     "Py_DEBUG", "EXCEEDS_RECURSION_LIMIT", "Py_C_RECURSION_LIMIT",
     "skip_on_s390x",
+    "without_optimizer",
     ]
 
 
@@ -2533,3 +2534,19 @@ def adjust_int_max_str_digits(max_digits):
                                 'skipped on s390x')
 
 Py_TRACE_REFS = hasattr(sys, 'getobjects')
+
+# Decorator to disable optimizer while a function run
+def without_optimizer(func):
+    try:
+        import _testinternalcapi
+    except ImportError:
+        return func
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        save_opt = _testinternalcapi.get_optimizer()
+        try:
+            _testinternalcapi.set_optimizer(None)
+            return func(*args, **kwargs)
+        finally:
+            _testinternalcapi.set_optimizer(save_opt)
+    return wrapper
diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py
@@ -2455,7 +2455,7 @@ def testfunc(x):
         opt = _testinternalcapi.get_uop_optimizer()
 
         with temporary_optimizer(opt):
-            testfunc(10)
+            testfunc(20)
 
         ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
@@ -2470,7 +2470,7 @@ def testfunc(n):
 
         opt = _testinternalcapi.get_uop_optimizer()
         with temporary_optimizer(opt):
-            testfunc(10)
+            testfunc(20)
 
         ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
@@ -2485,7 +2485,7 @@ def testfunc(a):
 
         opt = _testinternalcapi.get_uop_optimizer()
         with temporary_optimizer(opt):
-            testfunc(range(10))
+            testfunc(range(20))
 
         ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
@@ -2495,12 +2495,13 @@ def testfunc(a):
     def test_pop_jump_if_not_none(self):
         def testfunc(a):
             for x in a:
+                x = None
                 if x is not None:
                     x = 0
 
         opt = _testinternalcapi.get_uop_optimizer()
         with temporary_optimizer(opt):
-            testfunc(range(10))
+            testfunc(range(20))
 
         ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
@@ -2515,7 +2516,7 @@ def testfunc(n):
 
         opt = _testinternalcapi.get_uop_optimizer()
         with temporary_optimizer(opt):
-            testfunc(10)
+            testfunc(20)
 
         ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
@@ -2530,7 +2531,7 @@ def testfunc(n):
 
         opt = _testinternalcapi.get_uop_optimizer()
         with temporary_optimizer(opt):
-            testfunc(10)
+            testfunc(20)
 
         ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
@@ -2550,7 +2551,7 @@ def testfunc(n):
 
         opt = _testinternalcapi.get_uop_optimizer()
         with temporary_optimizer(opt):
-            testfunc(10)
+            testfunc(20)
 
         ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
@@ -2568,8 +2569,8 @@ def testfunc(n):
 
         opt = _testinternalcapi.get_uop_optimizer()
         with temporary_optimizer(opt):
-            total = testfunc(10)
-            self.assertEqual(total, 45)
+            total = testfunc(20)
+            self.assertEqual(total, 190)
 
         ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
@@ -2589,9 +2590,9 @@ def testfunc(a):
 
         opt = _testinternalcapi.get_uop_optimizer()
         with temporary_optimizer(opt):
-            a = list(range(10))
+            a = list(range(20))
             total = testfunc(a)
-            self.assertEqual(total, 45)
+            self.assertEqual(total, 190)
 
         ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
@@ -2611,9 +2612,9 @@ def testfunc(a):
 
         opt = _testinternalcapi.get_uop_optimizer()
         with temporary_optimizer(opt):
-            a = tuple(range(10))
+            a = tuple(range(20))
             total = testfunc(a)
-            self.assertEqual(total, 45)
+            self.assertEqual(total, 190)
 
         ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
@@ -2647,14 +2648,30 @@ def dummy(x):
 
         opt = _testinternalcapi.get_uop_optimizer()
         with temporary_optimizer(opt):
-            testfunc(10)
+            testfunc(20)
 
         ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
         uops = {opname for opname, _, _ in ex}
         self.assertIn("_PUSH_FRAME", uops)
         self.assertIn("_BINARY_OP_ADD_INT", uops)
 
+    def test_branch_taken(self):
+        def testfunc(n):
+            for i in range(n):
+                if i < 0:
+                    i = 0
+                else:
+                    i = 1
+
+        opt = _testinternalcapi.get_uop_optimizer()
+        with temporary_optimizer(opt):
+            testfunc(20)
+
+        ex = get_first_executor(testfunc)
+        self.assertIsNotNone(ex)
+        uops = {opname for opname, _, _ in ex}
+        self.assertIn("_POP_JUMP_IF_TRUE", uops)
 
 
 if __name__ == "__main__":