firedancer-io · mjain-jump · Dec 11, 2024 · Dec 6, 2024 · Dec 6, 2024 · Dec 10, 2024
diff --git a/commands.md b/commands.md
@@ -230,7 +230,8 @@ $ solana-test-suite run-tests [OPTIONS]
 * `-r, --randomize-output-buffer`: Randomizes bytes in output buffer before shared library execution
 * `-c, --chunk-size INTEGER`: Number of test results per file  [default: 10000]
 * `-v, --verbose`: Verbose output: log failed test cases
-* `-c, --consensus-mode`: Only fail on consensus failures. One such effect is to normalize error codes when comparing results
+* `-c, --consensus-mode`: Only fail on consensus failures. One such effect is to normalize error codes when comparing results. Note: Cannot be used with --core-bpf-mode.
+* `-cb, --core-bpf-mode`: Deliberately skip known mismatches between BPF programs and builtins, only failing on genuine mimatches. For example, builtin programs may throw errors on readonly account state violations sooner than BPF programs, compute unit usage will be different, etc. This feature is primarily used to test a BPF program against a builtin. Note: Cannot be used with --consensus-mode.
 * `-f, --failures-only`: Only log failed test cases
 * `-sf, --save-failures`: Saves failed test cases to results directory
 * `-ss, --save-successes`: Saves successful test cases to results directory

diff --git a/src/test_suite/fuzz_context.py b/src/test_suite/fuzz_context.py
@@ -25,6 +25,7 @@
     context_human_decode_fn=instr_codec.decode_input,
     effects_human_encode_fn=instr_codec.encode_output,
     consensus_diff_effect_fn=instr_diff.consensus_instr_diff_effects,
+    core_bpf_diff_effect_fn=instr_diff.core_bpf_instr_diff_effects,
 )
 
 SyscallHarness = HarnessCtx(

diff --git a/src/test_suite/fuzz_interface.py b/src/test_suite/fuzz_interface.py
@@ -88,6 +88,9 @@ class HarnessCtx:
     consensus_diff_effect_fn: Callable[[EffectsType, EffectsType], bool] = (
         generic_effects_diff
     )
+    core_bpf_diff_effect_fn: Callable[[EffectsType, EffectsType], bool] = (
+        generic_effects_diff
+    )
     prune_effects_fn: Callable[
         [ContextType | None, dict[str, str | None]], dict[str, str | None] | None
     ] = generic_effects_prune

diff --git a/src/test_suite/instr/diff_utils.py b/src/test_suite/instr/diff_utils.py
@@ -17,3 +17,29 @@ def consensus_instr_diff_effects(a: invoke_pb.InstrEffects, b: invoke_pb.InstrEf
     b_san.cu_avail = 0
 
     return a_san == b_san
+
+
+def core_bpf_instr_diff_effects(a: invoke_pb.InstrEffects, b: invoke_pb.InstrEffects):
+    a_san = invoke_pb.InstrEffects()
+    a_san.CopyFrom(a)
+    b_san = invoke_pb.InstrEffects()
+    b_san.CopyFrom(b)
+
+    # If the result is an error (not 0), don't return modified accounts.
+    if a_san.result != 0:
+        while len(a_san.modified_accounts) > 0:
+            a_san.modified_accounts.pop()
+    if b_san.result != 0:
+        while len(b_san.modified_accounts) > 0:
+            b_san.modified_accounts.pop()
+
+    # Normalize error codes and cus
+    a_san.result = 0
+    a_san.custom_err = 0
+    a_san.cu_avail = 0
+
+    b_san.result = 0
+    b_san.custom_err = 0
+    b_san.cu_avail = 0
+
+    return a_san == b_san
diff --git a/src/test_suite/multiprocessing_utils.py b/src/test_suite/multiprocessing_utils.py
@@ -316,6 +316,8 @@ def build_test_results(
 
             if globals.consensus_mode:
                 harness_ctx.diff_effect_fn = harness_ctx.consensus_diff_effect_fn
+            if globals.core_bpf_mode:
+                harness_ctx.diff_effect_fn = harness_ctx.core_bpf_diff_effect_fn
 
             # Note: diff_effect_fn may modify effects in-place
             all_passed &= harness_ctx.diff_effect_fn(ref_effects, effects)

diff --git a/src/test_suite/test_suite.py b/src/test_suite/test_suite.py
@@ -353,7 +353,17 @@ def run_tests(
         False,
         "--consensus-mode",
         "-c",
-        help="Only fail on consensus failures. One such effect is to normalize error codes when comparing results",
+        help="Only fail on consensus failures. One such effect is to normalize error codes when comparing results. \
+Note: Cannot be used with --core-bpf-mode.",
+    ),
+    core_bpf_mode: bool = typer.Option(
+        False,
+        "--core-bpf-mode",
+        "-cb",
+        help="Deliberately skip known mismatches between BPF programs and builtins, only failing on genuine mimatches. \
+For example, builtin programs may throw errors on readonly account state violations sooner than BPF programs, \
+compute unit usage will be different, etc. This feature is primarily used to test a BPF program against a builtin. \
+Note: Cannot be used with --consensus-mode.",
     ),
     failures_only: bool = typer.Option(
         False,
@@ -388,8 +398,17 @@ def run_tests(
     globals.reference_shared_library = reference_shared_library
     globals.default_harness_ctx = HARNESS_MAP[default_harness_ctx]
 
+    # Set diff mode if specified
+    if consensus_mode and core_bpf_mode:
+        typer.echo(
+            "Error: --consensus-mode and --core-bpf-mode cannot be used together.",
+            err=True,
+        )
+        raise typer.Exit(code=1)
     # Set diff mode to consensus if specified
     globals.consensus_mode = consensus_mode
+    # Set diff mode to core_bpf if specified
+    globals.core_bpf_mode = core_bpf_mode
 
     # Create the output directory, if necessary
     if globals.output_dir.exists():
@@ -703,6 +722,7 @@ def debug_mismatches(
         log_chunk_size=10000,
         verbose=True,
         consensus_mode=False,
+        core_bpf_mode=False,
         failures_only=False,
         save_failures=True,
         save_successes=True,