diff --git a/commands.md b/commands.md index c572e31..4ef8125 100644 --- a/commands.md +++ b/commands.md @@ -230,7 +230,8 @@ $ solana-test-suite run-tests [OPTIONS] * `-r, --randomize-output-buffer`: Randomizes bytes in output buffer before shared library execution * `-c, --chunk-size INTEGER`: Number of test results per file [default: 10000] * `-v, --verbose`: Verbose output: log failed test cases -* `-c, --consensus-mode`: Only fail on consensus failures. One such effect is to normalize error codes when comparing results +* `-c, --consensus-mode`: Only fail on consensus failures. One such effect is to normalize error codes when comparing results. Note: Cannot be used with --core-bpf-mode. +* `-cb, --core-bpf-mode`: Deliberately skip known mismatches between BPF programs and builtins, only failing on genuine mimatches. For example, builtin programs may throw errors on readonly account state violations sooner than BPF programs, compute unit usage will be different, etc. This feature is primarily used to test a BPF program against a builtin. Note: Cannot be used with --consensus-mode. * `-f, --failures-only`: Only log failed test cases * `-sf, --save-failures`: Saves failed test cases to results directory * `-ss, --save-successes`: Saves successful test cases to results directory diff --git a/src/test_suite/fuzz_context.py b/src/test_suite/fuzz_context.py index b5af90e..941dbf7 100644 --- a/src/test_suite/fuzz_context.py +++ b/src/test_suite/fuzz_context.py @@ -25,6 +25,7 @@ context_human_decode_fn=instr_codec.decode_input, effects_human_encode_fn=instr_codec.encode_output, consensus_diff_effect_fn=instr_diff.consensus_instr_diff_effects, + core_bpf_diff_effect_fn=instr_diff.core_bpf_instr_diff_effects, ) SyscallHarness = HarnessCtx( diff --git a/src/test_suite/fuzz_interface.py b/src/test_suite/fuzz_interface.py index 20902a6..b80d121 100644 --- a/src/test_suite/fuzz_interface.py +++ b/src/test_suite/fuzz_interface.py @@ -88,6 +88,9 @@ class HarnessCtx: consensus_diff_effect_fn: Callable[[EffectsType, EffectsType], bool] = ( generic_effects_diff ) + core_bpf_diff_effect_fn: Callable[[EffectsType, EffectsType], bool] = ( + generic_effects_diff + ) prune_effects_fn: Callable[ [ContextType | None, dict[str, str | None]], dict[str, str | None] | None ] = generic_effects_prune diff --git a/src/test_suite/instr/diff_utils.py b/src/test_suite/instr/diff_utils.py index 0516e5e..a1d039d 100644 --- a/src/test_suite/instr/diff_utils.py +++ b/src/test_suite/instr/diff_utils.py @@ -17,3 +17,29 @@ def consensus_instr_diff_effects(a: invoke_pb.InstrEffects, b: invoke_pb.InstrEf b_san.cu_avail = 0 return a_san == b_san + + +def core_bpf_instr_diff_effects(a: invoke_pb.InstrEffects, b: invoke_pb.InstrEffects): + a_san = invoke_pb.InstrEffects() + a_san.CopyFrom(a) + b_san = invoke_pb.InstrEffects() + b_san.CopyFrom(b) + + # If the result is an error (not 0), don't return modified accounts. + if a_san.result != 0: + while len(a_san.modified_accounts) > 0: + a_san.modified_accounts.pop() + if b_san.result != 0: + while len(b_san.modified_accounts) > 0: + b_san.modified_accounts.pop() + + # Normalize error codes and cus + a_san.result = 0 + a_san.custom_err = 0 + a_san.cu_avail = 0 + + b_san.result = 0 + b_san.custom_err = 0 + b_san.cu_avail = 0 + + return a_san == b_san diff --git a/src/test_suite/multiprocessing_utils.py b/src/test_suite/multiprocessing_utils.py index f618196..361e6f8 100644 --- a/src/test_suite/multiprocessing_utils.py +++ b/src/test_suite/multiprocessing_utils.py @@ -316,6 +316,8 @@ def build_test_results( if globals.consensus_mode: harness_ctx.diff_effect_fn = harness_ctx.consensus_diff_effect_fn + if globals.core_bpf_mode: + harness_ctx.diff_effect_fn = harness_ctx.core_bpf_diff_effect_fn # Note: diff_effect_fn may modify effects in-place all_passed &= harness_ctx.diff_effect_fn(ref_effects, effects) diff --git a/src/test_suite/test_suite.py b/src/test_suite/test_suite.py index 3ebb75c..582a506 100644 --- a/src/test_suite/test_suite.py +++ b/src/test_suite/test_suite.py @@ -353,7 +353,17 @@ def run_tests( False, "--consensus-mode", "-c", - help="Only fail on consensus failures. One such effect is to normalize error codes when comparing results", + help="Only fail on consensus failures. One such effect is to normalize error codes when comparing results. \ +Note: Cannot be used with --core-bpf-mode.", + ), + core_bpf_mode: bool = typer.Option( + False, + "--core-bpf-mode", + "-cb", + help="Deliberately skip known mismatches between BPF programs and builtins, only failing on genuine mimatches. \ +For example, builtin programs may throw errors on readonly account state violations sooner than BPF programs, \ +compute unit usage will be different, etc. This feature is primarily used to test a BPF program against a builtin. \ +Note: Cannot be used with --consensus-mode.", ), failures_only: bool = typer.Option( False, @@ -388,8 +398,17 @@ def run_tests( globals.reference_shared_library = reference_shared_library globals.default_harness_ctx = HARNESS_MAP[default_harness_ctx] + # Set diff mode if specified + if consensus_mode and core_bpf_mode: + typer.echo( + "Error: --consensus-mode and --core-bpf-mode cannot be used together.", + err=True, + ) + raise typer.Exit(code=1) # Set diff mode to consensus if specified globals.consensus_mode = consensus_mode + # Set diff mode to core_bpf if specified + globals.core_bpf_mode = core_bpf_mode # Create the output directory, if necessary if globals.output_dir.exists(): @@ -703,6 +722,7 @@ def debug_mismatches( log_chunk_size=10000, verbose=True, consensus_mode=False, + core_bpf_mode=False, failures_only=False, save_failures=True, save_successes=True,