From ee5e878ad794dd38e778d37099373f3eda762d73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kamil=20=C5=9Aliwak?= <kamil.sliwak@codepoets.it>
Date: Wed, 9 Feb 2022 18:05:38 +0100
Subject: [PATCH] benchmark_diff: A Python script for diffing summarized
 benchmarks from external tests

---
 scripts/externalTests/benchmark_diff.py       | 212 ++++++++++++
 .../summarized-benchmarks-branch.json         | 100 ++++++
 .../summarized-benchmarks-develop.json        |  99 ++++++
 .../test_externalTests_benchmark_diff.py      | 309 ++++++++++++++++++
 4 files changed, 720 insertions(+)
 create mode 100755 scripts/externalTests/benchmark_diff.py
 create mode 100644 test/scripts/fixtures/summarized-benchmarks-branch.json
 create mode 100644 test/scripts/fixtures/summarized-benchmarks-develop.json
 create mode 100644 test/scripts/test_externalTests_benchmark_diff.py

diff --git a/scripts/externalTests/benchmark_diff.py b/scripts/externalTests/benchmark_diff.py
new file mode 100755
index 000000000000..0669e62ebad1
--- /dev/null
+++ b/scripts/externalTests/benchmark_diff.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python3
+
+from argparse import ArgumentParser
+from dataclasses import dataclass
+from enum import Enum
+from pathlib import Path
+from typing import Any, Optional, Union
+import json
+import sys
+
+
+class DifferenceStyle(Enum):
+    ABSOLUTE = 'absolute'
+    RELATIVE = 'relative'
+    HUMANIZED = 'humanized'
+
+
+DEFAULT_RELATIVE_PRECISION = 4
+DEFAULT_DIFFERENCE_STYLE = DifferenceStyle.ABSOLUTE
+
+
+class ValidationError(Exception):
+    pass
+
+
+class CommandLineError(ValidationError):
+    pass
+
+
+class BenchmarkDiffer:
+    difference_style: DifferenceStyle
+    relative_precision: Optional[int]
+
+    def __init__(
+        self,
+        difference_style: DifferenceStyle,
+        relative_precision: Optional[int],
+    ):
+        self.difference_style = difference_style
+        self.relative_precision = relative_precision
+
+    def run(self, before: Any, after: Any) -> Optional[Union[dict, str, int, float]]:
+        if not isinstance(before, dict) or not isinstance(after, dict):
+            return self._diff_scalars(before, after)
+
+        if before.get('version') != after.get('version'):
+            return self._humanize_diff('!V')
+
+        diff = {}
+        for key in (set(before) | set(after)) - {'version'}:
+            value_diff = self.run(before.get(key), after.get(key))
+            if value_diff not in [None, {}]:
+                diff[key] = value_diff
+
+        return diff
+
+    def _diff_scalars(self, before: Any, after: Any) -> Optional[Union[str, int, float, dict]]:
+        assert not isinstance(before, dict) or not isinstance(after, dict)
+
+        if before is None and after is None:
+            return {}
+        if before is None:
+            return self._humanize_diff('!B')
+        if after is None:
+            return self._humanize_diff('!A')
+        if not isinstance(before, (int, float)) or not isinstance(after, (int, float)):
+            return self._humanize_diff('!T')
+
+        number_diff = self._diff_numbers(before, after)
+        if self.difference_style != DifferenceStyle.HUMANIZED:
+            return number_diff
+
+        return self._humanize_diff(number_diff)
+
+    def _diff_numbers(self, value_before: Union[int, float], value_after: Union[int, float]) -> Union[str, int, float]:
+        diff: Union[str, int, float]
+
+        if self.difference_style == DifferenceStyle.ABSOLUTE:
+            diff = value_after - value_before
+            if isinstance(diff, float) and diff.is_integer():
+                diff = int(diff)
+
+            return diff
+
+        if value_before == 0:
+            if value_after > 0:
+                return '+INF'
+            elif value_after < 0:
+                return '-INF'
+            else:
+                return 0
+
+        diff = (value_after - value_before) / abs(value_before)
+        if self.relative_precision is not None:
+            rounded_diff = round(diff, self.relative_precision)
+            if rounded_diff == 0 and diff < 0:
+                diff = '-0'
+            elif rounded_diff == 0 and diff > 0:
+                diff = '+0'
+            else:
+                diff = rounded_diff
+
+        if isinstance(diff, float) and diff.is_integer():
+            diff = int(diff)
+
+        return diff
+
+    def _humanize_diff(self, diff: Union[str, int, float]) -> str:
+        if isinstance(diff, str) and diff.startswith('!'):
+            return diff
+
+        value: Union[str, int, float]
+        if isinstance(diff, (int, float)):
+            value = diff * 100
+            if isinstance(value, float) and self.relative_precision is not None:
+                # The multiplication can result in new significant digits appearing. We need to reround.
+                # NOTE: round() works fine with negative precision.
+                value = round(value, self.relative_precision - 2)
+                if isinstance(value, float) and value.is_integer():
+                    value = int(value)
+            prefix = ''
+            if diff < 0:
+                prefix = ''
+            elif diff > 0:
+                prefix = '+'
+        else:
+            value = diff
+            prefix = ''
+
+        return f"{prefix}{value}%"
+
+
+@dataclass(frozen=True)
+class CommandLineOptions:
+    report_before: Path
+    report_after: Path
+    difference_style: DifferenceStyle
+    relative_precision: int
+
+
+def process_commandline() -> CommandLineOptions:
+    script_description = (
+        "Compares summarized benchmark reports and outputs JSON with the same structure but listing only differences."
+    )
+
+    parser = ArgumentParser(description=script_description)
+    parser.add_argument(dest='report_before', help="Path to a JSON file containing original benchmark results.")
+    parser.add_argument(dest='report_after', help="Path to a JSON file containing new benchmark results.")
+    parser.add_argument(
+        '--style',
+        dest='difference_style',
+        choices=[s.value for s in DifferenceStyle],
+        help=(
+            "How to present numeric differences: "
+            f"'{DifferenceStyle.ABSOLUTE.value}' subtracts new from original; "
+            f"'{DifferenceStyle.RELATIVE.value}' also divides by the original; "
+            f"'{DifferenceStyle.HUMANIZED.value}' is like relative but value is a percentage and "
+            "positive/negative changes are emphasized. "
+            f"(default: '{DEFAULT_DIFFERENCE_STYLE}')."
+        )
+    )
+    # NOTE: Negative values are valid for precision. round() handles them in a sensible way.
+    parser.add_argument(
+        '--precision',
+        dest='relative_precision',
+        type=int,
+        default=DEFAULT_RELATIVE_PRECISION,
+        help=(
+            "Number of significant digits for relative differences. "
+            f"Note that with --style={DifferenceStyle.HUMANIZED.value} the rounding is applied "
+            "**before** converting the value to a percentage so you need to add 2. "
+            f"Has no effect when used together with --style={DifferenceStyle.ABSOLUTE.value}. "
+            f"(default: {DEFAULT_RELATIVE_PRECISION})"
+        )
+    )
+
+    options = parser.parse_args()
+
+    if options.difference_style is not None:
+        difference_style = DifferenceStyle(options.difference_style)
+    else:
+        difference_style = DEFAULT_DIFFERENCE_STYLE
+
+    processed_options = CommandLineOptions(
+        report_before=Path(options.report_before),
+        report_after=Path(options.report_after),
+        difference_style=difference_style,
+        relative_precision=options.relative_precision,
+    )
+
+    return processed_options
+
+
+def main():
+    try:
+        options = process_commandline()
+
+        differ = BenchmarkDiffer(options.difference_style, options.relative_precision)
+        diff = differ.run(
+            json.loads(options.report_before.read_text('utf-8')),
+            json.loads(options.report_after.read_text('utf-8')),
+        )
+
+        print(json.dumps(diff, indent=4, sort_keys=True))
+
+        return 0
+    except CommandLineError as exception:
+        print(f"ERROR: {exception}", file=sys.stderr)
+        return 1
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/test/scripts/fixtures/summarized-benchmarks-branch.json b/test/scripts/fixtures/summarized-benchmarks-branch.json
new file mode 100644
index 000000000000..0ba46b87d682
--- /dev/null
+++ b/test/scripts/fixtures/summarized-benchmarks-branch.json
@@ -0,0 +1,100 @@
+{
+    "bleeps": {
+        "ir-optimize-evm+yul": {
+            "bytecode_size": 132868,
+            "deployment_gas": 0,
+            "method_gas": 39289198,
+            "version": "bb90cd0"
+        },
+        "legacy-optimize-evm+yul": {
+            "bytecode_size": 137869,
+            "deployment_gas": 0,
+            "method_gas": 38863224,
+            "version": "bb90cd0"
+        }
+    },
+    "colony": {
+        "legacy-no-optimize": {
+            "bytecode_size": 664190,
+            "deployment_gas": null,
+            "method_gas": null,
+            "version": "573399b"
+        },
+        "legacy-optimize-evm+yul": {
+            "bytecode_size": 363606,
+            "deployment_gas": null,
+            "method_gas": null,
+            "version": "573399b"
+        }
+    },
+    "elementfi": {
+        "legacy-no-optimize": {
+            "bytecode_size": null,
+            "deployment_gas": 69200158,
+            "method_gas": null,
+            "version": "87f8b5e"
+        },
+        "legacy-optimize-evm+yul": {
+            "deployment_gas": 40951128,
+            "version": "87f8b5e"
+        },
+        "ir-optimize-evm-only": {},
+        "ir-no-optimize": {
+            "deployment_gas": null,
+            "method_gas": 2777867251,
+            "version": "87f8b5e"
+        }
+    },
+    "euler": {
+        "ir-no-optimize": {
+            "bytecode_size": 328540,
+            "deployment_gas": 61591870,
+            "method_gas": 3537419168,
+            "version": "2ef99fc"
+        },
+        "legacy-no-optimize": {
+            "bytecode_size": 328540,
+            "deployment_gas": 62590688,
+            "method_gas": 3537419168,
+            "version": "2ef99fc"
+        },
+        "legacy-optimize-evm+yul": {
+            "bytecode_size": 182190,
+            "deployment_gas": 35236828,
+            "method_gas": 2777867251,
+            "version": "2ef99fc"
+        },
+        "legacy-optimize-evm-only": {
+            "bytecode_size": 205211,
+            "deployment_gas": 39459629,
+            "method_gas": 2978467272,
+            "version": "2ef99fc"
+        },
+        "ir-optimize-evm-only": {
+            "bytecode_size": 205211,
+            "deployment_gas": 39459629,
+            "method_gas": 2978467272,
+            "version": "2ef99fc"
+        },
+        "ir-optimize-evm+yul": {
+            "bytecode_size": 205211,
+            "deployment_gas": 39459629,
+            "method_gas": 2777867251
+        }
+    },
+    "gnosis": {
+        "ir-optimize-evm+yul": {
+            "bytecode_size": 56069,
+            "deployment_gas": null,
+            "method_gas": null,
+            "version": "ea09294"
+        }
+    },
+    "zeppelin": {
+        "legacy-optimize-evm+yul": {
+            "bytecode_size": 510428,
+            "deployment_gas": 94501114,
+            "version": "af7ec04"
+        }
+    }
+}
diff --git a/test/scripts/fixtures/summarized-benchmarks-develop.json b/test/scripts/fixtures/summarized-benchmarks-develop.json
new file mode 100644
index 000000000000..1961870f3f45
--- /dev/null
+++ b/test/scripts/fixtures/summarized-benchmarks-develop.json
@@ -0,0 +1,99 @@
+{
+    "bleeps": {
+        "ir-optimize-evm+yul": {
+            "bytecode_size": 132165,
+            "deployment_gas": 0,
+            "method_gas": 39289935,
+            "version": "bb90cd0"
+        },
+        "legacy-optimize-evm+yul": {
+            "bytecode_size": 137869,
+            "deployment_gas": 0,
+            "method_gas": 38863224,
+            "version": "bb90cd0"
+        }
+    },
+    "colony": {
+        "ir-optimize-evm+yul": {
+            "bytecode_size": 363606,
+            "deployment_gas": null,
+            "method_gas": null,
+            "version": "573399b"
+        },
+        "legacy-optimize-evm+yul": {
+            "bytecode_size": 363606,
+            "deployment_gas": null,
+            "method_gas": null,
+            "version": "573399b"
+        }
+    },
+    "elementfi": {
+        "legacy-no-optimize": {
+            "bytecode_size": 890560,
+            "deployment_gas": null,
+            "method_gas": null,
+            "version": "87f8b5e"
+        },
+        "legacy-optimize-evm+yul": {
+            "bytecode_size": 536668,
+            "version": "87f8b5e"
+        },
+        "legacy-optimize-evm-only": {},
+        "ir-no-optimize": {
+            "bytecode_size": null,
+            "method_gas": 2777867251,
+            "version": "87f8b5e"
+        }
+    },
+    "euler": {
+        "ir-no-optimize": {
+            "bytecode_size": 323909,
+            "deployment_gas": 61591870,
+            "method_gas": 3452105184,
+            "version": "2ef99fc"
+        },
+        "legacy-no-optimize": {
+            "bytecode_size": 323909,
+            "deployment_gas": 61591870,
+            "method_gas": 3452105184,
+            "version": "c23e8bd"
+        },
+        "legacy-optimize-evm+yul": {
+            "bytecode_size": 182190,
+            "deployment_gas": 35236828,
+            "method_gas": 2777867251,
+            "version": "c23e8bd"
+        },
+        "legacy-optimize-evm-only": {
+            "bytecode_size": 202106,
+            "deployment_gas": 38790600,
+            "method_gas": 2907368790,
+            "version": "v1.2.3"
+        },
+        "ir-optimize-evm-only": {
+            "bytecode_size": 182190,
+            "deployment_gas": 35236828,
+            "method_gas": 2777867251
+        },
+        "ir-optimize-evm+yul": {
+            "bytecode_size": 182190,
+            "deployment_gas": 35236828,
+            "method_gas": 2777867251
+        }
+    },
+    "ens": {
+        "legacy-optimize-evm+yul": {
+            "bytecode_size": 156937,
+            "deployment_gas": 30073789,
+            "method_gas": 105365362,
+            "version": "v0.0.8"
+        }
+    },
+    "zeppelin": {
+        "legacy-optimize-evm+yul": {
+            "bytecode_size": 510428,
+            "deployment_gas": 94501114,
+            "version": "af7ec04"
+        }
+    }
+}
diff --git a/test/scripts/test_externalTests_benchmark_diff.py b/test/scripts/test_externalTests_benchmark_diff.py
new file mode 100644
index 000000000000..b4ccf07450d5
--- /dev/null
+++ b/test/scripts/test_externalTests_benchmark_diff.py
@@ -0,0 +1,309 @@
+#!/usr/bin/env python3
+
+import json
+import unittest
+
+from unittest_helpers import FIXTURE_DIR, load_fixture
+
+# NOTE: This test file file only works with scripts/ added to PYTHONPATH so pylint can't find the imports
+# pragma pylint: disable=import-error
+from externalTests.benchmark_diff import BenchmarkDiffer, DifferenceStyle
+# pragma pylint: enable=import-error
+
+SUMMARIZED_BENCHMARKS_DEVELOP_JSON_PATH = FIXTURE_DIR / 'summarized-benchmarks-develop.json'
+SUMMARIZED_BENCHMARKS_BRANCH_JSON_PATH = FIXTURE_DIR / 'summarized-benchmarks-branch.json'
+
+
+class TestBenchmarkDiff(unittest.TestCase):
+    def setUp(self):
+        self.maxDiff = 10000
+
+    def test_benchmark_diff(self):
+        report_before = json.loads(load_fixture(SUMMARIZED_BENCHMARKS_DEVELOP_JSON_PATH))
+        report_after = json.loads(load_fixture(SUMMARIZED_BENCHMARKS_BRANCH_JSON_PATH))
+        expected_diff = {
+            "bleeps": {
+                "ir-optimize-evm+yul": {
+                    # Numerical difference -> negative/positive/zero.
+                    # Zeros are not skipped to differentiate them from missing values.
+                    "bytecode_size": 132868 - 132165,
+                    "deployment_gas": 0,
+                    "method_gas": 39289198 - 39289935,
+                },
+                "legacy-optimize-evm+yul": {
+                    # No differences within preset -> zeros still present.
+                    "bytecode_size": 0,
+                    "deployment_gas": 0,
+                    "method_gas": 0,
+                },
+            },
+            "colony": {
+                # Preset missing on one side -> replace dict with string
+                "ir-optimize-evm+yul": "!A",
+                "legacy-no-optimize": "!B",
+                "legacy-optimize-evm+yul": {
+                    "bytecode_size": 0,
+                    # Attribute missing on both sides -> skip
+                    #"deployment_gas":
+                    #"method_gas":
+                },
+            },
+            "elementfi": {
+                "legacy-no-optimize": {
+                    # Attributes null on one side -> replace value with string
+                    "bytecode_size": "!A",
+                    "deployment_gas": "!B",
+                    # Attribute null on both sides -> skip
+                    #"method_gas":
+                },
+                "legacy-optimize-evm+yul": {
+                    # Attributes missing on one side -> replace value with string
+                    "bytecode_size": "!A",
+                    "deployment_gas": "!B",
+                    # Attribute missing on both sides -> skip
+                    #"method_gas":
+                },
+                "ir-no-optimize": {
+                    # Attributes missing on one side, null on the other -> skip
+                    #"bytecode_size":
+                    #"deployment_gas":
+                    "method_gas": 0,
+                },
+                # Empty preset missing on one side -> replace dict with string
+                "legacy-optimize-evm-only": "!A",
+                "ir-optimize-evm-only": "!B",
+            },
+            "euler": {
+                # Matching versions -> show attributes, skip version
+                "ir-no-optimize": {
+                    "bytecode_size": 328540 - 323909,
+                    "deployment_gas": 0,
+                    "method_gas": 3537419168 - 3452105184,
+                },
+                # Different versions, different values -> replace whole preset with string
+                "legacy-no-optimize": "!V",
+                # Different versions, same values -> replace whole preset with string
+                "legacy-optimize-evm+yul": "!V",
+                # Different versions (not a commit hash), different values -> replace whole preset with string
+                "legacy-optimize-evm-only": "!V",
+                # Version missing on one side -> replace whole preset with string
+                "ir-optimize-evm-only": "!V",
+                # Version missing on both sides -> assume same version
+                "ir-optimize-evm+yul": {
+                    "bytecode_size": 205211 - 182190,
+                    "deployment_gas": 39459629 - 35236828,
+                    "method_gas": 0,
+                },
+            },
+            "zeppelin": {
+                "legacy-optimize-evm+yul": {
+                    # Whole project identical -> attributes still present, with zeros
+                    "bytecode_size": 0,
+                    "deployment_gas": 0,
+                    # Field missing on both sides -> skip
+                    #"method_gas":
+                }
+            },
+            # Empty project missing on one side -> replace its dict with a string
+            "gnosis": "!B",
+            "ens": "!A",
+        }
+        differ = BenchmarkDiffer(DifferenceStyle.ABSOLUTE, None)
+        self.assertEqual(differ.run(report_before, report_after), expected_diff)
+
+
+class TestBenchmarkDiffer(unittest.TestCase):
+    def setUp(self):
+        self.maxDiff = 10000
+
+    @staticmethod
+    def _nest(value, levels):
+        nested_value = value
+        for level in levels:
+            nested_value = {level: nested_value}
+
+        return nested_value
+
+    def _assert_single_value_diff_matches(self, differ, cases, nest_result=True, nestings=None):
+        if nestings is None:
+            nestings = [[], ['p'], ['p', 's'], ['p', 's', 'a']]
+
+        for levels in nestings:
+            for (before, after, expected_diff) in cases:
+                self.assertEqual(
+                    differ.run(self._nest(before, levels), self._nest(after, levels)),
+                    self._nest(expected_diff, levels) if nest_result else expected_diff,
+                    f'Wrong diff for {self._nest(before, levels)} vs {self._nest(after, levels)}'
+                )
+
+    def test_empty(self):
+        for style in DifferenceStyle:
+            differ = BenchmarkDiffer(style, None)
+            self._assert_single_value_diff_matches(differ, [({}, {}, {})], nest_result=False)
+
+    def test_null(self):
+        for style in DifferenceStyle:
+            differ = BenchmarkDiffer(style, None)
+            self._assert_single_value_diff_matches(differ, [(None, None, {})], nest_result=False)
+
+    def test_number_diff_absolute_json(self):
+        self._assert_single_value_diff_matches(
+            BenchmarkDiffer(DifferenceStyle.ABSOLUTE, 4),
+            [
+                (2,   2,    0),
+                (2,   5,    3),
+                (5,   2,   -3),
+                (2.0, 2.0,  0),
+                (2,   2.0,  0),
+                (2.0, 2,    0),
+                (2,   2.5,  2.5 - 2),
+                (2.5, 2,    2 - 2.5),
+
+                (0,   0,    0),
+                (0,   2,    2),
+                (0,   -2,  -2),
+
+                (-3, -1,    2),
+                (-1, -3,   -2),
+                (2,   0,   -2),
+                (-2,  0,    2),
+
+                (1.00006, 1,  1 - 1.00006),
+                (1, 1.00006,  1.00006 - 1),
+                (1.00004, 1, 1 - 1.00004),
+                (1, 1.00004, 1.00004 - 1),
+            ],
+        )
+
+    def test_number_diff_json(self):
+        self._assert_single_value_diff_matches(
+            BenchmarkDiffer(DifferenceStyle.RELATIVE, 4),
+            [
+                (2,   2,   0),
+                (2,   5,   (5 - 2) / 2),
+                (5,   2,   (2 - 5) / 5),
+                (2.0, 2.0, 0),
+                (2,   2.0, 0),
+                (2.0, 2,   0),
+                (2,   2.5, (2.5 - 2) / 2),
+                (2.5, 2,   (2 - 2.5) / 2.5),
+
+                (0,   0,   0),
+                (0,   2,   '+INF'),
+                (0,   -2,  '-INF'),
+
+                (-3, -1,   0.6667),
+                (-1, -3,  -2),
+                (2,   0,  -1),
+                (-2,  0,   1),
+
+                (1.00006, 1,   -0.0001),
+                (1, 1.00006,    0.0001),
+                (1.000004, 1, '-0'),
+                (1, 1.000004, '+0'),
+            ],
+        )
+
+    def test_number_diff_humanized_json(self):
+        self._assert_single_value_diff_matches(
+            BenchmarkDiffer(DifferenceStyle.HUMANIZED, 4),
+            [
+                (2,   2,      '0%'),
+                (2,   5,   '+150%'),
+                (5,   2,    '-60%'),
+                (2.0, 2.0,    '0%'),
+                (2,   2.0,    '0%'),
+                (2.0, 2,      '0%'),
+                (2,   2.5,  '+25%'),
+                (2.5, 2,    '-20%'),
+
+                (0,   0,      '0%'),
+                (0,   2,   '+INF%'),
+                (0,   -2,  '-INF%'),
+
+                (-3, -1, '+66.67%'),
+                (-1, -3,   '-200%'),
+                (2,   0,   '-100%'),
+                (-2,  0,   '+100%'),
+
+                (1.00006, 1,  '-0.01%'),
+                (1, 1.00006,  '+0.01%'),
+                (1.000004, 1,    '-0%'),
+                (1, 1.000004,    '+0%'),
+            ],
+        )
+
+    def test_type_mismatch(self):
+        for style in DifferenceStyle:
+            self._assert_single_value_diff_matches(
+                BenchmarkDiffer(style, 4),
+                [
+                    (1, {}, '!T'),
+                    ({}, 1, '!T'),
+                    (1.5, {}, '!T'),
+                    ({}, 1.5, '!T'),
+                    ('1', {}, '!T'),
+                    ({}, '1', '!T'),
+                    (1, '1', '!T'),
+                    ('1', 1, '!T'),
+                    (1.5, '1', '!T'),
+                    ('1', 1.5, '!T'),
+                    ('1', '1', '!T'),
+                ],
+            )
+
+    def test_version_mismatch(self):
+        for style in DifferenceStyle:
+            self._assert_single_value_diff_matches(
+                BenchmarkDiffer(style, 4),
+                [
+                    ({'a': 123, 'version': 1}, {'a': 123, 'version': 2}, '!V'),
+                    ({'a': 123, 'version': 2}, {'a': 123, 'version': 1}, '!V'),
+                    ({'a': 123, 'version': 'a'}, {'a': 123, 'version': 'b'}, '!V'),
+                    ({'a': 123, 'version': 'a'}, {'a': 123, 'version': 1}, '!V'),
+
+                    ({'a': 'a', 'version': 1}, {'a': 'a', 'version': 2}, '!V'),
+                    ({'a': {}, 'version': 1}, {'a': {}, 'version': 2}, '!V'),
+                    ({'s': {'a': 1}, 'version': 1}, {'s': {'a': 1}, 'version': 2}, '!V'),
+
+                    ({'a': 123, 'version': 1}, {'a': 456, 'version': 2}, '!V'),
+                    ({'a': 'a', 'version': 1}, {'a': 'b', 'version': 2}, '!V'),
+                    ({'s': {'a': 1}, 'version': 1}, {'s': {'a': 2}, 'version': 2}, '!V'),
+                ],
+            )
+
+    def test_missing(self):
+        for style in DifferenceStyle:
+            self._assert_single_value_diff_matches(
+                BenchmarkDiffer(style, None),
+                [
+                    (1, None, '!A'),
+                    (None, 1, '!B'),
+                    ('1', None, '!A'),
+                    (None, '1', '!B'),
+                    ({}, None, '!A'),
+                    (None, {}, '!B'),
+
+                    ({'x': 1}, {}, {'x': '!A'}),
+                    ({}, {'x': 1}, {'x': '!B'}),
+                    ({'x': 1}, {'x': None}, {'x': '!A'}),
+                    ({'x': None}, {'x': 1}, {'x': '!B'}),
+                    ({'x': 1}, {'y': 1}, {'x': '!A', 'y': '!B'}),
+
+                    ({'x': {}}, {}, {'x': '!A'}),
+                    ({}, {'x': {}}, {'x': '!B'}),
+                    ({'p': {'x': {}}}, {}, {'p': '!A'}),
+                    ({}, {'p': {'x': {}}}, {'p': '!B'}),
+                ],
+            )
+
+    def test_missing_vs_null(self):
+        for style in DifferenceStyle:
+            self._assert_single_value_diff_matches(
+                BenchmarkDiffer(style, None),
+                [
+                    ({'a': None}, {}, {}),
+                    ({}, {'a': None}, {}),
+                ],
+                nest_result=False,
+            )