diff --git a/scripts/externalTests/benchmark_diff.py b/scripts/externalTests/benchmark_diff.py index 0669e62ebad1..234f85ac0ea9 100755 --- a/scripts/externalTests/benchmark_diff.py +++ b/scripts/externalTests/benchmark_diff.py @@ -4,19 +4,44 @@ from dataclasses import dataclass from enum import Enum from pathlib import Path -from typing import Any, Optional, Union +from textwrap import dedent +from typing import Any, Mapping, Optional, Set, Sequence, Union import json import sys +class DiffMode(Enum): + IN_PLACE = 'inplace' + TABLE = 'table' + + class DifferenceStyle(Enum): ABSOLUTE = 'absolute' RELATIVE = 'relative' HUMANIZED = 'humanized' +class OutputFormat(Enum): + JSON = 'json' + CONSOLE = 'console' + MARKDOWN = 'markdown' + + DEFAULT_RELATIVE_PRECISION = 4 -DEFAULT_DIFFERENCE_STYLE = DifferenceStyle.ABSOLUTE + +DEFAULT_DIFFERENCE_STYLE = { + DiffMode.IN_PLACE: DifferenceStyle.ABSOLUTE, + DiffMode.TABLE: DifferenceStyle.HUMANIZED, +} +assert all(t in DiffMode for t in DEFAULT_DIFFERENCE_STYLE) +assert all(d in DifferenceStyle for d in DEFAULT_DIFFERENCE_STYLE.values()) + +DEFAULT_OUTPUT_FORMAT = { + DiffMode.IN_PLACE: OutputFormat.JSON, + DiffMode.TABLE: OutputFormat.CONSOLE, +} +assert all(m in DiffMode for m in DEFAULT_OUTPUT_FORMAT) +assert all(o in OutputFormat for o in DEFAULT_OUTPUT_FORMAT.values()) class ValidationError(Exception): @@ -30,14 +55,17 @@ class CommandLineError(ValidationError): class BenchmarkDiffer: difference_style: DifferenceStyle relative_precision: Optional[int] + output_format: OutputFormat def __init__( self, difference_style: DifferenceStyle, relative_precision: Optional[int], + output_format: OutputFormat, ): self.difference_style = difference_style self.relative_precision = relative_precision + self.output_format = output_format def run(self, before: Any, after: Any) -> Optional[Union[dict, str, int, float]]: if not isinstance(before, dict) or not isinstance(after, dict): @@ -106,8 +134,13 @@ def _diff_numbers(self, value_before: Union[int, float], value_after: Union[int, return diff def _humanize_diff(self, diff: Union[str, int, float]) -> str: + def wrap(value: str, symbol: str): + return f"{symbol}{value}{symbol}" + + markdown = (self.output_format == OutputFormat.MARKDOWN) + if isinstance(diff, str) and diff.startswith('!'): - return diff + return wrap(diff, '`' if markdown else '') value: Union[str, int, float] if isinstance(diff, (int, float)): @@ -118,32 +151,209 @@ def _humanize_diff(self, diff: Union[str, int, float]) -> str: value = round(value, self.relative_precision - 2) if isinstance(value, float) and value.is_integer(): value = int(value) + suffix = '' prefix = '' if diff < 0: prefix = '' + if markdown: + suffix += ' ✅' elif diff > 0: prefix = '+' + if markdown: + suffix += ' ❌' + important = (diff != 0) else: value = diff + important = False prefix = '' + suffix = '' + + return wrap( + wrap( + f"{prefix}{value}%{suffix}", + '`' if markdown else '' + ), + '**' if important and markdown else '' + ) + + +@dataclass(frozen=True) +class DiffTable: + columns: Mapping[str, Sequence[Union[int, float, str]]] + + +class DiffTableSet: + table_headers: Sequence[str] + row_headers: Sequence[str] + column_headers: Sequence[str] + + # Cells is a nested dict rather than a 3D array so that conversion to JSON is straightforward + cells: Mapping[str, Mapping[str, Mapping[str, Union[int, float, str]]]] # preset -> project -> attribute + + def __init__(self, diff: dict): + self.table_headers = sorted(self._find_all_preset_names(diff)) + self.column_headers = sorted(self._find_all_attribute_names(diff)) + self.row_headers = sorted(project for project in diff) + + # All dimensions must have unique values + assert len(self.table_headers) == len(set(self.table_headers)) + assert len(self.column_headers) == len(set(self.column_headers)) + assert len(self.row_headers) == len(set(self.row_headers)) + + self.cells = { + preset: { + project: { + attribute: self._cell_content(diff, project, preset, attribute) + for attribute in self.column_headers + } + for project in self.row_headers + } + for preset in self.table_headers + } + + def calculate_row_column_width(self) -> int: + return max(len(h) for h in self.row_headers) + + def calculate_column_widths(self, table_header: str) -> Sequence[int]: + assert table_header in self.table_headers + + return [ + max( + len(column_header), + max( + len(str(self.cells[table_header][row_header][column_header])) + for row_header in self.row_headers + ) + ) + for column_header in self.column_headers + ] + + @classmethod + def _find_all_preset_names(cls, diff: dict) -> Set[str]: + return { + preset + for project, project_diff in diff.items() + if isinstance(project_diff, dict) + for preset in project_diff + } + + @classmethod + def _find_all_attribute_names(cls, diff: dict) -> Set[str]: + return { + attribute + for project, project_diff in diff.items() + if isinstance(project_diff, dict) + for preset, preset_diff in project_diff.items() + if isinstance(preset_diff, dict) + for attribute in preset_diff + } + + @classmethod + def _cell_content(cls, diff: dict, project: str, preset: str, attribute: str) -> str: + assert project in diff + + if isinstance(diff[project], str): + return diff[project] + if preset not in diff[project]: + return '' + if isinstance(diff[project][preset], str): + return diff[project][preset] + if attribute not in diff[project][preset]: + return '' + + return diff[project][preset][attribute] + + +class DiffTableFormatter: + LEGEND = dedent(""" + `!V` = version mismatch + `!B` = no value in the "before" version + `!A` = no value in the "after" version + `!T` = one or both values were not numeric and could not be compared + `-0` = very small negative value rounded to zero + `+0` = very small positive value rounded to zero + """) + + @classmethod + def run(cls, diff_table_set: DiffTableSet, output_format: OutputFormat): + if output_format == OutputFormat.JSON: + return json.dumps(diff_table_set.cells, indent=4, sort_keys=True) + else: + assert output_format in {OutputFormat.CONSOLE, OutputFormat.MARKDOWN} + + output = '' + for table_header in diff_table_set.table_headers: + column_widths = ([ + diff_table_set.calculate_row_column_width(), + *diff_table_set.calculate_column_widths(table_header) + ]) + + if output_format == OutputFormat.MARKDOWN: + output += f'\n### `{table_header}`\n' + else: + output += f'\n{table_header.upper()}\n' + + if output_format == OutputFormat.CONSOLE: + output += cls._format_separator_row(column_widths, output_format) + '\n' + output += cls._format_data_row(['project', *diff_table_set.column_headers], column_widths) + '\n' + output += cls._format_separator_row(column_widths, output_format) + '\n' + + for row_header in diff_table_set.row_headers: + row = [ + diff_table_set.cells[table_header][row_header][column_header] + for column_header in diff_table_set.column_headers + ] + output += cls._format_data_row([row_header, *row], column_widths) + '\n' + + if output_format == OutputFormat.CONSOLE: + output += cls._format_separator_row(column_widths, output_format) + '\n' + + if output_format == OutputFormat.MARKDOWN: + output += f'\n{cls.LEGEND}\n' + return output + + @classmethod + def _format_separator_row(cls, widths: Sequence[int], output_format: OutputFormat): + assert output_format in {OutputFormat.CONSOLE, OutputFormat.MARKDOWN} + + if output_format == OutputFormat.MARKDOWN: + return '|:' + ':|-'.join('-' * width for width in widths) + ':|' + else: + return '|-' + '-|-'.join('-' * width for width in widths) + '-|' - return f"{prefix}{value}%" + @classmethod + def _format_data_row(cls, cells: Sequence[Union[int, float, str]], widths: Sequence[int]): + assert len(cells) == len(widths) + + return '| ' + ' | '.join(str(cell).rjust(width) for cell, width in zip(cells, widths)) + ' |' @dataclass(frozen=True) class CommandLineOptions: + diff_mode: DiffMode report_before: Path report_after: Path difference_style: DifferenceStyle relative_precision: int + output_format: OutputFormat def process_commandline() -> CommandLineOptions: script_description = ( - "Compares summarized benchmark reports and outputs JSON with the same structure but listing only differences." + "Compares summarized benchmark reports and outputs JSON with the same structure but listing only differences. " + "Can also print the output as markdown table and format the values to make differences stand out more." ) parser = ArgumentParser(description=script_description) + parser.add_argument( + dest='diff_mode', + choices=[m.value for m in DiffMode], + help=( + "Diff mode: " + f"'{DiffMode.IN_PLACE.value}' preserves input JSON structure and replace values with differences; " + f"'{DiffMode.TABLE.value}' creates a table assuming 3-level project/preset/attribute structure." + ) + ) parser.add_argument(dest='report_before', help="Path to a JSON file containing original benchmark results.") parser.add_argument(dest='report_after', help="Path to a JSON file containing new benchmark results.") parser.add_argument( @@ -156,7 +366,8 @@ def process_commandline() -> CommandLineOptions: f"'{DifferenceStyle.RELATIVE.value}' also divides by the original; " f"'{DifferenceStyle.HUMANIZED.value}' is like relative but value is a percentage and " "positive/negative changes are emphasized. " - f"(default: '{DEFAULT_DIFFERENCE_STYLE}')." + f"(default: '{DEFAULT_DIFFERENCE_STYLE[DiffMode.IN_PLACE]}' in '{DiffMode.IN_PLACE.value}' mode, " + f"'{DEFAULT_DIFFERENCE_STYLE[DiffMode.TABLE]}' in '{DiffMode.TABLE.value}' mode)" ) ) # NOTE: Negative values are valid for precision. round() handles them in a sensible way. @@ -173,21 +384,47 @@ def process_commandline() -> CommandLineOptions: f"(default: {DEFAULT_RELATIVE_PRECISION})" ) ) + parser.add_argument( + '--output-format', + dest='output_format', + choices=[o.value for o in OutputFormat], + help=( + "The format to use for the diff: " + f"'{OutputFormat.JSON.value}' is raw JSON; " + f"'{OutputFormat.CONSOLE.value}' is a table with human-readable values that will look good in the console output. " + f"'{OutputFormat.MARKDOWN.value}' is similar '{OutputFormat.CONSOLE.value}' but adjusted to " + "render as proper markdown and with extra elements (legend, emoji to make non-zero values stand out more, etc)." + f"(default: '{DEFAULT_OUTPUT_FORMAT[DiffMode.IN_PLACE]}' in '{DiffMode.IN_PLACE.value}' mode, " + f"'{DEFAULT_OUTPUT_FORMAT[DiffMode.TABLE]}' in '{DiffMode.TABLE.value}' mode)" + ) + ) options = parser.parse_args() if options.difference_style is not None: difference_style = DifferenceStyle(options.difference_style) else: - difference_style = DEFAULT_DIFFERENCE_STYLE + difference_style = DEFAULT_DIFFERENCE_STYLE[DiffMode(options.diff_mode)] + + if options.output_format is not None: + output_format = OutputFormat(options.output_format) + else: + output_format = DEFAULT_OUTPUT_FORMAT[DiffMode(options.diff_mode)] processed_options = CommandLineOptions( + diff_mode=DiffMode(options.diff_mode), report_before=Path(options.report_before), report_after=Path(options.report_after), difference_style=difference_style, relative_precision=options.relative_precision, + output_format=output_format, ) + if processed_options.diff_mode == DiffMode.IN_PLACE and processed_options.output_format != OutputFormat.JSON: + raise CommandLineError( + f"Only the '{OutputFormat.JSON.value}' output format is supported in the '{DiffMode.IN_PLACE.value}' mode." + ) + return processed_options @@ -195,13 +432,17 @@ def main(): try: options = process_commandline() - differ = BenchmarkDiffer(options.difference_style, options.relative_precision) + differ = BenchmarkDiffer(options.difference_style, options.relative_precision, options.output_format) diff = differ.run( json.loads(options.report_before.read_text('utf-8')), json.loads(options.report_after.read_text('utf-8')), ) - print(json.dumps(diff, indent=4, sort_keys=True)) + if options.diff_mode == DiffMode.IN_PLACE: + print(json.dumps(diff, indent=4, sort_keys=True)) + else: + assert options.diff_mode == DiffMode.TABLE + print(DiffTableFormatter.run(DiffTableSet(diff), options.output_format)) return 0 except CommandLineError as exception: diff --git a/test/scripts/fixtures/summarized-benchmark-diff-develop-branch-humanized.md b/test/scripts/fixtures/summarized-benchmark-diff-develop-branch-humanized.md new file mode 100644 index 000000000000..ce351a5b709c --- /dev/null +++ b/test/scripts/fixtures/summarized-benchmark-diff-develop-branch-humanized.md @@ -0,0 +1,75 @@ + +### `ir-no-optimize` +| project | bytecode_size | deployment_gas | method_gas | +|:---------:|---------------:|---------------:|---------------:| +| bleeps | | | | +| colony | | | | +| elementfi | | | `0%` | +| ens | `!A` | `!A` | `!A` | +| euler | **`+1.43% ❌`** | `0%` | **`+2.47% ❌`** | +| gnosis | `!B` | `!B` | `!B` | +| zeppelin | | | | + +### `ir-optimize-evm+yul` +| project | bytecode_size | deployment_gas | method_gas | +|:---------:|----------------:|----------------:|-----------:| +| bleeps | **`+0.53% ❌`** | `0%` | `-0%` | +| colony | `!A` | `!A` | `!A` | +| elementfi | | | | +| ens | `!A` | `!A` | `!A` | +| euler | **`+12.64% ❌`** | **`+11.98% ❌`** | `0%` | +| gnosis | `!B` | `!B` | `!B` | +| zeppelin | | | | + +### `ir-optimize-evm-only` +| project | bytecode_size | deployment_gas | method_gas | +|:---------:|--------------:|---------------:|-----------:| +| bleeps | | | | +| colony | | | | +| elementfi | `!B` | `!B` | `!B` | +| ens | `!A` | `!A` | `!A` | +| euler | `!V` | `!V` | `!V` | +| gnosis | `!B` | `!B` | `!B` | +| zeppelin | | | | + +### `legacy-no-optimize` +| project | bytecode_size | deployment_gas | method_gas | +|:---------:|--------------:|---------------:|-----------:| +| bleeps | | | | +| colony | `!B` | `!B` | `!B` | +| elementfi | `!A` | `!B` | | +| ens | `!A` | `!A` | `!A` | +| euler | `!V` | `!V` | `!V` | +| gnosis | `!B` | `!B` | `!B` | +| zeppelin | | | | + +### `legacy-optimize-evm+yul` +| project | bytecode_size | deployment_gas | method_gas | +|:---------:|--------------:|---------------:|-----------:| +| bleeps | `0%` | `0%` | `0%` | +| colony | `0%` | | | +| elementfi | `!A` | `!B` | | +| ens | `!A` | `!A` | `!A` | +| euler | `!V` | `!V` | `!V` | +| gnosis | `!B` | `!B` | `!B` | +| zeppelin | `0%` | `0%` | | + +### `legacy-optimize-evm-only` +| project | bytecode_size | deployment_gas | method_gas | +|:---------:|--------------:|---------------:|-----------:| +| bleeps | | | | +| colony | | | | +| elementfi | `!A` | `!A` | `!A` | +| ens | `!A` | `!A` | `!A` | +| euler | `!V` | `!V` | `!V` | +| gnosis | `!B` | `!B` | `!B` | +| zeppelin | | | | + + +`!V` = version mismatch +`!B` = no value in the "before" version +`!A` = no value in the "after" version +`!T` = one or both values were not numeric and could not be compared +`-0` = very small negative value rounded to zero +`+0` = very small positive value rounded to zero + diff --git a/test/scripts/test_externalTests_benchmark_diff.py b/test/scripts/test_externalTests_benchmark_diff.py index b4ccf07450d5..b40db8e63384 100644 --- a/test/scripts/test_externalTests_benchmark_diff.py +++ b/test/scripts/test_externalTests_benchmark_diff.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +from textwrap import dedent import json import unittest @@ -7,12 +8,15 @@ # NOTE: This test file file only works with scripts/ added to PYTHONPATH so pylint can't find the imports # pragma pylint: disable=import-error -from externalTests.benchmark_diff import BenchmarkDiffer, DifferenceStyle +from externalTests.benchmark_diff import BenchmarkDiffer, DifferenceStyle, DiffTableSet, DiffTableFormatter, OutputFormat # pragma pylint: enable=import-error SUMMARIZED_BENCHMARKS_DEVELOP_JSON_PATH = FIXTURE_DIR / 'summarized-benchmarks-develop.json' SUMMARIZED_BENCHMARKS_BRANCH_JSON_PATH = FIXTURE_DIR / 'summarized-benchmarks-branch.json' +SUMMARIZED_DIFF_HUMANIZED_MD_PATH = FIXTURE_DIR / 'summarized-benchmark-diff-develop-branch-humanized.md' +SUMMARIZED_DIFF_HUMANIZED_MD = load_fixture(SUMMARIZED_DIFF_HUMANIZED_MD_PATH) + class TestBenchmarkDiff(unittest.TestCase): def setUp(self): @@ -108,7 +112,7 @@ def test_benchmark_diff(self): "gnosis": "!B", "ens": "!A", } - differ = BenchmarkDiffer(DifferenceStyle.ABSOLUTE, None) + differ = BenchmarkDiffer(DifferenceStyle.ABSOLUTE, None, OutputFormat.JSON) self.assertEqual(differ.run(report_before, report_after), expected_diff) @@ -138,105 +142,137 @@ def _assert_single_value_diff_matches(self, differ, cases, nest_result=True, nes def test_empty(self): for style in DifferenceStyle: - differ = BenchmarkDiffer(style, None) + differ = BenchmarkDiffer(style, None, OutputFormat.JSON) self._assert_single_value_diff_matches(differ, [({}, {}, {})], nest_result=False) def test_null(self): for style in DifferenceStyle: - differ = BenchmarkDiffer(style, None) + differ = BenchmarkDiffer(style, None, OutputFormat.JSON) self._assert_single_value_diff_matches(differ, [(None, None, {})], nest_result=False) def test_number_diff_absolute_json(self): - self._assert_single_value_diff_matches( - BenchmarkDiffer(DifferenceStyle.ABSOLUTE, 4), - [ - (2, 2, 0), - (2, 5, 3), - (5, 2, -3), - (2.0, 2.0, 0), - (2, 2.0, 0), - (2.0, 2, 0), - (2, 2.5, 2.5 - 2), - (2.5, 2, 2 - 2.5), - - (0, 0, 0), - (0, 2, 2), - (0, -2, -2), - - (-3, -1, 2), - (-1, -3, -2), - (2, 0, -2), - (-2, 0, 2), - - (1.00006, 1, 1 - 1.00006), - (1, 1.00006, 1.00006 - 1), - (1.00004, 1, 1 - 1.00004), - (1, 1.00004, 1.00004 - 1), - ], - ) + for output_format in OutputFormat: + self._assert_single_value_diff_matches( + BenchmarkDiffer(DifferenceStyle.ABSOLUTE, 4, output_format), + [ + (2, 2, 0), + (2, 5, 3), + (5, 2, -3), + (2.0, 2.0, 0), + (2, 2.0, 0), + (2.0, 2, 0), + (2, 2.5, 2.5 - 2), + (2.5, 2, 2 - 2.5), + + (0, 0, 0), + (0, 2, 2), + (0, -2, -2), + + (-3, -1, 2), + (-1, -3, -2), + (2, 0, -2), + (-2, 0, 2), + + (1.00006, 1, 1 - 1.00006), + (1, 1.00006, 1.00006 - 1), + (1.00004, 1, 1 - 1.00004), + (1, 1.00004, 1.00004 - 1), + ], + ) def test_number_diff_json(self): - self._assert_single_value_diff_matches( - BenchmarkDiffer(DifferenceStyle.RELATIVE, 4), - [ - (2, 2, 0), - (2, 5, (5 - 2) / 2), - (5, 2, (2 - 5) / 5), - (2.0, 2.0, 0), - (2, 2.0, 0), - (2.0, 2, 0), - (2, 2.5, (2.5 - 2) / 2), - (2.5, 2, (2 - 2.5) / 2.5), - - (0, 0, 0), - (0, 2, '+INF'), - (0, -2, '-INF'), - - (-3, -1, 0.6667), - (-1, -3, -2), - (2, 0, -1), - (-2, 0, 1), - - (1.00006, 1, -0.0001), - (1, 1.00006, 0.0001), - (1.000004, 1, '-0'), - (1, 1.000004, '+0'), - ], - ) + for output_format in OutputFormat: + self._assert_single_value_diff_matches( + BenchmarkDiffer(DifferenceStyle.RELATIVE, 4, output_format), + [ + (2, 2, 0), + (2, 5, (5 - 2) / 2), + (5, 2, (2 - 5) / 5), + (2.0, 2.0, 0), + (2, 2.0, 0), + (2.0, 2, 0), + (2, 2.5, (2.5 - 2) / 2), + (2.5, 2, (2 - 2.5) / 2.5), + + (0, 0, 0), + (0, 2, '+INF'), + (0, -2, '-INF'), + + (-3, -1, 0.6667), + (-1, -3, -2), + (2, 0, -1), + (-2, 0, 1), + + (1.00006, 1, -0.0001), + (1, 1.00006, 0.0001), + (1.000004, 1, '-0'), + (1, 1.000004, '+0'), + ], + ) + + def test_number_diff_humanized_json_and_console(self): + for output_format in [OutputFormat.JSON, OutputFormat.CONSOLE]: + self._assert_single_value_diff_matches( + BenchmarkDiffer(DifferenceStyle.HUMANIZED, 4, output_format), + [ + (2, 2, '0%'), + (2, 5, '+150%'), + (5, 2, '-60%'), + (2.0, 2.0, '0%'), + (2, 2.0, '0%'), + (2.0, 2, '0%'), + (2, 2.5, '+25%'), + (2.5, 2, '-20%'), + + (0, 0, '0%'), + (0, 2, '+INF%'), + (0, -2, '-INF%'), + + (-3, -1, '+66.67%'), + (-1, -3, '-200%'), + (2, 0, '-100%'), + (-2, 0, '+100%'), + + (1.00006, 1, '-0.01%'), + (1, 1.00006, '+0.01%'), + (1.000004, 1, '-0%'), + (1, 1.000004, '+0%'), + ], + ) - def test_number_diff_humanized_json(self): + def test_number_diff_humanized_markdown(self): self._assert_single_value_diff_matches( - BenchmarkDiffer(DifferenceStyle.HUMANIZED, 4), + BenchmarkDiffer(DifferenceStyle.HUMANIZED, 4, OutputFormat.MARKDOWN), [ - (2, 2, '0%'), - (2, 5, '+150%'), - (5, 2, '-60%'), - (2.0, 2.0, '0%'), - (2, 2.0, '0%'), - (2.0, 2, '0%'), - (2, 2.5, '+25%'), - (2.5, 2, '-20%'), - - (0, 0, '0%'), - (0, 2, '+INF%'), - (0, -2, '-INF%'), - - (-3, -1, '+66.67%'), - (-1, -3, '-200%'), - (2, 0, '-100%'), - (-2, 0, '+100%'), - - (1.00006, 1, '-0.01%'), - (1, 1.00006, '+0.01%'), - (1.000004, 1, '-0%'), - (1, 1.000004, '+0%'), + (2, 2, '`0%`'), + (2, 5, '**`+150% ❌`**'), + (5, 2, '**`-60% ✅`**'), + (2.0, 2.0, '`0%`'), + (2, 2.0, '`0%`'), + (2.0, 2, '`0%`'), + (2, 2.5, '**`+25% ❌`**'), + (2.5, 2, '**`-20% ✅`**'), + + (0, 0, '`0%`'), + (0, 2, '`+INF%`'), + (0, -2, '`-INF%`'), + + (-3, -1, '**`+66.67% ❌`**'), + (-1, -3, '**`-200% ✅`**'), + (2, 0, '**`-100% ✅`**'), + (-2, 0, '**`+100% ❌`**'), + + (1.00006, 1, '**`-0.01% ✅`**'), + (1, 1.00006, '**`+0.01% ❌`**'), + (1.000004, 1, '`-0%`'), + (1, 1.000004, '`+0%`'), ], ) def test_type_mismatch(self): for style in DifferenceStyle: self._assert_single_value_diff_matches( - BenchmarkDiffer(style, 4), + BenchmarkDiffer(style, 4, OutputFormat.JSON), [ (1, {}, '!T'), ({}, 1, '!T'), @@ -255,7 +291,7 @@ def test_type_mismatch(self): def test_version_mismatch(self): for style in DifferenceStyle: self._assert_single_value_diff_matches( - BenchmarkDiffer(style, 4), + BenchmarkDiffer(style, 4, OutputFormat.JSON), [ ({'a': 123, 'version': 1}, {'a': 123, 'version': 2}, '!V'), ({'a': 123, 'version': 2}, {'a': 123, 'version': 1}, '!V'), @@ -275,7 +311,7 @@ def test_version_mismatch(self): def test_missing(self): for style in DifferenceStyle: self._assert_single_value_diff_matches( - BenchmarkDiffer(style, None), + BenchmarkDiffer(style, None, OutputFormat.JSON), [ (1, None, '!A'), (None, 1, '!B'), @@ -300,10 +336,173 @@ def test_missing(self): def test_missing_vs_null(self): for style in DifferenceStyle: self._assert_single_value_diff_matches( - BenchmarkDiffer(style, None), + BenchmarkDiffer(style, None, OutputFormat.JSON), [ ({'a': None}, {}, {}), ({}, {'a': None}, {}), ], nest_result=False, ) + + +class TestDiffTableFormatter(unittest.TestCase): + def setUp(self): + self.maxDiff = 10000 + + self.report_before = { + 'project A': { + 'preset X': {'A1': 99, 'A2': 50, 'version': 1}, + 'preset Y': {'A1': 0, 'A2': 50, 'version': 1}, + }, + 'project B': { + 'preset X': { 'A2': 50}, + 'preset Y': {'A1': 0}, + }, + 'project C': { + 'preset X': {'A1': 0, 'A2': 50, 'version': 1}, + }, + 'project D': { + 'preset X': {'A1': 999}, + }, + } + self.report_after = { + 'project A': { + 'preset X': {'A1': 100, 'A2': 50, 'version': 1}, + 'preset Y': {'A1': 500, 'A2': 500, 'version': 2}, + }, + 'project B': { + 'preset X': {'A1': 0}, + 'preset Y': { 'A2': 50}, + }, + 'project C': { + 'preset Y': {'A1': 0, 'A2': 50, 'version': 1}, + }, + 'project E': { + 'preset Y': { 'A2': 999}, + }, + } + + def test_diff_table_formatter(self): + report_before = json.loads(load_fixture(SUMMARIZED_BENCHMARKS_DEVELOP_JSON_PATH)) + report_after = json.loads(load_fixture(SUMMARIZED_BENCHMARKS_BRANCH_JSON_PATH)) + differ = BenchmarkDiffer(DifferenceStyle.HUMANIZED, 4, OutputFormat.MARKDOWN) + diff = differ.run(report_before, report_after) + + self.assertEqual(DiffTableFormatter.run(DiffTableSet(diff), OutputFormat.MARKDOWN), SUMMARIZED_DIFF_HUMANIZED_MD) + + def test_diff_table_formatter_json_absolute(self): + differ = BenchmarkDiffer(DifferenceStyle.ABSOLUTE, 4, OutputFormat.JSON) + diff = differ.run(self.report_before, self.report_after) + + expected_formatted_table = dedent("""\ + { + "preset X": { + "project A": { + "A1": 1, + "A2": 0 + }, + "project B": { + "A1": "!B", + "A2": "!A" + }, + "project C": { + "A1": "!A", + "A2": "!A" + }, + "project D": { + "A1": "!A", + "A2": "!A" + }, + "project E": { + "A1": "!B", + "A2": "!B" + } + }, + "preset Y": { + "project A": { + "A1": "!V", + "A2": "!V" + }, + "project B": { + "A1": "!A", + "A2": "!B" + }, + "project C": { + "A1": "!B", + "A2": "!B" + }, + "project D": { + "A1": "!A", + "A2": "!A" + }, + "project E": { + "A1": "!B", + "A2": "!B" + } + } + }""" + ) + self.assertEqual(DiffTableFormatter.run(DiffTableSet(diff), OutputFormat.JSON), expected_formatted_table) + + def test_diff_table_formatter_console_relative(self): + differ = BenchmarkDiffer(DifferenceStyle.RELATIVE, 4, OutputFormat.CONSOLE) + diff = differ.run(self.report_before, self.report_after) + + expected_formatted_table = dedent(""" + PRESET X + |-----------|--------|----| + | project | A1 | A2 | + |-----------|--------|----| + | project A | 0.0101 | 0 | + | project B | !B | !A | + | project C | !A | !A | + | project D | !A | !A | + | project E | !B | !B | + |-----------|--------|----| + + PRESET Y + |-----------|----|----| + | project | A1 | A2 | + |-----------|----|----| + | project A | !V | !V | + | project B | !A | !B | + | project C | !B | !B | + | project D | !A | !A | + | project E | !B | !B | + |-----------|----|----| + """) + self.assertEqual(DiffTableFormatter.run(DiffTableSet(diff), OutputFormat.CONSOLE), expected_formatted_table) + + def test_diff_table_formatter_markdown_humanized(self): + differ = BenchmarkDiffer(DifferenceStyle.HUMANIZED, 4, OutputFormat.MARKDOWN) + diff = differ.run(self.report_before, self.report_after) + + expected_formatted_table = dedent(""" + ### `preset X` + | project | A1 | A2 | + |:---------:|---------------:|-----:| + | project A | **`+1.01% ❌`** | `0%` | + | project B | `!B` | `!A` | + | project C | `!A` | `!A` | + | project D | `!A` | `!A` | + | project E | `!B` | `!B` | + + ### `preset Y` + | project | A1 | A2 | + |:---------:|-----:|-----:| + | project A | `!V` | `!V` | + | project B | `!A` | `!B` | + | project C | `!B` | `!B` | + | project D | `!A` | `!A` | + | project E | `!B` | `!B` | + + + `!V` = version mismatch + `!B` = no value in the "before" version + `!A` = no value in the "after" version + `!T` = one or both values were not numeric and could not be compared + `-0` = very small negative value rounded to zero + `+0` = very small positive value rounded to zero + + """) + self.assertEqual(DiffTableFormatter.run(DiffTableSet(diff), OutputFormat.MARKDOWN), expected_formatted_table)