From 8c9856c52c464c598b55e75c7873a27778d9957d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kamil=20=C5=9Aliwak?= <kamil.sliwak@codepoets.it>
Date: Fri, 18 Mar 2022 14:51:11 +0100
Subject: [PATCH] benchmark_diff: Add table mode with support for json, console
 and markdown output

---
 scripts/externalTests/benchmark_diff.py       | 259 +++++++++++-
 ...benchmark-diff-develop-branch-humanized.md |  75 ++++
 .../test_externalTests_benchmark_diff.py      | 371 ++++++++++++++----
 3 files changed, 610 insertions(+), 95 deletions(-)
 create mode 100644 test/scripts/fixtures/summarized-benchmark-diff-develop-branch-humanized.md

diff --git a/scripts/externalTests/benchmark_diff.py b/scripts/externalTests/benchmark_diff.py
index 0669e62ebad1..234f85ac0ea9 100755
--- a/scripts/externalTests/benchmark_diff.py
+++ b/scripts/externalTests/benchmark_diff.py
@@ -4,19 +4,44 @@
 from dataclasses import dataclass
 from enum import Enum
 from pathlib import Path
-from typing import Any, Optional, Union
+from textwrap import dedent
+from typing import Any, Mapping, Optional, Set, Sequence, Union
 import json
 import sys
 
 
+class DiffMode(Enum):
+    IN_PLACE = 'inplace'
+    TABLE = 'table'
+
+
 class DifferenceStyle(Enum):
     ABSOLUTE = 'absolute'
     RELATIVE = 'relative'
     HUMANIZED = 'humanized'
 
 
+class OutputFormat(Enum):
+    JSON = 'json'
+    CONSOLE = 'console'
+    MARKDOWN = 'markdown'
+
+
 DEFAULT_RELATIVE_PRECISION = 4
-DEFAULT_DIFFERENCE_STYLE = DifferenceStyle.ABSOLUTE
+
+DEFAULT_DIFFERENCE_STYLE = {
+    DiffMode.IN_PLACE: DifferenceStyle.ABSOLUTE,
+    DiffMode.TABLE: DifferenceStyle.HUMANIZED,
+}
+assert all(t in DiffMode for t in DEFAULT_DIFFERENCE_STYLE)
+assert all(d in DifferenceStyle for d in DEFAULT_DIFFERENCE_STYLE.values())
+
+DEFAULT_OUTPUT_FORMAT = {
+    DiffMode.IN_PLACE: OutputFormat.JSON,
+    DiffMode.TABLE: OutputFormat.CONSOLE,
+}
+assert all(m in DiffMode for m in DEFAULT_OUTPUT_FORMAT)
+assert all(o in OutputFormat for o in DEFAULT_OUTPUT_FORMAT.values())
 
 
 class ValidationError(Exception):
@@ -30,14 +55,17 @@ class CommandLineError(ValidationError):
 class BenchmarkDiffer:
     difference_style: DifferenceStyle
     relative_precision: Optional[int]
+    output_format: OutputFormat
 
     def __init__(
         self,
         difference_style: DifferenceStyle,
         relative_precision: Optional[int],
+        output_format: OutputFormat,
     ):
         self.difference_style = difference_style
         self.relative_precision = relative_precision
+        self.output_format = output_format
 
     def run(self, before: Any, after: Any) -> Optional[Union[dict, str, int, float]]:
         if not isinstance(before, dict) or not isinstance(after, dict):
@@ -106,8 +134,13 @@ def _diff_numbers(self, value_before: Union[int, float], value_after: Union[int,
         return diff
 
     def _humanize_diff(self, diff: Union[str, int, float]) -> str:
+        def wrap(value: str, symbol: str):
+            return f"{symbol}{value}{symbol}"
+
+        markdown = (self.output_format == OutputFormat.MARKDOWN)
+
         if isinstance(diff, str) and diff.startswith('!'):
-            return diff
+            return wrap(diff, '`' if markdown else '')
 
         value: Union[str, int, float]
         if isinstance(diff, (int, float)):
@@ -118,32 +151,209 @@ def _humanize_diff(self, diff: Union[str, int, float]) -> str:
                 value = round(value, self.relative_precision - 2)
                 if isinstance(value, float) and value.is_integer():
                     value = int(value)
+            suffix = ''
             prefix = ''
             if diff < 0:
                 prefix = ''
+                if markdown:
+                    suffix += ' ✅'
             elif diff > 0:
                 prefix = '+'
+                if markdown:
+                    suffix += ' ❌'
+            important = (diff != 0)
         else:
             value = diff
+            important = False
             prefix = ''
+            suffix = ''
+
+        return wrap(
+            wrap(
+                f"{prefix}{value}%{suffix}",
+                '`' if markdown else ''
+            ),
+            '**' if important and markdown else ''
+        )
+
+
+@dataclass(frozen=True)
+class DiffTable:
+    columns: Mapping[str, Sequence[Union[int, float, str]]]
+
+
+class DiffTableSet:
+    table_headers: Sequence[str]
+    row_headers: Sequence[str]
+    column_headers: Sequence[str]
+
+    # Cells is a nested dict rather than a 3D array so that conversion to JSON is straightforward
+    cells: Mapping[str, Mapping[str, Mapping[str, Union[int, float, str]]]] # preset -> project -> attribute
+
+    def __init__(self, diff: dict):
+        self.table_headers = sorted(self._find_all_preset_names(diff))
+        self.column_headers = sorted(self._find_all_attribute_names(diff))
+        self.row_headers = sorted(project for project in diff)
+
+        # All dimensions must have unique values
+        assert len(self.table_headers) == len(set(self.table_headers))
+        assert len(self.column_headers) == len(set(self.column_headers))
+        assert len(self.row_headers) == len(set(self.row_headers))
+
+        self.cells = {
+            preset: {
+                project: {
+                    attribute: self._cell_content(diff, project, preset, attribute)
+                    for attribute in self.column_headers
+                }
+                for project in self.row_headers
+            }
+            for preset in self.table_headers
+        }
+
+    def calculate_row_column_width(self) -> int:
+        return max(len(h) for h in self.row_headers)
+
+    def calculate_column_widths(self, table_header: str) -> Sequence[int]:
+        assert table_header in self.table_headers
+
+        return [
+            max(
+                len(column_header),
+                max(
+                    len(str(self.cells[table_header][row_header][column_header]))
+                    for row_header in self.row_headers
+                )
+            )
+            for column_header in self.column_headers
+        ]
+
+    @classmethod
+    def _find_all_preset_names(cls, diff: dict) -> Set[str]:
+        return {
+            preset
+            for project, project_diff in diff.items()
+            if isinstance(project_diff, dict)
+            for preset in project_diff
+        }
+
+    @classmethod
+    def _find_all_attribute_names(cls, diff: dict) -> Set[str]:
+        return {
+            attribute
+            for project, project_diff in diff.items()
+            if isinstance(project_diff, dict)
+            for preset, preset_diff in project_diff.items()
+            if isinstance(preset_diff, dict)
+            for attribute in preset_diff
+        }
+
+    @classmethod
+    def _cell_content(cls, diff: dict, project: str, preset: str, attribute: str) -> str:
+        assert project in diff
+
+        if isinstance(diff[project], str):
+            return diff[project]
+        if preset not in diff[project]:
+            return ''
+        if isinstance(diff[project][preset], str):
+            return diff[project][preset]
+        if attribute not in diff[project][preset]:
+            return ''
+
+        return diff[project][preset][attribute]
+
+
+class DiffTableFormatter:
+    LEGEND = dedent("""
+        `!V` = version mismatch
+        `!B` = no value in the "before" version
+        `!A` = no value in the "after" version
+        `!T` = one or both values were not numeric and could not be compared
+        `-0` = very small negative value rounded to zero
+        `+0` = very small positive value rounded to zero
+    """)
+
+    @classmethod
+    def run(cls, diff_table_set: DiffTableSet, output_format: OutputFormat):
+        if output_format == OutputFormat.JSON:
+            return json.dumps(diff_table_set.cells, indent=4, sort_keys=True)
+        else:
+            assert output_format in {OutputFormat.CONSOLE, OutputFormat.MARKDOWN}
+
+            output = ''
+            for table_header in diff_table_set.table_headers:
+                column_widths = ([
+                    diff_table_set.calculate_row_column_width(),
+                    *diff_table_set.calculate_column_widths(table_header)
+                ])
+
+                if output_format == OutputFormat.MARKDOWN:
+                    output += f'\n### `{table_header}`\n'
+                else:
+                    output += f'\n{table_header.upper()}\n'
+
+                if output_format == OutputFormat.CONSOLE:
+                    output += cls._format_separator_row(column_widths, output_format) + '\n'
+                output += cls._format_data_row(['project', *diff_table_set.column_headers], column_widths) + '\n'
+                output += cls._format_separator_row(column_widths, output_format) + '\n'
+
+                for row_header in diff_table_set.row_headers:
+                    row = [
+                        diff_table_set.cells[table_header][row_header][column_header]
+                        for column_header in diff_table_set.column_headers
+                    ]
+                    output += cls._format_data_row([row_header, *row], column_widths) + '\n'
+
+                if output_format == OutputFormat.CONSOLE:
+                    output += cls._format_separator_row(column_widths, output_format) + '\n'
+
+            if output_format == OutputFormat.MARKDOWN:
+                output += f'\n{cls.LEGEND}\n'
+            return output
+
+    @classmethod
+    def _format_separator_row(cls, widths: Sequence[int], output_format: OutputFormat):
+        assert output_format in {OutputFormat.CONSOLE, OutputFormat.MARKDOWN}
+
+        if output_format == OutputFormat.MARKDOWN:
+            return '|:' + ':|-'.join('-' * width for width in widths) + ':|'
+        else:
+            return '|-' + '-|-'.join('-' * width for width in widths) + '-|'
 
-        return f"{prefix}{value}%"
+    @classmethod
+    def _format_data_row(cls, cells: Sequence[Union[int, float, str]], widths: Sequence[int]):
+        assert len(cells) == len(widths)
+
+        return '| ' + ' | '.join(str(cell).rjust(width) for cell, width in zip(cells, widths)) + ' |'
 
 
 @dataclass(frozen=True)
 class CommandLineOptions:
+    diff_mode: DiffMode
     report_before: Path
     report_after: Path
     difference_style: DifferenceStyle
     relative_precision: int
+    output_format: OutputFormat
 
 
 def process_commandline() -> CommandLineOptions:
     script_description = (
-        "Compares summarized benchmark reports and outputs JSON with the same structure but listing only differences."
+        "Compares summarized benchmark reports and outputs JSON with the same structure but listing only differences. "
+        "Can also print the output as markdown table and format the values to make differences stand out more."
     )
 
     parser = ArgumentParser(description=script_description)
+    parser.add_argument(
+        dest='diff_mode',
+        choices=[m.value for m in DiffMode],
+        help=(
+            "Diff mode: "
+            f"'{DiffMode.IN_PLACE.value}' preserves input JSON structure and replace values with differences; "
+            f"'{DiffMode.TABLE.value}' creates a table assuming 3-level project/preset/attribute structure."
+        )
+    )
     parser.add_argument(dest='report_before', help="Path to a JSON file containing original benchmark results.")
     parser.add_argument(dest='report_after', help="Path to a JSON file containing new benchmark results.")
     parser.add_argument(
@@ -156,7 +366,8 @@ def process_commandline() -> CommandLineOptions:
             f"'{DifferenceStyle.RELATIVE.value}' also divides by the original; "
             f"'{DifferenceStyle.HUMANIZED.value}' is like relative but value is a percentage and "
             "positive/negative changes are emphasized. "
-            f"(default: '{DEFAULT_DIFFERENCE_STYLE}')."
+            f"(default: '{DEFAULT_DIFFERENCE_STYLE[DiffMode.IN_PLACE]}' in '{DiffMode.IN_PLACE.value}' mode, "
+            f"'{DEFAULT_DIFFERENCE_STYLE[DiffMode.TABLE]}' in '{DiffMode.TABLE.value}' mode)"
         )
     )
     # NOTE: Negative values are valid for precision. round() handles them in a sensible way.
@@ -173,21 +384,47 @@ def process_commandline() -> CommandLineOptions:
             f"(default: {DEFAULT_RELATIVE_PRECISION})"
         )
     )
+    parser.add_argument(
+        '--output-format',
+        dest='output_format',
+        choices=[o.value for o in OutputFormat],
+        help=(
+            "The format to use for the diff: "
+            f"'{OutputFormat.JSON.value}' is raw JSON; "
+            f"'{OutputFormat.CONSOLE.value}' is a table with human-readable values that will look good in the console output. "
+            f"'{OutputFormat.MARKDOWN.value}' is similar '{OutputFormat.CONSOLE.value}' but adjusted to "
+            "render as proper markdown and with extra elements (legend, emoji to make non-zero values stand out more, etc)."
+            f"(default: '{DEFAULT_OUTPUT_FORMAT[DiffMode.IN_PLACE]}' in '{DiffMode.IN_PLACE.value}' mode, "
+            f"'{DEFAULT_OUTPUT_FORMAT[DiffMode.TABLE]}' in '{DiffMode.TABLE.value}' mode)"
+        )
+    )
 
     options = parser.parse_args()
 
     if options.difference_style is not None:
         difference_style = DifferenceStyle(options.difference_style)
     else:
-        difference_style = DEFAULT_DIFFERENCE_STYLE
+        difference_style = DEFAULT_DIFFERENCE_STYLE[DiffMode(options.diff_mode)]
+
+    if options.output_format is not None:
+        output_format = OutputFormat(options.output_format)
+    else:
+        output_format = DEFAULT_OUTPUT_FORMAT[DiffMode(options.diff_mode)]
 
     processed_options = CommandLineOptions(
+        diff_mode=DiffMode(options.diff_mode),
         report_before=Path(options.report_before),
         report_after=Path(options.report_after),
         difference_style=difference_style,
         relative_precision=options.relative_precision,
+        output_format=output_format,
     )
 
+    if processed_options.diff_mode == DiffMode.IN_PLACE and processed_options.output_format != OutputFormat.JSON:
+        raise CommandLineError(
+            f"Only the '{OutputFormat.JSON.value}' output format is supported in the '{DiffMode.IN_PLACE.value}' mode."
+        )
+
     return processed_options
 
 
@@ -195,13 +432,17 @@ def main():
     try:
         options = process_commandline()
 
-        differ = BenchmarkDiffer(options.difference_style, options.relative_precision)
+        differ = BenchmarkDiffer(options.difference_style, options.relative_precision, options.output_format)
         diff = differ.run(
             json.loads(options.report_before.read_text('utf-8')),
             json.loads(options.report_after.read_text('utf-8')),
         )
 
-        print(json.dumps(diff, indent=4, sort_keys=True))
+        if options.diff_mode == DiffMode.IN_PLACE:
+            print(json.dumps(diff, indent=4, sort_keys=True))
+        else:
+            assert options.diff_mode == DiffMode.TABLE
+            print(DiffTableFormatter.run(DiffTableSet(diff), options.output_format))
 
         return 0
     except CommandLineError as exception:
diff --git a/test/scripts/fixtures/summarized-benchmark-diff-develop-branch-humanized.md b/test/scripts/fixtures/summarized-benchmark-diff-develop-branch-humanized.md
new file mode 100644
index 000000000000..ce351a5b709c
--- /dev/null
+++ b/test/scripts/fixtures/summarized-benchmark-diff-develop-branch-humanized.md
@@ -0,0 +1,75 @@
+
+### `ir-no-optimize`
+|   project |  bytecode_size | deployment_gas |     method_gas |
+|:---------:|---------------:|---------------:|---------------:|
+|    bleeps |                |                |                |
+|    colony |                |                |                |
+| elementfi |                |                |           `0%` |
+|       ens |           `!A` |           `!A` |           `!A` |
+|     euler | **`+1.43% ❌`** |           `0%` | **`+2.47% ❌`** |
+|    gnosis |           `!B` |           `!B` |           `!B` |
+|  zeppelin |                |                |                |
+
+### `ir-optimize-evm+yul`
+|   project |   bytecode_size |  deployment_gas | method_gas |
+|:---------:|----------------:|----------------:|-----------:|
+|    bleeps |  **`+0.53% ❌`** |            `0%` |      `-0%` |
+|    colony |            `!A` |            `!A` |       `!A` |
+| elementfi |                 |                 |            |
+|       ens |            `!A` |            `!A` |       `!A` |
+|     euler | **`+12.64% ❌`** | **`+11.98% ❌`** |       `0%` |
+|    gnosis |            `!B` |            `!B` |       `!B` |
+|  zeppelin |                 |                 |            |
+
+### `ir-optimize-evm-only`
+|   project | bytecode_size | deployment_gas | method_gas |
+|:---------:|--------------:|---------------:|-----------:|
+|    bleeps |               |                |            |
+|    colony |               |                |            |
+| elementfi |          `!B` |           `!B` |       `!B` |
+|       ens |          `!A` |           `!A` |       `!A` |
+|     euler |          `!V` |           `!V` |       `!V` |
+|    gnosis |          `!B` |           `!B` |       `!B` |
+|  zeppelin |               |                |            |
+
+### `legacy-no-optimize`
+|   project | bytecode_size | deployment_gas | method_gas |
+|:---------:|--------------:|---------------:|-----------:|
+|    bleeps |               |                |            |
+|    colony |          `!B` |           `!B` |       `!B` |
+| elementfi |          `!A` |           `!B` |            |
+|       ens |          `!A` |           `!A` |       `!A` |
+|     euler |          `!V` |           `!V` |       `!V` |
+|    gnosis |          `!B` |           `!B` |       `!B` |
+|  zeppelin |               |                |            |
+
+### `legacy-optimize-evm+yul`
+|   project | bytecode_size | deployment_gas | method_gas |
+|:---------:|--------------:|---------------:|-----------:|
+|    bleeps |          `0%` |           `0%` |       `0%` |
+|    colony |          `0%` |                |            |
+| elementfi |          `!A` |           `!B` |            |
+|       ens |          `!A` |           `!A` |       `!A` |
+|     euler |          `!V` |           `!V` |       `!V` |
+|    gnosis |          `!B` |           `!B` |       `!B` |
+|  zeppelin |          `0%` |           `0%` |            |
+
+### `legacy-optimize-evm-only`
+|   project | bytecode_size | deployment_gas | method_gas |
+|:---------:|--------------:|---------------:|-----------:|
+|    bleeps |               |                |            |
+|    colony |               |                |            |
+| elementfi |          `!A` |           `!A` |       `!A` |
+|       ens |          `!A` |           `!A` |       `!A` |
+|     euler |          `!V` |           `!V` |       `!V` |
+|    gnosis |          `!B` |           `!B` |       `!B` |
+|  zeppelin |               |                |            |
+
+
+`!V` = version mismatch
+`!B` = no value in the "before" version
+`!A` = no value in the "after" version
+`!T` = one or both values were not numeric and could not be compared
+`-0` = very small negative value rounded to zero
+`+0` = very small positive value rounded to zero
+
diff --git a/test/scripts/test_externalTests_benchmark_diff.py b/test/scripts/test_externalTests_benchmark_diff.py
index b4ccf07450d5..b40db8e63384 100644
--- a/test/scripts/test_externalTests_benchmark_diff.py
+++ b/test/scripts/test_externalTests_benchmark_diff.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python3
 
+from textwrap import dedent
 import json
 import unittest
 
@@ -7,12 +8,15 @@
 
 # NOTE: This test file file only works with scripts/ added to PYTHONPATH so pylint can't find the imports
 # pragma pylint: disable=import-error
-from externalTests.benchmark_diff import BenchmarkDiffer, DifferenceStyle
+from externalTests.benchmark_diff import BenchmarkDiffer, DifferenceStyle, DiffTableSet, DiffTableFormatter, OutputFormat
 # pragma pylint: enable=import-error
 
 SUMMARIZED_BENCHMARKS_DEVELOP_JSON_PATH = FIXTURE_DIR / 'summarized-benchmarks-develop.json'
 SUMMARIZED_BENCHMARKS_BRANCH_JSON_PATH = FIXTURE_DIR / 'summarized-benchmarks-branch.json'
 
+SUMMARIZED_DIFF_HUMANIZED_MD_PATH = FIXTURE_DIR / 'summarized-benchmark-diff-develop-branch-humanized.md'
+SUMMARIZED_DIFF_HUMANIZED_MD = load_fixture(SUMMARIZED_DIFF_HUMANIZED_MD_PATH)
+
 
 class TestBenchmarkDiff(unittest.TestCase):
     def setUp(self):
@@ -108,7 +112,7 @@ def test_benchmark_diff(self):
             "gnosis": "!B",
             "ens": "!A",
         }
-        differ = BenchmarkDiffer(DifferenceStyle.ABSOLUTE, None)
+        differ = BenchmarkDiffer(DifferenceStyle.ABSOLUTE, None, OutputFormat.JSON)
         self.assertEqual(differ.run(report_before, report_after), expected_diff)
 
 
@@ -138,105 +142,137 @@ def _assert_single_value_diff_matches(self, differ, cases, nest_result=True, nes
 
     def test_empty(self):
         for style in DifferenceStyle:
-            differ = BenchmarkDiffer(style, None)
+            differ = BenchmarkDiffer(style, None, OutputFormat.JSON)
             self._assert_single_value_diff_matches(differ, [({}, {}, {})], nest_result=False)
 
     def test_null(self):
         for style in DifferenceStyle:
-            differ = BenchmarkDiffer(style, None)
+            differ = BenchmarkDiffer(style, None, OutputFormat.JSON)
             self._assert_single_value_diff_matches(differ, [(None, None, {})], nest_result=False)
 
     def test_number_diff_absolute_json(self):
-        self._assert_single_value_diff_matches(
-            BenchmarkDiffer(DifferenceStyle.ABSOLUTE, 4),
-            [
-                (2,   2,    0),
-                (2,   5,    3),
-                (5,   2,   -3),
-                (2.0, 2.0,  0),
-                (2,   2.0,  0),
-                (2.0, 2,    0),
-                (2,   2.5,  2.5 - 2),
-                (2.5, 2,    2 - 2.5),
-
-                (0,   0,    0),
-                (0,   2,    2),
-                (0,   -2,  -2),
-
-                (-3, -1,    2),
-                (-1, -3,   -2),
-                (2,   0,   -2),
-                (-2,  0,    2),
-
-                (1.00006, 1,  1 - 1.00006),
-                (1, 1.00006,  1.00006 - 1),
-                (1.00004, 1, 1 - 1.00004),
-                (1, 1.00004, 1.00004 - 1),
-            ],
-        )
+        for output_format in OutputFormat:
+            self._assert_single_value_diff_matches(
+                BenchmarkDiffer(DifferenceStyle.ABSOLUTE, 4, output_format),
+                [
+                    (2,   2,    0),
+                    (2,   5,    3),
+                    (5,   2,   -3),
+                    (2.0, 2.0,  0),
+                    (2,   2.0,  0),
+                    (2.0, 2,    0),
+                    (2,   2.5,  2.5 - 2),
+                    (2.5, 2,    2 - 2.5),
+
+                    (0,   0,    0),
+                    (0,   2,    2),
+                    (0,   -2,  -2),
+
+                    (-3, -1,    2),
+                    (-1, -3,   -2),
+                    (2,   0,   -2),
+                    (-2,  0,    2),
+
+                    (1.00006, 1,  1 - 1.00006),
+                    (1, 1.00006,  1.00006 - 1),
+                    (1.00004, 1, 1 - 1.00004),
+                    (1, 1.00004, 1.00004 - 1),
+                ],
+            )
 
     def test_number_diff_json(self):
-        self._assert_single_value_diff_matches(
-            BenchmarkDiffer(DifferenceStyle.RELATIVE, 4),
-            [
-                (2,   2,   0),
-                (2,   5,   (5 - 2) / 2),
-                (5,   2,   (2 - 5) / 5),
-                (2.0, 2.0, 0),
-                (2,   2.0, 0),
-                (2.0, 2,   0),
-                (2,   2.5, (2.5 - 2) / 2),
-                (2.5, 2,   (2 - 2.5) / 2.5),
-
-                (0,   0,   0),
-                (0,   2,   '+INF'),
-                (0,   -2,  '-INF'),
-
-                (-3, -1,   0.6667),
-                (-1, -3,  -2),
-                (2,   0,  -1),
-                (-2,  0,   1),
-
-                (1.00006, 1,   -0.0001),
-                (1, 1.00006,    0.0001),
-                (1.000004, 1, '-0'),
-                (1, 1.000004, '+0'),
-            ],
-        )
+        for output_format in OutputFormat:
+            self._assert_single_value_diff_matches(
+                BenchmarkDiffer(DifferenceStyle.RELATIVE, 4, output_format),
+                [
+                    (2,   2,   0),
+                    (2,   5,   (5 - 2) / 2),
+                    (5,   2,   (2 - 5) / 5),
+                    (2.0, 2.0, 0),
+                    (2,   2.0, 0),
+                    (2.0, 2,   0),
+                    (2,   2.5, (2.5 - 2) / 2),
+                    (2.5, 2,   (2 - 2.5) / 2.5),
+
+                    (0,   0,   0),
+                    (0,   2,   '+INF'),
+                    (0,   -2,  '-INF'),
+
+                    (-3, -1,   0.6667),
+                    (-1, -3,  -2),
+                    (2,   0,  -1),
+                    (-2,  0,   1),
+
+                    (1.00006, 1,   -0.0001),
+                    (1, 1.00006,    0.0001),
+                    (1.000004, 1, '-0'),
+                    (1, 1.000004, '+0'),
+                ],
+            )
+
+    def test_number_diff_humanized_json_and_console(self):
+        for output_format in [OutputFormat.JSON, OutputFormat.CONSOLE]:
+            self._assert_single_value_diff_matches(
+                BenchmarkDiffer(DifferenceStyle.HUMANIZED, 4, output_format),
+                [
+                    (2,   2,      '0%'),
+                    (2,   5,   '+150%'),
+                    (5,   2,    '-60%'),
+                    (2.0, 2.0,    '0%'),
+                    (2,   2.0,    '0%'),
+                    (2.0, 2,      '0%'),
+                    (2,   2.5,  '+25%'),
+                    (2.5, 2,    '-20%'),
+
+                    (0,   0,      '0%'),
+                    (0,   2,   '+INF%'),
+                    (0,   -2,  '-INF%'),
+
+                    (-3, -1, '+66.67%'),
+                    (-1, -3,   '-200%'),
+                    (2,   0,   '-100%'),
+                    (-2,  0,   '+100%'),
+
+                    (1.00006, 1,  '-0.01%'),
+                    (1, 1.00006,  '+0.01%'),
+                    (1.000004, 1,    '-0%'),
+                    (1, 1.000004,    '+0%'),
+                ],
+            )
 
-    def test_number_diff_humanized_json(self):
+    def test_number_diff_humanized_markdown(self):
         self._assert_single_value_diff_matches(
-            BenchmarkDiffer(DifferenceStyle.HUMANIZED, 4),
+            BenchmarkDiffer(DifferenceStyle.HUMANIZED, 4, OutputFormat.MARKDOWN),
             [
-                (2,   2,      '0%'),
-                (2,   5,   '+150%'),
-                (5,   2,    '-60%'),
-                (2.0, 2.0,    '0%'),
-                (2,   2.0,    '0%'),
-                (2.0, 2,      '0%'),
-                (2,   2.5,  '+25%'),
-                (2.5, 2,    '-20%'),
-
-                (0,   0,      '0%'),
-                (0,   2,   '+INF%'),
-                (0,   -2,  '-INF%'),
-
-                (-3, -1, '+66.67%'),
-                (-1, -3,   '-200%'),
-                (2,   0,   '-100%'),
-                (-2,  0,   '+100%'),
-
-                (1.00006, 1,  '-0.01%'),
-                (1, 1.00006,  '+0.01%'),
-                (1.000004, 1,    '-0%'),
-                (1, 1.000004,    '+0%'),
+                (2,   2,             '`0%`'),
+                (2,   5,   '**`+150% ❌`**'),
+                (5,   2,    '**`-60% ✅`**'),
+                (2.0, 2.0,           '`0%`'),
+                (2,   2.0,           '`0%`'),
+                (2.0, 2,             '`0%`'),
+                (2,   2.5,  '**`+25% ❌`**'),
+                (2.5, 2,    '**`-20% ✅`**'),
+
+                (0,   0,             '`0%`'),
+                (0,   2,          '`+INF%`'),
+                (0,   -2,         '`-INF%`'),
+
+                (-3, -1, '**`+66.67% ❌`**'),
+                (-1, -3,   '**`-200% ✅`**'),
+                (2,   0,   '**`-100% ✅`**'),
+                (-2,  0,   '**`+100% ❌`**'),
+
+                (1.00006, 1,  '**`-0.01% ✅`**'),
+                (1, 1.00006,  '**`+0.01% ❌`**'),
+                (1.000004, 1,           '`-0%`'),
+                (1, 1.000004,           '`+0%`'),
             ],
         )
 
     def test_type_mismatch(self):
         for style in DifferenceStyle:
             self._assert_single_value_diff_matches(
-                BenchmarkDiffer(style, 4),
+                BenchmarkDiffer(style, 4, OutputFormat.JSON),
                 [
                     (1, {}, '!T'),
                     ({}, 1, '!T'),
@@ -255,7 +291,7 @@ def test_type_mismatch(self):
     def test_version_mismatch(self):
         for style in DifferenceStyle:
             self._assert_single_value_diff_matches(
-                BenchmarkDiffer(style, 4),
+                BenchmarkDiffer(style, 4, OutputFormat.JSON),
                 [
                     ({'a': 123, 'version': 1}, {'a': 123, 'version': 2}, '!V'),
                     ({'a': 123, 'version': 2}, {'a': 123, 'version': 1}, '!V'),
@@ -275,7 +311,7 @@ def test_version_mismatch(self):
     def test_missing(self):
         for style in DifferenceStyle:
             self._assert_single_value_diff_matches(
-                BenchmarkDiffer(style, None),
+                BenchmarkDiffer(style, None, OutputFormat.JSON),
                 [
                     (1, None, '!A'),
                     (None, 1, '!B'),
@@ -300,10 +336,173 @@ def test_missing(self):
     def test_missing_vs_null(self):
         for style in DifferenceStyle:
             self._assert_single_value_diff_matches(
-                BenchmarkDiffer(style, None),
+                BenchmarkDiffer(style, None, OutputFormat.JSON),
                 [
                     ({'a': None}, {}, {}),
                     ({}, {'a': None}, {}),
                 ],
                 nest_result=False,
             )
+
+
+class TestDiffTableFormatter(unittest.TestCase):
+    def setUp(self):
+        self.maxDiff = 10000
+
+        self.report_before = {
+            'project A': {
+                'preset X': {'A1':  99, 'A2': 50, 'version': 1},
+                'preset Y': {'A1':   0, 'A2': 50, 'version': 1},
+            },
+            'project B': {
+                'preset X': {           'A2': 50},
+                'preset Y': {'A1':   0},
+            },
+            'project C': {
+                'preset X': {'A1':   0, 'A2': 50, 'version': 1},
+            },
+            'project D': {
+                'preset X': {'A1': 999},
+            },
+        }
+        self.report_after = {
+            'project A': {
+                'preset X': {'A1': 100, 'A2':  50, 'version': 1},
+                'preset Y': {'A1': 500, 'A2': 500, 'version': 2},
+            },
+            'project B': {
+                'preset X': {'A1':   0},
+                'preset Y': {           'A2': 50},
+            },
+            'project C': {
+                'preset Y': {'A1':   0, 'A2': 50, 'version': 1},
+            },
+            'project E': {
+                'preset Y': {           'A2': 999},
+            },
+        }
+
+    def test_diff_table_formatter(self):
+        report_before = json.loads(load_fixture(SUMMARIZED_BENCHMARKS_DEVELOP_JSON_PATH))
+        report_after = json.loads(load_fixture(SUMMARIZED_BENCHMARKS_BRANCH_JSON_PATH))
+        differ = BenchmarkDiffer(DifferenceStyle.HUMANIZED, 4, OutputFormat.MARKDOWN)
+        diff = differ.run(report_before, report_after)
+
+        self.assertEqual(DiffTableFormatter.run(DiffTableSet(diff), OutputFormat.MARKDOWN), SUMMARIZED_DIFF_HUMANIZED_MD)
+
+    def test_diff_table_formatter_json_absolute(self):
+        differ = BenchmarkDiffer(DifferenceStyle.ABSOLUTE, 4, OutputFormat.JSON)
+        diff = differ.run(self.report_before, self.report_after)
+
+        expected_formatted_table = dedent("""\
+            {
+                "preset X": {
+                    "project A": {
+                        "A1": 1,
+                        "A2": 0
+                    },
+                    "project B": {
+                        "A1": "!B",
+                        "A2": "!A"
+                    },
+                    "project C": {
+                        "A1": "!A",
+                        "A2": "!A"
+                    },
+                    "project D": {
+                        "A1": "!A",
+                        "A2": "!A"
+                    },
+                    "project E": {
+                        "A1": "!B",
+                        "A2": "!B"
+                    }
+                },
+                "preset Y": {
+                    "project A": {
+                        "A1": "!V",
+                        "A2": "!V"
+                    },
+                    "project B": {
+                        "A1": "!A",
+                        "A2": "!B"
+                    },
+                    "project C": {
+                        "A1": "!B",
+                        "A2": "!B"
+                    },
+                    "project D": {
+                        "A1": "!A",
+                        "A2": "!A"
+                    },
+                    "project E": {
+                        "A1": "!B",
+                        "A2": "!B"
+                    }
+                }
+            }"""
+        )
+        self.assertEqual(DiffTableFormatter.run(DiffTableSet(diff), OutputFormat.JSON), expected_formatted_table)
+
+    def test_diff_table_formatter_console_relative(self):
+        differ = BenchmarkDiffer(DifferenceStyle.RELATIVE, 4, OutputFormat.CONSOLE)
+        diff = differ.run(self.report_before, self.report_after)
+
+        expected_formatted_table = dedent("""
+            PRESET X
+            |-----------|--------|----|
+            |   project |     A1 | A2 |
+            |-----------|--------|----|
+            | project A | 0.0101 |  0 |
+            | project B |     !B | !A |
+            | project C |     !A | !A |
+            | project D |     !A | !A |
+            | project E |     !B | !B |
+            |-----------|--------|----|
+
+            PRESET Y
+            |-----------|----|----|
+            |   project | A1 | A2 |
+            |-----------|----|----|
+            | project A | !V | !V |
+            | project B | !A | !B |
+            | project C | !B | !B |
+            | project D | !A | !A |
+            | project E | !B | !B |
+            |-----------|----|----|
+        """)
+        self.assertEqual(DiffTableFormatter.run(DiffTableSet(diff), OutputFormat.CONSOLE), expected_formatted_table)
+
+    def test_diff_table_formatter_markdown_humanized(self):
+        differ = BenchmarkDiffer(DifferenceStyle.HUMANIZED, 4, OutputFormat.MARKDOWN)
+        diff = differ.run(self.report_before, self.report_after)
+
+        expected_formatted_table = dedent("""
+            ### `preset X`
+            |   project |             A1 |   A2 |
+            |:---------:|---------------:|-----:|
+            | project A | **`+1.01% ❌`** | `0%` |
+            | project B |           `!B` | `!A` |
+            | project C |           `!A` | `!A` |
+            | project D |           `!A` | `!A` |
+            | project E |           `!B` | `!B` |
+
+            ### `preset Y`
+            |   project |   A1 |   A2 |
+            |:---------:|-----:|-----:|
+            | project A | `!V` | `!V` |
+            | project B | `!A` | `!B` |
+            | project C | `!B` | `!B` |
+            | project D | `!A` | `!A` |
+            | project E | `!B` | `!B` |
+
+
+            `!V` = version mismatch
+            `!B` = no value in the "before" version
+            `!A` = no value in the "after" version
+            `!T` = one or both values were not numeric and could not be compared
+            `-0` = very small negative value rounded to zero
+            `+0` = very small positive value rounded to zero
+
+        """)
+        self.assertEqual(DiffTableFormatter.run(DiffTableSet(diff), OutputFormat.MARKDOWN), expected_formatted_table)