From 9bc0c5ea1887ddfd3dac170b343241e5ce3a81d6 Mon Sep 17 00:00:00 2001
From: Santiago Palladino <santiago@aztecprotocol.com>
Date: Wed, 18 Oct 2023 11:11:45 -0300
Subject: [PATCH] chore: Less noisy benchmark reports

Hides benchmark reports in PR comments behind a collapsible, and only
shows a list of the metrics with a siginificant change. Increases the
warning threshold for measurements with an absolute value of less than
100ms.
---
 yarn-project/scripts/benchmark.json           | 103 ------------------
 .../scripts/src/benchmarks/markdown.ts        |  98 ++++++++++++++---
 2 files changed, 80 insertions(+), 121 deletions(-)
 delete mode 100644 yarn-project/scripts/benchmark.json

diff --git a/yarn-project/scripts/benchmark.json b/yarn-project/scripts/benchmark.json
deleted file mode 100644
index 8443a67824f..00000000000
--- a/yarn-project/scripts/benchmark.json
+++ /dev/null
@@ -1,103 +0,0 @@
-{
-  "circuit_simulation_time_in_ms": {
-    "private-kernel-init": 54.9340490797546,
-    "private-kernel-ordering": 30.192484662576685,
-    "base-rollup": 871,
-    "root-rollup": 37.926829268292686,
-    "private-kernel-inner": 51.861111111111114,
-    "public-kernel-private-input": 51.72453703703704,
-    "public-kernel-non-first-iteration": 31.63425925925926,
-    "merge-rollup": 1
-  },
-  "circuit_input_size_in_bytes": {
-    "private-kernel-init": 56577,
-    "private-kernel-ordering": 20137,
-    "base-rollup": 631604,
-    "root-rollup": 4072,
-    "private-kernel-inner": 72288,
-    "public-kernel-private-input": 37359,
-    "public-kernel-non-first-iteration": 37401,
-    "merge-rollup": 2592
-  },
-  "circuit_output_size_in_bytes": {
-    "private-kernel-init": 14745,
-    "private-kernel-ordering": 8089,
-    "base-rollup": 810,
-    "root-rollup": 1097,
-    "private-kernel-inner": 14745,
-    "public-kernel-private-input": 14745,
-    "public-kernel-non-first-iteration": 14745,
-    "merge-rollup": 873
-  },
-  "node_history_sync_time_in_ms": {
-    "10": 30823,
-    "20": 75516,
-    "30": 136231
-  },
-  "node_database_size_in_bytes": {
-    "10": 1194179,
-    "20": 1900681,
-    "30": 2754125
-  },
-  "note_history_successful_decrypting_time_in_ms": {
-    "10": 4653,
-    "20": 12961,
-    "30": 20148
-  },
-  "pxe_database_size_in_bytes": {
-    "10": 54187,
-    "20": 108338,
-    "30": 162578
-  },
-  "note_history_trial_decrypting_time_in_ms": {
-    "10": 147,
-    "20": 208,
-    "30": 254
-  },
-  "l2_block_building_time_in_ms": {
-    "8": 9114,
-    "32": 36117,
-    "128": 152315
-  },
-  "l2_block_rollup_simulation_time_in_ms": {
-    "8": 6771,
-    "32": 26781,
-    "128": 107164
-  },
-  "l2_block_public_tx_process_time_in_ms": {
-    "8": 2300,
-    "32": 9209,
-    "128": 44431
-  },
-  "l1_rollup_calldata_gas": {
-    "8": 222984,
-    "32": 867956,
-    "128": 3449696
-  },
-  "l1_rollup_calldata_size_in_bytes": {
-    "8": 45444,
-    "32": 179588,
-    "128": 716132
-  },
-  "l1_rollup_execution_gas": {
-    "8": 842071,
-    "32": 3595064,
-    "128": 22205065
-  },
-  "l2_block_processing_time_in_ms": {
-    "8": 1060,
-    "32": 3981,
-    "128": 15688
-  },
-  "note_successful_decrypting_time_in_ms": {
-    "8": 332,
-    "32": 1019,
-    "128": 3780
-  },
-  "note_trial_decrypting_time_in_ms": {
-    "8": 34,
-    "32": 108,
-    "128": 138
-  },
-  "timestamp": "2023-10-10T17:51:38.017Z"
-}
diff --git a/yarn-project/scripts/src/benchmarks/markdown.ts b/yarn-project/scripts/src/benchmarks/markdown.ts
index 0f76a604481..f62627fd774 100644
--- a/yarn-project/scripts/src/benchmarks/markdown.ts
+++ b/yarn-project/scripts/src/benchmarks/markdown.ts
@@ -15,8 +15,68 @@ const baseFile = BaseBenchFile;
 const COMMENT_MARK = '<!-- AUTOGENERATED BENCHMARK COMMENT -->';
 const S3_URL = 'https://aztec-ci-artifacts.s3.us-east-2.amazonaws.com';
 
+// What % diff should be considered as a warning
+const WARNING_DIFF_THRESHOLD = 15;
+// When a measurement in ms should be considered "small"
+const SMALL_MS_THRESHOLD = 100;
+// What % diff should be considered as a warning for "small" ms measurements
+const WARNING_DIFF_THRESHOLD_SMALL_MS = 30;
+
 const log = createConsoleLogger();
 
+/** Returns whether the value should be a warning, based on the % difference and absolute value. */
+function isWarning(row: string, col: string, value: number, base: number | undefined) {
+  if (base === undefined) return false;
+  const absPercentDiff = Math.abs(Math.round(((value - base) / base) * 100));
+  if ((row.endsWith('_ms') || col.endsWith('_ms')) && value < SMALL_MS_THRESHOLD) {
+    return absPercentDiff >= WARNING_DIFF_THRESHOLD_SMALL_MS;
+  } else {
+    return absPercentDiff > WARNING_DIFF_THRESHOLD;
+  }
+}
+
+/** Returns summary text for warnings */
+function getWarningsSummary(
+  data: Record<string, Record<string, number>>,
+  base: Record<string, Record<string, number>> | undefined,
+) {
+  const warnings = getWarnings(data, base);
+  if (!base) {
+    return 'No base data found for comparison.';
+  } else if (warnings.length) {
+    return `Metrics with a significant change: \n${warnings.join('\n')}`;
+  } else {
+    return `No metrics with a significant change found.`;
+  }
+}
+
+/** Returns a string with the % diff between value and base. */
+function formatDiff(value: number, baseValue: number) {
+  const percentDiff = Math.round(((value - baseValue) / baseValue) * 100);
+  const percentSign = percentDiff > 0 ? '+' : '';
+  return `<span title="${formatValue(baseValue)}">${percentSign}${percentDiff}%</span>`;
+}
+
+/** Gets a list of warnings. */
+function getWarnings(
+  data: Record<string, Record<string, number>>,
+  base: Record<string, Record<string, number>> | undefined,
+) {
+  if (!base) return [];
+  const warnings: string[] = [];
+  for (const row in data) {
+    for (const col in data[row]) {
+      const value = data[row][col];
+      const baseValue = (base[row] ?? {})[col];
+      if (baseValue && isWarning(row, col, value, baseValue)) {
+        const diffText = formatDiff(value, baseValue);
+        warnings.push(`- **${withDesc(row)}** (${withDesc(col)}): ${formatValue(value)} (${diffText})`);
+      }
+    }
+  }
+  return warnings;
+}
+
 /** Returns a cell content formatted as string */
 function getCell(
   data: Record<string, Record<string, number>>,
@@ -25,27 +85,21 @@ function getCell(
   col: string,
 ) {
   const value = data[row][col];
+  const formattedValue = formatValue(value);
   const baseValue = base ? (base[row] ?? {})[col] : undefined;
   const percentDiff = baseValue ? Math.round(((value - baseValue) / baseValue) * 100) : undefined;
-  const formattedValue = formatValue(value);
-  const highlight = percentDiff && Math.abs(percentDiff) > 10 ? '**' : '';
-  const warning = percentDiff && Math.abs(percentDiff) > 10 ? ':warning:' : '';
-  const percentSign = percentDiff && percentDiff > 0 ? '+' : '';
-  return percentDiff && Math.abs(percentDiff) >= 1
-    ? `${warning} ${formattedValue} (${highlight}<span title="${formatValue(
-        baseValue!,
-      )}">${percentSign}${percentDiff}%</span>${highlight})`
-    : formattedValue;
-}
-
-/** Returns the description of a metric name, if found. */
-function tryGetDescription(name: string) {
-  return Metrics.find(m => m.name === name)?.description;
+  if (!percentDiff || Math.abs(percentDiff) < 1) {
+    return formattedValue;
+  }
+  if (!isWarning(row, col, value, baseValue)) {
+    return `${formattedValue} (${formatDiff(value, baseValue!)})`;
+  }
+  return `:warning: ${formattedValue} (**${formatDiff(value, baseValue!)}**)`;
 }
 
 /** Wraps the metric name in a span with a title with the description, if found. */
-function withDescriptionTitle(name: string) {
-  const description = tryGetDescription(name);
+function withDesc(name: string) {
+  const description = Metrics.find(m => m.name === name)?.description;
   if (!description) return name;
   return `<span title="${description}">${name}</span>`;
 }
@@ -87,11 +141,11 @@ function getTableContent(
 ) {
   const rowKeys = Object.keys(data);
   const groups = [...new Set(rowKeys.flatMap(key => Object.keys(data[key])))];
-  const makeHeader = (colTitle: string) => `${withDescriptionTitle(colTitle)} ${groupUnit}`;
+  const makeHeader = (colTitle: string) => `${withDesc(colTitle)} ${groupUnit}`;
   const header = `| ${col1Title} | ${groups.map(makeHeader).join(' | ')} |`;
   const separator = `| - | ${groups.map(() => '-').join(' | ')} |`;
   const makeCell = (row: string, col: string) => getCell(data, baseBenchmark, row, col);
-  const rows = rowKeys.map(key => `${withDescriptionTitle(key)} | ${groups.map(g => makeCell(key, g)).join(' | ')} |`);
+  const rows = rowKeys.map(key => `${withDesc(key)} | ${groups.map(g => makeCell(key, g)).join(' | ')} |`);
 
   return `
 ${header}
@@ -104,6 +158,7 @@ ${rows.join('\n')}
 export function getMarkdown() {
   const benchmark = JSON.parse(fs.readFileSync(inputFile, 'utf-8'));
   const baseBenchmark = getBaseBenchmark();
+
   const metricsByBlockSize = Metrics.filter(m => m.groupBy === 'block-size').map(m => m.name);
   const metricsByChainLength = Metrics.filter(m => m.groupBy === 'chain-length').map(m => m.name);
   const metricsByCircuitName = Metrics.filter(m => m.groupBy === 'circuit-name').map(m => m.name);
@@ -124,6 +179,12 @@ export function getMarkdown() {
   return `
 ## Benchmark results
 
+${getWarningsSummary(benchmark, baseBenchmark)}
+
+<details>
+
+<summary>Detailed results</summary>
+
 All benchmarks are run on txs on the \`Benchmarking\` contract on the repository. Each tx consists of a batch call  to \`create_note\` and \`increment_balance\`, which guarantees that each tx has a private call, a nested private call, a public call, and a nested public call, as well as an emitted private note, an unencrypted log, and public storage read and write. 
 ${prSourceDataText}
 ${baseCommitText}
@@ -148,6 +209,7 @@ ${getTableContent(transpose(pick(benchmark, metricsByCircuitName)), transpose(ba
 Transaction sizes based on how many contracts are deployed in the tx.
 ${getTableContent(pick(benchmark, metricsByContractCount), baseBenchmark, 'deployed contracts')}
 
+</details>
 ${COMMENT_MARK}
 `;
 }