chore: Less noisy benchmark reports

Hides benchmark reports in PR comments behind a collapsible, and only shows a list of the metrics with a siginificant change. Increases the warning threshold for measurements with an absolute value of less than 100ms.
AztecProtocol · Oct 18, 2023 · d9aeb3c · d9aeb3c
1 parent ba2f671
commit d9aeb3c
Show file tree

Hide file tree

Showing 2 changed files with 80 additions and 121 deletions.
diff --git a/yarn-project/scripts/benchmark.json b/yarn-project/scripts/benchmark.json
diff --git a/yarn-project/scripts/src/benchmarks/markdown.ts b/yarn-project/scripts/src/benchmarks/markdown.ts
@@ -15,8 +15,68 @@ const baseFile = BaseBenchFile;
 const COMMENT_MARK = '<!-- AUTOGENERATED BENCHMARK COMMENT -->';
 const S3_URL = 'https://aztec-ci-artifacts.s3.us-east-2.amazonaws.com';
 
+// What % diff should be considered as a warning
+const WARNING_DIFF_THRESHOLD = 15;
+// When a measurement in ms should be considered "small"
+const SMALL_MS_THRESHOLD = 100;
+// What % diff should be considered as a warning for "small" ms measurements
+const WARNING_DIFF_THRESHOLD_SMALL_MS = 30;
+
 const log = createConsoleLogger();
 
+/** Returns whether the value should be a warning, based on the % difference and absolute value. */
+function isWarning(row: string, col: string, value: number, base: number | undefined) {
+  if (base === undefined) return false;
+  const absPercentDiff = Math.abs(Math.round(((value - base) / base) * 100));
+  if ((row.endsWith('_ms') || col.endsWith('_ms')) && value < SMALL_MS_THRESHOLD) {
+    return absPercentDiff >= WARNING_DIFF_THRESHOLD_SMALL_MS;
+  } else {
+    return absPercentDiff > WARNING_DIFF_THRESHOLD;
+  }
+}
+
+/** Returns summary text for warnings */
+function getWarningsSummary(
+  data: Record<string, Record<string, number>>,
+  base: Record<string, Record<string, number>> | undefined,
+) {
+  const warnings = getWarnings(data, base);
+  if (!base) {
+    return 'No base data found for comparison.';
+  } else if (warnings.length) {
+    return `Metrics with a significant change: \n${warnings.join('\n')}`;
+  } else {
+    return `No metrics with a significant change found.`;
+  }
+}
+
+/** Returns a string with the % diff between value and base. */
+function formatDiff(value: number, baseValue: number) {
+  const percentDiff = Math.round(((value - baseValue) / baseValue) * 100);
+  const percentSign = percentDiff > 0 ? '+' : '';
+  return `<span title="${formatValue(baseValue)}">${percentSign}${percentDiff}%</span>`;
+}
+
+/** Gets a list of warnings. */
+function getWarnings(
+  data: Record<string, Record<string, number>>,
+  base: Record<string, Record<string, number>> | undefined,
+) {
+  if (!base) return [];
+  const warnings: string[] = [];
+  for (const row in data) {
+    for (const col in data[row]) {
+      const value = data[row][col];
+      const baseValue = (base[row] ?? {})[col];
+      if (baseValue && isWarning(row, col, value, baseValue)) {
+        const diffText = formatDiff(value, baseValue);
+        warnings.push(`- **${withDesc(row)}** (${withDesc(col)}): ${formatValue(value)} (${diffText})`);
+      }
+    }
+  }
+  return warnings;
+}
+
 /** Returns a cell content formatted as string */
 function getCell(
   data: Record<string, Record<string, number>>,
@@ -25,27 +85,21 @@ function getCell(
   col: string,
 ) {
   const value = data[row][col];
+  const formattedValue = formatValue(value);
   const baseValue = base ? (base[row] ?? {})[col] : undefined;
   const percentDiff = baseValue ? Math.round(((value - baseValue) / baseValue) * 100) : undefined;
-  const formattedValue = formatValue(value);
-  const highlight = percentDiff && Math.abs(percentDiff) > 10 ? '**' : '';
-  const warning = percentDiff && Math.abs(percentDiff) > 10 ? ':warning:' : '';
-  const percentSign = percentDiff && percentDiff > 0 ? '+' : '';
-  return percentDiff && Math.abs(percentDiff) >= 1
-    ? `${warning} ${formattedValue} (${highlight}<span title="${formatValue(
-        baseValue!,
-      )}">${percentSign}${percentDiff}%</span>${highlight})`
-    : formattedValue;
-}
-
-/** Returns the description of a metric name, if found. */
-function tryGetDescription(name: string) {
-  return Metrics.find(m => m.name === name)?.description;
+  if (!percentDiff || Math.abs(percentDiff) < 1) {
+    return formattedValue;
+  }
+  if (!isWarning(row, col, value, baseValue)) {
+    return `${formattedValue} (${formatDiff(value, baseValue!)})`;
+  }
+  return `:warning: ${formattedValue} (**${formatDiff(value, baseValue!)}**)`;
 }
 
 /** Wraps the metric name in a span with a title with the description, if found. */
-function withDescriptionTitle(name: string) {
-  const description = tryGetDescription(name);
+function withDesc(name: string) {
+  const description = Metrics.find(m => m.name === name)?.description;
   if (!description) return name;
   return `<span title="${description}">${name}</span>`;
 }
@@ -87,11 +141,11 @@ function getTableContent(
 ) {
   const rowKeys = Object.keys(data);
   const groups = [...new Set(rowKeys.flatMap(key => Object.keys(data[key])))];
-  const makeHeader = (colTitle: string) => `${withDescriptionTitle(colTitle)} ${groupUnit}`;
+  const makeHeader = (colTitle: string) => `${withDesc(colTitle)} ${groupUnit}`;
   const header = `| ${col1Title} | ${groups.map(makeHeader).join(' | ')} |`;
   const separator = `| - | ${groups.map(() => '-').join(' | ')} |`;
   const makeCell = (row: string, col: string) => getCell(data, baseBenchmark, row, col);
-  const rows = rowKeys.map(key => `${withDescriptionTitle(key)} | ${groups.map(g => makeCell(key, g)).join(' | ')} |`);
+  const rows = rowKeys.map(key => `${withDesc(key)} | ${groups.map(g => makeCell(key, g)).join(' | ')} |`);
 
   return `
 ${header}
@@ -104,6 +158,7 @@ ${rows.join('\n')}
 export function getMarkdown() {
   const benchmark = JSON.parse(fs.readFileSync(inputFile, 'utf-8'));
   const baseBenchmark = getBaseBenchmark();
+
   const metricsByBlockSize = Metrics.filter(m => m.groupBy === 'block-size').map(m => m.name);
   const metricsByChainLength = Metrics.filter(m => m.groupBy === 'chain-length').map(m => m.name);
   const metricsByCircuitName = Metrics.filter(m => m.groupBy === 'circuit-name').map(m => m.name);
@@ -124,6 +179,12 @@ export function getMarkdown() {
   return `
 ## Benchmark results
 
+${getWarningsSummary(benchmark, baseBenchmark)}
+
+<details>
+
+<summary>Detailed results</summary>
+
 All benchmarks are run on txs on the \`Benchmarking\` contract on the repository. Each tx consists of a batch call  to \`create_note\` and \`increment_balance\`, which guarantees that each tx has a private call, a nested private call, a public call, and a nested public call, as well as an emitted private note, an unencrypted log, and public storage read and write. 
 ${prSourceDataText}
 ${baseCommitText}
@@ -148,6 +209,7 @@ ${getTableContent(transpose(pick(benchmark, metricsByCircuitName)), transpose(ba
 Transaction sizes based on how many contracts are deployed in the tx.
 ${getTableContent(pick(benchmark, metricsByContractCount), baseBenchmark, 'deployed contracts')}
 
+</details>
 ${COMMENT_MARK}
 `;
 }