From 9bc0c5ea1887ddfd3dac170b343241e5ce3a81d6 Mon Sep 17 00:00:00 2001 From: Santiago Palladino Date: Wed, 18 Oct 2023 11:11:45 -0300 Subject: [PATCH] chore: Less noisy benchmark reports Hides benchmark reports in PR comments behind a collapsible, and only shows a list of the metrics with a siginificant change. Increases the warning threshold for measurements with an absolute value of less than 100ms. --- yarn-project/scripts/benchmark.json | 103 ------------------ .../scripts/src/benchmarks/markdown.ts | 98 ++++++++++++++--- 2 files changed, 80 insertions(+), 121 deletions(-) delete mode 100644 yarn-project/scripts/benchmark.json diff --git a/yarn-project/scripts/benchmark.json b/yarn-project/scripts/benchmark.json deleted file mode 100644 index 8443a67824f..00000000000 --- a/yarn-project/scripts/benchmark.json +++ /dev/null @@ -1,103 +0,0 @@ -{ - "circuit_simulation_time_in_ms": { - "private-kernel-init": 54.9340490797546, - "private-kernel-ordering": 30.192484662576685, - "base-rollup": 871, - "root-rollup": 37.926829268292686, - "private-kernel-inner": 51.861111111111114, - "public-kernel-private-input": 51.72453703703704, - "public-kernel-non-first-iteration": 31.63425925925926, - "merge-rollup": 1 - }, - "circuit_input_size_in_bytes": { - "private-kernel-init": 56577, - "private-kernel-ordering": 20137, - "base-rollup": 631604, - "root-rollup": 4072, - "private-kernel-inner": 72288, - "public-kernel-private-input": 37359, - "public-kernel-non-first-iteration": 37401, - "merge-rollup": 2592 - }, - "circuit_output_size_in_bytes": { - "private-kernel-init": 14745, - "private-kernel-ordering": 8089, - "base-rollup": 810, - "root-rollup": 1097, - "private-kernel-inner": 14745, - "public-kernel-private-input": 14745, - "public-kernel-non-first-iteration": 14745, - "merge-rollup": 873 - }, - "node_history_sync_time_in_ms": { - "10": 30823, - "20": 75516, - "30": 136231 - }, - "node_database_size_in_bytes": { - "10": 1194179, - "20": 1900681, - "30": 2754125 - }, - "note_history_successful_decrypting_time_in_ms": { - "10": 4653, - "20": 12961, - "30": 20148 - }, - "pxe_database_size_in_bytes": { - "10": 54187, - "20": 108338, - "30": 162578 - }, - "note_history_trial_decrypting_time_in_ms": { - "10": 147, - "20": 208, - "30": 254 - }, - "l2_block_building_time_in_ms": { - "8": 9114, - "32": 36117, - "128": 152315 - }, - "l2_block_rollup_simulation_time_in_ms": { - "8": 6771, - "32": 26781, - "128": 107164 - }, - "l2_block_public_tx_process_time_in_ms": { - "8": 2300, - "32": 9209, - "128": 44431 - }, - "l1_rollup_calldata_gas": { - "8": 222984, - "32": 867956, - "128": 3449696 - }, - "l1_rollup_calldata_size_in_bytes": { - "8": 45444, - "32": 179588, - "128": 716132 - }, - "l1_rollup_execution_gas": { - "8": 842071, - "32": 3595064, - "128": 22205065 - }, - "l2_block_processing_time_in_ms": { - "8": 1060, - "32": 3981, - "128": 15688 - }, - "note_successful_decrypting_time_in_ms": { - "8": 332, - "32": 1019, - "128": 3780 - }, - "note_trial_decrypting_time_in_ms": { - "8": 34, - "32": 108, - "128": 138 - }, - "timestamp": "2023-10-10T17:51:38.017Z" -} diff --git a/yarn-project/scripts/src/benchmarks/markdown.ts b/yarn-project/scripts/src/benchmarks/markdown.ts index 0f76a604481..f62627fd774 100644 --- a/yarn-project/scripts/src/benchmarks/markdown.ts +++ b/yarn-project/scripts/src/benchmarks/markdown.ts @@ -15,8 +15,68 @@ const baseFile = BaseBenchFile; const COMMENT_MARK = ''; const S3_URL = 'https://aztec-ci-artifacts.s3.us-east-2.amazonaws.com'; +// What % diff should be considered as a warning +const WARNING_DIFF_THRESHOLD = 15; +// When a measurement in ms should be considered "small" +const SMALL_MS_THRESHOLD = 100; +// What % diff should be considered as a warning for "small" ms measurements +const WARNING_DIFF_THRESHOLD_SMALL_MS = 30; + const log = createConsoleLogger(); +/** Returns whether the value should be a warning, based on the % difference and absolute value. */ +function isWarning(row: string, col: string, value: number, base: number | undefined) { + if (base === undefined) return false; + const absPercentDiff = Math.abs(Math.round(((value - base) / base) * 100)); + if ((row.endsWith('_ms') || col.endsWith('_ms')) && value < SMALL_MS_THRESHOLD) { + return absPercentDiff >= WARNING_DIFF_THRESHOLD_SMALL_MS; + } else { + return absPercentDiff > WARNING_DIFF_THRESHOLD; + } +} + +/** Returns summary text for warnings */ +function getWarningsSummary( + data: Record>, + base: Record> | undefined, +) { + const warnings = getWarnings(data, base); + if (!base) { + return 'No base data found for comparison.'; + } else if (warnings.length) { + return `Metrics with a significant change: \n${warnings.join('\n')}`; + } else { + return `No metrics with a significant change found.`; + } +} + +/** Returns a string with the % diff between value and base. */ +function formatDiff(value: number, baseValue: number) { + const percentDiff = Math.round(((value - baseValue) / baseValue) * 100); + const percentSign = percentDiff > 0 ? '+' : ''; + return `${percentSign}${percentDiff}%`; +} + +/** Gets a list of warnings. */ +function getWarnings( + data: Record>, + base: Record> | undefined, +) { + if (!base) return []; + const warnings: string[] = []; + for (const row in data) { + for (const col in data[row]) { + const value = data[row][col]; + const baseValue = (base[row] ?? {})[col]; + if (baseValue && isWarning(row, col, value, baseValue)) { + const diffText = formatDiff(value, baseValue); + warnings.push(`- **${withDesc(row)}** (${withDesc(col)}): ${formatValue(value)} (${diffText})`); + } + } + } + return warnings; +} + /** Returns a cell content formatted as string */ function getCell( data: Record>, @@ -25,27 +85,21 @@ function getCell( col: string, ) { const value = data[row][col]; + const formattedValue = formatValue(value); const baseValue = base ? (base[row] ?? {})[col] : undefined; const percentDiff = baseValue ? Math.round(((value - baseValue) / baseValue) * 100) : undefined; - const formattedValue = formatValue(value); - const highlight = percentDiff && Math.abs(percentDiff) > 10 ? '**' : ''; - const warning = percentDiff && Math.abs(percentDiff) > 10 ? ':warning:' : ''; - const percentSign = percentDiff && percentDiff > 0 ? '+' : ''; - return percentDiff && Math.abs(percentDiff) >= 1 - ? `${warning} ${formattedValue} (${highlight}${percentSign}${percentDiff}%${highlight})` - : formattedValue; -} - -/** Returns the description of a metric name, if found. */ -function tryGetDescription(name: string) { - return Metrics.find(m => m.name === name)?.description; + if (!percentDiff || Math.abs(percentDiff) < 1) { + return formattedValue; + } + if (!isWarning(row, col, value, baseValue)) { + return `${formattedValue} (${formatDiff(value, baseValue!)})`; + } + return `:warning: ${formattedValue} (**${formatDiff(value, baseValue!)}**)`; } /** Wraps the metric name in a span with a title with the description, if found. */ -function withDescriptionTitle(name: string) { - const description = tryGetDescription(name); +function withDesc(name: string) { + const description = Metrics.find(m => m.name === name)?.description; if (!description) return name; return `${name}`; } @@ -87,11 +141,11 @@ function getTableContent( ) { const rowKeys = Object.keys(data); const groups = [...new Set(rowKeys.flatMap(key => Object.keys(data[key])))]; - const makeHeader = (colTitle: string) => `${withDescriptionTitle(colTitle)} ${groupUnit}`; + const makeHeader = (colTitle: string) => `${withDesc(colTitle)} ${groupUnit}`; const header = `| ${col1Title} | ${groups.map(makeHeader).join(' | ')} |`; const separator = `| - | ${groups.map(() => '-').join(' | ')} |`; const makeCell = (row: string, col: string) => getCell(data, baseBenchmark, row, col); - const rows = rowKeys.map(key => `${withDescriptionTitle(key)} | ${groups.map(g => makeCell(key, g)).join(' | ')} |`); + const rows = rowKeys.map(key => `${withDesc(key)} | ${groups.map(g => makeCell(key, g)).join(' | ')} |`); return ` ${header} @@ -104,6 +158,7 @@ ${rows.join('\n')} export function getMarkdown() { const benchmark = JSON.parse(fs.readFileSync(inputFile, 'utf-8')); const baseBenchmark = getBaseBenchmark(); + const metricsByBlockSize = Metrics.filter(m => m.groupBy === 'block-size').map(m => m.name); const metricsByChainLength = Metrics.filter(m => m.groupBy === 'chain-length').map(m => m.name); const metricsByCircuitName = Metrics.filter(m => m.groupBy === 'circuit-name').map(m => m.name); @@ -124,6 +179,12 @@ export function getMarkdown() { return ` ## Benchmark results +${getWarningsSummary(benchmark, baseBenchmark)} + +
+ +Detailed results + All benchmarks are run on txs on the \`Benchmarking\` contract on the repository. Each tx consists of a batch call to \`create_note\` and \`increment_balance\`, which guarantees that each tx has a private call, a nested private call, a public call, and a nested public call, as well as an emitted private note, an unencrypted log, and public storage read and write. ${prSourceDataText} ${baseCommitText} @@ -148,6 +209,7 @@ ${getTableContent(transpose(pick(benchmark, metricsByCircuitName)), transpose(ba Transaction sizes based on how many contracts are deployed in the tx. ${getTableContent(pick(benchmark, metricsByContractCount), baseBenchmark, 'deployed contracts')} +
${COMMENT_MARK} `; }