From 8efa3741ca7503cd38a7de75d5768f1b4d1be287 Mon Sep 17 00:00:00 2001
From: Santiago Palladino <santiago@aztecprotocol.com>
Date: Wed, 11 Oct 2023 12:41:38 -0300
Subject: [PATCH] chore: Rewrite benchmark scripts in ts (#2765)

Adds types for all tracked metrics and stats, so the compiler will warn
if we break the data we're logging on the app and expecting on the
processing scripts. Introduces a new package `scripts` in the
yarn-project workspace for typescript-based CI scripts. Also adds
comparison with base benchmark on master, and shortens the bench
processing job until we get better times out of it.
---
 scripts/ci/aggregate_e2e_benchmark.js         | 199 --------------
 scripts/ci/assemble_e2e_benchmark.sh          |  50 +++-
 scripts/ci/benchmark_shared.js                |  50 ----
 scripts/ci/comment_e2e_benchmark.js           | 243 ------------------
 .../end-to-end/src/benchmarks/README.md       |  11 +
 .../benchmarks/bench_process_history.test.ts  |  11 +-
 .../benchmarks/bench_publish_rollup.test.ts   |   5 +-
 yarn-project/package.json                     |   1 +
 .../pxe/src/kernel_prover/proof_creator.ts    |   7 +-
 .../pxe/src/note_processor/note_processor.ts  |  15 +-
 .../pxe/src/synchronizer/synchronizer.ts      |   3 +-
 yarn-project/scripts/.eslintrc.cjs            |   1 +
 yarn-project/scripts/README.md                |   3 +
 yarn-project/scripts/benchmark.json           | 103 ++++++++
 yarn-project/scripts/package.json             |  60 +++++
 yarn-project/scripts/run_script.sh            |  14 +
 .../scripts/src/benchmarks/aggregate.ts       | 194 ++++++++++++++
 .../scripts/src/benchmarks/comment.ts         | 103 ++++++++
 .../scripts/src/benchmarks/markdown.ts        | 152 +++++++++++
 yarn-project/scripts/src/benchmarks/paths.ts  |  13 +
 .../scripts/src/bin/bench-aggregate.ts        |   7 +
 yarn-project/scripts/src/bin/bench-comment.ts |   7 +
 .../scripts/src/bin/bench-markdown.ts         |   9 +
 yarn-project/scripts/src/index.ts             |   0
 yarn-project/scripts/tsconfig.json            |  17 ++
 .../sequencer-client/src/publisher/index.ts   |  28 --
 .../src/publisher/l1-publisher.ts             |   2 +-
 .../src/sequencer/sequencer.ts                |   3 +-
 .../src/simulator/public_kernel.ts            |   5 +-
 .../sequencer-client/src/simulator/rollup.ts  |   7 +-
 yarn-project/tsconfig.json                    |   3 +-
 yarn-project/types/package.json               |   5 +-
 yarn-project/types/src/stats/benchmarks.ts    |  12 +
 yarn-project/types/src/stats/index.ts         |  18 ++
 yarn-project/types/src/stats/metrics.ts       | 130 ++++++++++
 yarn-project/types/src/stats/stats.ts         | 131 ++++++++++
 .../server_world_state_synchronizer.ts        |   3 +-
 yarn-project/yarn.lock                        |  25 ++
 38 files changed, 1088 insertions(+), 562 deletions(-)
 delete mode 100644 scripts/ci/aggregate_e2e_benchmark.js
 delete mode 100644 scripts/ci/benchmark_shared.js
 delete mode 100644 scripts/ci/comment_e2e_benchmark.js
 create mode 100644 yarn-project/end-to-end/src/benchmarks/README.md
 create mode 100644 yarn-project/scripts/.eslintrc.cjs
 create mode 100644 yarn-project/scripts/README.md
 create mode 100644 yarn-project/scripts/benchmark.json
 create mode 100644 yarn-project/scripts/package.json
 create mode 100755 yarn-project/scripts/run_script.sh
 create mode 100644 yarn-project/scripts/src/benchmarks/aggregate.ts
 create mode 100644 yarn-project/scripts/src/benchmarks/comment.ts
 create mode 100644 yarn-project/scripts/src/benchmarks/markdown.ts
 create mode 100644 yarn-project/scripts/src/benchmarks/paths.ts
 create mode 100644 yarn-project/scripts/src/bin/bench-aggregate.ts
 create mode 100644 yarn-project/scripts/src/bin/bench-comment.ts
 create mode 100644 yarn-project/scripts/src/bin/bench-markdown.ts
 create mode 100644 yarn-project/scripts/src/index.ts
 create mode 100644 yarn-project/scripts/tsconfig.json
 create mode 100644 yarn-project/types/src/stats/benchmarks.ts
 create mode 100644 yarn-project/types/src/stats/index.ts
 create mode 100644 yarn-project/types/src/stats/metrics.ts
 create mode 100644 yarn-project/types/src/stats/stats.ts

diff --git a/scripts/ci/aggregate_e2e_benchmark.js b/scripts/ci/aggregate_e2e_benchmark.js
deleted file mode 100644
index 53eb747cf6c..00000000000
--- a/scripts/ci/aggregate_e2e_benchmark.js
+++ /dev/null
@@ -1,199 +0,0 @@
-// Given a local folder with the e2e benchmark files, generates a single file
-// output with the grouped metrics to be published. This script can probably
-// be replaced by a single call to jq, but I found this easier to write,
-// and pretty much every CI comes with a working version of node.
-//
-// To test this locally, first run the benchmark tests from the yarn-project/end-to-end folder
-// BENCHMARK=1 ROLLUP_SIZES=8 yarn test bench
-//
-// And then run this script from the root of the project:
-// LOGS_DIR=./yarn-project/end-to-end/log/ node ./scripts/ci/aggregate_e2e_benchmark.js 
-
-const fs = require("fs");
-const path = require("path");
-const readline = require("readline");
-
-const {
-  L1_ROLLUP_CALLDATA_SIZE_IN_BYTES,
-  L1_ROLLUP_CALLDATA_GAS,
-  L1_ROLLUP_EXECUTION_GAS,
-  L2_BLOCK_PROCESSING_TIME,
-  L2_BLOCK_SYNCED,
-  L2_BLOCK_PUBLISHED_TO_L1,
-  CIRCUIT_SIMULATION_TIME,
-  CIRCUIT_OUTPUT_SIZE,
-  CIRCUIT_INPUT_SIZE,
-  CIRCUIT_SIMULATED,
-  NOTE_SUCCESSFUL_DECRYPTING_TIME,
-  NOTE_TRIAL_DECRYPTING_TIME,
-  NOTE_PROCESSOR_CAUGHT_UP,
-  L2_BLOCK_BUILT,
-  L2_BLOCK_BUILD_TIME,
-  L2_BLOCK_ROLLUP_SIMULATION_TIME,
-  L2_BLOCK_PUBLIC_TX_PROCESS_TIME,
-  NODE_HISTORY_SYNC_TIME,
-  NODE_SYNCED_CHAIN,
-  NOTE_HISTORY_TRIAL_DECRYPTING_TIME,
-  NOTE_HISTORY_SUCCESSFUL_DECRYPTING_TIME,
-  PXE_DB_SIZE,
-  ROLLUP_SIZES,
-  CHAIN_LENGTHS,
-  BENCHMARK_FILE_JSON,
-  BLOCK_SIZE,
-  NODE_DB_SIZE,
-} = require("./benchmark_shared.js");
-
-// Folder where to load logs from
-const logsDir = process.env.LOGS_DIR ?? `log`;
-
-// Appends a data point to the final results for the given metric in the given bucket
-function append(results, metric, bucket, value) {
-  if (value === undefined) {
-    console.error(`Undefined value for ${metric} in bucket ${bucket}`);
-    return;
-  }
-  const numeric = Number(value);
-  if (Number.isNaN(numeric)) {
-    console.error(`Value ${value} for ${metric} in ${bucket} is not a number`);
-    return;
-  }
-  if (!results[metric]) results[metric] = {};
-  if (!results[metric][bucket]) results[metric][bucket] = [];
-  results[metric][bucket].push(numeric);
-}
-
-// Processes an entry with event name 'rollup-published-to-l1' and updates results
-function processRollupPublished(entry, results) {
-  const bucket = entry.txCount;
-  if (!ROLLUP_SIZES.includes(bucket)) return;
-  append(results, L1_ROLLUP_CALLDATA_GAS, bucket, entry.calldataGas);
-  append(results, L1_ROLLUP_CALLDATA_SIZE_IN_BYTES, bucket, entry.calldataSize);
-  append(results, L1_ROLLUP_EXECUTION_GAS, bucket, entry.gasUsed);
-}
-
-// Processes an entry with event name 'l2-block-handled' and updates results
-// Skips instances where the block was emitted by the same node where the processing is skipped
-function processRollupBlockSynced(entry, results) {
-  const bucket = entry.txCount;
-  if (!ROLLUP_SIZES.includes(bucket)) return;
-  if (entry.isBlockOurs) return;
-  append(results, L2_BLOCK_PROCESSING_TIME, bucket, entry.duration);
-}
-
-// Processes an entry with event name 'circuit-simulated' and updates results
-// Buckets are circuit names
-function processCircuitSimulation(entry, results) {
-  const bucket = entry.circuitName;
-  if (!bucket) return;
-  append(results, CIRCUIT_SIMULATION_TIME, bucket, entry.duration);
-  append(results, CIRCUIT_INPUT_SIZE, bucket, entry.inputSize);
-  append(results, CIRCUIT_OUTPUT_SIZE, bucket, entry.outputSize);
-}
-
-// Processes an entry with event name 'note-processor-caught-up' and updates results
-// Buckets are rollup sizes for NOTE_DECRYPTING_TIME, or chain sizes for NOTE_HISTORY_DECRYPTING_TIME
-function processNoteProcessorCaughtUp(entry, results) {
-  const { seen, decrypted, blocks, duration, dbSize } = entry;
-  if (ROLLUP_SIZES.includes(decrypted))
-    append(results, NOTE_SUCCESSFUL_DECRYPTING_TIME, decrypted, duration);
-  if (ROLLUP_SIZES.includes(seen) && decrypted === 0)
-    append(results, NOTE_TRIAL_DECRYPTING_TIME, seen, duration);
-  if (CHAIN_LENGTHS.includes(blocks) && decrypted > 0) {
-    append(results, NOTE_HISTORY_SUCCESSFUL_DECRYPTING_TIME, blocks, duration);
-    append(results, PXE_DB_SIZE, blocks, dbSize);
-  }
-  if (CHAIN_LENGTHS.includes(blocks) && decrypted === 0)
-    append(results, NOTE_HISTORY_TRIAL_DECRYPTING_TIME, blocks, duration);
-}
-
-// Processes an entry with event name 'l2-block-built' and updates results
-// Buckets are rollup sizes
-function processL2BlockBuilt(entry, results) {
-  const bucket = entry.txCount;
-  if (!ROLLUP_SIZES.includes(bucket)) return;
-  append(results, L2_BLOCK_BUILD_TIME, bucket, entry.duration);
-  append(
-    results,
-    L2_BLOCK_ROLLUP_SIMULATION_TIME,
-    bucket,
-    entry.rollupCircuitsDuration
-  );
-  append(
-    results,
-    L2_BLOCK_PUBLIC_TX_PROCESS_TIME,
-    bucket,
-    entry.publicProcessDuration
-  );
-}
-
-// Processes entries with event name node-synced-chain-history emitted by benchmark tests
-// Buckets are chain lengths
-function processNodeSyncedChain(entry, results) {
-  const bucket = entry.blockCount;
-  if (!CHAIN_LENGTHS.includes(bucket)) return;
-  if (entry.txsPerBlock !== BLOCK_SIZE) return;
-  append(results, NODE_HISTORY_SYNC_TIME, bucket, entry.duration);
-  append(results, NODE_DB_SIZE, bucket, entry.dbSize);
-}
-
-// Processes a parsed entry from a logfile and updates results
-function processEntry(entry, results) {
-  switch (entry.eventName) {
-    case L2_BLOCK_PUBLISHED_TO_L1:
-      return processRollupPublished(entry, results);
-    case L2_BLOCK_SYNCED:
-      return processRollupBlockSynced(entry, results);
-    case CIRCUIT_SIMULATED:
-      return processCircuitSimulation(entry, results);
-    case NOTE_PROCESSOR_CAUGHT_UP:
-      return processNoteProcessorCaughtUp(entry, results);
-    case L2_BLOCK_BUILT:
-      return processL2BlockBuilt(entry, results);
-    case NODE_SYNCED_CHAIN:
-      return processNodeSyncedChain(entry, results);
-    default:
-      return;
-  }
-}
-
-// Parses all jsonl files downloaded and aggregates them into a single results object
-async function main() {
-  const results = {};
-
-  // Get all jsonl files in the logs dir
-  const files = fs.readdirSync(logsDir).filter((f) => f.endsWith(".jsonl"));
-
-  // Iterate over each .jsonl file
-  for (const file of files) {
-    const filePath = path.join(logsDir, file);
-    const fileStream = fs.createReadStream(filePath);
-    const rl = readline.createInterface({ input: fileStream });
-
-    for await (const line of rl) {
-      const entry = JSON.parse(line);
-      processEntry(entry, results);
-    }
-  }
-
-  console.log(`Collected entries:`, JSON.stringify(results, null, 2));
-
-  // For each bucket of each metric compute the average all collected datapoints
-  for (const metricName in results) {
-    const metric = results[metricName];
-    for (const bucketName in metric) {
-      const bucket = metric[bucketName];
-      let avg = bucket.reduce((acc, val) => acc + val, 0) / bucket.length;
-      if (avg > 100) avg = Math.floor(avg);
-      metric[bucketName] = avg;
-    }
-  }
-
-  // Throw in a timestamp
-  results.timestamp = new Date().toISOString();
-
-  // Write results to disk
-  console.log(`Aggregated results:`, JSON.stringify(results, null, 2));
-  fs.writeFileSync(BENCHMARK_FILE_JSON, JSON.stringify(results, null, 2));
-}
-
-main();
diff --git a/scripts/ci/assemble_e2e_benchmark.sh b/scripts/ci/assemble_e2e_benchmark.sh
index aed3ed71975..7456d4e1cbc 100755
--- a/scripts/ci/assemble_e2e_benchmark.sh
+++ b/scripts/ci/assemble_e2e_benchmark.sh
@@ -8,8 +8,11 @@ set -eu
 
 BUCKET_NAME="aztec-ci-artifacts"
 LOG_FOLDER="${LOG_FOLDER:-log}"
+BENCH_FOLDER="${BENCH_FOLDER:-bench}"
 COMMIT_HASH="${COMMIT_HASH:-$(git rev-parse HEAD)}"
-BENCHMARK_FILE_JSON="benchmark.json"
+BASE_COMMIT_HASH=""
+BENCHMARK_FILE_JSON="${BENCH_FOLDER}/benchmark.json"
+BASE_BENCHMARK_FILE_JSON="${BENCH_FOLDER}/base-benchmark.json"
 
 # Adapted from yarn-project/end-to-end/scripts/upload_logs_to_s3.sh
 if [ "${CIRCLE_BRANCH:-}" = "master" ]; then
@@ -37,15 +40,26 @@ aws s3 cp "s3://${BUCKET_NAME}/${LOG_SOURCE_FOLDER}/" $LOG_FOLDER --exclude '*'
 # this skips the whole aggregation. For now, that's fine because all benchmark files have the
 # same rebuild pattern rules. But if that changes, then we'd need to go up in the commit history
 # to find the latest log files for the unchanged benchmarks.
-EXPECTED_BENCHMARK_COUNT=$(find yarn-project/end-to-end/src -type f -name "bench*.test.ts" | wc -l)
-DOWNLOADED_BENCHMARK_COUNT=$(find $LOG_FOLDER -type f -name "*.jsonl" | wc -l)
-if [ "$DOWNLOADED_BENCHMARK_COUNT" -lt "$EXPECTED_BENCHMARK_COUNT" ]; then
-  echo Found $DOWNLOADED_BENCHMARK_COUNT out of $EXPECTED_BENCHMARK_COUNT benchmark log files in s3://${BUCKET_NAME}/${LOG_SOURCE_FOLDER}/. Exiting.
+EXPECTED_LOGS_COUNT=$(find yarn-project/end-to-end/src -type f -name "bench*.test.ts" | wc -l)
+DOWNLOADED_LOGS_COUNT=$(find $LOG_FOLDER -type f -name "*.jsonl" | wc -l)
+if [ "$DOWNLOADED_LOGS_COUNT" -lt "$EXPECTED_LOGS_COUNT" ]; then
+  echo Found $DOWNLOADED_LOGS_COUNT out of $EXPECTED_LOGS_COUNT benchmark log files in s3://${BUCKET_NAME}/${LOG_SOURCE_FOLDER}/. Exiting.
   exit 0
 fi
 
 # Generate the aggregated benchmark file
-node scripts/ci/aggregate_e2e_benchmark.js
+mkdir -p $BENCH_FOLDER
+CONTAINER_BENCH_FOLDER="/usr/src/yarn-project/bench"
+CONTAINER_LOG_FOLDER="/usr/src/yarn-project/log"
+export DOCKER_RUN_OPTS="\
+ -v $(realpath $BENCH_FOLDER):${CONTAINER_BENCH_FOLDER}:rw \
+ -e BENCH_FOLDER=${CONTAINER_BENCH_FOLDER} \
+ -v $(realpath $LOG_FOLDER):${CONTAINER_LOG_FOLDER}:rw \
+ -e LOG_FOLDER=${CONTAINER_LOG_FOLDER} \
+ -e BASE_COMMIT_HASH \
+ -e AZTEC_BOT_COMMENTER_GITHUB_TOKEN \
+ -e CIRCLE_PULL_REQUEST"
+yarn-project/scripts/run_script.sh workspace @aztec/scripts bench-aggregate
 echo "generated: $BENCHMARK_FILE_JSON"
 
 # Upload it to master or pulls
@@ -56,9 +70,29 @@ if [ -n "${BENCHMARK_LATEST_FILE:-}" ]; then
   aws s3 cp $BENCHMARK_FILE_JSON "s3://${BUCKET_NAME}/${BENCHMARK_LATEST_FILE}"
 fi
 
-# If on a pull request, comment on it
+# If on a pull request, get the data from the most recent commit on master where it's available, 
+# generate a markdown comment, and post it on the pull request
 if [ -n "${CIRCLE_PULL_REQUEST:-}" ]; then
-  (node scripts/ci/comment_e2e_benchmark.js && echo "commented on pr $CIRCLE_PULL_REQUEST") || echo "failed commenting on pr"
+  MASTER_COMMIT_HASH=$(curl -s "https://api.github.com/repos/AztecProtocol/aztec-packages/pulls/${CIRCLE_PULL_REQUEST##*/}" | jq -r '.base.sha')
+  MASTER_COMMIT_HASHES=($(git log $MASTER_COMMIT_HASH --format="%H" -n 50))
+
+  set +e
+  echo "Searching for base benchmark data starting from commit $MASTER_COMMIT_HASH"
+  for commit_hash in "${MASTER_COMMIT_HASHES[@]}"; do
+    aws s3 cp "s3://${BUCKET_NAME}/benchmarks-v1/master/$commit_hash.json" $BASE_BENCHMARK_FILE_JSON
+    if [ $? -eq 0 ]; then
+      echo "Downloaded base data from commit $commit_hash"
+      export BASE_COMMIT_HASH=$commit_hash
+      break;
+    fi
+  done
+  set -e
+
+  if [ -z "${BASE_COMMIT_HASH:-}" ]; then 
+    echo "No base commit data found"
+  fi
+
+  (yarn-project/scripts/run_script.sh workspace @aztec/scripts bench-comment && echo "commented on pr $CIRCLE_PULL_REQUEST") || echo "failed commenting on pr"
 fi
 
 
diff --git a/scripts/ci/benchmark_shared.js b/scripts/ci/benchmark_shared.js
deleted file mode 100644
index fe3735b4035..00000000000
--- a/scripts/ci/benchmark_shared.js
+++ /dev/null
@@ -1,50 +0,0 @@
-// Block sizes to track (duplicated from yarn-project/end-to-end/src/benchmarks/bench_publish_rollup.test.ts)
-const ROLLUP_SIZES = process.env.ROLLUP_SIZES
-  ? process.env.ROLLUP_SIZES.split(",").map(Number)
-  : [8, 32, 128];
-
-// Block size to use for building chains of multiple length (duplicated from yarn-project/end-to-end/src/benchmarks/bench_process_history.test.ts)
-const BLOCK_SIZE = process.env.BLOCK_SIZE ? +process.env.BLOCK_SIZE : 16;
-
-// Chain lengths to test (duplicated from yarn-project/end-to-end/src/benchmarks/bench_process_history.test.ts)
-const CHAIN_LENGTHS = process.env.CHAIN_LENGTHS
-  ? process.env.CHAIN_LENGTHS.split(",").map(Number)
-  : [10, 20, 30];
-
-// Output files
-const BENCHMARK_FILE_JSON = process.env.BENCHMARK_FILE_JSON ?? "benchmark.json";
-
-module.exports = {
-  // Metrics to capture
-  L1_ROLLUP_CALLDATA_SIZE_IN_BYTES: "l1_rollup_calldata_size_in_bytes",
-  L1_ROLLUP_CALLDATA_GAS: "l1_rollup_calldata_gas",
-  L1_ROLLUP_EXECUTION_GAS: "l1_rollup_execution_gas",
-  L2_BLOCK_PROCESSING_TIME: "l2_block_processing_time_in_ms",
-  CIRCUIT_SIMULATION_TIME: "circuit_simulation_time_in_ms",
-  CIRCUIT_INPUT_SIZE: "circuit_input_size_in_bytes",
-  CIRCUIT_OUTPUT_SIZE: "circuit_output_size_in_bytes",
-  NOTE_SUCCESSFUL_DECRYPTING_TIME: "note_successful_decrypting_time_in_ms",
-  NOTE_TRIAL_DECRYPTING_TIME: "note_trial_decrypting_time_in_ms",
-  L2_BLOCK_BUILD_TIME: "l2_block_building_time_in_ms",
-  L2_BLOCK_ROLLUP_SIMULATION_TIME: "l2_block_rollup_simulation_time_in_ms",
-  L2_BLOCK_PUBLIC_TX_PROCESS_TIME: "l2_block_public_tx_process_time_in_ms",
-  NODE_HISTORY_SYNC_TIME: "node_history_sync_time_in_ms",
-  NOTE_HISTORY_SUCCESSFUL_DECRYPTING_TIME:
-    "note_history_successful_decrypting_time_in_ms",
-  NOTE_HISTORY_TRIAL_DECRYPTING_TIME:
-    "note_history_trial_decrypting_time_in_ms",
-  NODE_DB_SIZE: "node_database_size_in_bytes",
-  PXE_DB_SIZE: "pxe_database_size_in_bytes",
-  // Events to track
-  L2_BLOCK_PUBLISHED_TO_L1: "rollup-published-to-l1",
-  L2_BLOCK_SYNCED: "l2-block-handled",
-  L2_BLOCK_BUILT: "l2-block-built",
-  CIRCUIT_SIMULATED: "circuit-simulation",
-  NOTE_PROCESSOR_CAUGHT_UP: "note-processor-caught-up",
-  NODE_SYNCED_CHAIN: "node-synced-chain-history",
-  // Other
-  ROLLUP_SIZES,
-  BLOCK_SIZE,
-  CHAIN_LENGTHS,
-  BENCHMARK_FILE_JSON,
-};
diff --git a/scripts/ci/comment_e2e_benchmark.js b/scripts/ci/comment_e2e_benchmark.js
deleted file mode 100644
index 38c8aea4c06..00000000000
--- a/scripts/ci/comment_e2e_benchmark.js
+++ /dev/null
@@ -1,243 +0,0 @@
-// Given a local benchmark json aggregated file, reformats it in markdown
-// and comments on the PR that prompted it. If the CI is rerun, the comment
-// is updated.
-
-const https = require("https");
-const fs = require("fs");
-
-const GITHUB_TOKEN = process.env.AZTEC_BOT_COMMENTER_GITHUB_TOKEN;
-const OWNER = "AztecProtocol";
-const REPO = "aztec3-packages";
-const COMMENT_MARK = "<!-- AUTOGENERATED BENCHMARK COMMENT -->";
-
-const {
-  ROLLUP_SIZES,
-  BLOCK_SIZE,
-  BENCHMARK_FILE_JSON,
-  L1_ROLLUP_CALLDATA_SIZE_IN_BYTES,
-  L1_ROLLUP_CALLDATA_GAS,
-  L1_ROLLUP_EXECUTION_GAS,
-  L2_BLOCK_PROCESSING_TIME,
-  CIRCUIT_SIMULATION_TIME,
-  CIRCUIT_INPUT_SIZE,
-  CIRCUIT_OUTPUT_SIZE,
-  NOTE_SUCCESSFUL_DECRYPTING_TIME,
-  NOTE_TRIAL_DECRYPTING_TIME,
-  L2_BLOCK_BUILD_TIME,
-  L2_BLOCK_ROLLUP_SIMULATION_TIME,
-  L2_BLOCK_PUBLIC_TX_PROCESS_TIME,
-  NODE_HISTORY_SYNC_TIME,
-  NOTE_HISTORY_SUCCESSFUL_DECRYPTING_TIME,
-  NOTE_HISTORY_TRIAL_DECRYPTING_TIME,
-  NODE_DB_SIZE,
-  PXE_DB_SIZE,
-} = require("./benchmark_shared.js");
-
-const METRICS_GROUPED_BY_ROLLUP_SIZE = [
-  L1_ROLLUP_CALLDATA_SIZE_IN_BYTES,
-  L1_ROLLUP_CALLDATA_GAS,
-  L1_ROLLUP_EXECUTION_GAS,
-  L2_BLOCK_PROCESSING_TIME,
-  NOTE_SUCCESSFUL_DECRYPTING_TIME,
-  NOTE_TRIAL_DECRYPTING_TIME,
-  L2_BLOCK_BUILD_TIME,
-  L2_BLOCK_ROLLUP_SIMULATION_TIME,
-  L2_BLOCK_PUBLIC_TX_PROCESS_TIME,
-];
-
-const METRICS_GROUPED_BY_CHAIN_LENGTH = [
-  NODE_HISTORY_SYNC_TIME,
-  NOTE_HISTORY_SUCCESSFUL_DECRYPTING_TIME,
-  NOTE_HISTORY_TRIAL_DECRYPTING_TIME,
-  NODE_DB_SIZE,
-  PXE_DB_SIZE,
-];
-
-const METRICS_GROUPED_BY_CIRCUIT_NAME = [
-  CIRCUIT_SIMULATION_TIME,
-  CIRCUIT_INPUT_SIZE,
-  CIRCUIT_OUTPUT_SIZE,
-];
-
-function formatValue(value) {
-  return value;
-}
-
-function transpose(obj) {
-  const transposed = {};
-  for (const outerKey in obj) {
-    const innerObj = obj[outerKey];
-    for (const innerKey in innerObj) {
-      if (!transposed[innerKey]) transposed[innerKey] = {};
-      transposed[innerKey][outerKey] = innerObj[innerKey];
-    }
-  }
-  return transposed;
-}
-
-function pick(benchmark, keys) {
-  const result = {};
-  for (const key of keys) {
-    result[key] = benchmark[key];
-  }
-  return result;
-}
-
-function getTableContent(benchmark, groupUnit = "", col1Title = "Metric") {
-  const rowKeys = Object.keys(benchmark);
-  const groups = [
-    ...new Set(rowKeys.flatMap((key) => Object.keys(benchmark[key]))),
-  ];
-  console.log(groups);
-  const header = `| ${col1Title} | ${groups
-    .map((i) => `${i} ${groupUnit}`)
-    .join(" | ")} |`;
-  const separator = `| - | ${groups.map(() => "-").join(" | ")} |`;
-  const rows = rowKeys.map((key) => {
-    const metric = benchmark[key];
-    return `${key} | ${groups
-      .map((i) => formatValue(metric[i]))
-      .join(" | ")} |`;
-  });
-
-  return `
-${header}
-${separator}
-${rows.join("\n")}
-  `;
-}
-
-// Returns the md content to post
-function getPostContent() {
-  const benchmark = JSON.parse(fs.readFileSync(BENCHMARK_FILE_JSON, "utf-8"));
-  delete benchmark.timestamp;
-
-  return `
-## Benchmark results
-
-All benchmarks are run on txs on the \`Benchmarking\` contract on the repository. Each tx consists of a batch call to \`create_note\` and \`increment_balance\`, which guarantees that each tx has a private call, a nested private call, a public call, and a nested public call, as well as an emitted private note, an unencrypted log, and public storage read and write.
-
-### L2 block published to L1
-
-Each column represents the number of txs on an L2 block published to L1.
-${getTableContent(pick(benchmark, METRICS_GROUPED_BY_ROLLUP_SIZE), "txs")}
-
-### L2 chain processing
-
-Each column represents the number of blocks on the L2 chain where each block has ${BLOCK_SIZE} txs.
-${getTableContent(pick(benchmark, METRICS_GROUPED_BY_CHAIN_LENGTH), "blocks")}
-
-### Circuits stats
-
-Stats on running time and I/O sizes collected for every circuit run across all benchmarks.
-${getTableContent(
-  transpose(pick(benchmark, METRICS_GROUPED_BY_CIRCUIT_NAME)),
-  "",
-  "Circuit"
-)}
-
-${COMMENT_MARK}
-`;
-}
-
-// Returns the number of the current PR
-function getPrNumber() {
-  if (!process.env.CIRCLE_PULL_REQUEST) throw new Error(`Not in Circle PR`);
-  const fragments = process.env.CIRCLE_PULL_REQUEST.split("/");
-  return fragments[fragments.length - 1];
-}
-
-// Function to check if a bench comment already exists
-async function getExistingComment() {
-  try {
-    const response = await sendGitHubRequest(
-      `/repos/${OWNER}/${REPO}/issues/${getPrNumber()}/comments`
-    );
-    const comments = JSON.parse(response);
-    return comments.find((comment) => comment.body.includes(COMMENT_MARK));
-  } catch (error) {
-    throw new Error("Error checking for existing comments: " + error.message);
-  }
-}
-
-// Function to create or update a comment
-async function upsertComment(existingCommentId) {
-  try {
-    const commentContent = getPostContent();
-    const commentData = { body: commentContent };
-
-    const requestMethod = existingCommentId ? "PATCH" : "POST";
-    const requestUrl = existingCommentId
-      ? `/repos/${OWNER}/${REPO}/issues/comments/${existingCommentId}`
-      : `/repos/${OWNER}/${REPO}/issues/${getPrNumber()}/comments`;
-
-    await sendGitHubRequest(requestUrl, requestMethod, commentData);
-    console.log("Comment added or updated successfully.");
-  } catch (error) {
-    throw new Error("Error adding or updating comment: " + error.message);
-  }
-}
-
-// Function to send a request to the GitHub API
-async function sendGitHubRequest(url, method = "GET", data = null) {
-  const apiUrl = url.startsWith("http") ? url : `https://api.github.com${url}`;
-  const headers = {
-    Authorization: `Bearer ${GITHUB_TOKEN}`,
-    Accept: "application/vnd.github+json",
-    "X-GitHub-Api-Version": "2022-11-28",
-    "User-Agent": OWNER,
-  };
-  if (data) headers["Content-Type"] = "application/json";
-  const requestOptions = { method, headers };
-
-  return new Promise((resolve, reject) => {
-    const req = https.request(apiUrl, requestOptions, (res) => {
-      if (
-        res.statusCode === 301 ||
-        res.statusCode === 302 ||
-        res.statusCode === 307
-      ) {
-        sendGitHubRequest(res.headers.location, method, data)
-          .then(resolve)
-          .catch(reject);
-        return;
-      } else {
-        let data = "";
-        res.on("data", (chunk) => {
-          data += chunk;
-        });
-
-        res.on("end", () => {
-          if (res.statusCode >= 200 && res.statusCode < 300) {
-            resolve(data);
-          } else {
-            reject(
-              new Error(
-                `GitHub API request failed with ${res.statusCode}: ${data}`
-              )
-            );
-          }
-        });
-      }
-    });
-
-    req.on("error", (error) => {
-      reject(error);
-    });
-
-    if (data) req.write(JSON.stringify(data));
-    req.end();
-  });
-}
-
-async function main() {
-  try {
-    const existingComment = await getExistingComment();
-    await upsertComment(existingComment?.id);
-  } catch (err) {
-    console.error(`error while commenting on pull request:`, err);
-    process.exit(1);
-  }
-}
-
-main();
diff --git a/yarn-project/end-to-end/src/benchmarks/README.md b/yarn-project/end-to-end/src/benchmarks/README.md
new file mode 100644
index 00000000000..23885b690cb
--- /dev/null
+++ b/yarn-project/end-to-end/src/benchmarks/README.md
@@ -0,0 +1,11 @@
+# Benchmarks
+
+Tests in this folder are meant to used for benchmarking. Stats are collected by instrumenting relevant sections of the code and emitting stats via structured logging. All stats are strongly-typed and defined in `yarn-project/types/src/stats/stats.ts`.
+
+These stats are emitted to jsonl files named after the test being run if the `BENCHMARK` flag is enabled or if running on `CI`. This setup happens when calling the `setup` helper of e2e tests in `yarn-project/end-to-end/src/fixtures/logging.ts`. Note that by default stats from all e2e tests are collected on the CI, and are uploaded to S3 using the `upload_logs_to_s3.sh` script called at the end of `run_tests_local`. All jsonl files are uploaded to the `aztec-ci-artifacts` bucket under the `logs` folder. Tests run in master are uploaded to `logs/master/COMMIT_HASH`, while tests from a PR are uploaded to `logs/pulls/PULL_REQUEST_NUMBER`.
+
+After all benchmark tests are executed, a `bench-summary` CI job takes care of aggregating them, using the scripts in `yarn-project/scripts/benchmarks` orchestrated by `scripts/ci/assemble_e2e_benchmark.sh`. This script downloads all jsonl files, extracts metrics grouped by block size or chain length, and outputs an aggregated benchmark json file which is uploaded to S3. This file is uploaded to the same `aztec-ci-artifacts` bucket but under the `benchmarks` folder.
+
+Metrics are strongly typed as well and defined in `yarn-project/types/src/stats/metrics.ts`, while the `yarn-project/scripts/src/benchmarks/aggregate.ts` script takes care of generating them out of the collected stats from the jsonl files.
+
+Once the summary is generated, if the benchmark run is on a PR, then the summary job will also download the latest benchmark from master, compare it against the current run, generate a markdown summary, and post it to the pull request on github. This uses the `AZTEC_BOT_COMMENTER_GITHUB_TOKEN`, which is a fine-grained personal access token from the `AztecBot` github user with rw permissions on issues and pull requests.
diff --git a/yarn-project/end-to-end/src/benchmarks/bench_process_history.test.ts b/yarn-project/end-to-end/src/benchmarks/bench_process_history.test.ts
index ce78602059a..73a9532d940 100644
--- a/yarn-project/end-to-end/src/benchmarks/bench_process_history.test.ts
+++ b/yarn-project/end-to-end/src/benchmarks/bench_process_history.test.ts
@@ -5,6 +5,11 @@ import { elapsed } from '@aztec/foundation/timer';
 import { BenchmarkingContract } from '@aztec/noir-contracts/types';
 import { SequencerClient } from '@aztec/sequencer-client';
 import { INITIAL_L2_BLOCK_NUM } from '@aztec/types';
+import {
+  BENCHMARK_HISTORY_BLOCK_SIZE,
+  BENCHMARK_HISTORY_CHAIN_LENGTHS,
+  NodeSyncedChainHistoryStats,
+} from '@aztec/types/stats';
 
 import { EndToEndContext } from '../fixtures/utils.js';
 import {
@@ -16,8 +21,8 @@ import {
   waitRegisteredAccountSynced,
 } from './utils.js';
 
-const BLOCK_SIZE = process.env.BLOCK_SIZE ? +process.env.BLOCK_SIZE : 16;
-const CHAIN_LENGTHS = process.env.CHAIN_LENGTHS ? process.env.CHAIN_LENGTHS.split(',').map(Number) : [10, 20, 30];
+const BLOCK_SIZE = BENCHMARK_HISTORY_BLOCK_SIZE;
+const CHAIN_LENGTHS = BENCHMARK_HISTORY_CHAIN_LENGTHS;
 const MAX_CHAIN_LENGTH = CHAIN_LENGTHS[CHAIN_LENGTHS.length - 1];
 const SETUP_BLOCK_COUNT = 2; // deploy account + deploy contract
 
@@ -65,7 +70,7 @@ describe('benchmarks/process_history', () => {
           blockNumber,
           blockCount: chainLength,
           dbSize: getFolderSize(dataDirectory),
-        });
+        } satisfies NodeSyncedChainHistoryStats);
 
         // Create a new pxe and measure how much time it takes it to sync with failed and successful decryption
         // Skip the first two blocks used for setup (create account contract and deploy benchmarking contract)
diff --git a/yarn-project/end-to-end/src/benchmarks/bench_publish_rollup.test.ts b/yarn-project/end-to-end/src/benchmarks/bench_publish_rollup.test.ts
index 16ef88b7377..888a4916a92 100644
--- a/yarn-project/end-to-end/src/benchmarks/bench_publish_rollup.test.ts
+++ b/yarn-project/end-to-end/src/benchmarks/bench_publish_rollup.test.ts
@@ -2,12 +2,11 @@ import { AztecNodeService } from '@aztec/aztec-node';
 import { Fr, GrumpkinScalar } from '@aztec/circuits.js';
 import { BenchmarkingContract } from '@aztec/noir-contracts/types';
 import { SequencerClient } from '@aztec/sequencer-client';
+import { BENCHMARK_BLOCK_SIZES } from '@aztec/types/stats';
 
 import { EndToEndContext } from '../fixtures/utils.js';
 import { benchmarkSetup, sendTxs, waitNewPXESynced, waitRegisteredAccountSynced } from './utils.js';
 
-const ROLLUP_SIZES = process.env.ROLLUP_SIZES ? process.env.ROLLUP_SIZES.split(',').map(Number) : [8, 32, 128];
-
 describe('benchmarks/publish_rollup', () => {
   let context: EndToEndContext;
   let contract: BenchmarkingContract;
@@ -17,7 +16,7 @@ describe('benchmarks/publish_rollup', () => {
     ({ context, contract, sequencer } = await benchmarkSetup({ maxTxsPerBlock: 1024 }));
   }, 60_000);
 
-  it.each(ROLLUP_SIZES)(
+  it.each(BENCHMARK_BLOCK_SIZES)(
     `publishes a rollup with %d txs`,
     async (txCount: number) => {
       await sequencer.stop();
diff --git a/yarn-project/package.json b/yarn-project/package.json
index 8e3d25271bb..a24f74be2cc 100644
--- a/yarn-project/package.json
+++ b/yarn-project/package.json
@@ -42,6 +42,7 @@
     "prover-client",
     "rollup-provider",
     "sequencer-client",
+    "scripts",
     "types",
     "world-state",
     "yarn-project-base"
diff --git a/yarn-project/pxe/src/kernel_prover/proof_creator.ts b/yarn-project/pxe/src/kernel_prover/proof_creator.ts
index ba8c2b037d7..40d24e15759 100644
--- a/yarn-project/pxe/src/kernel_prover/proof_creator.ts
+++ b/yarn-project/pxe/src/kernel_prover/proof_creator.ts
@@ -17,6 +17,7 @@ import { siloCommitment } from '@aztec/circuits.js/abis';
 import { Fr } from '@aztec/foundation/fields';
 import { createDebugLogger } from '@aztec/foundation/log';
 import { elapsed } from '@aztec/foundation/timer';
+import { CircuitSimulationStats } from '@aztec/types/stats';
 
 /**
  * Represents the output of the proof creation process for init and inner private kernel circuit.
@@ -119,7 +120,7 @@ export class KernelProofCreator implements ProofCreator {
       duration,
       inputSize: privateInputs.toBuffer().length,
       outputSize: result.toBuffer().length,
-    });
+    } satisfies CircuitSimulationStats);
     this.log('Skipping private kernel init proving...');
     const proof = makeEmptyProof();
 
@@ -141,7 +142,7 @@ export class KernelProofCreator implements ProofCreator {
       duration,
       inputSize: privateInputs.toBuffer().length,
       outputSize: result.toBuffer().length,
-    });
+    } satisfies CircuitSimulationStats);
     this.log('Skipping private kernel inner proving...');
     const proof = makeEmptyProof();
 
@@ -164,7 +165,7 @@ export class KernelProofCreator implements ProofCreator {
       duration,
       inputSize: privateInputs.toBuffer().length,
       outputSize: result.toBuffer().length,
-    });
+    } satisfies CircuitSimulationStats);
     this.log('Skipping private kernel ordering proving...');
     const proof = makeEmptyProof();
 
diff --git a/yarn-project/pxe/src/note_processor/note_processor.ts b/yarn-project/pxe/src/note_processor/note_processor.ts
index 9853251ea71..0d2e3b30a4e 100644
--- a/yarn-project/pxe/src/note_processor/note_processor.ts
+++ b/yarn-project/pxe/src/note_processor/note_processor.ts
@@ -5,6 +5,7 @@ import { Fr } from '@aztec/foundation/fields';
 import { createDebugLogger } from '@aztec/foundation/log';
 import { Timer } from '@aztec/foundation/timer';
 import { AztecNode, KeyStore, L2BlockContext, L2BlockL2Logs, NoteSpendingInfo, PublicKey } from '@aztec/types';
+import { NoteProcessorStats } from '@aztec/types/stats';
 
 import { Database, NoteSpendingInfoDao } from '../database/index.js';
 import { getAcirSimulator } from '../simulator/index.js';
@@ -23,20 +24,6 @@ interface ProcessedData {
   noteSpendingInfoDaos: NoteSpendingInfoDao[];
 }
 
-/** Accumulated stats for a note processor.  */
-type NoteProcessorStats = {
-  /** How many notes have been seen and trial-decrypted. */
-  seen: number;
-  /** How many notes were successfully decrypted. */
-  decrypted: number;
-  /** How many notes failed processing. */
-  failed: number;
-  /** How many blocks were spanned.  */
-  blocks: number;
-  /** How many txs were spanned.  */
-  txs: number;
-};
-
 /**
  * NoteProcessor is responsible for decrypting logs and converting them to notes via their originating contracts
  * before storing them against their owner.
diff --git a/yarn-project/pxe/src/synchronizer/synchronizer.ts b/yarn-project/pxe/src/synchronizer/synchronizer.ts
index 87fd155d85b..8c01efc4fda 100644
--- a/yarn-project/pxe/src/synchronizer/synchronizer.ts
+++ b/yarn-project/pxe/src/synchronizer/synchronizer.ts
@@ -3,6 +3,7 @@ import { computeGlobalsHash } from '@aztec/circuits.js/abis';
 import { DebugLogger, createDebugLogger } from '@aztec/foundation/log';
 import { InterruptableSleep } from '@aztec/foundation/sleep';
 import { AztecNode, INITIAL_L2_BLOCK_NUM, KeyStore, L2BlockContext, L2BlockL2Logs, LogType } from '@aztec/types';
+import { NoteProcessorCaughtUpStats } from '@aztec/types/stats';
 
 import { Database } from '../database/index.js';
 import { NoteProcessor } from '../note_processor/index.js';
@@ -184,7 +185,7 @@ export class Synchronizer {
           duration: noteProcessor.timer.ms(),
           dbSize: this.db.estimateSize(),
           ...noteProcessor.stats,
-        });
+        } satisfies NoteProcessorCaughtUpStats);
         this.noteProcessorsToCatchUp.shift();
         this.noteProcessors.push(noteProcessor);
       }
diff --git a/yarn-project/scripts/.eslintrc.cjs b/yarn-project/scripts/.eslintrc.cjs
new file mode 100644
index 00000000000..e659927475c
--- /dev/null
+++ b/yarn-project/scripts/.eslintrc.cjs
@@ -0,0 +1 @@
+module.exports = require('@aztec/foundation/eslint');
diff --git a/yarn-project/scripts/README.md b/yarn-project/scripts/README.md
new file mode 100644
index 00000000000..d0ad794d0c7
--- /dev/null
+++ b/yarn-project/scripts/README.md
@@ -0,0 +1,3 @@
+# Scripts
+
+Private package with strongly-typed scripts in typescript.
\ No newline at end of file
diff --git a/yarn-project/scripts/benchmark.json b/yarn-project/scripts/benchmark.json
new file mode 100644
index 00000000000..8443a67824f
--- /dev/null
+++ b/yarn-project/scripts/benchmark.json
@@ -0,0 +1,103 @@
+{
+  "circuit_simulation_time_in_ms": {
+    "private-kernel-init": 54.9340490797546,
+    "private-kernel-ordering": 30.192484662576685,
+    "base-rollup": 871,
+    "root-rollup": 37.926829268292686,
+    "private-kernel-inner": 51.861111111111114,
+    "public-kernel-private-input": 51.72453703703704,
+    "public-kernel-non-first-iteration": 31.63425925925926,
+    "merge-rollup": 1
+  },
+  "circuit_input_size_in_bytes": {
+    "private-kernel-init": 56577,
+    "private-kernel-ordering": 20137,
+    "base-rollup": 631604,
+    "root-rollup": 4072,
+    "private-kernel-inner": 72288,
+    "public-kernel-private-input": 37359,
+    "public-kernel-non-first-iteration": 37401,
+    "merge-rollup": 2592
+  },
+  "circuit_output_size_in_bytes": {
+    "private-kernel-init": 14745,
+    "private-kernel-ordering": 8089,
+    "base-rollup": 810,
+    "root-rollup": 1097,
+    "private-kernel-inner": 14745,
+    "public-kernel-private-input": 14745,
+    "public-kernel-non-first-iteration": 14745,
+    "merge-rollup": 873
+  },
+  "node_history_sync_time_in_ms": {
+    "10": 30823,
+    "20": 75516,
+    "30": 136231
+  },
+  "node_database_size_in_bytes": {
+    "10": 1194179,
+    "20": 1900681,
+    "30": 2754125
+  },
+  "note_history_successful_decrypting_time_in_ms": {
+    "10": 4653,
+    "20": 12961,
+    "30": 20148
+  },
+  "pxe_database_size_in_bytes": {
+    "10": 54187,
+    "20": 108338,
+    "30": 162578
+  },
+  "note_history_trial_decrypting_time_in_ms": {
+    "10": 147,
+    "20": 208,
+    "30": 254
+  },
+  "l2_block_building_time_in_ms": {
+    "8": 9114,
+    "32": 36117,
+    "128": 152315
+  },
+  "l2_block_rollup_simulation_time_in_ms": {
+    "8": 6771,
+    "32": 26781,
+    "128": 107164
+  },
+  "l2_block_public_tx_process_time_in_ms": {
+    "8": 2300,
+    "32": 9209,
+    "128": 44431
+  },
+  "l1_rollup_calldata_gas": {
+    "8": 222984,
+    "32": 867956,
+    "128": 3449696
+  },
+  "l1_rollup_calldata_size_in_bytes": {
+    "8": 45444,
+    "32": 179588,
+    "128": 716132
+  },
+  "l1_rollup_execution_gas": {
+    "8": 842071,
+    "32": 3595064,
+    "128": 22205065
+  },
+  "l2_block_processing_time_in_ms": {
+    "8": 1060,
+    "32": 3981,
+    "128": 15688
+  },
+  "note_successful_decrypting_time_in_ms": {
+    "8": 332,
+    "32": 1019,
+    "128": 3780
+  },
+  "note_trial_decrypting_time_in_ms": {
+    "8": 34,
+    "32": 108,
+    "128": 138
+  },
+  "timestamp": "2023-10-10T17:51:38.017Z"
+}
diff --git a/yarn-project/scripts/package.json b/yarn-project/scripts/package.json
new file mode 100644
index 00000000000..7c7fba41894
--- /dev/null
+++ b/yarn-project/scripts/package.json
@@ -0,0 +1,60 @@
+{
+  "name": "@aztec/scripts",
+  "private": true,
+  "version": "0.1.0",
+  "type": "module",
+  "exports": "./dest/index.js",
+  "bin": {
+    "bench-aggregate": "./dest/bin/bench-aggregate.js",
+    "bench-comment": "./dest/bin/bench-comment.js",
+    "bench-markdown": "./dest/bin/bench-markdown.js"
+  },
+  "scripts": {
+    "build": "yarn clean && tsc -b",
+    "build:dev": "tsc -b --watch",
+    "clean": "rm -rf ./dest .tsbuildinfo",
+    "formatting": "run -T prettier --check ./src && run -T eslint ./src",
+    "formatting:fix": "run -T prettier -w ./src",
+    "start:dev": "tsc-watch -p tsconfig.json --onSuccess 'yarn start'",
+    "start": "node ./dest/index.js",
+    "test": "NODE_NO_WARNINGS=1 node --experimental-vm-modules $(yarn bin jest) --passWithNoTests"
+  },
+  "inherits": [
+    "../package.common.json"
+  ],
+  "dependencies": {
+    "@aztec/foundation": "workspace:^",
+    "@aztec/types": "workspace:^",
+    "fs-extra": "^11.1.1",
+    "lodash.pick": "^4.4.0",
+    "tslib": "^2.4.0"
+  },
+  "devDependencies": {
+    "@jest/globals": "^29.5.0",
+    "@rushstack/eslint-patch": "^1.1.4",
+    "@types/jest": "^29.5.0",
+    "@types/lodash.pick": "^4.4.7",
+    "@types/node": "^18.14.6",
+    "jest": "^29.5.0",
+    "ts-jest": "^29.1.0",
+    "ts-node": "^10.9.1",
+    "typescript": "^5.0.4"
+  },
+  "files": [
+    "dest",
+    "src",
+    "!*.test.*"
+  ],
+  "types": "./dest/index.d.ts",
+  "jest": {
+    "preset": "ts-jest/presets/default-esm",
+    "moduleNameMapper": {
+      "^(\\.{1,2}/.*)\\.m?js$": "$1"
+    },
+    "testRegex": "./src/.*\\.test\\.(js|mjs|ts)$",
+    "rootDir": "./src"
+  },
+  "engines": {
+    "node": ">=18"
+  }
+}
diff --git a/yarn-project/scripts/run_script.sh b/yarn-project/scripts/run_script.sh
new file mode 100755
index 00000000000..a3347411e85
--- /dev/null
+++ b/yarn-project/scripts/run_script.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+# Downloads the image that contains the built scripts package and executes the given command in it.
+[ -n "${BUILD_SYSTEM_DEBUG:-}" ] && set -x # conditionally trace
+set -eu
+
+export PATH="$PATH:$(git rev-parse --show-toplevel)/build-system/scripts"
+
+ecr_login
+
+REPO="yarn-project"
+retry docker pull $(calculate_image_uri $REPO)
+retry docker tag $(calculate_image_uri $REPO) aztecprotocol/$REPO:latest
+
+docker run ${DOCKER_RUN_OPTS:-} --rm aztecprotocol/$REPO:latest $@
\ No newline at end of file
diff --git a/yarn-project/scripts/src/benchmarks/aggregate.ts b/yarn-project/scripts/src/benchmarks/aggregate.ts
new file mode 100644
index 00000000000..1a872548b9b
--- /dev/null
+++ b/yarn-project/scripts/src/benchmarks/aggregate.ts
@@ -0,0 +1,194 @@
+// Given a local folder with the e2e benchmark files, generates a single file
+// output with the grouped metrics to be published. This script can probably
+// be replaced by a single call to jq, but I found this easier to write,
+// and pretty much every CI comes with a working version of node.
+//
+// To test this locally, first run the benchmark tests from the yarn-project/end-to-end folder
+// BENCHMARK=1 yarn test bench
+//
+// And then run this script from the yarn-project/scripts folder
+// LOG_FOLDER=../end-to-end/log yarn bench-aggregate
+import { createConsoleLogger } from '@aztec/foundation/log';
+import {
+  BENCHMARK_BLOCK_SIZES,
+  BENCHMARK_HISTORY_BLOCK_SIZE,
+  BENCHMARK_HISTORY_CHAIN_LENGTHS,
+  BenchmarkMetricResults,
+  BenchmarkResults,
+  BenchmarkResultsWithTimestamp,
+  CircuitSimulationStats,
+  L1PublishStats,
+  L2BlockBuiltStats,
+  L2BlockHandledStats,
+  MetricName,
+  NodeSyncedChainHistoryStats,
+  NoteProcessorCaughtUpStats,
+  Stats,
+} from '@aztec/types/stats';
+
+import * as fs from 'fs';
+import { mkdirpSync } from 'fs-extra';
+import * as path from 'path';
+import * as readline from 'readline';
+
+import { BenchDir, BenchFile, LogsDir } from './paths.js';
+
+const log = createConsoleLogger();
+
+/** Appends a data point to the final results for the given metric in the given bucket */
+function append(
+  results: BenchmarkCollectedResults,
+  metric: MetricName,
+  bucket: number | string,
+  value: number | bigint,
+) {
+  if (value === undefined) {
+    log(`Undefined value for ${metric} in bucket ${bucket}`);
+    return;
+  }
+  const numeric = Number(value);
+  if (Number.isNaN(numeric)) {
+    log(`Value ${value} for ${metric} in ${bucket} is not a number`);
+    return;
+  }
+  if (!results[metric]) results[metric] = {};
+  if (!results[metric]![bucket]) results[metric]![bucket] = [];
+  results[metric]![bucket].push(numeric);
+}
+
+/** Processes an entry with event name 'rollup-published-to-l1' and updates results */
+function processRollupPublished(entry: L1PublishStats, results: BenchmarkCollectedResults) {
+  const bucket = entry.txCount;
+  if (!BENCHMARK_BLOCK_SIZES.includes(bucket)) return;
+  append(results, 'l1_rollup_calldata_gas', bucket, entry.calldataGas);
+  append(results, 'l1_rollup_calldata_size_in_bytes', bucket, entry.calldataSize);
+  append(results, 'l1_rollup_execution_gas', bucket, entry.gasUsed);
+}
+
+/**
+ * Processes an entry with event name 'l2-block-handled' and updates results
+ * Skips instances where the block was emitted by the same node where the processing is skipped
+ */
+function processRollupBlockSynced(entry: L2BlockHandledStats, results: BenchmarkCollectedResults) {
+  const bucket = entry.txCount;
+  if (!BENCHMARK_BLOCK_SIZES.includes(bucket)) return;
+  if (entry.isBlockOurs) return;
+  append(results, 'l2_block_processing_time_in_ms', bucket, entry.duration);
+}
+
+/**
+ * Processes an entry with event name 'circuit-simulated' and updates results
+ * Buckets are circuit names
+ */
+function processCircuitSimulation(entry: CircuitSimulationStats, results: BenchmarkCollectedResults) {
+  const bucket = entry.circuitName;
+  if (!bucket) return;
+  append(results, 'circuit_simulation_time_in_ms', bucket, entry.duration);
+  append(results, 'circuit_input_size_in_bytes', bucket, entry.inputSize);
+  append(results, 'circuit_output_size_in_bytes', bucket, entry.outputSize);
+}
+
+/**
+ * Processes an entry with event name 'note-processor-caught-up' and updates results
+ * Buckets are rollup sizes for NOTE_DECRYPTING_TIME, or chain sizes for NOTE_HISTORY_DECRYPTING_TIME
+ */
+function processNoteProcessorCaughtUp(entry: NoteProcessorCaughtUpStats, results: BenchmarkCollectedResults) {
+  const { seen, decrypted, blocks, duration, dbSize } = entry;
+  if (BENCHMARK_BLOCK_SIZES.includes(decrypted)) {
+    append(results, 'note_successful_decrypting_time_in_ms', decrypted, duration);
+  }
+  if (BENCHMARK_BLOCK_SIZES.includes(seen) && decrypted === 0) {
+    append(results, 'note_trial_decrypting_time_in_ms', seen, duration);
+  }
+  if (BENCHMARK_HISTORY_CHAIN_LENGTHS.includes(blocks) && decrypted > 0) {
+    append(results, 'note_history_successful_decrypting_time_in_ms', blocks, duration);
+    append(results, 'pxe_database_size_in_bytes', blocks, dbSize);
+  }
+  if (BENCHMARK_HISTORY_CHAIN_LENGTHS.includes(blocks) && decrypted === 0)
+    append(results, 'note_history_trial_decrypting_time_in_ms', blocks, duration);
+}
+
+/** Processes an entry with event name 'l2-block-built' and updates results where buckets are rollup sizes */
+function processL2BlockBuilt(entry: L2BlockBuiltStats, results: BenchmarkCollectedResults) {
+  const bucket = entry.txCount;
+  if (!BENCHMARK_BLOCK_SIZES.includes(bucket)) return;
+  append(results, 'l2_block_building_time_in_ms', bucket, entry.duration);
+  append(results, 'l2_block_rollup_simulation_time_in_ms', bucket, entry.rollupCircuitsDuration);
+  append(results, 'l2_block_public_tx_process_time_in_ms', bucket, entry.publicProcessDuration);
+}
+
+/** Processes entries with event name node-synced-chain-history emitted by benchmark tests where buckets are chain lengths */
+function processNodeSyncedChain(entry: NodeSyncedChainHistoryStats, results: BenchmarkCollectedResults) {
+  const bucket = entry.blockCount;
+  if (!BENCHMARK_HISTORY_CHAIN_LENGTHS.includes(bucket)) return;
+  if (entry.txsPerBlock !== BENCHMARK_HISTORY_BLOCK_SIZE) return;
+  append(results, 'node_history_sync_time_in_ms', bucket, entry.duration);
+  append(results, 'node_database_size_in_bytes', bucket, entry.dbSize);
+}
+
+/** Processes a parsed entry from a logfile and updates results */
+function processEntry(entry: Stats, results: BenchmarkCollectedResults) {
+  switch (entry.eventName) {
+    case 'rollup-published-to-l1':
+      return processRollupPublished(entry, results);
+    case 'l2-block-handled':
+      return processRollupBlockSynced(entry, results);
+    case 'circuit-simulation':
+      return processCircuitSimulation(entry, results);
+    case 'note-processor-caught-up':
+      return processNoteProcessorCaughtUp(entry, results);
+    case 'l2-block-built':
+      return processL2BlockBuilt(entry, results);
+    case 'node-synced-chain-history':
+      return processNodeSyncedChain(entry, results);
+    default:
+      return;
+  }
+}
+
+/** Array of collected raw results for a given metric. */
+type BenchmarkCollectedMetricResults = Record<string, number[]>;
+
+/** Collected raw results pending averaging each bucket within each metric. */
+type BenchmarkCollectedResults = Partial<Record<MetricName, BenchmarkCollectedMetricResults>>;
+
+/** Parses all jsonl files downloaded and aggregates them into a single results object. */
+export async function main() {
+  const collected: BenchmarkCollectedResults = {};
+
+  // Get all jsonl files in the logs dir
+  const files = fs.readdirSync(LogsDir).filter(f => f.endsWith('.jsonl'));
+
+  // Iterate over each .jsonl file
+  for (const file of files) {
+    const filePath = path.join(LogsDir, file);
+    const fileStream = fs.createReadStream(filePath);
+    const rl = readline.createInterface({ input: fileStream });
+
+    for await (const line of rl) {
+      const entry = JSON.parse(line);
+      processEntry(entry, collected);
+    }
+  }
+
+  log(`Collected entries: ${JSON.stringify(collected)}`);
+
+  // For each bucket of each metric compute the average all collected datapoints
+  const results: BenchmarkResults = {};
+  for (const [metricName, metric] of Object.entries(collected)) {
+    const resultMetric: BenchmarkMetricResults = {};
+    results[metricName as MetricName] = resultMetric;
+    for (const [bucketName, bucket] of Object.entries(metric)) {
+      let avg = bucket.reduce((acc, val) => acc + val, 0) / bucket.length;
+      if (avg > 100) avg = Math.floor(avg);
+      resultMetric[bucketName] = avg;
+    }
+  }
+
+  const timestampedResults: BenchmarkResultsWithTimestamp = { ...results, timestamp: new Date().toISOString() };
+
+  // Write results to disk
+  log(`Aggregated results: ${JSON.stringify(timestampedResults, null, 2)}`);
+  mkdirpSync(BenchDir);
+  fs.writeFileSync(BenchFile, JSON.stringify(timestampedResults, null, 2));
+}
diff --git a/yarn-project/scripts/src/benchmarks/comment.ts b/yarn-project/scripts/src/benchmarks/comment.ts
new file mode 100644
index 00000000000..12815f95283
--- /dev/null
+++ b/yarn-project/scripts/src/benchmarks/comment.ts
@@ -0,0 +1,103 @@
+// Given a local benchmark json aggregated file, reformats it in markdown
+// and comments on the PR that prompted it. If the CI is rerun, the comment
+// is updated.
+import { createConsoleLogger } from '@aztec/foundation/log';
+
+import * as https from 'https';
+
+import { getMarkdown } from './markdown.js';
+
+const GITHUB_TOKEN = process.env.AZTEC_BOT_COMMENTER_GITHUB_TOKEN;
+const OWNER = 'AztecProtocol';
+const REPO = 'aztec3-packages';
+const COMMENT_MARK = '<!-- AUTOGENERATED BENCHMARK COMMENT -->';
+
+const log = createConsoleLogger();
+
+/** Returns the number of the current PR */
+function getPrNumber() {
+  if (!process.env.CIRCLE_PULL_REQUEST) throw new Error(`Not in Circle PR`);
+  const fragments = process.env.CIRCLE_PULL_REQUEST.split('/');
+  return fragments[fragments.length - 1];
+}
+
+/** Function to check if a bench comment already exists */
+async function getExistingComment() {
+  try {
+    const response = await sendGitHubRequest(`/repos/${OWNER}/${REPO}/issues/${getPrNumber()}/comments`);
+    const comments = JSON.parse(response);
+    return comments.find((comment: any) => comment.body.includes(COMMENT_MARK));
+  } catch (error: any) {
+    throw new Error('Error checking for existing comments: ' + error.message);
+  }
+}
+
+/** Function to create or update a comment */
+async function upsertComment(existingCommentId: string) {
+  try {
+    const commentContent = getMarkdown();
+    const commentData = { body: commentContent };
+
+    const requestMethod = existingCommentId ? 'PATCH' : 'POST';
+    const requestUrl = existingCommentId
+      ? `/repos/${OWNER}/${REPO}/issues/comments/${existingCommentId}`
+      : `/repos/${OWNER}/${REPO}/issues/${getPrNumber()}/comments`;
+
+    await sendGitHubRequest(requestUrl, requestMethod, commentData);
+    log('Comment added or updated successfully.');
+  } catch (error: any) {
+    throw new Error('Error adding or updating comment: ' + error.message);
+  }
+}
+
+/** Function to send a request to the GitHub API */
+function sendGitHubRequest(url: string, method = 'GET', data?: object): Promise<string> {
+  const apiUrl = url.startsWith('http') ? url : `https://api.github.com${url}`;
+  const headers = {
+    Authorization: `Bearer ${GITHUB_TOKEN}`,
+    Accept: 'application/vnd.github+json',
+    'X-GitHub-Api-Version': '2022-11-28',
+    'User-Agent': OWNER,
+    'Content-Type': undefined as string | undefined,
+  };
+  if (data) headers['Content-Type'] = 'application/json';
+  else delete headers['Content-Type'];
+
+  const requestOptions = { method, headers };
+
+  // TODO: Use octokit instead of manually using the https node module
+  return new Promise((resolve, reject) => {
+    const req = https.request(apiUrl, requestOptions, res => {
+      if (res.statusCode === 301 || res.statusCode === 302 || res.statusCode === 307) {
+        sendGitHubRequest(res.headers.location!, method, data).then(resolve).catch(reject);
+        return;
+      } else {
+        let data = '';
+        res.on('data', chunk => {
+          data += chunk;
+        });
+
+        res.on('end', () => {
+          if (res.statusCode! >= 200 && res.statusCode! < 300) {
+            resolve(data);
+          } else {
+            reject(new Error(`GitHub API request failed with ${res.statusCode}: ${data}`));
+          }
+        });
+      }
+    });
+
+    req.on('error', error => {
+      reject(error);
+    });
+
+    if (data) req.write(JSON.stringify(data));
+    req.end();
+  });
+}
+
+/** Entrypoint */
+export async function main() {
+  const existingComment = await getExistingComment();
+  await upsertComment(existingComment?.id);
+}
diff --git a/yarn-project/scripts/src/benchmarks/markdown.ts b/yarn-project/scripts/src/benchmarks/markdown.ts
new file mode 100644
index 00000000000..ea1818ccb53
--- /dev/null
+++ b/yarn-project/scripts/src/benchmarks/markdown.ts
@@ -0,0 +1,152 @@
+// Generate a markdown file with a table summary of the aggregated benchmarks.
+// If a benchmark-base file is available, shows the comparison against base (ie master in a PR).
+import { createConsoleLogger } from '@aztec/foundation/log';
+import { BENCHMARK_HISTORY_BLOCK_SIZE, Metrics } from '@aztec/types/stats';
+
+import * as fs from 'fs';
+import pick from 'lodash.pick';
+
+import { BaseBenchFile, BenchFile } from './paths.js';
+
+// Input file paths
+const inputFile = BenchFile;
+const baseFile = BaseBenchFile;
+
+const COMMENT_MARK = '<!-- AUTOGENERATED BENCHMARK COMMENT -->';
+const S3_URL = 'https://aztec-ci-artifacts.s3.us-east-2.amazonaws.com';
+
+const log = createConsoleLogger();
+
+/** Returns a cell content formatted as string */
+function getCell(
+  data: Record<string, Record<string, number>>,
+  base: Record<string, Record<string, number>> | undefined,
+  row: string,
+  col: string,
+) {
+  const value = data[row][col];
+  const baseValue = base ? (base[row] ?? {})[col] : undefined;
+  const percentDiff = baseValue ? Math.round(((value - baseValue) / baseValue) * 100) : undefined;
+  const formattedValue = formatValue(value);
+  const highlight = percentDiff && Math.abs(percentDiff) > 10 ? '**' : '';
+  const warning = percentDiff && Math.abs(percentDiff) > 10 ? ':warning:' : '';
+  const percentSign = percentDiff && percentDiff > 0 ? '+' : '';
+  return percentDiff && Math.abs(percentDiff) >= 1
+    ? `${warning} ${formattedValue} (${highlight}<span title="${formatValue(
+        baseValue!,
+      )}">${percentSign}${percentDiff}%</span>${highlight})`
+    : formattedValue;
+}
+
+/** Returns the description of a metric name, if found. */
+function tryGetDescription(name: string) {
+  return Metrics.find(m => m.name === name)?.description;
+}
+
+/** Wraps the metric name in a span with a title with the description, if found. */
+function withDescriptionTitle(name: string) {
+  const description = tryGetDescription(name);
+  if (!description) return name;
+  return `<span title="${description}">${name}</span>`;
+}
+
+/** Formats a numeric value for display. */
+function formatValue(value: number) {
+  if (value < 100) return value.toPrecision(3);
+  return value.toLocaleString();
+}
+
+/** Transposes an object topmost and nested keys. */
+function transpose(obj: any) {
+  const transposed: any = {};
+  for (const outerKey in obj) {
+    const innerObj = obj[outerKey];
+    for (const innerKey in innerObj) {
+      if (!transposed[innerKey]) transposed[innerKey] = {};
+      transposed[innerKey][outerKey] = innerObj[innerKey];
+    }
+  }
+  return transposed;
+}
+
+/** Returns the base benchmark for comparison, if exists */
+function getBaseBenchmark(): Record<string, Record<string, number>> | undefined {
+  try {
+    return JSON.parse(fs.readFileSync(baseFile, 'utf-8'));
+  } catch {
+    return undefined;
+  }
+}
+
+/** Creates a table in md out of the data (rows and cols). */
+function getTableContent(
+  data: Record<string, Record<string, number>>,
+  baseBenchmark: Record<string, Record<string, number>> | undefined,
+  groupUnit = '',
+  col1Title = 'Metric',
+) {
+  const rowKeys = Object.keys(data);
+  const groups = [...new Set(rowKeys.flatMap(key => Object.keys(data[key])))];
+  const makeHeader = (colTitle: string) => `${withDescriptionTitle(colTitle)} ${groupUnit}`;
+  const header = `| ${col1Title} | ${groups.map(makeHeader).join(' | ')} |`;
+  const separator = `| - | ${groups.map(() => '-').join(' | ')} |`;
+  const makeCell = (row: string, col: string) => getCell(data, baseBenchmark, row, col);
+  const rows = rowKeys.map(key => `${withDescriptionTitle(key)} | ${groups.map(g => makeCell(key, g)).join(' | ')} |`);
+
+  return `
+${header}
+${separator}
+${rows.join('\n')}
+  `;
+}
+
+/** Creates a md with the benchmark contents. */
+export function getMarkdown() {
+  const benchmark = JSON.parse(fs.readFileSync(inputFile, 'utf-8'));
+  const baseBenchmark = getBaseBenchmark();
+  const metricsByBlockSize = Metrics.filter(m => m.groupBy === 'block-size').map(m => m.name);
+  const metricsByChainLength = Metrics.filter(m => m.groupBy === 'chain-length').map(m => m.name);
+  const metricsByCircuitName = Metrics.filter(m => m.groupBy === 'circuit-name').map(m => m.name);
+
+  const baseHash = process.env.BASE_COMMIT_HASH;
+  const baseUrl = baseHash && `[\`${baseHash.slice(0, 8)}\`](${S3_URL}/benchmarks-v1/master/${baseHash}.json)`;
+  const baseCommitText = baseUrl
+    ? `\nValues are compared against data from master at commit ${baseUrl} and shown if the difference exceeds 1%.`
+    : '';
+
+  const prNumber = process.env.CIRCLE_PULL_REQUEST && parseInt(process.env.CIRCLE_PULL_REQUEST.split('/')[6]);
+  const prSourceDataUrl = prNumber && `${S3_URL}/benchmarks-v1/pulls/${prNumber}.json`;
+  const prSourceDataText = prSourceDataUrl
+    ? `\nThis benchmark source data is available in JSON format on S3 [here](${prSourceDataUrl}).`
+    : '';
+
+  return `
+## Benchmark results
+
+All benchmarks are run on txs on the \`Benchmarking\` contract on the repository. Each tx consists of a batch call  to \`create_note\` and \`increment_balance\`, which guarantees that each tx has a private call, a nested private call, a public call, and a nested public call, as well as an emitted private note, an unencrypted log, and public storage read and write. 
+${prSourceDataText}
+${baseCommitText}
+
+### L2 block published to L1
+
+Each column represents the number of txs on an L2 block published to L1.
+${getTableContent(pick(benchmark, metricsByBlockSize), baseBenchmark, 'txs')}
+
+### L2 chain processing
+
+Each column represents the number of blocks on the L2 chain where each block has ${BENCHMARK_HISTORY_BLOCK_SIZE} txs.
+${getTableContent(pick(benchmark, metricsByChainLength), baseBenchmark, 'blocks')}
+
+### Circuits stats
+
+Stats on running time and I/O sizes collected for every circuit run across all benchmarks.
+${getTableContent(transpose(pick(benchmark, metricsByCircuitName)), transpose(baseBenchmark), '', 'Circuit')}
+
+${COMMENT_MARK}
+`;
+}
+
+/** Entrypoint */
+export function main() {
+  log(getMarkdown());
+}
diff --git a/yarn-project/scripts/src/benchmarks/paths.ts b/yarn-project/scripts/src/benchmarks/paths.ts
new file mode 100644
index 00000000000..8b82f42b19a
--- /dev/null
+++ b/yarn-project/scripts/src/benchmarks/paths.ts
@@ -0,0 +1,13 @@
+import * as path from 'path';
+
+/** Folder where to load raw logs from */
+export const LogsDir = process.env.LOG_FOLDER ?? `log`;
+
+/** Folder with the aggregated benchmark results */
+export const BenchDir = process.env.BENCH_FOLDER ?? `bench`;
+
+/** Benchmark file path */
+export const BenchFile = path.join(BenchDir, 'benchmark.json');
+
+/** Base benchmark file path */
+export const BaseBenchFile = path.join(BenchDir, 'base-benchmark.json');
diff --git a/yarn-project/scripts/src/bin/bench-aggregate.ts b/yarn-project/scripts/src/bin/bench-aggregate.ts
new file mode 100644
index 00000000000..44ee68f3694
--- /dev/null
+++ b/yarn-project/scripts/src/bin/bench-aggregate.ts
@@ -0,0 +1,7 @@
+import { main } from '../benchmarks/aggregate.js';
+
+void main().catch(err => {
+  // eslint-disable-next-line no-console
+  console.error(err.message);
+  process.exit(1);
+});
diff --git a/yarn-project/scripts/src/bin/bench-comment.ts b/yarn-project/scripts/src/bin/bench-comment.ts
new file mode 100644
index 00000000000..4216f047a41
--- /dev/null
+++ b/yarn-project/scripts/src/bin/bench-comment.ts
@@ -0,0 +1,7 @@
+import { main } from '../benchmarks/comment.js';
+
+void main().catch(err => {
+  // eslint-disable-next-line no-console
+  console.error(err.message);
+  process.exit(1);
+});
diff --git a/yarn-project/scripts/src/bin/bench-markdown.ts b/yarn-project/scripts/src/bin/bench-markdown.ts
new file mode 100644
index 00000000000..f7c2af364ff
--- /dev/null
+++ b/yarn-project/scripts/src/bin/bench-markdown.ts
@@ -0,0 +1,9 @@
+import { main } from '../benchmarks/markdown.js';
+
+try {
+  void main();
+} catch (err: any) {
+  // eslint-disable-next-line no-console
+  console.error(err.message);
+  process.exit(1);
+}
diff --git a/yarn-project/scripts/src/index.ts b/yarn-project/scripts/src/index.ts
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/yarn-project/scripts/tsconfig.json b/yarn-project/scripts/tsconfig.json
new file mode 100644
index 00000000000..831130c7c84
--- /dev/null
+++ b/yarn-project/scripts/tsconfig.json
@@ -0,0 +1,17 @@
+{
+  "extends": "..",
+  "compilerOptions": {
+    "outDir": "dest",
+    "rootDir": "src",
+    "tsBuildInfoFile": ".tsbuildinfo"
+  },
+  "references": [
+    {
+      "path": "../foundation"
+    },
+    {
+      "path": "../types"
+    }
+  ],
+  "include": ["src"]
+}
diff --git a/yarn-project/sequencer-client/src/publisher/index.ts b/yarn-project/sequencer-client/src/publisher/index.ts
index 97cef6b9915..9fd03d25880 100644
--- a/yarn-project/sequencer-client/src/publisher/index.ts
+++ b/yarn-project/sequencer-client/src/publisher/index.ts
@@ -5,34 +5,6 @@ import { ViemTxSender } from './viem-tx-sender.js';
 export { L1Publisher } from './l1-publisher.js';
 export { PublisherConfig } from './config.js';
 
-/** Stats logged for each L1 rollup publish tx.*/
-export type L1PublishStats = {
-  /** Name of the event for metrics purposes */
-  eventName: 'rollup-published-to-l1';
-  /** Effective gas price of the tx. */
-  gasPrice: bigint;
-  /** Effective gas used in the tx. */
-  gasUsed: bigint;
-  /** Hash of the L1 tx. */
-  transactionHash: string;
-  /** Gas cost of the calldata. */
-  calldataGas: number;
-  /** Size in bytes of the calldata. */
-  calldataSize: number;
-  /** Number of txs in the L2 block. */
-  txCount: number;
-  /** Number of the L2 block. */
-  blockNumber: number;
-  /** Number of encrypted logs. */
-  encryptedLogCount?: number;
-  /** Number of unencrypted logs. */
-  unencryptedLogCount?: number;
-  /** Serialised size of encrypted logs. */
-  encryptedLogSize?: number;
-  /** Serialised size of unencrypted logs. */
-  unencryptedLogSize?: number;
-};
-
 /**
  * Returns a new instance of the L1Publisher.
  * @param config - Configuration to initialize the new instance.
diff --git a/yarn-project/sequencer-client/src/publisher/l1-publisher.ts b/yarn-project/sequencer-client/src/publisher/l1-publisher.ts
index 1294f698555..9e9e68517dc 100644
--- a/yarn-project/sequencer-client/src/publisher/l1-publisher.ts
+++ b/yarn-project/sequencer-client/src/publisher/l1-publisher.ts
@@ -1,12 +1,12 @@
 import { createDebugLogger } from '@aztec/foundation/log';
 import { InterruptableSleep } from '@aztec/foundation/sleep';
 import { ExtendedContractData, L2Block } from '@aztec/types';
+import { L1PublishStats } from '@aztec/types/stats';
 
 import pick from 'lodash.pick';
 
 import { L2BlockReceiver } from '../receiver.js';
 import { PublisherConfig } from './config.js';
-import { L1PublishStats } from './index.js';
 
 /**
  * Stats for a sent transaction.
diff --git a/yarn-project/sequencer-client/src/sequencer/sequencer.ts b/yarn-project/sequencer-client/src/sequencer/sequencer.ts
index 010d855cb05..c2014632262 100644
--- a/yarn-project/sequencer-client/src/sequencer/sequencer.ts
+++ b/yarn-project/sequencer-client/src/sequencer/sequencer.ts
@@ -5,6 +5,7 @@ import { RunningPromise } from '@aztec/foundation/running-promise';
 import { Timer, elapsed } from '@aztec/foundation/timer';
 import { P2P } from '@aztec/p2p';
 import { ContractDataSource, L1ToL2MessageSource, L2Block, L2BlockSource, MerkleTreeId, Tx } from '@aztec/types';
+import { L2BlockBuiltStats } from '@aztec/types/stats';
 import { WorldStateStatus, WorldStateSynchronizer } from '@aztec/world-state';
 
 import times from 'lodash.times';
@@ -182,7 +183,7 @@ export class Sequencer {
         publicProcessDuration: publicProcessorDuration,
         rollupCircuitsDuration: rollupCircuitsDuration,
         ...block.getStats(),
-      });
+      } satisfies L2BlockBuiltStats);
 
       await this.publishExtendedContractData(validTxs, block);
 
diff --git a/yarn-project/sequencer-client/src/simulator/public_kernel.ts b/yarn-project/sequencer-client/src/simulator/public_kernel.ts
index 3a21d3a2696..89df560bcc3 100644
--- a/yarn-project/sequencer-client/src/simulator/public_kernel.ts
+++ b/yarn-project/sequencer-client/src/simulator/public_kernel.ts
@@ -1,6 +1,7 @@
 import { PublicKernelInputs, PublicKernelPublicInputs, simulatePublicKernelCircuit } from '@aztec/circuits.js';
 import { createDebugLogger } from '@aztec/foundation/log';
 import { elapsed } from '@aztec/foundation/timer';
+import { CircuitSimulationStats } from '@aztec/types/stats';
 
 import { PublicKernelCircuitSimulator } from './index.js';
 
@@ -24,7 +25,7 @@ export class WasmPublicKernelCircuitSimulator implements PublicKernelCircuitSimu
       duration,
       inputSize: input.toBuffer().length,
       outputSize: result.toBuffer().length,
-    });
+    } satisfies CircuitSimulationStats);
     return result;
   }
 
@@ -42,7 +43,7 @@ export class WasmPublicKernelCircuitSimulator implements PublicKernelCircuitSimu
       duration,
       inputSize: input.toBuffer().length,
       outputSize: result.toBuffer().length,
-    });
+    } satisfies CircuitSimulationStats);
     return result;
   }
 }
diff --git a/yarn-project/sequencer-client/src/simulator/rollup.ts b/yarn-project/sequencer-client/src/simulator/rollup.ts
index 4a31a4fc0d1..0f45ebdccc4 100644
--- a/yarn-project/sequencer-client/src/simulator/rollup.ts
+++ b/yarn-project/sequencer-client/src/simulator/rollup.ts
@@ -12,6 +12,7 @@ import {
 } from '@aztec/circuits.js';
 import { createDebugLogger } from '@aztec/foundation/log';
 import { elapsed } from '@aztec/foundation/timer';
+import { CircuitSimulationStats } from '@aztec/types/stats';
 
 import { RollupSimulator } from './index.js';
 
@@ -39,7 +40,7 @@ export class WasmRollupCircuitSimulator implements RollupSimulator {
       duration,
       inputSize: input.toBuffer().length,
       outputSize: result.toBuffer().length,
-    });
+    } satisfies CircuitSimulationStats);
 
     return Promise.resolve(result);
   }
@@ -61,7 +62,7 @@ export class WasmRollupCircuitSimulator implements RollupSimulator {
       duration,
       inputSize: input.toBuffer().length,
       outputSize: result.toBuffer().length,
-    });
+    } satisfies CircuitSimulationStats);
 
     return result;
   }
@@ -84,7 +85,7 @@ export class WasmRollupCircuitSimulator implements RollupSimulator {
       duration,
       inputSize: input.toBuffer().length,
       outputSize: result.toBuffer().length,
-    });
+    } satisfies CircuitSimulationStats);
 
     return result;
   }
diff --git a/yarn-project/tsconfig.json b/yarn-project/tsconfig.json
index c59d9b15b2f..e9d16264b63 100644
--- a/yarn-project/tsconfig.json
+++ b/yarn-project/tsconfig.json
@@ -42,7 +42,8 @@
     { "path": "world-state/tsconfig.json" },
     { "path": "boxes/private-token/tsconfig.json" },
     { "path": "boxes/blank/tsconfig.json" },
-    { "path": "boxes/blank-react/tsconfig.json" }
+    { "path": "boxes/blank-react/tsconfig.json" },
+    { "path": "scripts/tsconfig.json" }
   ],
   "files": ["./@types/jest/index.d.ts"]
 }
diff --git a/yarn-project/types/package.json b/yarn-project/types/package.json
index ff64626d7ac..597a6d02bc1 100644
--- a/yarn-project/types/package.json
+++ b/yarn-project/types/package.json
@@ -2,7 +2,10 @@
   "name": "@aztec/types",
   "version": "0.1.0",
   "type": "module",
-  "exports": "./dest/index.js",
+  "exports": {
+    ".": "./dest/index.js",
+    "./stats": "./dest/stats/index.js"
+  },
   "typedocOptions": {
     "entryPoints": [
       "./src/index.ts"
diff --git a/yarn-project/types/src/stats/benchmarks.ts b/yarn-project/types/src/stats/benchmarks.ts
new file mode 100644
index 00000000000..284cdadd48f
--- /dev/null
+++ b/yarn-project/types/src/stats/benchmarks.ts
@@ -0,0 +1,12 @@
+import { MetricName } from './metrics.js';
+
+/** Aggregated benchmark results. */
+export type BenchmarkResults = Partial<Record<MetricName, BenchmarkMetricResults>>;
+
+/** Aggregated benchmark result for a given metric (values aggregated by bucket such as chain size). */
+export type BenchmarkMetricResults = Record<string, number>;
+
+/** Aggregated benchmark results with a timestamp. */
+export type BenchmarkResultsWithTimestamp = BenchmarkResults & {
+  /** When did this benchmark happen. */ timestamp: string;
+};
diff --git a/yarn-project/types/src/stats/index.ts b/yarn-project/types/src/stats/index.ts
new file mode 100644
index 00000000000..5bf1e142616
--- /dev/null
+++ b/yarn-project/types/src/stats/index.ts
@@ -0,0 +1,18 @@
+export * from './stats.js';
+export * from './metrics.js';
+export * from './benchmarks.js';
+
+/** Block sizes to use for benchmark tests on multiple block sizes. */
+export const BENCHMARK_BLOCK_SIZES = process.env.BENCHMARK_BLOCK_SIZES
+  ? process.env.BENCHMARK_BLOCK_SIZES.split(',').map(Number)
+  : [8, 32, 128];
+
+/** Block size to use for building chains of multiple blocks. */
+export const BENCHMARK_HISTORY_BLOCK_SIZE = process.env.BENCHMARK_HISTORY_BLOCK_SIZE
+  ? +process.env.BENCHMARK_HISTORY_BLOCK_SIZE
+  : 16;
+
+/** Chain lengths to test for history processing benchmarks. */
+export const BENCHMARK_HISTORY_CHAIN_LENGTHS = process.env.BENCHMARK_HISTORY_CHAIN_LENGTHS
+  ? process.env.BENCHMARK_HISTORY_CHAIN_LENGTHS.split(',').map(Number)
+  : [5, 10];
diff --git a/yarn-project/types/src/stats/metrics.ts b/yarn-project/types/src/stats/metrics.ts
new file mode 100644
index 00000000000..763f39dcb16
--- /dev/null
+++ b/yarn-project/types/src/stats/metrics.ts
@@ -0,0 +1,130 @@
+import { StatsEventName } from './stats.js';
+
+/** How a metric is grouped in benchmarks: by block size, by length of chain processed, or by circuit name. */
+export type MetricGroupBy = 'block-size' | 'chain-length' | 'circuit-name';
+
+/** Definition of a metric to track in benchmarks. */
+export interface Metric {
+  /** Identifier. */
+  name: string;
+  /** What dimension this metric is grouped by. */
+  groupBy: MetricGroupBy;
+  /** Description */
+  description: string;
+  /** Events used for generating this metric. */
+  events: readonly StatsEventName[];
+}
+
+/** Metric definitions to track from benchmarks. */
+export const Metrics = [
+  {
+    name: 'l1_rollup_calldata_size_in_bytes',
+    groupBy: 'block-size',
+    description: 'Size in bytes of the tx calldata posted to L1 when submitting a block.',
+    events: ['rollup-published-to-l1'],
+  },
+  {
+    name: 'l1_rollup_calldata_gas',
+    groupBy: 'block-size',
+    description: 'Estimated gas cost of the tx calldata when posting a block to L1.',
+    events: ['rollup-published-to-l1'],
+  },
+  {
+    name: 'l1_rollup_execution_gas',
+    groupBy: 'block-size',
+    description: 'Total gas used in a tx when submitting a block to L1.',
+    events: ['rollup-published-to-l1'],
+  },
+  {
+    name: 'l2_block_processing_time_in_ms',
+    groupBy: 'block-size',
+    description: 'Time for the state synchroniser to process an L2 block that was not built by its own sequencer.',
+    events: ['l2-block-handled'],
+  },
+  {
+    name: 'note_successful_decrypting_time_in_ms',
+    groupBy: 'block-size',
+    description: 'Time for the PXE to decrypt all notes in a block where they belong to a registered account.',
+    events: ['note-processor-caught-up'],
+  },
+  {
+    name: 'note_trial_decrypting_time_in_ms',
+    groupBy: 'block-size',
+    description:
+      'Time for the PXE to try decrypting all notes in a block where they do not belong to a registered account.',
+    events: ['note-processor-caught-up'],
+  },
+  {
+    name: 'l2_block_building_time_in_ms',
+    groupBy: 'block-size',
+    description: 'Total time for the sequencer to build an L2 block from a set of txs.',
+    events: ['l2-block-built'],
+  },
+  {
+    name: 'l2_block_rollup_simulation_time_in_ms',
+    groupBy: 'block-size',
+    description: 'Time for the sequencer to run the rollup circuit simulation when assembling a block.',
+    events: ['l2-block-built'],
+  },
+  {
+    name: 'l2_block_public_tx_process_time_in_ms',
+    groupBy: 'block-size',
+    description: 'Time for the sequencer to execute public function calls for txs in a block.',
+    events: ['l2-block-built'],
+  },
+  {
+    name: 'node_history_sync_time_in_ms',
+    groupBy: 'chain-length',
+    description: 'Time for a node without a sequencer to sync chain history',
+    events: ['node-synced-chain-history'],
+  },
+  {
+    name: 'note_history_successful_decrypting_time_in_ms',
+    groupBy: 'chain-length',
+    description: 'Time for a PXE to decrypt all notes in the chain where they belong to a registered account.',
+    events: ['note-processor-caught-up'],
+  },
+  {
+    name: 'note_history_trial_decrypting_time_in_ms',
+    groupBy: 'chain-length',
+    description:
+      'Time for a PXE to try decrypting all notes in the chain where they do not belong to a registered account.',
+    events: ['note-processor-caught-up'],
+  },
+  {
+    name: 'node_database_size_in_bytes',
+    groupBy: 'chain-length',
+    description: 'Size on disk of the leveldown database of a node after syncing all chain history.',
+    events: ['node-synced-chain-history'],
+  },
+  {
+    name: 'pxe_database_size_in_bytes',
+    groupBy: 'chain-length',
+    description: 'Estimated size in memory of a PXE database after syncing all notes that belong to it in the chain.',
+    events: ['note-processor-caught-up'],
+  },
+  {
+    name: 'circuit_simulation_time_in_ms',
+    groupBy: 'circuit-name',
+    description: 'Time to run a circuit simulation.',
+    events: ['circuit-simulation'],
+  },
+  {
+    name: 'circuit_input_size_in_bytes',
+    groupBy: 'circuit-name',
+    description: 'Size of the inputs to a circuit simulation.',
+    events: ['circuit-simulation'],
+  },
+  {
+    name: 'circuit_output_size_in_bytes',
+    groupBy: 'circuit-name',
+    description: 'Size of the outputs (ie public inputs) from a circuit simulation.',
+    events: ['circuit-simulation'],
+  },
+] as const satisfies readonly Metric[];
+
+/** Metric definitions to track from benchmarks. */
+export type Metrics = typeof Metrics;
+
+/** Type of valid metric names. */
+export type MetricName = Metrics[number]['name'];
diff --git a/yarn-project/types/src/stats/stats.ts b/yarn-project/types/src/stats/stats.ts
new file mode 100644
index 00000000000..0c99e930bbc
--- /dev/null
+++ b/yarn-project/types/src/stats/stats.ts
@@ -0,0 +1,131 @@
+/** Stats associated with an L2 block. */
+export type L2BlockStats = {
+  /** Number of txs in the L2 block. */
+  txCount: number;
+  /** Number of the L2 block. */
+  blockNumber: number;
+  /** Number of encrypted logs. */
+  encryptedLogCount?: number;
+  /** Number of unencrypted logs. */
+  unencryptedLogCount?: number;
+  /** Serialised size of encrypted logs. */
+  encryptedLogSize?: number;
+  /** Serialised size of unencrypted logs. */
+  unencryptedLogSize?: number;
+};
+
+/** Stats logged for each L1 rollup publish tx.*/
+export type L1PublishStats = {
+  /** Name of the event for metrics purposes */
+  eventName: 'rollup-published-to-l1';
+  /** Effective gas price of the tx. */
+  gasPrice: bigint;
+  /** Effective gas used in the tx. */
+  gasUsed: bigint;
+  /** Hash of the L1 tx. */
+  transactionHash: string;
+  /** Gas cost of the calldata. */
+  calldataGas: number;
+  /** Size in bytes of the calldata. */
+  calldataSize: number;
+} & L2BlockStats;
+
+/** Stats logged for synching node chain history.  */
+export type NodeSyncedChainHistoryStats = {
+  /** Name of the event. */
+  eventName: 'node-synced-chain-history';
+  /** Number of txs in the L2 block.. */
+  txCount: number;
+  /** Number of txs in each block. */
+  txsPerBlock: number;
+  /** Duration in ms. */
+  duration: number;
+  /** Id of the L2 block. */
+  blockNumber: number;
+  /** Number of blocks processed. */
+  blockCount: number;
+  /** Size of the db in bytes. */
+  dbSize: number;
+};
+
+/** Stats for circuit simulation. */
+export type CircuitSimulationStats = {
+  /** name of the event. */
+  eventName: 'circuit-simulation';
+  /** Name of the circuit. */
+  circuitName:
+    | 'base-rollup'
+    | 'private-kernel-init'
+    | 'private-kernel-ordering'
+    | 'root-rollup'
+    | 'merge-rollup'
+    | 'private-kernel-inner'
+    | 'public-kernel-private-input'
+    | 'public-kernel-non-first-iteration';
+  /** Duration in ms. */
+  duration: number;
+  /** Size in bytes of circuit inputs. */
+  inputSize: number;
+  /** Size in bytes of circuit outputs (aka public inputs). */
+  outputSize: number;
+};
+
+/** Stats for an L2 block built by a sequencer. */
+export type L2BlockBuiltStats = {
+  /** Name of the event. */
+  eventName: 'l2-block-built';
+  /** Total duration in ms. */
+  duration: number;
+  /** Time for processing public txs in ms. */
+  publicProcessDuration: number;
+  /** Time for running rollup circuits in ms.  */
+  rollupCircuitsDuration: number;
+} & L2BlockStats;
+
+/** Stats for an L2 block processed by the world state synchronizer. */
+export type L2BlockHandledStats = {
+  /** Name of the event. */
+  eventName: 'l2-block-handled';
+  /** Total duration in ms. */
+  duration: number;
+  /** Whether the block was produced by this node. */
+  isBlockOurs: boolean;
+} & L2BlockStats;
+
+/** Stats for a note processor that has caught up with the chain. */
+export type NoteProcessorCaughtUpStats = {
+  /** Name of the event. */
+  eventName: 'note-processor-caught-up';
+  /** Public key of the note processor. */
+  publicKey: string;
+  /** Total time to catch up with the tip of the chain from scratch in ms. */
+  duration: number;
+  /** Size of the notes db. */
+  dbSize: number;
+} & NoteProcessorStats;
+
+/** Accumulated rolling stats for a note processor.  */
+export type NoteProcessorStats = {
+  /** How many notes have been seen and trial-decrypted. */
+  seen: number;
+  /** How many notes were successfully decrypted. */
+  decrypted: number;
+  /** How many notes failed processing. */
+  failed: number;
+  /** How many blocks were spanned.  */
+  blocks: number;
+  /** How many txs were spanned.  */
+  txs: number;
+};
+
+/** Stats emitted in structured logs with an `eventName` for tracking. */
+export type Stats =
+  | L1PublishStats
+  | NodeSyncedChainHistoryStats
+  | CircuitSimulationStats
+  | L2BlockBuiltStats
+  | L2BlockHandledStats
+  | NoteProcessorCaughtUpStats;
+
+/** Set of event names across emitted stats. */
+export type StatsEventName = Stats['eventName'];
diff --git a/yarn-project/world-state/src/synchronizer/server_world_state_synchronizer.ts b/yarn-project/world-state/src/synchronizer/server_world_state_synchronizer.ts
index b0010250c8c..2f7e166ff2a 100644
--- a/yarn-project/world-state/src/synchronizer/server_world_state_synchronizer.ts
+++ b/yarn-project/world-state/src/synchronizer/server_world_state_synchronizer.ts
@@ -2,6 +2,7 @@ import { SerialQueue } from '@aztec/foundation/fifo';
 import { createDebugLogger } from '@aztec/foundation/log';
 import { elapsed } from '@aztec/foundation/timer';
 import { L2Block, L2BlockDownloader, L2BlockSource } from '@aztec/types';
+import { L2BlockHandledStats } from '@aztec/types/stats';
 
 import { LevelUp } from 'levelup';
 
@@ -190,7 +191,7 @@ export class ServerWorldStateSynchronizer implements WorldStateSynchronizer {
         duration,
         isBlockOurs: result.isBlockOurs,
         ...l2Block.getStats(),
-      });
+      } satisfies L2BlockHandledStats);
     }
   }
 
diff --git a/yarn-project/yarn.lock b/yarn-project/yarn.lock
index ad79c7505a5..8de88d3e484 100644
--- a/yarn-project/yarn.lock
+++ b/yarn-project/yarn.lock
@@ -706,6 +706,31 @@ __metadata:
   languageName: unknown
   linkType: soft
 
+"@aztec/scripts@workspace:scripts":
+  version: 0.0.0-use.local
+  resolution: "@aztec/scripts@workspace:scripts"
+  dependencies:
+    "@aztec/foundation": "workspace:^"
+    "@aztec/types": "workspace:^"
+    "@jest/globals": ^29.5.0
+    "@rushstack/eslint-patch": ^1.1.4
+    "@types/jest": ^29.5.0
+    "@types/lodash.pick": ^4.4.7
+    "@types/node": ^18.14.6
+    fs-extra: ^11.1.1
+    jest: ^29.5.0
+    lodash.pick: ^4.4.0
+    ts-jest: ^29.1.0
+    ts-node: ^10.9.1
+    tslib: ^2.4.0
+    typescript: ^5.0.4
+  bin:
+    bench-aggregate: ./dest/bin/bench-aggregate.js
+    bench-comment: ./dest/bin/bench-comment.js
+    bench-markdown: ./dest/bin/bench-markdown.js
+  languageName: unknown
+  linkType: soft
+
 "@aztec/sequencer-client@workspace:^, @aztec/sequencer-client@workspace:sequencer-client":
   version: 0.0.0-use.local
   resolution: "@aztec/sequencer-client@workspace:sequencer-client"