diff --git a/.github/workflows/ci-core-reusable.yml b/.github/workflows/ci-core-reusable.yml
index 85eefc862272..51550f87a34b 100644
--- a/.github/workflows/ci-core-reusable.yml
+++ b/.github/workflows/ci-core-reusable.yml
@@ -67,7 +67,7 @@ jobs:
           ci_run zk test rust
           # Benchmarks are not tested by `cargo nextest` unless specified explicitly, and even then `criterion` harness is incompatible
           # with how `cargo nextest` runs tests. Thus, we run criterion-based benchmark tests manually.
-          ci_run zk f cargo test --release -p vm-benchmark --bench criterion --bench fill_bootloader
+          ci_run zk f cargo test --release -p vm-benchmark --bench oneshot --bench batch
 
   loadtest:
     runs-on: [matterlabs-ci-runner]
diff --git a/.github/workflows/vm-perf-comparison.yml b/.github/workflows/vm-perf-comparison.yml
index 53dada123574..da88b07779fd 100644
--- a/.github/workflows/vm-perf-comparison.yml
+++ b/.github/workflows/vm-perf-comparison.yml
@@ -1,4 +1,4 @@
-name: Compare VM perfomance to base branch
+name: Compare VM performance to base branch
 
 on:
   pull_request:
@@ -47,7 +47,7 @@ jobs:
           ci_run zk
           ci_run zk compiler system-contracts
           ci_run cargo bench --package vm-benchmark --bench iai | tee base-iai
-          ci_run cargo run --package vm-benchmark --release --bin instruction-counts | tee base-opcodes || touch base-opcodes
+          ci_run cargo run --package vm-benchmark --release --bin instruction_counts | tee base-opcodes || touch base-opcodes
           ci_run yarn workspace system-contracts clean
 
       - name: checkout PR
@@ -59,7 +59,7 @@ jobs:
           ci_run zk
           ci_run zk compiler system-contracts
           ci_run cargo bench --package vm-benchmark --bench iai | tee pr-iai
-          ci_run cargo run --package vm-benchmark --release --bin instruction-counts | tee pr-opcodes || touch pr-opcodes
+          ci_run cargo run --package vm-benchmark --release --bin instruction_counts | tee pr-opcodes || touch pr-opcodes
 
           EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
           echo "speedup<<$EOF" >> $GITHUB_OUTPUT
diff --git a/.github/workflows/vm-perf-to-prometheus.yml b/.github/workflows/vm-perf-to-prometheus.yml
index fce7ead2d696..3cfd4e4deb87 100644
--- a/.github/workflows/vm-perf-to-prometheus.yml
+++ b/.github/workflows/vm-perf-to-prometheus.yml
@@ -21,7 +21,7 @@ jobs:
 
       - name: setup-env
         run: |
-          echo PUSH_VM_BENCHMARKS_TO_PROMETHEUS=1 >> .env
+          echo BENCHMARK_PROMETHEUS_PUSHGATEWAY_URL=${{ secrets.BENCHMARK_PROMETHEUS_PUSHGATEWAY_URL }} >> .env
 
           echo ZKSYNC_HOME=$(pwd) >> $GITHUB_ENV
           echo $(pwd)/bin >> $GITHUB_PATH
@@ -31,10 +31,12 @@ jobs:
           run_retried docker compose pull zk
           docker compose up -d zk
           ci_run zk
-          ci_run zk compiler system-contracts
+          ci_run zk compiler all
 
       - name: run benchmarks
         run: |
-          ci_run cargo bench --package vm-benchmark --bench diy_benchmark
+          ci_run cargo bench --package vm-benchmark --bench oneshot
+          # Run only benches with 1,000 transactions per batch to not spend too much time
+          ci_run cargo bench --package vm-benchmark --bench batch '/1000$'
           ci_run cargo bench --package vm-benchmark --bench iai | tee iai-result
           ci_run cargo run --package vm-benchmark --bin iai_results_to_prometheus --release < iai-result
diff --git a/Cargo.lock b/Cargo.lock
index 0d4ba4c23834..54714b21af2b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -7271,14 +7271,18 @@ dependencies = [
 name = "vm-benchmark"
 version = "0.1.0"
 dependencies = [
+ "assert_matches",
  "criterion",
  "iai",
+ "once_cell",
  "rand 0.8.5",
  "tokio",
  "vise",
+ "zksync_contracts",
+ "zksync_multivm",
  "zksync_types",
+ "zksync_utils",
  "zksync_vlog",
- "zksync_vm_benchmark_harness",
 ]
 
 [[package]]
@@ -9751,21 +9755,6 @@ dependencies = [
  "vise-exporter",
 ]
 
-[[package]]
-name = "zksync_vm_benchmark_harness"
-version = "0.1.0"
-dependencies = [
- "assert_matches",
- "once_cell",
- "zk_evm 0.133.0",
- "zksync_contracts",
- "zksync_multivm",
- "zksync_state",
- "zksync_system_constants",
- "zksync_types",
- "zksync_utils",
-]
-
 [[package]]
 name = "zksync_vm_interface"
 version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
index 6ee6ce79e490..c9c8ff95ebc4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -79,7 +79,6 @@ members = [
     "core/tests/test_account",
     "core/tests/loadnext",
     "core/tests/vm-benchmark",
-    "core/tests/vm-benchmark/harness",
     # Parts of prover workspace that are needed for Core workspace
     "prover/crates/lib/prover_dal",
 ]
@@ -238,7 +237,6 @@ zksync_prover_dal = { version = "0.1.0", path = "prover/crates/lib/prover_dal" }
 zksync_vlog = { version = "0.1.0", path = "core/lib/vlog" }
 zksync_vm_interface = { version = "0.1.0", path = "core/lib/vm_interface" }
 zksync_vm_utils = { version = "0.1.0", path = "core/lib/vm_utils" }
-zksync_vm_benchmark_harness = { version = "0.1.0", path = "core/tests/vm-benchmark/harness" }
 zksync_basic_types = { version = "0.1.0", path = "core/lib/basic_types" }
 zksync_circuit_breaker = { version = "0.1.0", path = "core/lib/circuit_breaker" }
 zksync_config = { version = "0.1.0", path = "core/lib/config" }
diff --git a/core/tests/vm-benchmark/Cargo.toml b/core/tests/vm-benchmark/Cargo.toml
index 27218d79aafe..4586c637e128 100644
--- a/core/tests/vm-benchmark/Cargo.toml
+++ b/core/tests/vm-benchmark/Cargo.toml
@@ -6,46 +6,30 @@ license.workspace = true
 publish = false
 
 [dependencies]
+zksync_contracts.workspace = true
+zksync_multivm.workspace = true
 zksync_types.workspace = true
+zksync_utils.workspace = true
 zksync_vlog.workspace = true
-zksync_vm_benchmark_harness.workspace = true
 
+criterion.workspace = true
+once_cell.workspace = true
 rand.workspace = true
 vise.workspace = true
 tokio.workspace = true
 
 [dev-dependencies]
-criterion.workspace = true
+assert_matches.workspace = true
 iai.workspace = true
 
 [[bench]]
-name = "criterion"
+name = "oneshot"
 harness = false
 
 [[bench]]
-name = "diy_benchmark"
+name = "batch"
 harness = false
 
 [[bench]]
 name = "iai"
 harness = false
-
-[[bench]]
-name = "fill_bootloader"
-harness = false
-
-[[bin]]
-name = "iai_results_to_prometheus"
-path = "src/iai_results_to_prometheus.rs"
-
-[[bin]]
-name = "compare_iai_results"
-path = "src/compare_iai_results.rs"
-
-[[bin]]
-name = "find-slowest"
-path = "src/find_slowest.rs"
-
-[[bin]]
-name = "instruction-counts"
-path = "src/instruction_counts.rs"
diff --git a/core/tests/vm-benchmark/README.md b/core/tests/vm-benchmark/README.md
index cecbdb31d0cf..b7f056894e73 100644
--- a/core/tests/vm-benchmark/README.md
+++ b/core/tests/vm-benchmark/README.md
@@ -9,35 +9,22 @@ benchmarks, however.
 There are three different benchmarking tools available:
 
 ```sh
-cargo bench --bench criterion
-cargo bench --bench diy_benchmark
+cargo bench --bench oneshot
+cargo bench --bench batch
 cargo +nightly bench --bench iai
 ```
 
-Criterion is the de-facto microbenchmarking tool for Rust. Run it, then optimize something and run the command again to
-see if your changes have made a difference.
+`oneshot` and `batch` targets use Criterion, the de-facto standard micro-benchmarking tool for Rust. `oneshot` measures
+VM performance on single transactions, and `batch` on entire batches of up to 5,000 transactions. Run these benches,
+then optimize something and run the command again to see if your changes have made a difference.
 
-The DIY benchmark works a bit better in noisy environments and is used to push benchmark data to Prometheus
-automatically.
+IAI uses cachegrind to simulate the CPU, so noise is completely irrelevant to it, but it also doesn't measure exactly
+the same thing as normal benchmarks. You need valgrind to be able to run it.
 
-IAI uses cachegrind to simulate the CPU, so noise is completely irrelevant to it but it also doesn't measure exactly the
-same thing as normal benchmarks. You need valgrind to be able to run it.
-
-You can add your own bytecodes to be benchmarked into the folder "deployment_benchmarks". For iai, you also need to add
-them to "benches/iai.rs".
+You can add new bytecodes to be benchmarked into the [`bytecodes`](src/bytecodes) directory and then add them to the
+`BYTECODES` constant exported by the crate.
 
 ## Profiling (Linux only)
 
 You can also use `sh perf.sh bytecode_file` to produce data that can be fed into the
 [firefox profiler](https://profiler.firefox.com/) for a specific bytecode.
-
-## Fuzzing
-
-There is a fuzzer using this library at core/lib/vm/fuzz. The fuzz.sh script located there starts a fuzzer which
-attempts to make cover as much code as it can to ultimately produce a valid deployment bytecode.
-
-It has no chance of succeeding currently because the fuzzing speed drops to 10 executions/s easily. Optimizing the VM or
-lowering the gas limit will help with that.
-
-The fuzzer has been useful for producing synthetic benchmark inputs. It may be a good tool for finding show transactions
-with a certain gas limit, an empirical way of evaluating gas prices of instructions.
diff --git a/core/tests/vm-benchmark/benches/fill_bootloader.rs b/core/tests/vm-benchmark/benches/batch.rs
similarity index 79%
rename from core/tests/vm-benchmark/benches/fill_bootloader.rs
rename to core/tests/vm-benchmark/benches/batch.rs
index 13fa1df0b2fc..608f6be6d089 100644
--- a/core/tests/vm-benchmark/benches/fill_bootloader.rs
+++ b/core/tests/vm-benchmark/benches/batch.rs
@@ -14,17 +14,15 @@
 
 use std::{iter, time::Duration};
 
-use criterion::{
-    black_box, criterion_group, criterion_main, measurement::WallTime, BatchSize, BenchmarkGroup,
-    BenchmarkId, Criterion, Throughput,
-};
+use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
 use rand::{rngs::StdRng, Rng, SeedableRng};
-use zksync_types::Transaction;
-use zksync_vm_benchmark_harness::{
-    cut_to_allowed_bytecode_size, get_deploy_tx_with_gas_limit, get_heavy_load_test_tx,
-    get_load_test_deploy_tx, get_load_test_tx, get_realistic_load_test_tx, get_transfer_tx,
-    BenchmarkingVm, BenchmarkingVmFactory, Fast, Legacy, LoadTestParams,
+use vm_benchmark::{
+    criterion::{is_test_mode, BenchmarkGroup, BenchmarkId, CriterionExt, MeteredTime},
+    get_deploy_tx_with_gas_limit, get_heavy_load_test_tx, get_load_test_deploy_tx,
+    get_load_test_tx, get_realistic_load_test_tx, get_transfer_tx, BenchmarkingVm,
+    BenchmarkingVmFactory, Bytecode, Fast, Legacy, LoadTestParams,
 };
+use zksync_types::Transaction;
 
 /// Gas limit for deployment transactions.
 const DEPLOY_GAS_LIMIT: u32 = 30_000_000;
@@ -59,7 +57,7 @@ fn bench_vm<VM: BenchmarkingVmFactory, const FULL: bool>(
 }
 
 fn run_vm_expecting_failures<VM: BenchmarkingVmFactory, const FULL: bool>(
-    group: &mut BenchmarkGroup<'_, WallTime>,
+    group: &mut BenchmarkGroup<'_>,
     name: &str,
     txs: &[Transaction],
     expected_failures: &[bool],
@@ -70,25 +68,24 @@ fn run_vm_expecting_failures<VM: BenchmarkingVmFactory, const FULL: bool>(
         }
 
         group.throughput(Throughput::Elements(*txs_in_batch as u64));
-        group.bench_with_input(
+        group.bench_metered_with_input(
             BenchmarkId::new(name, txs_in_batch),
             txs_in_batch,
             |bencher, &txs_in_batch| {
                 if FULL {
                     // Include VM initialization / drop into the measured time
-                    bencher.iter(|| {
+                    bencher.iter(|timer| {
+                        let _guard = timer.start();
                         let mut vm = BenchmarkingVm::<VM>::default();
                         bench_vm::<_, true>(&mut vm, &txs[..txs_in_batch], expected_failures);
                     });
                 } else {
-                    bencher.iter_batched(
-                        BenchmarkingVm::<VM>::default,
-                        |mut vm| {
-                            bench_vm::<_, false>(&mut vm, &txs[..txs_in_batch], expected_failures);
-                            vm
-                        },
-                        BatchSize::LargeInput, // VM can consume significant amount of RAM, especially the new one
-                    );
+                    bencher.iter(|timer| {
+                        let mut vm = BenchmarkingVm::<VM>::default();
+                        let guard = timer.start();
+                        bench_vm::<_, false>(&mut vm, &txs[..txs_in_batch], expected_failures);
+                        drop(guard);
+                    });
                 }
             },
         );
@@ -96,22 +93,23 @@ fn run_vm_expecting_failures<VM: BenchmarkingVmFactory, const FULL: bool>(
 }
 
 fn run_vm<VM: BenchmarkingVmFactory, const FULL: bool>(
-    group: &mut BenchmarkGroup<'_, WallTime>,
+    group: &mut BenchmarkGroup<'_>,
     name: &str,
     txs: &[Transaction],
 ) {
     run_vm_expecting_failures::<VM, FULL>(group, name, txs, &[]);
 }
 
-fn bench_fill_bootloader<VM: BenchmarkingVmFactory, const FULL: bool>(c: &mut Criterion) {
-    let is_test_mode = !std::env::args().any(|arg| arg == "--bench");
-    let txs_in_batch = if is_test_mode {
+fn bench_fill_bootloader<VM: BenchmarkingVmFactory, const FULL: bool>(
+    c: &mut Criterion<MeteredTime>,
+) {
+    let txs_in_batch = if is_test_mode() {
         &TXS_IN_BATCH[..3] // Reduce the number of transactions in a batch so that tests don't take long
     } else {
         TXS_IN_BATCH
     };
 
-    let mut group = c.benchmark_group(if FULL {
+    let mut group = c.metered_group(if FULL {
         format!("fill_bootloader_full{}", VM::LABEL.as_suffix())
     } else {
         format!("fill_bootloader{}", VM::LABEL.as_suffix())
@@ -121,12 +119,12 @@ fn bench_fill_bootloader<VM: BenchmarkingVmFactory, const FULL: bool>(c: &mut Cr
         .measurement_time(Duration::from_secs(10));
 
     // Deploying simple contract
-    let test_contract =
-        std::fs::read("deployment_benchmarks/deploy_simple_contract").expect("failed to read file");
-    let code = cut_to_allowed_bytecode_size(&test_contract).unwrap();
+    let test_contract = Bytecode::get("deploy_simple_contract");
     let max_txs = *txs_in_batch.last().unwrap() as u32;
     let txs: Vec<_> = (0..max_txs)
-        .map(|nonce| get_deploy_tx_with_gas_limit(code, DEPLOY_GAS_LIMIT, nonce))
+        .map(|nonce| {
+            get_deploy_tx_with_gas_limit(test_contract.bytecode(), DEPLOY_GAS_LIMIT, nonce)
+        })
         .collect();
     run_vm::<VM, FULL>(&mut group, "deploy_simple_contract", &txs);
     drop(txs);
@@ -187,9 +185,12 @@ fn bench_fill_bootloader<VM: BenchmarkingVmFactory, const FULL: bool>(c: &mut Cr
 }
 
 criterion_group!(
-    benches,
-    bench_fill_bootloader::<Fast, false>,
-    bench_fill_bootloader::<Fast, true>,
-    bench_fill_bootloader::<Legacy, false>
+    name = benches;
+    config = Criterion::default()
+        .configure_from_args()
+        .with_measurement(MeteredTime::new("fill_bootloader"));
+    targets = bench_fill_bootloader::<Fast, false>,
+        bench_fill_bootloader::<Fast, true>,
+        bench_fill_bootloader::<Legacy, false>
 );
 criterion_main!(benches);
diff --git a/core/tests/vm-benchmark/benches/criterion.rs b/core/tests/vm-benchmark/benches/criterion.rs
deleted file mode 100644
index 9e12fc25f54c..000000000000
--- a/core/tests/vm-benchmark/benches/criterion.rs
+++ /dev/null
@@ -1,98 +0,0 @@
-use std::time::Duration;
-
-use criterion::{
-    black_box, criterion_group, criterion_main, measurement::WallTime, BatchSize, BenchmarkGroup,
-    Criterion,
-};
-use zksync_types::Transaction;
-use zksync_vm_benchmark_harness::{
-    cut_to_allowed_bytecode_size, get_deploy_tx, get_heavy_load_test_tx, get_load_test_deploy_tx,
-    get_load_test_tx, get_realistic_load_test_tx, BenchmarkingVm, BenchmarkingVmFactory, Fast,
-    Legacy, LoadTestParams,
-};
-
-const SAMPLE_SIZE: usize = 20;
-
-fn benches_in_folder<VM: BenchmarkingVmFactory, const FULL: bool>(c: &mut Criterion) {
-    let mut group = c.benchmark_group(VM::LABEL.as_str());
-    group
-        .sample_size(SAMPLE_SIZE)
-        .measurement_time(Duration::from_secs(10));
-
-    for path in std::fs::read_dir("deployment_benchmarks").unwrap() {
-        let path = path.unwrap().path();
-
-        let test_contract = std::fs::read(&path).expect("failed to read file");
-
-        let code = cut_to_allowed_bytecode_size(&test_contract).unwrap();
-        let tx = get_deploy_tx(code);
-        let file_name = path.file_name().unwrap().to_str().unwrap();
-        let full_suffix = if FULL { "/full" } else { "" };
-        let bench_name = format!("{file_name}{full_suffix}");
-        group.bench_function(bench_name, |bencher| {
-            if FULL {
-                // Include VM initialization / drop into the measured time
-                bencher.iter(|| BenchmarkingVm::<VM>::default().run_transaction(black_box(&tx)));
-            } else {
-                bencher.iter_batched(
-                    BenchmarkingVm::<VM>::default,
-                    |mut vm| {
-                        let result = vm.run_transaction(black_box(&tx));
-                        (vm, result)
-                    },
-                    BatchSize::LargeInput, // VM can consume significant amount of RAM, especially the new one
-                );
-            }
-        });
-    }
-}
-
-fn bench_load_test<VM: BenchmarkingVmFactory>(c: &mut Criterion) {
-    let mut group = c.benchmark_group(VM::LABEL.as_str());
-    group
-        .sample_size(SAMPLE_SIZE)
-        .measurement_time(Duration::from_secs(10));
-
-    // Nonce 0 is used for the deployment transaction
-    let tx = get_load_test_tx(1, 10_000_000, LoadTestParams::default());
-    bench_load_test_transaction::<VM>(&mut group, "load_test", &tx);
-
-    let tx = get_realistic_load_test_tx(1);
-    bench_load_test_transaction::<VM>(&mut group, "load_test_realistic", &tx);
-
-    let tx = get_heavy_load_test_tx(1);
-    bench_load_test_transaction::<VM>(&mut group, "load_test_heavy", &tx);
-}
-
-fn bench_load_test_transaction<VM: BenchmarkingVmFactory>(
-    group: &mut BenchmarkGroup<'_, WallTime>,
-    name: &str,
-    tx: &Transaction,
-) {
-    group.bench_function(name, |bencher| {
-        bencher.iter_batched(
-            || {
-                let mut vm = BenchmarkingVm::<VM>::default();
-                vm.run_transaction(&get_load_test_deploy_tx());
-                vm
-            },
-            |mut vm| {
-                let result = vm.run_transaction(black_box(tx));
-                assert!(!result.result.is_failed(), "{:?}", result.result);
-                (vm, result)
-            },
-            BatchSize::LargeInput,
-        );
-    });
-}
-
-criterion_group!(
-    benches,
-    benches_in_folder::<Fast, false>,
-    benches_in_folder::<Fast, true>,
-    benches_in_folder::<Legacy, false>,
-    benches_in_folder::<Legacy, true>,
-    bench_load_test::<Fast>,
-    bench_load_test::<Legacy>
-);
-criterion_main!(benches);
diff --git a/core/tests/vm-benchmark/benches/diy_benchmark.rs b/core/tests/vm-benchmark/benches/diy_benchmark.rs
deleted file mode 100644
index 1601de5eb85f..000000000000
--- a/core/tests/vm-benchmark/benches/diy_benchmark.rs
+++ /dev/null
@@ -1,53 +0,0 @@
-use std::time::{Duration, Instant};
-
-use criterion::black_box;
-use vise::{Gauge, LabeledFamily, Metrics};
-use zksync_vm_benchmark_harness::{cut_to_allowed_bytecode_size, get_deploy_tx, BenchmarkingVm};
-
-fn main() {
-    let mut results = vec![];
-
-    for path in std::fs::read_dir("deployment_benchmarks").unwrap() {
-        let path = path.unwrap().path();
-
-        let test_contract = std::fs::read(&path).expect("failed to read file");
-
-        let code = cut_to_allowed_bytecode_size(&test_contract).unwrap();
-        let tx = get_deploy_tx(code);
-
-        let name = path.file_name().unwrap().to_str().unwrap();
-
-        println!("benchmarking: {}", name);
-
-        let mut timings = vec![];
-        let benchmark_start = Instant::now();
-        while benchmark_start.elapsed() < Duration::from_secs(5) {
-            let start = Instant::now();
-            BenchmarkingVm::new().run_transaction(black_box(&tx));
-            timings.push(start.elapsed());
-        }
-
-        println!("{:?}", timings.iter().min().unwrap());
-        results.push((name.to_owned(), timings));
-    }
-
-    if option_env!("PUSH_VM_BENCHMARKS_TO_PROMETHEUS").is_some() {
-        vm_benchmark::with_prometheus::with_prometheus(|| {
-            for (name, timings) in results {
-                for (i, timing) in timings.into_iter().enumerate() {
-                    VM_BENCHMARK_METRICS.timing[&(name.clone(), i.to_string())].set(timing);
-                }
-            }
-        });
-    }
-}
-
-#[derive(Debug, Metrics)]
-#[metrics(prefix = "vm_benchmark")]
-pub(crate) struct VmBenchmarkMetrics {
-    #[metrics(labels = ["benchmark", "run_no"])]
-    pub timing: LabeledFamily<(String, String), Gauge<Duration>, 2>,
-}
-
-#[vise::register]
-pub(crate) static VM_BENCHMARK_METRICS: vise::Global<VmBenchmarkMetrics> = vise::Global::new();
diff --git a/core/tests/vm-benchmark/benches/iai.rs b/core/tests/vm-benchmark/benches/iai.rs
index 2837a2345a5a..6b8965afa4f1 100644
--- a/core/tests/vm-benchmark/benches/iai.rs
+++ b/core/tests/vm-benchmark/benches/iai.rs
@@ -1,14 +1,8 @@
 use iai::black_box;
-use zksync_vm_benchmark_harness::{
-    cut_to_allowed_bytecode_size, get_deploy_tx, BenchmarkingVm, BenchmarkingVmFactory, Fast,
-    Legacy,
-};
-
-fn run_bytecode<VM: BenchmarkingVmFactory>(path: &str) {
-    let test_contract = std::fs::read(path).expect("failed to read file");
-    let code = cut_to_allowed_bytecode_size(&test_contract).unwrap();
-    let tx = get_deploy_tx(code);
+use vm_benchmark::{BenchmarkingVm, BenchmarkingVmFactory, Bytecode, Fast, Legacy};
 
+fn run_bytecode<VM: BenchmarkingVmFactory>(name: &str) {
+    let tx = Bytecode::get(name).deploy_tx();
     black_box(BenchmarkingVm::<VM>::default().run_transaction(&tx));
 }
 
@@ -16,11 +10,11 @@ macro_rules! make_functions_and_main {
     ($($file:ident => $legacy_name:ident,)+) => {
         $(
         fn $file() {
-            run_bytecode::<Fast>(concat!("deployment_benchmarks/", stringify!($file)));
+            run_bytecode::<Fast>(stringify!($file));
         }
 
         fn $legacy_name() {
-            run_bytecode::<Legacy>(concat!("deployment_benchmarks/", stringify!($file)));
+            run_bytecode::<Legacy>(stringify!($file));
         }
         )+
 
diff --git a/core/tests/vm-benchmark/benches/oneshot.rs b/core/tests/vm-benchmark/benches/oneshot.rs
new file mode 100644
index 000000000000..58a90af4981f
--- /dev/null
+++ b/core/tests/vm-benchmark/benches/oneshot.rs
@@ -0,0 +1,91 @@
+use std::time::Duration;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use vm_benchmark::{
+    criterion::{BenchmarkGroup, CriterionExt, MeteredTime},
+    get_heavy_load_test_tx, get_load_test_deploy_tx, get_load_test_tx, get_realistic_load_test_tx,
+    BenchmarkingVm, BenchmarkingVmFactory, Fast, Legacy, LoadTestParams, BYTECODES,
+};
+use zksync_types::Transaction;
+
+const SAMPLE_SIZE: usize = 20;
+
+fn benches_in_folder<VM: BenchmarkingVmFactory, const FULL: bool>(c: &mut Criterion<MeteredTime>) {
+    let mut group = c.metered_group(VM::LABEL.as_str());
+    group
+        .sample_size(SAMPLE_SIZE)
+        .measurement_time(Duration::from_secs(10));
+
+    for bytecode in BYTECODES {
+        let tx = bytecode.deploy_tx();
+        let bench_name = bytecode.name;
+        let full_suffix = if FULL { "/full" } else { "" };
+        let bench_name = format!("{bench_name}{full_suffix}");
+
+        group.bench_metered(bench_name, |bencher| {
+            if FULL {
+                // Include VM initialization / drop into the measured time
+                bencher.iter(|timer| {
+                    let _guard = timer.start();
+                    BenchmarkingVm::<VM>::default().run_transaction(black_box(&tx));
+                });
+            } else {
+                bencher.iter(|timer| {
+                    let mut vm = BenchmarkingVm::<VM>::default();
+                    let guard = timer.start();
+                    let _result = vm.run_transaction(black_box(&tx));
+                    drop(guard); // do not include latency of dropping `_result`
+                });
+            }
+        });
+    }
+}
+
+fn bench_load_test<VM: BenchmarkingVmFactory>(c: &mut Criterion<MeteredTime>) {
+    let mut group = c.metered_group(VM::LABEL.as_str());
+    group
+        .sample_size(SAMPLE_SIZE)
+        .measurement_time(Duration::from_secs(10));
+
+    // Nonce 0 is used for the deployment transaction
+    let tx = get_load_test_tx(1, 10_000_000, LoadTestParams::default());
+    bench_load_test_transaction::<VM>(&mut group, "load_test", &tx);
+
+    let tx = get_realistic_load_test_tx(1);
+    bench_load_test_transaction::<VM>(&mut group, "load_test_realistic", &tx);
+
+    let tx = get_heavy_load_test_tx(1);
+    bench_load_test_transaction::<VM>(&mut group, "load_test_heavy", &tx);
+}
+
+fn bench_load_test_transaction<VM: BenchmarkingVmFactory>(
+    group: &mut BenchmarkGroup<'_>,
+    name: &str,
+    tx: &Transaction,
+) {
+    group.bench_metered(name, |bencher| {
+        bencher.iter(|timer| {
+            let mut vm = BenchmarkingVm::<VM>::default();
+            vm.run_transaction(&get_load_test_deploy_tx());
+
+            let guard = timer.start();
+            let result = vm.run_transaction(black_box(tx));
+            drop(guard); // do not include the latency of `result` checks / drop
+            assert!(!result.result.is_failed(), "{:?}", result.result);
+        });
+    });
+}
+
+criterion_group!(
+    name = benches;
+    config = Criterion::default()
+        .configure_from_args()
+        .with_measurement(MeteredTime::new("criterion"));
+    targets = benches_in_folder::<Fast, false>,
+        benches_in_folder::<Fast, true>,
+        benches_in_folder::<Legacy, false>,
+        benches_in_folder::<Legacy, true>,
+        bench_load_test::<Fast>,
+        bench_load_test::<Legacy>
+);
+criterion_main!(benches);
diff --git a/core/tests/vm-benchmark/harness/Cargo.toml b/core/tests/vm-benchmark/harness/Cargo.toml
deleted file mode 100644
index a24d3fa1294a..000000000000
--- a/core/tests/vm-benchmark/harness/Cargo.toml
+++ /dev/null
@@ -1,19 +0,0 @@
-[package]
-name = "zksync_vm_benchmark_harness"
-version.workspace = true
-edition.workspace = true
-license.workspace = true
-publish = false
-
-[dependencies]
-zksync_multivm.workspace = true
-zksync_types.workspace = true
-zksync_state.workspace = true
-zksync_utils.workspace = true
-zksync_system_constants.workspace = true
-zksync_contracts.workspace = true
-zk_evm.workspace = true
-once_cell.workspace = true
-
-[dev-dependencies]
-assert_matches.workspace = true
diff --git a/core/tests/vm-benchmark/src/parse_iai.rs b/core/tests/vm-benchmark/src/bin/common/mod.rs
similarity index 98%
rename from core/tests/vm-benchmark/src/parse_iai.rs
rename to core/tests/vm-benchmark/src/bin/common/mod.rs
index 61376b429a32..a92c9d5f710c 100644
--- a/core/tests/vm-benchmark/src/parse_iai.rs
+++ b/core/tests/vm-benchmark/src/bin/common/mod.rs
@@ -1,5 +1,6 @@
 use std::io::BufRead;
 
+#[derive(Debug)]
 pub struct IaiResult {
     pub name: String,
     pub instructions: u64,
diff --git a/core/tests/vm-benchmark/src/compare_iai_results.rs b/core/tests/vm-benchmark/src/bin/compare_iai_results.rs
similarity index 98%
rename from core/tests/vm-benchmark/src/compare_iai_results.rs
rename to core/tests/vm-benchmark/src/bin/compare_iai_results.rs
index d2c9d73f7e36..faf72a18f451 100644
--- a/core/tests/vm-benchmark/src/compare_iai_results.rs
+++ b/core/tests/vm-benchmark/src/bin/compare_iai_results.rs
@@ -4,7 +4,9 @@ use std::{
     io::{BufRead, BufReader},
 };
 
-use vm_benchmark::parse_iai::parse_iai;
+pub use crate::common::parse_iai;
+
+mod common;
 
 fn main() {
     let [iai_before, iai_after, opcodes_before, opcodes_after] = std::env::args()
diff --git a/core/tests/vm-benchmark/src/bin/iai_results_to_prometheus.rs b/core/tests/vm-benchmark/src/bin/iai_results_to_prometheus.rs
new file mode 100644
index 000000000000..3b3aa05bf69c
--- /dev/null
+++ b/core/tests/vm-benchmark/src/bin/iai_results_to_prometheus.rs
@@ -0,0 +1,52 @@
+use std::{env, io::BufReader, time::Duration};
+
+use tokio::sync::watch;
+use vise::{Gauge, LabeledFamily, Metrics};
+use zksync_vlog::prometheus::PrometheusExporterConfig;
+
+use crate::common::{parse_iai, IaiResult};
+
+mod common;
+
+#[derive(Debug, Metrics)]
+#[metrics(prefix = "vm_cachegrind")]
+pub(crate) struct VmCachegrindMetrics {
+    #[metrics(labels = ["benchmark"])]
+    pub instructions: LabeledFamily<String, Gauge<u64>>,
+    #[metrics(labels = ["benchmark"])]
+    pub l1_accesses: LabeledFamily<String, Gauge<u64>>,
+    #[metrics(labels = ["benchmark"])]
+    pub l2_accesses: LabeledFamily<String, Gauge<u64>>,
+    #[metrics(labels = ["benchmark"])]
+    pub ram_accesses: LabeledFamily<String, Gauge<u64>>,
+    #[metrics(labels = ["benchmark"])]
+    pub cycles: LabeledFamily<String, Gauge<u64>>,
+}
+
+#[vise::register]
+pub(crate) static VM_CACHEGRIND_METRICS: vise::Global<VmCachegrindMetrics> = vise::Global::new();
+
+#[tokio::main]
+async fn main() {
+    let results: Vec<IaiResult> = parse_iai(BufReader::new(std::io::stdin())).collect();
+
+    let endpoint = env::var("BENCHMARK_PROMETHEUS_PUSHGATEWAY_URL")
+        .expect("`BENCHMARK_PROMETHEUS_PUSHGATEWAY_URL` env var is not set");
+    let (stop_sender, stop_receiver) = watch::channel(false);
+    let prometheus_config =
+        PrometheusExporterConfig::push(endpoint.to_owned(), Duration::from_millis(100));
+    tokio::spawn(prometheus_config.run(stop_receiver));
+
+    for result in results {
+        let name = result.name;
+        VM_CACHEGRIND_METRICS.instructions[&name.clone()].set(result.instructions);
+        VM_CACHEGRIND_METRICS.l1_accesses[&name.clone()].set(result.l1_accesses);
+        VM_CACHEGRIND_METRICS.l2_accesses[&name.clone()].set(result.l2_accesses);
+        VM_CACHEGRIND_METRICS.ram_accesses[&name.clone()].set(result.ram_accesses);
+        VM_CACHEGRIND_METRICS.cycles[&name].set(result.cycles);
+    }
+
+    println!("Waiting for push to happen...");
+    tokio::time::sleep(Duration::from_secs(1)).await;
+    stop_sender.send_replace(true);
+}
diff --git a/core/tests/vm-benchmark/src/bin/instruction_counts.rs b/core/tests/vm-benchmark/src/bin/instruction_counts.rs
new file mode 100644
index 000000000000..f9bb04c01bff
--- /dev/null
+++ b/core/tests/vm-benchmark/src/bin/instruction_counts.rs
@@ -0,0 +1,11 @@
+//! Runs all benchmarks and prints out the number of zkEVM opcodes each one executed.
+
+use vm_benchmark::{BenchmarkingVm, BYTECODES};
+
+fn main() {
+    for bytecode in BYTECODES {
+        let tx = bytecode.deploy_tx();
+        let name = bytecode.name;
+        println!("{name} {}", BenchmarkingVm::new().instruction_count(&tx));
+    }
+}
diff --git a/core/tests/vm-benchmark/deployment_benchmarks/access_memory b/core/tests/vm-benchmark/src/bytecodes/access_memory
similarity index 100%
rename from core/tests/vm-benchmark/deployment_benchmarks/access_memory
rename to core/tests/vm-benchmark/src/bytecodes/access_memory
diff --git a/core/tests/vm-benchmark/deployment_benchmarks/call_far b/core/tests/vm-benchmark/src/bytecodes/call_far
similarity index 100%
rename from core/tests/vm-benchmark/deployment_benchmarks/call_far
rename to core/tests/vm-benchmark/src/bytecodes/call_far
diff --git a/core/tests/vm-benchmark/deployment_benchmarks/decode_shl_sub b/core/tests/vm-benchmark/src/bytecodes/decode_shl_sub
similarity index 100%
rename from core/tests/vm-benchmark/deployment_benchmarks/decode_shl_sub
rename to core/tests/vm-benchmark/src/bytecodes/decode_shl_sub
diff --git a/core/tests/vm-benchmark/deployment_benchmarks/deploy_simple_contract b/core/tests/vm-benchmark/src/bytecodes/deploy_simple_contract
similarity index 100%
rename from core/tests/vm-benchmark/deployment_benchmarks/deploy_simple_contract
rename to core/tests/vm-benchmark/src/bytecodes/deploy_simple_contract
diff --git a/core/tests/vm-benchmark/deployment_benchmarks/event_spam b/core/tests/vm-benchmark/src/bytecodes/event_spam
similarity index 100%
rename from core/tests/vm-benchmark/deployment_benchmarks/event_spam
rename to core/tests/vm-benchmark/src/bytecodes/event_spam
diff --git a/core/tests/vm-benchmark/deployment_benchmarks/finish_eventful_frames b/core/tests/vm-benchmark/src/bytecodes/finish_eventful_frames
similarity index 100%
rename from core/tests/vm-benchmark/deployment_benchmarks/finish_eventful_frames
rename to core/tests/vm-benchmark/src/bytecodes/finish_eventful_frames
diff --git a/core/tests/vm-benchmark/deployment_benchmarks/heap_read_write b/core/tests/vm-benchmark/src/bytecodes/heap_read_write
similarity index 100%
rename from core/tests/vm-benchmark/deployment_benchmarks/heap_read_write
rename to core/tests/vm-benchmark/src/bytecodes/heap_read_write
diff --git a/core/tests/vm-benchmark/deployment_benchmarks/slot_hash_collision b/core/tests/vm-benchmark/src/bytecodes/slot_hash_collision
similarity index 100%
rename from core/tests/vm-benchmark/deployment_benchmarks/slot_hash_collision
rename to core/tests/vm-benchmark/src/bytecodes/slot_hash_collision
diff --git a/core/tests/vm-benchmark/deployment_benchmarks/write_and_decode b/core/tests/vm-benchmark/src/bytecodes/write_and_decode
similarity index 100%
rename from core/tests/vm-benchmark/deployment_benchmarks/write_and_decode
rename to core/tests/vm-benchmark/src/bytecodes/write_and_decode
diff --git a/core/tests/vm-benchmark/src/criterion.rs b/core/tests/vm-benchmark/src/criterion.rs
new file mode 100644
index 000000000000..9515ac4ef988
--- /dev/null
+++ b/core/tests/vm-benchmark/src/criterion.rs
@@ -0,0 +1,477 @@
+//! Criterion helpers and extensions used to record benchmark timings as Prometheus metrics.
+
+use std::{
+    cell::RefCell,
+    convert::Infallible,
+    env, fmt, mem,
+    rc::Rc,
+    sync::Once,
+    thread,
+    time::{Duration, Instant},
+};
+
+use criterion::{
+    measurement::{Measurement, ValueFormatter, WallTime},
+    Criterion, Throughput,
+};
+use once_cell::{sync::OnceCell as SyncOnceCell, unsync::OnceCell};
+use tokio::sync::watch;
+use vise::{EncodeLabelSet, Family, Gauge, Metrics, Unit};
+use zksync_vlog::prometheus::PrometheusExporterConfig;
+
+/// Checks whether a benchmark binary is running in the test mode (as opposed to benchmarking).
+pub fn is_test_mode() -> bool {
+    !env::args().any(|arg| arg == "--bench")
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash, EncodeLabelSet)]
+struct BenchLabels {
+    bin: &'static str,
+    group: String,
+    benchmark: String,
+    arg: Option<String>,
+}
+
+// We don't use histograms because benchmark results are uploaded in short bursts, which leads to missing zero values.
+#[derive(Debug, Metrics)]
+#[metrics(prefix = "vm_benchmark")]
+struct VmBenchmarkMetrics {
+    /// Number of samples for a benchmark.
+    sample_count: Family<BenchLabels, Gauge<usize>>,
+
+    /// Mean latency for a benchmark.
+    #[metrics(unit = Unit::Seconds)]
+    mean_timing: Family<BenchLabels, Gauge<Duration>>,
+    /// Minimum latency for a benchmark.
+    #[metrics(unit = Unit::Seconds)]
+    min_timing: Family<BenchLabels, Gauge<Duration>>,
+    /// Maximum latency for a benchmark.
+    #[metrics(unit = Unit::Seconds)]
+    max_timing: Family<BenchLabels, Gauge<Duration>>,
+    /// Median latency for a benchmark.
+    #[metrics(unit = Unit::Seconds)]
+    median_timing: Family<BenchLabels, Gauge<Duration>>,
+}
+
+#[vise::register]
+static METRICS: vise::Global<VmBenchmarkMetrics> = vise::Global::new();
+
+#[derive(Debug)]
+struct PrometheusRuntime {
+    stop_sender: watch::Sender<bool>,
+    _runtime: tokio::runtime::Runtime,
+}
+
+impl Drop for PrometheusRuntime {
+    fn drop(&mut self) {
+        self.stop_sender.send_replace(true);
+        // Metrics are pushed automatically on exit, so we wait *after* sending a stop signal
+        println!("Waiting for Prometheus metrics to be pushed");
+        thread::sleep(Duration::from_secs(1));
+    }
+}
+
+impl PrometheusRuntime {
+    fn new() -> Option<Self> {
+        const PUSH_INTERVAL: Duration = Duration::from_millis(100);
+
+        let gateway_url = env::var("BENCHMARK_PROMETHEUS_PUSHGATEWAY_URL").ok()?;
+        let runtime = tokio::runtime::Runtime::new().expect("Failed initializing Tokio runtime");
+        println!("Pushing Prometheus metrics to {gateway_url} each {PUSH_INTERVAL:?}");
+        let (stop_sender, stop_receiver) = watch::channel(false);
+        let prometheus_config = PrometheusExporterConfig::push(gateway_url, PUSH_INTERVAL);
+        runtime.spawn(prometheus_config.run(stop_receiver));
+        Some(Self {
+            stop_sender,
+            _runtime: runtime,
+        })
+    }
+}
+
+/// Guard returned by [`CurrentBenchmark::set()`] that unsets the current benchmark on drop.
+#[must_use = "Will unset the current benchmark when dropped"]
+#[derive(Debug)]
+struct CurrentBenchmarkGuard;
+
+impl Drop for CurrentBenchmarkGuard {
+    fn drop(&mut self) {
+        CURRENT_BENCH.take();
+    }
+}
+
+#[derive(Debug)]
+struct CurrentBenchmark {
+    metrics: &'static VmBenchmarkMetrics,
+    labels: BenchLabels,
+    observations: Vec<Duration>,
+}
+
+impl CurrentBenchmark {
+    fn set(metrics: &'static VmBenchmarkMetrics, labels: BenchLabels) -> CurrentBenchmarkGuard {
+        CURRENT_BENCH.replace(Some(Self {
+            metrics,
+            labels,
+            observations: vec![],
+        }));
+        CurrentBenchmarkGuard
+    }
+
+    fn observe(timing: Duration) {
+        CURRENT_BENCH.with_borrow_mut(|this| {
+            if let Some(this) = this {
+                this.observations.push(timing);
+            }
+        });
+    }
+}
+
+impl Drop for CurrentBenchmark {
+    fn drop(&mut self) {
+        let mut observations = mem::take(&mut self.observations);
+        if observations.is_empty() {
+            return;
+        }
+
+        let len = observations.len();
+        self.metrics.sample_count[&self.labels].set(len);
+        let mean = observations
+            .iter()
+            .copied()
+            .sum::<Duration>()
+            .div_f32(len as f32);
+        self.metrics.mean_timing[&self.labels].set(mean);
+
+        // Could use quick median algorithm, but since there aren't that many observations expected,
+        // sorting looks acceptable.
+        observations.sort_unstable();
+        let (min, max) = (observations[0], *observations.last().unwrap());
+        self.metrics.min_timing[&self.labels].set(min);
+        self.metrics.max_timing[&self.labels].set(max);
+        let median = if len % 2 == 0 {
+            (observations[len / 2 - 1] + observations[len / 2]) / 2
+        } else {
+            observations[len / 2]
+        };
+        self.metrics.median_timing[&self.labels].set(median);
+
+        println!("Exported timings: min={min:?}, max={max:?}, mean={mean:?}, median={median:?}");
+    }
+}
+
+thread_local! {
+    static CURRENT_BENCH: RefCell<Option<CurrentBenchmark>> = const { RefCell::new(None) };
+}
+
+static BIN_NAME: SyncOnceCell<&'static str> = SyncOnceCell::new();
+
+/// Measurement for criterion that exports .
+#[derive(Debug)]
+pub struct MeteredTime {
+    _prometheus: Option<PrometheusRuntime>,
+}
+
+impl MeteredTime {
+    pub fn new(bin_name: &'static str) -> Self {
+        static PROMETHEUS_INIT: Once = Once::new();
+
+        let mut prometheus = None;
+        if !is_test_mode() {
+            PROMETHEUS_INIT.call_once(|| {
+                prometheus = PrometheusRuntime::new();
+            });
+        }
+
+        if let Err(prev_name) = BIN_NAME.set(bin_name) {
+            assert_eq!(prev_name, bin_name, "attempted to redefine binary name");
+        }
+
+        Self {
+            _prometheus: prometheus,
+        }
+    }
+}
+
+impl Measurement for MeteredTime {
+    type Intermediate = Infallible;
+    type Value = Duration;
+
+    fn start(&self) -> Self::Intermediate {
+        // All measurements must be done via `Bencher::iter()`
+        unreachable!("must not be invoked directly");
+    }
+
+    fn end(&self, _: Self::Intermediate) -> Self::Value {
+        unreachable!("must not be invoked directly");
+    }
+
+    fn add(&self, v1: &Self::Value, v2: &Self::Value) -> Self::Value {
+        *v1 + *v2
+    }
+
+    fn zero(&self) -> Self::Value {
+        Duration::ZERO
+    }
+
+    fn to_f64(&self, value: &Self::Value) -> f64 {
+        WallTime.to_f64(value)
+    }
+
+    fn formatter(&self) -> &dyn ValueFormatter {
+        WallTime.formatter()
+    }
+}
+
+/// Drop-in replacement for `criterion::BenchmarkId`.
+pub struct BenchmarkId {
+    inner: criterion::BenchmarkId,
+    benchmark: String,
+    arg: String,
+}
+
+impl BenchmarkId {
+    pub fn new<S: Into<String>, P: fmt::Display>(function_name: S, parameter: P) -> Self {
+        let function_name = function_name.into();
+        Self {
+            benchmark: function_name.clone(),
+            arg: parameter.to_string(),
+            inner: criterion::BenchmarkId::new(function_name, parameter),
+        }
+    }
+}
+
+/// Drop-in replacement for `criterion::BenchmarkGroup`.
+pub struct BenchmarkGroup<'a> {
+    name: String,
+    inner: criterion::BenchmarkGroup<'a, MeteredTime>,
+    metrics: &'static VmBenchmarkMetrics,
+}
+
+impl BenchmarkGroup<'_> {
+    pub fn sample_size(&mut self, size: usize) -> &mut Self {
+        self.inner.sample_size(size);
+        self
+    }
+
+    pub fn throughput(&mut self, throughput: Throughput) -> &mut Self {
+        self.inner.throughput(throughput);
+        self
+    }
+
+    pub fn measurement_time(&mut self, dur: Duration) -> &mut Self {
+        self.inner.measurement_time(dur);
+        self
+    }
+
+    fn start_bench(&self, benchmark: String, arg: Option<String>) -> CurrentBenchmarkGuard {
+        let labels = BenchLabels {
+            bin: BIN_NAME.get().copied().unwrap_or(""),
+            group: self.name.clone(),
+            benchmark,
+            arg,
+        };
+        CurrentBenchmark::set(self.metrics, labels)
+    }
+
+    pub fn bench_metered<F>(&mut self, id: impl Into<String>, mut bench_fn: F)
+    where
+        F: FnMut(&mut Bencher<'_, '_>),
+    {
+        let id = id.into();
+        let _guard = self.start_bench(id.clone(), None);
+        self.inner
+            .bench_function(id, |bencher| bench_fn(&mut Bencher { inner: bencher }));
+    }
+
+    pub fn bench_metered_with_input<I, F>(&mut self, id: BenchmarkId, input: &I, mut bench_fn: F)
+    where
+        I: ?Sized,
+        F: FnMut(&mut Bencher<'_, '_>, &I),
+    {
+        let _guard = self.start_bench(id.benchmark, Some(id.arg));
+        self.inner
+            .bench_with_input(id.inner, input, |bencher, input| {
+                bench_fn(&mut Bencher { inner: bencher }, input)
+            });
+    }
+}
+
+pub struct Bencher<'a, 'r> {
+    inner: &'r mut criterion::Bencher<'a, MeteredTime>,
+}
+
+impl Bencher<'_, '_> {
+    pub fn iter(&mut self, mut routine: impl FnMut(BenchmarkTimer)) {
+        self.inner.iter_custom(move |iters| {
+            let mut total = Duration::ZERO;
+            for _ in 0..iters {
+                let timer = BenchmarkTimer::new();
+                let observation = timer.observation.clone();
+                routine(timer);
+                let timing = observation.get().copied().unwrap_or_default();
+                CurrentBenchmark::observe(timing);
+                total += timing;
+            }
+            total
+        })
+    }
+}
+
+/// Timer for benchmarks supplied to the `Bencher::iter()` closure.
+#[derive(Debug)]
+#[must_use = "should be started to start measurements"]
+pub struct BenchmarkTimer {
+    observation: Rc<OnceCell<Duration>>,
+}
+
+impl BenchmarkTimer {
+    fn new() -> Self {
+        Self {
+            observation: Rc::default(),
+        }
+    }
+
+    /// Starts the timer. The timer will remain active until the returned guard is dropped. If you drop the timer implicitly,
+    /// be careful with the drop order (inverse to the variable declaration order); when in doubt, drop the guard explicitly.
+    pub fn start(self) -> BenchmarkTimerGuard {
+        BenchmarkTimerGuard {
+            started_at: Instant::now(),
+            observation: self.observation,
+        }
+    }
+}
+
+/// Guard returned from [`BenchmarkTimer::start()`].
+#[derive(Debug)]
+#[must_use = "will stop the timer on drop"]
+pub struct BenchmarkTimerGuard {
+    started_at: Instant,
+    observation: Rc<OnceCell<Duration>>,
+}
+
+impl Drop for BenchmarkTimerGuard {
+    fn drop(&mut self) {
+        let latency = self.started_at.elapsed();
+        self.observation.set(latency).ok();
+    }
+}
+
+pub trait CriterionExt {
+    fn metered_group(&mut self, name: impl Into<String>) -> BenchmarkGroup<'_>;
+}
+
+impl CriterionExt for Criterion<MeteredTime> {
+    fn metered_group(&mut self, name: impl Into<String>) -> BenchmarkGroup<'_> {
+        let name = name.into();
+        BenchmarkGroup {
+            inner: self.benchmark_group(name.clone()),
+            name,
+            metrics: &METRICS,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashSet;
+
+    use super::*;
+    use crate::BYTECODES;
+
+    fn test_benchmark(c: &mut Criterion<MeteredTime>, metrics: &'static VmBenchmarkMetrics) {
+        let mut group = c.metered_group("single");
+        group.metrics = metrics;
+        for bytecode in BYTECODES {
+            group.bench_metered(bytecode.name, |bencher| {
+                bencher.iter(|timer| {
+                    let _guard = timer.start();
+                    thread::sleep(Duration::from_millis(1))
+                })
+            });
+        }
+        drop(group);
+
+        let mut group = c.metered_group("with_arg");
+        group.metrics = metrics;
+        for bytecode in BYTECODES {
+            for arg in [1, 10, 100] {
+                group.bench_metered_with_input(
+                    BenchmarkId::new(bytecode.name, arg),
+                    &arg,
+                    |bencher, _arg| {
+                        bencher.iter(|timer| {
+                            let _guard = timer.start();
+                            thread::sleep(Duration::from_millis(1))
+                        });
+                    },
+                )
+            }
+        }
+    }
+
+    #[test]
+    fn recording_benchmarks() {
+        let metered_time = MeteredTime::new("test");
+        let metrics = &*Box::leak(Box::<VmBenchmarkMetrics>::default());
+
+        let mut criterion = Criterion::default()
+            .warm_up_time(Duration::from_millis(10))
+            .measurement_time(Duration::from_millis(10))
+            .sample_size(10)
+            .with_measurement(metered_time);
+        test_benchmark(&mut criterion, metrics);
+
+        let timing_labels: HashSet<_> = metrics.mean_timing.to_entries().into_keys().collect();
+        // Check that labels are as expected.
+        for bytecode in BYTECODES {
+            assert!(timing_labels.contains(&BenchLabels {
+                bin: "test",
+                group: "single".to_owned(),
+                benchmark: bytecode.name.to_owned(),
+                arg: None,
+            }));
+            assert!(timing_labels.contains(&BenchLabels {
+                bin: "test",
+                group: "with_arg".to_owned(),
+                benchmark: bytecode.name.to_owned(),
+                arg: Some("1".to_owned()),
+            }));
+            assert!(timing_labels.contains(&BenchLabels {
+                bin: "test",
+                group: "with_arg".to_owned(),
+                benchmark: bytecode.name.to_owned(),
+                arg: Some("10".to_owned()),
+            }));
+            assert!(timing_labels.contains(&BenchLabels {
+                bin: "test",
+                group: "with_arg".to_owned(),
+                benchmark: bytecode.name.to_owned(),
+                arg: Some("100".to_owned()),
+            }));
+        }
+        assert_eq!(
+            timing_labels.len(),
+            4 * BYTECODES.len(),
+            "{timing_labels:#?}"
+        );
+
+        // Sanity-check relations among collected metrics
+        for label in &timing_labels {
+            let mean = metrics.mean_timing[label].get();
+            let min = metrics.min_timing[label].get();
+            let max = metrics.max_timing[label].get();
+            let median = metrics.median_timing[label].get();
+            assert!(
+                min > Duration::ZERO,
+                "min={min:?}, mean={mean:?}, median = {median:?}, max={max:?}"
+            );
+            assert!(
+                min <= mean && min <= median,
+                "min={min:?}, mean={mean:?}, median = {median:?}, max={max:?}"
+            );
+            assert!(
+                mean <= max && median <= max,
+                "min={min:?}, mean={mean:?}, median = {median:?}, max={max:?}"
+            );
+        }
+    }
+}
diff --git a/core/tests/vm-benchmark/src/find_slowest.rs b/core/tests/vm-benchmark/src/find_slowest.rs
deleted file mode 100644
index 97a6acd5acd9..000000000000
--- a/core/tests/vm-benchmark/src/find_slowest.rs
+++ /dev/null
@@ -1,43 +0,0 @@
-use std::{
-    io::Write,
-    time::{Duration, Instant},
-};
-
-use zksync_vm_benchmark_harness::*;
-
-fn main() {
-    let mut results = vec![];
-
-    let arg = std::env::args()
-        .nth(1)
-        .expect("Expected directory of contracts to rank as first argument.");
-    let files = std::fs::read_dir(arg).expect("Failed to list dir");
-
-    let mut last_progress_update = Instant::now();
-
-    for (i, file) in files.enumerate() {
-        let path = file.unwrap().path();
-
-        let test_contract = std::fs::read(&path).expect("failed to read file");
-
-        if let Some(code) = cut_to_allowed_bytecode_size(&test_contract) {
-            let tx = get_deploy_tx(code);
-
-            let start_time = Instant::now();
-            BenchmarkingVm::new().run_transaction(&tx);
-            results.push((start_time.elapsed(), path));
-        }
-
-        if last_progress_update.elapsed() > Duration::from_millis(100) {
-            print!("\r{}", i);
-            std::io::stdout().flush().unwrap();
-            last_progress_update = Instant::now();
-        }
-    }
-    println!();
-
-    results.sort();
-    for (time, path) in results.iter().rev().take(30) {
-        println!("{} took {:?}", path.display(), time);
-    }
-}
diff --git a/core/tests/vm-benchmark/src/iai_results_to_prometheus.rs b/core/tests/vm-benchmark/src/iai_results_to_prometheus.rs
deleted file mode 100644
index d419603bae87..000000000000
--- a/core/tests/vm-benchmark/src/iai_results_to_prometheus.rs
+++ /dev/null
@@ -1,37 +0,0 @@
-use std::io::BufReader;
-
-use vise::{Gauge, LabeledFamily, Metrics};
-use vm_benchmark::parse_iai::IaiResult;
-
-fn main() {
-    let results: Vec<IaiResult> =
-        vm_benchmark::parse_iai::parse_iai(BufReader::new(std::io::stdin())).collect();
-
-    vm_benchmark::with_prometheus::with_prometheus(|| {
-        for r in results {
-            VM_CACHEGRIND_METRICS.instructions[&r.name.clone()].set(r.instructions as f64);
-            VM_CACHEGRIND_METRICS.l1_accesses[&r.name.clone()].set(r.l1_accesses as f64);
-            VM_CACHEGRIND_METRICS.l2_accesses[&r.name.clone()].set(r.l2_accesses as f64);
-            VM_CACHEGRIND_METRICS.ram_accesses[&r.name.clone()].set(r.ram_accesses as f64);
-            VM_CACHEGRIND_METRICS.cycles[&r.name.clone()].set(r.cycles as f64);
-        }
-    })
-}
-
-#[derive(Debug, Metrics)]
-#[metrics(prefix = "vm_cachegrind")]
-pub(crate) struct VmCachegrindMetrics {
-    #[metrics(labels = ["benchmark"])]
-    pub instructions: LabeledFamily<String, Gauge<f64>>,
-    #[metrics(labels = ["benchmark"])]
-    pub l1_accesses: LabeledFamily<String, Gauge<f64>>,
-    #[metrics(labels = ["benchmark"])]
-    pub l2_accesses: LabeledFamily<String, Gauge<f64>>,
-    #[metrics(labels = ["benchmark"])]
-    pub ram_accesses: LabeledFamily<String, Gauge<f64>>,
-    #[metrics(labels = ["benchmark"])]
-    pub cycles: LabeledFamily<String, Gauge<f64>>,
-}
-
-#[vise::register]
-pub(crate) static VM_CACHEGRIND_METRICS: vise::Global<VmCachegrindMetrics> = vise::Global::new();
diff --git a/core/tests/vm-benchmark/harness/src/instruction_counter.rs b/core/tests/vm-benchmark/src/instruction_counter.rs
similarity index 100%
rename from core/tests/vm-benchmark/harness/src/instruction_counter.rs
rename to core/tests/vm-benchmark/src/instruction_counter.rs
diff --git a/core/tests/vm-benchmark/src/instruction_counts.rs b/core/tests/vm-benchmark/src/instruction_counts.rs
deleted file mode 100644
index c038c8f2bf6b..000000000000
--- a/core/tests/vm-benchmark/src/instruction_counts.rs
+++ /dev/null
@@ -1,28 +0,0 @@
-//! Runs all benchmarks and prints out the number of zkEVM opcodes each one executed.
-
-use std::path::Path;
-
-use zksync_vm_benchmark_harness::{cut_to_allowed_bytecode_size, get_deploy_tx, BenchmarkingVm};
-
-fn main() {
-    // using source file location because this is just a script, the binary isn't meant to be reused
-    let benchmark_folder = Path::new(file!())
-        .parent()
-        .unwrap()
-        .parent()
-        .unwrap()
-        .join("deployment_benchmarks");
-
-    for path in std::fs::read_dir(benchmark_folder).unwrap() {
-        let path = path.unwrap().path();
-
-        let test_contract = std::fs::read(&path).expect("failed to read file");
-
-        let code = cut_to_allowed_bytecode_size(&test_contract).unwrap();
-        let tx = get_deploy_tx(code);
-
-        let name = path.file_name().unwrap().to_str().unwrap();
-
-        println!("{} {}", name, BenchmarkingVm::new().instruction_count(&tx));
-    }
-}
diff --git a/core/tests/vm-benchmark/src/lib.rs b/core/tests/vm-benchmark/src/lib.rs
index 38cc311105b3..4bd008d33196 100644
--- a/core/tests/vm-benchmark/src/lib.rs
+++ b/core/tests/vm-benchmark/src/lib.rs
@@ -1,2 +1,72 @@
-pub mod parse_iai;
-pub mod with_prometheus;
+use zksync_types::Transaction;
+
+pub use crate::{
+    transaction::{
+        get_deploy_tx, get_deploy_tx_with_gas_limit, get_heavy_load_test_tx,
+        get_load_test_deploy_tx, get_load_test_tx, get_realistic_load_test_tx, get_transfer_tx,
+        LoadTestParams,
+    },
+    vm::{BenchmarkingVm, BenchmarkingVmFactory, Fast, Legacy, VmLabel},
+};
+
+pub mod criterion;
+mod instruction_counter;
+mod transaction;
+mod vm;
+
+#[derive(Debug, Clone, Copy)]
+pub struct Bytecode {
+    pub name: &'static str,
+    raw_bytecode: &'static [u8],
+}
+
+impl Bytecode {
+    pub fn get(name: &str) -> Self {
+        BYTECODES
+            .iter()
+            .find(|bytecode| bytecode.name == name)
+            .copied()
+            .unwrap_or_else(|| panic!("bytecode `{name}` is not defined"))
+    }
+
+    /// Bytecodes must consist of an odd number of 32 byte words.
+    /// This function "fixes" bytecodes of wrong length by cutting off their end.
+    fn cut_to_allowed_bytecode_size(bytes: &[u8]) -> &[u8] {
+        let mut words = bytes.len() / 32;
+        assert!(words > 0, "bytecode is empty");
+
+        if words & 1 == 0 {
+            words -= 1;
+        }
+        &bytes[..32 * words]
+    }
+
+    pub fn bytecode(&self) -> &'static [u8] {
+        Self::cut_to_allowed_bytecode_size(self.raw_bytecode)
+    }
+
+    pub fn deploy_tx(&self) -> Transaction {
+        get_deploy_tx(self.bytecode())
+    }
+}
+
+macro_rules! include_bytecode {
+    ($name:ident) => {
+        Bytecode {
+            name: stringify!($name),
+            raw_bytecode: include_bytes!(concat!("bytecodes/", stringify!($name))),
+        }
+    };
+}
+
+pub const BYTECODES: &[Bytecode] = &[
+    include_bytecode!(access_memory),
+    include_bytecode!(call_far),
+    include_bytecode!(decode_shl_sub),
+    include_bytecode!(deploy_simple_contract),
+    include_bytecode!(event_spam),
+    include_bytecode!(finish_eventful_frames),
+    include_bytecode!(heap_read_write),
+    include_bytecode!(slot_hash_collision),
+    include_bytecode!(write_and_decode),
+];
diff --git a/core/tests/vm-benchmark/src/main.rs b/core/tests/vm-benchmark/src/main.rs
index 925ec78ceb3c..6e2b397d746d 100644
--- a/core/tests/vm-benchmark/src/main.rs
+++ b/core/tests/vm-benchmark/src/main.rs
@@ -1,16 +1,10 @@
-use zksync_vm_benchmark_harness::*;
+use vm_benchmark::{BenchmarkingVm, Bytecode};
 
 fn main() {
-    let test_contract = std::fs::read(
-        std::env::args()
-            .nth(1)
-            .expect("please provide an input file"),
-    )
-    .expect("failed to read file");
-
-    let code = cut_to_allowed_bytecode_size(&test_contract).unwrap();
-    let tx = get_deploy_tx(code);
-
+    let bytecode_name = std::env::args()
+        .nth(1)
+        .expect("please provide bytecode name, e.g. 'access_memory'");
+    let tx = Bytecode::get(&bytecode_name).deploy_tx();
     for _ in 0..100 {
         let mut vm = BenchmarkingVm::new();
         vm.run_transaction(&tx);
diff --git a/core/tests/vm-benchmark/src/transaction.rs b/core/tests/vm-benchmark/src/transaction.rs
new file mode 100644
index 000000000000..90e1c6360b81
--- /dev/null
+++ b/core/tests/vm-benchmark/src/transaction.rs
@@ -0,0 +1,194 @@
+use once_cell::sync::Lazy;
+pub use zksync_contracts::test_contracts::LoadnextContractExecutionParams as LoadTestParams;
+use zksync_contracts::{deployer_contract, TestContract};
+use zksync_multivm::utils::get_max_gas_per_pubdata_byte;
+use zksync_types::{
+    ethabi::{encode, Token},
+    fee::Fee,
+    l2::L2Tx,
+    utils::deployed_address_create,
+    Address, K256PrivateKey, L2ChainId, Nonce, ProtocolVersionId, Transaction,
+    CONTRACT_DEPLOYER_ADDRESS, H256, U256,
+};
+use zksync_utils::bytecode::hash_bytecode;
+
+const LOAD_TEST_MAX_READS: usize = 100;
+
+pub(crate) static PRIVATE_KEY: Lazy<K256PrivateKey> =
+    Lazy::new(|| K256PrivateKey::from_bytes(H256([42; 32])).expect("invalid key bytes"));
+static LOAD_TEST_CONTRACT_ADDRESS: Lazy<Address> =
+    Lazy::new(|| deployed_address_create(PRIVATE_KEY.address(), 0.into()));
+
+static LOAD_TEST_CONTRACT: Lazy<TestContract> = Lazy::new(zksync_contracts::get_loadnext_contract);
+
+static CREATE_FUNCTION_SIGNATURE: Lazy<[u8; 4]> = Lazy::new(|| {
+    deployer_contract()
+        .function("create")
+        .unwrap()
+        .short_signature()
+});
+
+pub fn get_deploy_tx(code: &[u8]) -> Transaction {
+    get_deploy_tx_with_gas_limit(code, 30_000_000, 0)
+}
+
+pub fn get_deploy_tx_with_gas_limit(code: &[u8], gas_limit: u32, nonce: u32) -> Transaction {
+    let mut salt = vec![0_u8; 32];
+    salt[28..32].copy_from_slice(&nonce.to_be_bytes());
+    let params = [
+        Token::FixedBytes(salt),
+        Token::FixedBytes(hash_bytecode(code).0.to_vec()),
+        Token::Bytes([].to_vec()),
+    ];
+    let calldata = CREATE_FUNCTION_SIGNATURE
+        .iter()
+        .cloned()
+        .chain(encode(&params))
+        .collect();
+
+    let mut signed = L2Tx::new_signed(
+        CONTRACT_DEPLOYER_ADDRESS,
+        calldata,
+        Nonce(nonce),
+        tx_fee(gas_limit),
+        U256::zero(),
+        L2ChainId::from(270),
+        &PRIVATE_KEY,
+        vec![code.to_vec()], // maybe not needed?
+        Default::default(),
+    )
+    .expect("should create a signed execute transaction");
+
+    signed.set_input(H256::random().as_bytes().to_vec(), H256::random());
+    signed.into()
+}
+
+fn tx_fee(gas_limit: u32) -> Fee {
+    Fee {
+        gas_limit: U256::from(gas_limit),
+        max_fee_per_gas: U256::from(250_000_000),
+        max_priority_fee_per_gas: U256::from(0),
+        gas_per_pubdata_limit: U256::from(get_max_gas_per_pubdata_byte(
+            ProtocolVersionId::latest().into(),
+        )),
+    }
+}
+
+pub fn get_transfer_tx(nonce: u32) -> Transaction {
+    let mut signed = L2Tx::new_signed(
+        PRIVATE_KEY.address(),
+        vec![], // calldata
+        Nonce(nonce),
+        tx_fee(1_000_000),
+        1_000_000_000.into(), // value
+        L2ChainId::from(270),
+        &PRIVATE_KEY,
+        vec![],             // factory deps
+        Default::default(), // paymaster params
+    )
+    .expect("should create a signed execute transaction");
+
+    signed.set_input(H256::random().as_bytes().to_vec(), H256::random());
+    signed.into()
+}
+
+pub fn get_load_test_deploy_tx() -> Transaction {
+    let calldata = [Token::Uint(LOAD_TEST_MAX_READS.into())];
+    let params = [
+        Token::FixedBytes(vec![0_u8; 32]),
+        Token::FixedBytes(hash_bytecode(&LOAD_TEST_CONTRACT.bytecode).0.to_vec()),
+        Token::Bytes(encode(&calldata)),
+    ];
+    let create_calldata = CREATE_FUNCTION_SIGNATURE
+        .iter()
+        .cloned()
+        .chain(encode(&params))
+        .collect();
+
+    let mut factory_deps = LOAD_TEST_CONTRACT.factory_deps.clone();
+    factory_deps.push(LOAD_TEST_CONTRACT.bytecode.clone());
+
+    let mut signed = L2Tx::new_signed(
+        CONTRACT_DEPLOYER_ADDRESS,
+        create_calldata,
+        Nonce(0),
+        tx_fee(100_000_000),
+        U256::zero(),
+        L2ChainId::from(270),
+        &PRIVATE_KEY,
+        factory_deps,
+        Default::default(),
+    )
+    .expect("should create a signed execute transaction");
+
+    signed.set_input(H256::random().as_bytes().to_vec(), H256::random());
+    signed.into()
+}
+
+pub fn get_load_test_tx(nonce: u32, gas_limit: u32, params: LoadTestParams) -> Transaction {
+    assert!(
+        params.reads <= LOAD_TEST_MAX_READS,
+        "Too many reads: {params:?}, should be <={LOAD_TEST_MAX_READS}"
+    );
+
+    let execute_function = LOAD_TEST_CONTRACT
+        .contract
+        .function("execute")
+        .expect("no `execute` function in load test contract");
+    let calldata = execute_function
+        .encode_input(&vec![
+            Token::Uint(U256::from(params.reads)),
+            Token::Uint(U256::from(params.writes)),
+            Token::Uint(U256::from(params.hashes)),
+            Token::Uint(U256::from(params.events)),
+            Token::Uint(U256::from(params.recursive_calls)),
+            Token::Uint(U256::from(params.deploys)),
+        ])
+        .expect("cannot encode `execute` inputs");
+
+    let mut signed = L2Tx::new_signed(
+        *LOAD_TEST_CONTRACT_ADDRESS,
+        calldata,
+        Nonce(nonce),
+        tx_fee(gas_limit),
+        U256::zero(),
+        L2ChainId::from(270),
+        &PRIVATE_KEY,
+        LOAD_TEST_CONTRACT.factory_deps.clone(),
+        Default::default(),
+    )
+    .expect("should create a signed execute transaction");
+
+    signed.set_input(H256::random().as_bytes().to_vec(), H256::random());
+    signed.into()
+}
+
+pub fn get_realistic_load_test_tx(nonce: u32) -> Transaction {
+    get_load_test_tx(
+        nonce,
+        10_000_000,
+        LoadTestParams {
+            reads: 30,
+            writes: 2,
+            events: 5,
+            hashes: 10,
+            recursive_calls: 0,
+            deploys: 0,
+        },
+    )
+}
+
+pub fn get_heavy_load_test_tx(nonce: u32) -> Transaction {
+    get_load_test_tx(
+        nonce,
+        10_000_000,
+        LoadTestParams {
+            reads: 100,
+            writes: 5,
+            events: 20,
+            hashes: 100,
+            recursive_calls: 20,
+            deploys: 5,
+        },
+    )
+}
diff --git a/core/tests/vm-benchmark/harness/src/lib.rs b/core/tests/vm-benchmark/src/vm.rs
similarity index 54%
rename from core/tests/vm-benchmark/harness/src/lib.rs
rename to core/tests/vm-benchmark/src/vm.rs
index 6460d25a8e8d..e805554d5584 100644
--- a/core/tests/vm-benchmark/harness/src/lib.rs
+++ b/core/tests/vm-benchmark/src/vm.rs
@@ -1,51 +1,27 @@
 use std::{cell::RefCell, rc::Rc};
 
 use once_cell::sync::Lazy;
-pub use zksync_contracts::test_contracts::LoadnextContractExecutionParams as LoadTestParams;
-use zksync_contracts::{deployer_contract, BaseSystemContracts, TestContract};
+use zksync_contracts::BaseSystemContracts;
 use zksync_multivm::{
     interface::{
         storage::{InMemoryStorage, StorageView},
         ExecutionResult, L1BatchEnv, L2BlockEnv, SystemEnv, TxExecutionMode, VmExecutionMode,
         VmExecutionResultAndLogs, VmFactory, VmInterface, VmInterfaceHistoryEnabled,
     },
-    utils::get_max_gas_per_pubdata_byte,
     vm_fast, vm_latest,
     vm_latest::{constants::BATCH_COMPUTATIONAL_GAS_LIMIT, HistoryEnabled},
+    zk_evm_latest::ethereum_types::{Address, U256},
 };
 use zksync_types::{
-    block::L2BlockHasher,
-    ethabi::{encode, Token},
-    fee::Fee,
-    fee_model::BatchFeeInput,
-    helpers::unix_timestamp_ms,
-    l2::L2Tx,
-    utils::{deployed_address_create, storage_key_for_eth_balance},
-    Address, K256PrivateKey, L1BatchNumber, L2BlockNumber, L2ChainId, Nonce, ProtocolVersionId,
-    Transaction, CONTRACT_DEPLOYER_ADDRESS, H256, U256,
+    block::L2BlockHasher, fee_model::BatchFeeInput, helpers::unix_timestamp_ms,
+    utils::storage_key_for_eth_balance, L1BatchNumber, L2BlockNumber, L2ChainId, ProtocolVersionId,
+    Transaction,
 };
 use zksync_utils::bytecode::hash_bytecode;
 
-mod instruction_counter;
+use crate::transaction::PRIVATE_KEY;
 
-/// Bytecodes have consist of an odd number of 32 byte words
-/// This function "fixes" bytecodes of wrong length by cutting off their end.
-pub fn cut_to_allowed_bytecode_size(bytes: &[u8]) -> Option<&[u8]> {
-    let mut words = bytes.len() / 32;
-    if words == 0 {
-        return None;
-    }
-
-    if words & 1 == 0 {
-        words -= 1;
-    }
-    Some(&bytes[..32 * words])
-}
-
-const LOAD_TEST_MAX_READS: usize = 100;
-
-static LOAD_TEST_CONTRACT_ADDRESS: Lazy<Address> =
-    Lazy::new(|| deployed_address_create(PRIVATE_KEY.address(), 0.into()));
+static SYSTEM_CONTRACTS: Lazy<BaseSystemContracts> = Lazy::new(BaseSystemContracts::load_from_disk);
 
 static STORAGE: Lazy<InMemoryStorage> = Lazy::new(|| {
     let mut storage = InMemoryStorage::with_system_contracts(hash_bytecode);
@@ -56,20 +32,6 @@ static STORAGE: Lazy<InMemoryStorage> = Lazy::new(|| {
     storage
 });
 
-static SYSTEM_CONTRACTS: Lazy<BaseSystemContracts> = Lazy::new(BaseSystemContracts::load_from_disk);
-
-static LOAD_TEST_CONTRACT: Lazy<TestContract> = Lazy::new(zksync_contracts::get_loadnext_contract);
-
-static CREATE_FUNCTION_SIGNATURE: Lazy<[u8; 4]> = Lazy::new(|| {
-    deployer_contract()
-        .function("create")
-        .unwrap()
-        .short_signature()
-});
-
-static PRIVATE_KEY: Lazy<K256PrivateKey> =
-    Lazy::new(|| K256PrivateKey::from_bytes(H256([42; 32])).expect("invalid key bytes"));
-
 /// VM label used to name `criterion` benchmarks.
 #[derive(Debug, Clone, Copy)]
 pub enum VmLabel {
@@ -229,178 +191,17 @@ impl BenchmarkingVm<Legacy> {
     }
 }
 
-pub fn get_deploy_tx(code: &[u8]) -> Transaction {
-    get_deploy_tx_with_gas_limit(code, 30_000_000, 0)
-}
-
-pub fn get_deploy_tx_with_gas_limit(code: &[u8], gas_limit: u32, nonce: u32) -> Transaction {
-    let mut salt = vec![0_u8; 32];
-    salt[28..32].copy_from_slice(&nonce.to_be_bytes());
-    let params = [
-        Token::FixedBytes(salt),
-        Token::FixedBytes(hash_bytecode(code).0.to_vec()),
-        Token::Bytes([].to_vec()),
-    ];
-    let calldata = CREATE_FUNCTION_SIGNATURE
-        .iter()
-        .cloned()
-        .chain(encode(&params))
-        .collect();
-
-    let mut signed = L2Tx::new_signed(
-        CONTRACT_DEPLOYER_ADDRESS,
-        calldata,
-        Nonce(nonce),
-        tx_fee(gas_limit),
-        U256::zero(),
-        L2ChainId::from(270),
-        &PRIVATE_KEY,
-        vec![code.to_vec()], // maybe not needed?
-        Default::default(),
-    )
-    .expect("should create a signed execute transaction");
-
-    signed.set_input(H256::random().as_bytes().to_vec(), H256::random());
-    signed.into()
-}
-
-fn tx_fee(gas_limit: u32) -> Fee {
-    Fee {
-        gas_limit: U256::from(gas_limit),
-        max_fee_per_gas: U256::from(250_000_000),
-        max_priority_fee_per_gas: U256::from(0),
-        gas_per_pubdata_limit: U256::from(get_max_gas_per_pubdata_byte(
-            ProtocolVersionId::latest().into(),
-        )),
-    }
-}
-
-pub fn get_transfer_tx(nonce: u32) -> Transaction {
-    let mut signed = L2Tx::new_signed(
-        PRIVATE_KEY.address(),
-        vec![], // calldata
-        Nonce(nonce),
-        tx_fee(1_000_000),
-        1_000_000_000.into(), // value
-        L2ChainId::from(270),
-        &PRIVATE_KEY,
-        vec![],             // factory deps
-        Default::default(), // paymaster params
-    )
-    .expect("should create a signed execute transaction");
-
-    signed.set_input(H256::random().as_bytes().to_vec(), H256::random());
-    signed.into()
-}
-
-pub fn get_load_test_deploy_tx() -> Transaction {
-    let calldata = [Token::Uint(LOAD_TEST_MAX_READS.into())];
-    let params = [
-        Token::FixedBytes(vec![0_u8; 32]),
-        Token::FixedBytes(hash_bytecode(&LOAD_TEST_CONTRACT.bytecode).0.to_vec()),
-        Token::Bytes(encode(&calldata)),
-    ];
-    let create_calldata = CREATE_FUNCTION_SIGNATURE
-        .iter()
-        .cloned()
-        .chain(encode(&params))
-        .collect();
-
-    let mut factory_deps = LOAD_TEST_CONTRACT.factory_deps.clone();
-    factory_deps.push(LOAD_TEST_CONTRACT.bytecode.clone());
-
-    let mut signed = L2Tx::new_signed(
-        CONTRACT_DEPLOYER_ADDRESS,
-        create_calldata,
-        Nonce(0),
-        tx_fee(100_000_000),
-        U256::zero(),
-        L2ChainId::from(270),
-        &PRIVATE_KEY,
-        factory_deps,
-        Default::default(),
-    )
-    .expect("should create a signed execute transaction");
-
-    signed.set_input(H256::random().as_bytes().to_vec(), H256::random());
-    signed.into()
-}
-
-pub fn get_load_test_tx(nonce: u32, gas_limit: u32, params: LoadTestParams) -> Transaction {
-    assert!(
-        params.reads <= LOAD_TEST_MAX_READS,
-        "Too many reads: {params:?}, should be <={LOAD_TEST_MAX_READS}"
-    );
-
-    let execute_function = LOAD_TEST_CONTRACT
-        .contract
-        .function("execute")
-        .expect("no `execute` function in load test contract");
-    let calldata = execute_function
-        .encode_input(&vec![
-            Token::Uint(U256::from(params.reads)),
-            Token::Uint(U256::from(params.writes)),
-            Token::Uint(U256::from(params.hashes)),
-            Token::Uint(U256::from(params.events)),
-            Token::Uint(U256::from(params.recursive_calls)),
-            Token::Uint(U256::from(params.deploys)),
-        ])
-        .expect("cannot encode `execute` inputs");
-
-    let mut signed = L2Tx::new_signed(
-        *LOAD_TEST_CONTRACT_ADDRESS,
-        calldata,
-        Nonce(nonce),
-        tx_fee(gas_limit),
-        U256::zero(),
-        L2ChainId::from(270),
-        &PRIVATE_KEY,
-        LOAD_TEST_CONTRACT.factory_deps.clone(),
-        Default::default(),
-    )
-    .expect("should create a signed execute transaction");
-
-    signed.set_input(H256::random().as_bytes().to_vec(), H256::random());
-    signed.into()
-}
-
-pub fn get_realistic_load_test_tx(nonce: u32) -> Transaction {
-    get_load_test_tx(
-        nonce,
-        10_000_000,
-        LoadTestParams {
-            reads: 30,
-            writes: 2,
-            events: 5,
-            hashes: 10,
-            recursive_calls: 0,
-            deploys: 0,
-        },
-    )
-}
-
-pub fn get_heavy_load_test_tx(nonce: u32) -> Transaction {
-    get_load_test_tx(
-        nonce,
-        10_000_000,
-        LoadTestParams {
-            reads: 100,
-            writes: 5,
-            events: 20,
-            hashes: 100,
-            recursive_calls: 20,
-            deploys: 5,
-        },
-    )
-}
-
 #[cfg(test)]
 mod tests {
     use assert_matches::assert_matches;
     use zksync_contracts::read_bytecode;
     use zksync_multivm::interface::ExecutionResult;
 
-    use crate::*;
+    use super::*;
+    use crate::{
+        get_deploy_tx, get_heavy_load_test_tx, get_load_test_deploy_tx, get_load_test_tx,
+        get_realistic_load_test_tx, get_transfer_tx, LoadTestParams,
+    };
 
     #[test]
     fn can_deploy_contract() {
diff --git a/core/tests/vm-benchmark/src/with_prometheus.rs b/core/tests/vm-benchmark/src/with_prometheus.rs
deleted file mode 100644
index f9b79adedc09..000000000000
--- a/core/tests/vm-benchmark/src/with_prometheus.rs
+++ /dev/null
@@ -1,27 +0,0 @@
-use std::time::Duration;
-
-use tokio::sync::watch;
-use zksync_vlog::prometheus::PrometheusExporterConfig;
-
-pub fn with_prometheus<F: FnOnce()>(f: F) {
-    tokio::runtime::Runtime::new()
-        .unwrap()
-        .block_on(with_prometheus_async(f));
-}
-
-async fn with_prometheus_async<F: FnOnce()>(f: F) {
-    println!("Pushing results to Prometheus");
-
-    let endpoint =
-        "http://vmagent.stage.matterlabs.corp/api/v1/import/prometheus/metrics/job/vm-benchmark";
-    let (stop_sender, stop_receiver) = watch::channel(false);
-    let prometheus_config =
-        PrometheusExporterConfig::push(endpoint.to_owned(), Duration::from_millis(100));
-    tokio::spawn(prometheus_config.run(stop_receiver));
-
-    f();
-
-    println!("Waiting for push to happen...");
-    tokio::time::sleep(Duration::from_secs(1)).await;
-    stop_sender.send_replace(true);
-}