test(vm): Improve VM benchmarks (#2591)

## What ❔ - Extends the multi-transaction benchmark to cover simple deployments, transfers and various load test transactions. Optionally includes the snapshot workflow into the benchmark. - Fixes the multi-transaction benchmark setup so that transactions in it don't fail early in bootloader. ## Why ❔ - Transactions failing early leads to non-representative benchmark results. ## Checklist - [x] PR title corresponds to the body of PR (we generate changelog entries from PRs). - [x] Tests for the changes have been added / updated. - [x] Documentation comments have been added / updated. - [x] Code has been formatted via `zk fmt` and `zk lint`.
matter-labs · Aug 14, 2024 · 0d9c2ae · 0d9c2ae
1 parent 3f2cac6
commit 0d9c2ae
Show file tree

Hide file tree

Showing 9 changed files with 619 additions and 75 deletions.
diff --git a/.github/workflows/ci-core-reusable.yml b/.github/workflows/ci-core-reusable.yml
@@ -63,7 +63,11 @@ jobs:
         run: ci_run yarn l1-contracts test
 
       - name: Rust unit tests
-        run: ci_run zk test rust
+        run: |
+          ci_run zk test rust
+          # Benchmarks are not tested by `cargo nextest` unless specified explicitly, and even then `criterion` harness is incompatible
+          # with how `cargo nextest` runs tests. Thus, we run criterion-based benchmark tests manually.
+          ci_run zk f cargo test --release -p vm-benchmark --bench criterion --bench fill_bootloader
 
   loadtest:
     runs-on: [matterlabs-ci-runner]

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/core/tests/vm-benchmark/Cargo.toml b/core/tests/vm-benchmark/Cargo.toml
@@ -6,8 +6,11 @@ license.workspace = true
 publish = false
 
 [dependencies]
-zksync_vm_benchmark_harness.workspace = true
+zksync_types.workspace = true
 zksync_vlog.workspace = true
+zksync_vm_benchmark_harness.workspace = true
+
+rand.workspace = true
 vise.workspace = true
 tokio.workspace = true
 

diff --git a/core/tests/vm-benchmark/benches/criterion.rs b/core/tests/vm-benchmark/benches/criterion.rs
@@ -1,20 +1,98 @@
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
-use zksync_vm_benchmark_harness::{cut_to_allowed_bytecode_size, get_deploy_tx, BenchmarkingVm};
+use std::time::Duration;
+
+use criterion::{
+    black_box, criterion_group, criterion_main, measurement::WallTime, BatchSize, BenchmarkGroup,
+    Criterion,
+};
+use zksync_types::Transaction;
+use zksync_vm_benchmark_harness::{
+    cut_to_allowed_bytecode_size, get_deploy_tx, get_heavy_load_test_tx, get_load_test_deploy_tx,
+    get_load_test_tx, get_realistic_load_test_tx, BenchmarkingVm, BenchmarkingVmFactory, Fast,
+    Legacy, LoadTestParams,
+};
+
+const SAMPLE_SIZE: usize = 20;
+
+fn benches_in_folder<VM: BenchmarkingVmFactory, const FULL: bool>(c: &mut Criterion) {
+    let mut group = c.benchmark_group(VM::LABEL.as_str());
+    group
+        .sample_size(SAMPLE_SIZE)
+        .measurement_time(Duration::from_secs(10));
 
-fn benches_in_folder(c: &mut Criterion) {
     for path in std::fs::read_dir("deployment_benchmarks").unwrap() {
         let path = path.unwrap().path();
 
         let test_contract = std::fs::read(&path).expect("failed to read file");
 
         let code = cut_to_allowed_bytecode_size(&test_contract).unwrap();
         let tx = get_deploy_tx(code);
-
-        c.bench_function(path.file_name().unwrap().to_str().unwrap(), |b| {
-            b.iter(|| BenchmarkingVm::new().run_transaction(black_box(&tx)))
+        let file_name = path.file_name().unwrap().to_str().unwrap();
+        let full_suffix = if FULL { "/full" } else { "" };
+        let bench_name = format!("{file_name}{full_suffix}");
+        group.bench_function(bench_name, |bencher| {
+            if FULL {
+                // Include VM initialization / drop into the measured time
+                bencher.iter(|| BenchmarkingVm::<VM>::default().run_transaction(black_box(&tx)));
+            } else {
+                bencher.iter_batched(
+                    BenchmarkingVm::<VM>::default,
+                    |mut vm| {
+                        let result = vm.run_transaction(black_box(&tx));
+                        (vm, result)
+                    },
+                    BatchSize::LargeInput, // VM can consume significant amount of RAM, especially the new one
+                );
+            }
         });
     }
 }
 
-criterion_group!(benches, benches_in_folder);
+fn bench_load_test<VM: BenchmarkingVmFactory>(c: &mut Criterion) {
+    let mut group = c.benchmark_group(VM::LABEL.as_str());
+    group
+        .sample_size(SAMPLE_SIZE)
+        .measurement_time(Duration::from_secs(10));
+
+    // Nonce 0 is used for the deployment transaction
+    let tx = get_load_test_tx(1, 10_000_000, LoadTestParams::default());
+    bench_load_test_transaction::<VM>(&mut group, "load_test", &tx);
+
+    let tx = get_realistic_load_test_tx(1);
+    bench_load_test_transaction::<VM>(&mut group, "load_test_realistic", &tx);
+
+    let tx = get_heavy_load_test_tx(1);
+    bench_load_test_transaction::<VM>(&mut group, "load_test_heavy", &tx);
+}
+
+fn bench_load_test_transaction<VM: BenchmarkingVmFactory>(
+    group: &mut BenchmarkGroup<'_, WallTime>,
+    name: &str,
+    tx: &Transaction,
+) {
+    group.bench_function(name, |bencher| {
+        bencher.iter_batched(
+            || {
+                let mut vm = BenchmarkingVm::<VM>::default();
+                vm.run_transaction(&get_load_test_deploy_tx());
+                vm
+            },
+            |mut vm| {
+                let result = vm.run_transaction(black_box(tx));
+                assert!(!result.result.is_failed(), "{:?}", result.result);
+                (vm, result)
+            },
+            BatchSize::LargeInput,
+        );
+    });
+}
+
+criterion_group!(
+    benches,
+    benches_in_folder::<Fast, false>,
+    benches_in_folder::<Fast, true>,
+    benches_in_folder::<Legacy, false>,
+    benches_in_folder::<Legacy, true>,
+    bench_load_test::<Fast>,
+    bench_load_test::<Legacy>
+);
 criterion_main!(benches);
diff --git a/core/tests/vm-benchmark/benches/fill_bootloader.rs b/core/tests/vm-benchmark/benches/fill_bootloader.rs
@@ -1,23 +1,195 @@
-use std::time::Instant;
+//! Benchmarks executing entire batches of transactions with varying size (from 1 to 5,000).
+//!
+//! - `fill_bootloader_full/*` benches emulate the entire transaction lifecycle including taking a snapshot
+//!   before a transaction and rolling back to it on halt. They also include VM initialization and drop.
+//!   In contrast, `fill_bootloader/*` benches only cover transaction execution.
+//! - `deploy_simple_contract` benches deploy a simple contract in each transaction. All transactions succeed.
+//! - `transfer` benches perform the base token transfer in each transaction. All transactions succeed.
+//! - `transfer_with_invalid_nonce` benches are similar to `transfer`, but each transaction with a probability
+//!   `TX_FAILURE_PROBABILITY` has a previously used nonce and thus halts during validation.
+//! - `load_test(|_realistic|_heavy)` execute the load test contract (a mixture of storage reads, writes, emitting events,
+//!   recursive calls, hashing and deploying new contracts). These 3 categories differ in how many operations of each kind
+//!   are performed in each transaction. Beware that the first executed transaction is load test contract deployment,
+//!   which skews results for small-size batches.
 
-use criterion::black_box;
+use std::{iter, time::Duration};
+
+use criterion::{
+    black_box, criterion_group, criterion_main, measurement::WallTime, BatchSize, BenchmarkGroup,
+    BenchmarkId, Criterion, Throughput,
+};
+use rand::{rngs::StdRng, Rng, SeedableRng};
+use zksync_types::Transaction;
 use zksync_vm_benchmark_harness::{
-    cut_to_allowed_bytecode_size, get_deploy_tx_with_gas_limit, BenchmarkingVm,
+    cut_to_allowed_bytecode_size, get_deploy_tx_with_gas_limit, get_heavy_load_test_tx,
+    get_load_test_deploy_tx, get_load_test_tx, get_realistic_load_test_tx, get_transfer_tx,
+    BenchmarkingVm, BenchmarkingVmFactory, Fast, Legacy, LoadTestParams,
 };
 
-fn main() {
-    let test_contract =
-        std::fs::read("deployment_benchmarks/event_spam").expect("failed to read file");
+/// Gas limit for deployment transactions.
+const DEPLOY_GAS_LIMIT: u32 = 30_000_000;
+/// Tested numbers of transactions in a batch.
+const TXS_IN_BATCH: &[usize] = &[1, 10, 50, 100, 200, 500, 1_000, 2_000, 5_000];
+
+/// RNG seed used e.g. to randomize failing transactions.
+const RNG_SEED: u64 = 123;
+/// Probability for a transaction to fail in the `transfer_with_invalid_nonce` benchmarks.
+const TX_FAILURE_PROBABILITY: f64 = 0.2;
+
+fn bench_vm<VM: BenchmarkingVmFactory, const FULL: bool>(
+    vm: &mut BenchmarkingVm<VM>,
+    txs: &[Transaction],
+    expected_failures: &[bool],
+) {
+    for (i, tx) in txs.iter().enumerate() {
+        let result = if FULL {
+            vm.run_transaction_full(black_box(tx))
+        } else {
+            vm.run_transaction(black_box(tx))
+        };
+        let result = &result.result;
+        let expecting_failure = expected_failures.get(i).copied().unwrap_or(false);
+        assert_eq!(
+            result.is_failed(),
+            expecting_failure,
+            "{result:?} on tx #{i}"
+        );
+        black_box(result);
+    }
+}
+
+fn run_vm_expecting_failures<VM: BenchmarkingVmFactory, const FULL: bool>(
+    group: &mut BenchmarkGroup<'_, WallTime>,
+    name: &str,
+    txs: &[Transaction],
+    expected_failures: &[bool],
+) {
+    for txs_in_batch in TXS_IN_BATCH {
+        if *txs_in_batch > txs.len() {
+            break;
+        }
+
+        group.throughput(Throughput::Elements(*txs_in_batch as u64));
+        group.bench_with_input(
+            BenchmarkId::new(name, txs_in_batch),
+            txs_in_batch,
+            |bencher, &txs_in_batch| {
+                if FULL {
+                    // Include VM initialization / drop into the measured time
+                    bencher.iter(|| {
+                        let mut vm = BenchmarkingVm::<VM>::default();
+                        bench_vm::<_, true>(&mut vm, &txs[..txs_in_batch], expected_failures);
+                    });
+                } else {
+                    bencher.iter_batched(
+                        BenchmarkingVm::<VM>::default,
+                        |mut vm| {
+                            bench_vm::<_, false>(&mut vm, &txs[..txs_in_batch], expected_failures);
+                            vm
+                        },
+                        BatchSize::LargeInput, // VM can consume significant amount of RAM, especially the new one
+                    );
+                }
+            },
+        );
+    }
+}
 
+fn run_vm<VM: BenchmarkingVmFactory, const FULL: bool>(
+    group: &mut BenchmarkGroup<'_, WallTime>,
+    name: &str,
+    txs: &[Transaction],
+) {
+    run_vm_expecting_failures::<VM, FULL>(group, name, txs, &[]);
+}
+
+fn bench_fill_bootloader<VM: BenchmarkingVmFactory, const FULL: bool>(c: &mut Criterion) {
+    let is_test_mode = !std::env::args().any(|arg| arg == "--bench");
+    let txs_in_batch = if is_test_mode {
+        &TXS_IN_BATCH[..3] // Reduce the number of transactions in a batch so that tests don't take long
+    } else {
+        TXS_IN_BATCH
+    };
+
+    let mut group = c.benchmark_group(if FULL {
+        format!("fill_bootloader_full{}", VM::LABEL.as_suffix())
+    } else {
+        format!("fill_bootloader{}", VM::LABEL.as_suffix())
+    });
+    group
+        .sample_size(10)
+        .measurement_time(Duration::from_secs(10));
+
+    // Deploying simple contract
+    let test_contract =
+        std::fs::read("deployment_benchmarks/deploy_simple_contract").expect("failed to read file");
     let code = cut_to_allowed_bytecode_size(&test_contract).unwrap();
-    let tx = get_deploy_tx_with_gas_limit(code, 1000);
+    let max_txs = *txs_in_batch.last().unwrap() as u32;
+    let txs: Vec<_> = (0..max_txs)
+        .map(|nonce| get_deploy_tx_with_gas_limit(code, DEPLOY_GAS_LIMIT, nonce))
+        .collect();
+    run_vm::<VM, FULL>(&mut group, "deploy_simple_contract", &txs);
+    drop(txs);
+
+    // Load test with various parameters
+    let txs =
+        (1..=max_txs).map(|nonce| get_load_test_tx(nonce, 10_000_000, LoadTestParams::default()));
+    let txs: Vec<_> = iter::once(get_load_test_deploy_tx()).chain(txs).collect();
+    run_vm::<VM, FULL>(&mut group, "load_test", &txs);
+    drop(txs);
 
-    let start = Instant::now();
+    let txs = (1..=max_txs).map(get_realistic_load_test_tx);
+    let txs: Vec<_> = iter::once(get_load_test_deploy_tx()).chain(txs).collect();
+    run_vm::<VM, FULL>(&mut group, "load_test_realistic", &txs);
+    drop(txs);
 
-    let mut vm = BenchmarkingVm::new();
-    for _ in 0..1000 {
-        vm.run_transaction(black_box(&tx));
+    let txs = (1..=max_txs).map(get_heavy_load_test_tx);
+    let txs: Vec<_> = iter::once(get_load_test_deploy_tx()).chain(txs).collect();
+    run_vm::<VM, FULL>(&mut group, "load_test_heavy", &txs);
+    drop(txs);
+
+    // Base token transfers
+    let txs: Vec<_> = (0..max_txs).map(get_transfer_tx).collect();
+    run_vm::<VM, FULL>(&mut group, "transfer", &txs);
+
+    // Halted transactions produced by the following benchmarks *must* be rolled back,
+    // otherwise the bootloader will process following transactions incorrectly.
+    if !FULL {
+        return;
     }
 
-    println!("{:?}", start.elapsed());
+    let mut rng = StdRng::seed_from_u64(RNG_SEED);
+
+    let mut txs_with_failures = Vec::with_capacity(txs.len());
+    let mut expected_failures = Vec::with_capacity(txs.len());
+    txs_with_failures.push(txs[0].clone());
+    expected_failures.push(false);
+    let mut successful_txs = &txs[1..];
+    for _ in 1..txs.len() {
+        let (tx, should_fail) = if rng.gen_bool(TX_FAILURE_PROBABILITY) {
+            // Since we add the transaction with nonce 0 unconditionally as the first tx to execute,
+            // all transactions generated here should halt during validation.
+            (get_transfer_tx(0), true)
+        } else {
+            let (tx, remaining_txs) = successful_txs.split_first().unwrap();
+            successful_txs = remaining_txs;
+            (tx.clone(), false)
+        };
+        txs_with_failures.push(tx);
+        expected_failures.push(should_fail);
+    }
+    run_vm_expecting_failures::<VM, FULL>(
+        &mut group,
+        "transfer_with_invalid_nonce",
+        &txs_with_failures,
+        &expected_failures,
+    );
 }
+
+criterion_group!(
+    benches,
+    bench_fill_bootloader::<Fast, false>,
+    bench_fill_bootloader::<Fast, true>,
+    bench_fill_bootloader::<Legacy, false>
+);
+criterion_main!(benches);