Skip to content

Commit

Permalink
test(vm): Improve VM benchmarks (#2591)
Browse files Browse the repository at this point in the history
## What ❔

- Extends the multi-transaction benchmark to cover simple deployments,
transfers and various load test transactions. Optionally includes the
snapshot workflow into the benchmark.
- Fixes the multi-transaction benchmark setup so that transactions in it
don't fail early in bootloader.

## Why ❔

- Transactions failing early leads to non-representative benchmark
results.

## Checklist

- [x] PR title corresponds to the body of PR (we generate changelog
entries from PRs).
- [x] Tests for the changes have been added / updated.
- [x] Documentation comments have been added / updated.
- [x] Code has been formatted via `zk fmt` and `zk lint`.
  • Loading branch information
slowli authored Aug 14, 2024
1 parent 3f2cac6 commit 0d9c2ae
Show file tree
Hide file tree
Showing 9 changed files with 619 additions and 75 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/ci-core-reusable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,11 @@ jobs:
run: ci_run yarn l1-contracts test

- name: Rust unit tests
run: ci_run zk test rust
run: |
ci_run zk test rust
# Benchmarks are not tested by `cargo nextest` unless specified explicitly, and even then `criterion` harness is incompatible
# with how `cargo nextest` runs tests. Thus, we run criterion-based benchmark tests manually.
ci_run zk f cargo test --release -p vm-benchmark --bench criterion --bench fill_bootloader
loadtest:
runs-on: [matterlabs-ci-runner]
Expand Down
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion core/tests/vm-benchmark/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@ license.workspace = true
publish = false

[dependencies]
zksync_vm_benchmark_harness.workspace = true
zksync_types.workspace = true
zksync_vlog.workspace = true
zksync_vm_benchmark_harness.workspace = true

rand.workspace = true
vise.workspace = true
tokio.workspace = true

Expand Down
92 changes: 85 additions & 7 deletions core/tests/vm-benchmark/benches/criterion.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,98 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use zksync_vm_benchmark_harness::{cut_to_allowed_bytecode_size, get_deploy_tx, BenchmarkingVm};
use std::time::Duration;

use criterion::{
black_box, criterion_group, criterion_main, measurement::WallTime, BatchSize, BenchmarkGroup,
Criterion,
};
use zksync_types::Transaction;
use zksync_vm_benchmark_harness::{
cut_to_allowed_bytecode_size, get_deploy_tx, get_heavy_load_test_tx, get_load_test_deploy_tx,
get_load_test_tx, get_realistic_load_test_tx, BenchmarkingVm, BenchmarkingVmFactory, Fast,
Legacy, LoadTestParams,
};

const SAMPLE_SIZE: usize = 20;

fn benches_in_folder<VM: BenchmarkingVmFactory, const FULL: bool>(c: &mut Criterion) {
let mut group = c.benchmark_group(VM::LABEL.as_str());
group
.sample_size(SAMPLE_SIZE)
.measurement_time(Duration::from_secs(10));

fn benches_in_folder(c: &mut Criterion) {
for path in std::fs::read_dir("deployment_benchmarks").unwrap() {
let path = path.unwrap().path();

let test_contract = std::fs::read(&path).expect("failed to read file");

let code = cut_to_allowed_bytecode_size(&test_contract).unwrap();
let tx = get_deploy_tx(code);

c.bench_function(path.file_name().unwrap().to_str().unwrap(), |b| {
b.iter(|| BenchmarkingVm::new().run_transaction(black_box(&tx)))
let file_name = path.file_name().unwrap().to_str().unwrap();
let full_suffix = if FULL { "/full" } else { "" };
let bench_name = format!("{file_name}{full_suffix}");
group.bench_function(bench_name, |bencher| {
if FULL {
// Include VM initialization / drop into the measured time
bencher.iter(|| BenchmarkingVm::<VM>::default().run_transaction(black_box(&tx)));
} else {
bencher.iter_batched(
BenchmarkingVm::<VM>::default,
|mut vm| {
let result = vm.run_transaction(black_box(&tx));
(vm, result)
},
BatchSize::LargeInput, // VM can consume significant amount of RAM, especially the new one
);
}
});
}
}

criterion_group!(benches, benches_in_folder);
fn bench_load_test<VM: BenchmarkingVmFactory>(c: &mut Criterion) {
let mut group = c.benchmark_group(VM::LABEL.as_str());
group
.sample_size(SAMPLE_SIZE)
.measurement_time(Duration::from_secs(10));

// Nonce 0 is used for the deployment transaction
let tx = get_load_test_tx(1, 10_000_000, LoadTestParams::default());
bench_load_test_transaction::<VM>(&mut group, "load_test", &tx);

let tx = get_realistic_load_test_tx(1);
bench_load_test_transaction::<VM>(&mut group, "load_test_realistic", &tx);

let tx = get_heavy_load_test_tx(1);
bench_load_test_transaction::<VM>(&mut group, "load_test_heavy", &tx);
}

fn bench_load_test_transaction<VM: BenchmarkingVmFactory>(
group: &mut BenchmarkGroup<'_, WallTime>,
name: &str,
tx: &Transaction,
) {
group.bench_function(name, |bencher| {
bencher.iter_batched(
|| {
let mut vm = BenchmarkingVm::<VM>::default();
vm.run_transaction(&get_load_test_deploy_tx());
vm
},
|mut vm| {
let result = vm.run_transaction(black_box(tx));
assert!(!result.result.is_failed(), "{:?}", result.result);
(vm, result)
},
BatchSize::LargeInput,
);
});
}

criterion_group!(
benches,
benches_in_folder::<Fast, false>,
benches_in_folder::<Fast, true>,
benches_in_folder::<Legacy, false>,
benches_in_folder::<Legacy, true>,
bench_load_test::<Fast>,
bench_load_test::<Legacy>
);
criterion_main!(benches);
196 changes: 184 additions & 12 deletions core/tests/vm-benchmark/benches/fill_bootloader.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,195 @@
use std::time::Instant;
//! Benchmarks executing entire batches of transactions with varying size (from 1 to 5,000).
//!
//! - `fill_bootloader_full/*` benches emulate the entire transaction lifecycle including taking a snapshot
//! before a transaction and rolling back to it on halt. They also include VM initialization and drop.
//! In contrast, `fill_bootloader/*` benches only cover transaction execution.
//! - `deploy_simple_contract` benches deploy a simple contract in each transaction. All transactions succeed.
//! - `transfer` benches perform the base token transfer in each transaction. All transactions succeed.
//! - `transfer_with_invalid_nonce` benches are similar to `transfer`, but each transaction with a probability
//! `TX_FAILURE_PROBABILITY` has a previously used nonce and thus halts during validation.
//! - `load_test(|_realistic|_heavy)` execute the load test contract (a mixture of storage reads, writes, emitting events,
//! recursive calls, hashing and deploying new contracts). These 3 categories differ in how many operations of each kind
//! are performed in each transaction. Beware that the first executed transaction is load test contract deployment,
//! which skews results for small-size batches.
use criterion::black_box;
use std::{iter, time::Duration};

use criterion::{
black_box, criterion_group, criterion_main, measurement::WallTime, BatchSize, BenchmarkGroup,
BenchmarkId, Criterion, Throughput,
};
use rand::{rngs::StdRng, Rng, SeedableRng};
use zksync_types::Transaction;
use zksync_vm_benchmark_harness::{
cut_to_allowed_bytecode_size, get_deploy_tx_with_gas_limit, BenchmarkingVm,
cut_to_allowed_bytecode_size, get_deploy_tx_with_gas_limit, get_heavy_load_test_tx,
get_load_test_deploy_tx, get_load_test_tx, get_realistic_load_test_tx, get_transfer_tx,
BenchmarkingVm, BenchmarkingVmFactory, Fast, Legacy, LoadTestParams,
};

fn main() {
let test_contract =
std::fs::read("deployment_benchmarks/event_spam").expect("failed to read file");
/// Gas limit for deployment transactions.
const DEPLOY_GAS_LIMIT: u32 = 30_000_000;
/// Tested numbers of transactions in a batch.
const TXS_IN_BATCH: &[usize] = &[1, 10, 50, 100, 200, 500, 1_000, 2_000, 5_000];

/// RNG seed used e.g. to randomize failing transactions.
const RNG_SEED: u64 = 123;
/// Probability for a transaction to fail in the `transfer_with_invalid_nonce` benchmarks.
const TX_FAILURE_PROBABILITY: f64 = 0.2;

fn bench_vm<VM: BenchmarkingVmFactory, const FULL: bool>(
vm: &mut BenchmarkingVm<VM>,
txs: &[Transaction],
expected_failures: &[bool],
) {
for (i, tx) in txs.iter().enumerate() {
let result = if FULL {
vm.run_transaction_full(black_box(tx))
} else {
vm.run_transaction(black_box(tx))
};
let result = &result.result;
let expecting_failure = expected_failures.get(i).copied().unwrap_or(false);
assert_eq!(
result.is_failed(),
expecting_failure,
"{result:?} on tx #{i}"
);
black_box(result);
}
}

fn run_vm_expecting_failures<VM: BenchmarkingVmFactory, const FULL: bool>(
group: &mut BenchmarkGroup<'_, WallTime>,
name: &str,
txs: &[Transaction],
expected_failures: &[bool],
) {
for txs_in_batch in TXS_IN_BATCH {
if *txs_in_batch > txs.len() {
break;
}

group.throughput(Throughput::Elements(*txs_in_batch as u64));
group.bench_with_input(
BenchmarkId::new(name, txs_in_batch),
txs_in_batch,
|bencher, &txs_in_batch| {
if FULL {
// Include VM initialization / drop into the measured time
bencher.iter(|| {
let mut vm = BenchmarkingVm::<VM>::default();
bench_vm::<_, true>(&mut vm, &txs[..txs_in_batch], expected_failures);
});
} else {
bencher.iter_batched(
BenchmarkingVm::<VM>::default,
|mut vm| {
bench_vm::<_, false>(&mut vm, &txs[..txs_in_batch], expected_failures);
vm
},
BatchSize::LargeInput, // VM can consume significant amount of RAM, especially the new one
);
}
},
);
}
}

fn run_vm<VM: BenchmarkingVmFactory, const FULL: bool>(
group: &mut BenchmarkGroup<'_, WallTime>,
name: &str,
txs: &[Transaction],
) {
run_vm_expecting_failures::<VM, FULL>(group, name, txs, &[]);
}

fn bench_fill_bootloader<VM: BenchmarkingVmFactory, const FULL: bool>(c: &mut Criterion) {
let is_test_mode = !std::env::args().any(|arg| arg == "--bench");
let txs_in_batch = if is_test_mode {
&TXS_IN_BATCH[..3] // Reduce the number of transactions in a batch so that tests don't take long
} else {
TXS_IN_BATCH
};

let mut group = c.benchmark_group(if FULL {
format!("fill_bootloader_full{}", VM::LABEL.as_suffix())
} else {
format!("fill_bootloader{}", VM::LABEL.as_suffix())
});
group
.sample_size(10)
.measurement_time(Duration::from_secs(10));

// Deploying simple contract
let test_contract =
std::fs::read("deployment_benchmarks/deploy_simple_contract").expect("failed to read file");
let code = cut_to_allowed_bytecode_size(&test_contract).unwrap();
let tx = get_deploy_tx_with_gas_limit(code, 1000);
let max_txs = *txs_in_batch.last().unwrap() as u32;
let txs: Vec<_> = (0..max_txs)
.map(|nonce| get_deploy_tx_with_gas_limit(code, DEPLOY_GAS_LIMIT, nonce))
.collect();
run_vm::<VM, FULL>(&mut group, "deploy_simple_contract", &txs);
drop(txs);

// Load test with various parameters
let txs =
(1..=max_txs).map(|nonce| get_load_test_tx(nonce, 10_000_000, LoadTestParams::default()));
let txs: Vec<_> = iter::once(get_load_test_deploy_tx()).chain(txs).collect();
run_vm::<VM, FULL>(&mut group, "load_test", &txs);
drop(txs);

let start = Instant::now();
let txs = (1..=max_txs).map(get_realistic_load_test_tx);
let txs: Vec<_> = iter::once(get_load_test_deploy_tx()).chain(txs).collect();
run_vm::<VM, FULL>(&mut group, "load_test_realistic", &txs);
drop(txs);

let mut vm = BenchmarkingVm::new();
for _ in 0..1000 {
vm.run_transaction(black_box(&tx));
let txs = (1..=max_txs).map(get_heavy_load_test_tx);
let txs: Vec<_> = iter::once(get_load_test_deploy_tx()).chain(txs).collect();
run_vm::<VM, FULL>(&mut group, "load_test_heavy", &txs);
drop(txs);

// Base token transfers
let txs: Vec<_> = (0..max_txs).map(get_transfer_tx).collect();
run_vm::<VM, FULL>(&mut group, "transfer", &txs);

// Halted transactions produced by the following benchmarks *must* be rolled back,
// otherwise the bootloader will process following transactions incorrectly.
if !FULL {
return;
}

println!("{:?}", start.elapsed());
let mut rng = StdRng::seed_from_u64(RNG_SEED);

let mut txs_with_failures = Vec::with_capacity(txs.len());
let mut expected_failures = Vec::with_capacity(txs.len());
txs_with_failures.push(txs[0].clone());
expected_failures.push(false);
let mut successful_txs = &txs[1..];
for _ in 1..txs.len() {
let (tx, should_fail) = if rng.gen_bool(TX_FAILURE_PROBABILITY) {
// Since we add the transaction with nonce 0 unconditionally as the first tx to execute,
// all transactions generated here should halt during validation.
(get_transfer_tx(0), true)
} else {
let (tx, remaining_txs) = successful_txs.split_first().unwrap();
successful_txs = remaining_txs;
(tx.clone(), false)
};
txs_with_failures.push(tx);
expected_failures.push(should_fail);
}
run_vm_expecting_failures::<VM, FULL>(
&mut group,
"transfer_with_invalid_nonce",
&txs_with_failures,
&expected_failures,
);
}

criterion_group!(
benches,
bench_fill_bootloader::<Fast, false>,
bench_fill_bootloader::<Fast, true>,
bench_fill_bootloader::<Legacy, false>
);
criterion_main!(benches);
Loading

0 comments on commit 0d9c2ae

Please sign in to comment.