Skip to content

Commit

Permalink
test(vm): Refactor VM benchmarks (#2668)
Browse files Browse the repository at this point in the history
## What ❔

- Integrates Prometheus metrics into criterion benches; removes the DIY
benchmark correspondingly.
- Merges the main benchmark crate with the harness one.
- Includes benched bytecodes into the crate itself rather than reading
them in runtime.

## Why ❔

Makes VM benchmarks more maintainable.

## Checklist

- [x] PR title corresponds to the body of PR (we generate changelog
entries from PRs).
- [x] Tests for the changes have been added / updated.
- [x] Documentation comments have been added / updated.
- [x] Code has been formatted via `zk fmt` and `zk lint`.
  • Loading branch information
slowli authored Aug 27, 2024
1 parent a4170e9 commit bd2b5d8
Show file tree
Hide file tree
Showing 36 changed files with 988 additions and 645 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-core-reusable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ jobs:
ci_run zk test rust
# Benchmarks are not tested by `cargo nextest` unless specified explicitly, and even then `criterion` harness is incompatible
# with how `cargo nextest` runs tests. Thus, we run criterion-based benchmark tests manually.
ci_run zk f cargo test --release -p vm-benchmark --bench criterion --bench fill_bootloader
ci_run zk f cargo test --release -p vm-benchmark --bench oneshot --bench batch
loadtest:
runs-on: [matterlabs-ci-runner]
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/vm-perf-comparison.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Compare VM perfomance to base branch
name: Compare VM performance to base branch

on:
pull_request:
Expand Down Expand Up @@ -47,7 +47,7 @@ jobs:
ci_run zk
ci_run zk compiler system-contracts
ci_run cargo bench --package vm-benchmark --bench iai | tee base-iai
ci_run cargo run --package vm-benchmark --release --bin instruction-counts | tee base-opcodes || touch base-opcodes
ci_run cargo run --package vm-benchmark --release --bin instruction_counts | tee base-opcodes || touch base-opcodes
ci_run yarn workspace system-contracts clean
- name: checkout PR
Expand All @@ -59,7 +59,7 @@ jobs:
ci_run zk
ci_run zk compiler system-contracts
ci_run cargo bench --package vm-benchmark --bench iai | tee pr-iai
ci_run cargo run --package vm-benchmark --release --bin instruction-counts | tee pr-opcodes || touch pr-opcodes
ci_run cargo run --package vm-benchmark --release --bin instruction_counts | tee pr-opcodes || touch pr-opcodes
EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
echo "speedup<<$EOF" >> $GITHUB_OUTPUT
Expand Down
8 changes: 5 additions & 3 deletions .github/workflows/vm-perf-to-prometheus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:

- name: setup-env
run: |
echo PUSH_VM_BENCHMARKS_TO_PROMETHEUS=1 >> .env
echo BENCHMARK_PROMETHEUS_PUSHGATEWAY_URL=${{ secrets.BENCHMARK_PROMETHEUS_PUSHGATEWAY_URL }} >> .env
echo ZKSYNC_HOME=$(pwd) >> $GITHUB_ENV
echo $(pwd)/bin >> $GITHUB_PATH
Expand All @@ -31,10 +31,12 @@ jobs:
run_retried docker compose pull zk
docker compose up -d zk
ci_run zk
ci_run zk compiler system-contracts
ci_run zk compiler all
- name: run benchmarks
run: |
ci_run cargo bench --package vm-benchmark --bench diy_benchmark
ci_run cargo bench --package vm-benchmark --bench oneshot
# Run only benches with 1,000 transactions per batch to not spend too much time
ci_run cargo bench --package vm-benchmark --bench batch '/1000$'
ci_run cargo bench --package vm-benchmark --bench iai | tee iai-result
ci_run cargo run --package vm-benchmark --bin iai_results_to_prometheus --release < iai-result
21 changes: 5 additions & 16 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ members = [
"core/tests/test_account",
"core/tests/loadnext",
"core/tests/vm-benchmark",
"core/tests/vm-benchmark/harness",
# Parts of prover workspace that are needed for Core workspace
"prover/crates/lib/prover_dal",
]
Expand Down Expand Up @@ -238,7 +237,6 @@ zksync_prover_dal = { version = "0.1.0", path = "prover/crates/lib/prover_dal" }
zksync_vlog = { version = "0.1.0", path = "core/lib/vlog" }
zksync_vm_interface = { version = "0.1.0", path = "core/lib/vm_interface" }
zksync_vm_utils = { version = "0.1.0", path = "core/lib/vm_utils" }
zksync_vm_benchmark_harness = { version = "0.1.0", path = "core/tests/vm-benchmark/harness" }
zksync_basic_types = { version = "0.1.0", path = "core/lib/basic_types" }
zksync_circuit_breaker = { version = "0.1.0", path = "core/lib/circuit_breaker" }
zksync_config = { version = "0.1.0", path = "core/lib/config" }
Expand Down
32 changes: 8 additions & 24 deletions core/tests/vm-benchmark/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,46 +6,30 @@ license.workspace = true
publish = false

[dependencies]
zksync_contracts.workspace = true
zksync_multivm.workspace = true
zksync_types.workspace = true
zksync_utils.workspace = true
zksync_vlog.workspace = true
zksync_vm_benchmark_harness.workspace = true

criterion.workspace = true
once_cell.workspace = true
rand.workspace = true
vise.workspace = true
tokio.workspace = true

[dev-dependencies]
criterion.workspace = true
assert_matches.workspace = true
iai.workspace = true

[[bench]]
name = "criterion"
name = "oneshot"
harness = false

[[bench]]
name = "diy_benchmark"
name = "batch"
harness = false

[[bench]]
name = "iai"
harness = false

[[bench]]
name = "fill_bootloader"
harness = false

[[bin]]
name = "iai_results_to_prometheus"
path = "src/iai_results_to_prometheus.rs"

[[bin]]
name = "compare_iai_results"
path = "src/compare_iai_results.rs"

[[bin]]
name = "find-slowest"
path = "src/find_slowest.rs"

[[bin]]
name = "instruction-counts"
path = "src/instruction_counts.rs"
31 changes: 9 additions & 22 deletions core/tests/vm-benchmark/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,35 +9,22 @@ benchmarks, however.
There are three different benchmarking tools available:

```sh
cargo bench --bench criterion
cargo bench --bench diy_benchmark
cargo bench --bench oneshot
cargo bench --bench batch
cargo +nightly bench --bench iai
```

Criterion is the de-facto microbenchmarking tool for Rust. Run it, then optimize something and run the command again to
see if your changes have made a difference.
`oneshot` and `batch` targets use Criterion, the de-facto standard micro-benchmarking tool for Rust. `oneshot` measures
VM performance on single transactions, and `batch` on entire batches of up to 5,000 transactions. Run these benches,
then optimize something and run the command again to see if your changes have made a difference.

The DIY benchmark works a bit better in noisy environments and is used to push benchmark data to Prometheus
automatically.
IAI uses cachegrind to simulate the CPU, so noise is completely irrelevant to it, but it also doesn't measure exactly
the same thing as normal benchmarks. You need valgrind to be able to run it.

IAI uses cachegrind to simulate the CPU, so noise is completely irrelevant to it but it also doesn't measure exactly the
same thing as normal benchmarks. You need valgrind to be able to run it.

You can add your own bytecodes to be benchmarked into the folder "deployment_benchmarks". For iai, you also need to add
them to "benches/iai.rs".
You can add new bytecodes to be benchmarked into the [`bytecodes`](src/bytecodes) directory and then add them to the
`BYTECODES` constant exported by the crate.

## Profiling (Linux only)

You can also use `sh perf.sh bytecode_file` to produce data that can be fed into the
[firefox profiler](https://profiler.firefox.com/) for a specific bytecode.

## Fuzzing

There is a fuzzer using this library at core/lib/vm/fuzz. The fuzz.sh script located there starts a fuzzer which
attempts to make cover as much code as it can to ultimately produce a valid deployment bytecode.

It has no chance of succeeding currently because the fuzzing speed drops to 10 executions/s easily. Optimizing the VM or
lowering the gas limit will help with that.

The fuzzer has been useful for producing synthetic benchmark inputs. It may be a good tool for finding show transactions
with a certain gas limit, an empirical way of evaluating gas prices of instructions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,15 @@
use std::{iter, time::Duration};

use criterion::{
black_box, criterion_group, criterion_main, measurement::WallTime, BatchSize, BenchmarkGroup,
BenchmarkId, Criterion, Throughput,
};
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
use rand::{rngs::StdRng, Rng, SeedableRng};
use zksync_types::Transaction;
use zksync_vm_benchmark_harness::{
cut_to_allowed_bytecode_size, get_deploy_tx_with_gas_limit, get_heavy_load_test_tx,
get_load_test_deploy_tx, get_load_test_tx, get_realistic_load_test_tx, get_transfer_tx,
BenchmarkingVm, BenchmarkingVmFactory, Fast, Legacy, LoadTestParams,
use vm_benchmark::{
criterion::{is_test_mode, BenchmarkGroup, BenchmarkId, CriterionExt, MeteredTime},
get_deploy_tx_with_gas_limit, get_heavy_load_test_tx, get_load_test_deploy_tx,
get_load_test_tx, get_realistic_load_test_tx, get_transfer_tx, BenchmarkingVm,
BenchmarkingVmFactory, Bytecode, Fast, Legacy, LoadTestParams,
};
use zksync_types::Transaction;

/// Gas limit for deployment transactions.
const DEPLOY_GAS_LIMIT: u32 = 30_000_000;
Expand Down Expand Up @@ -59,7 +57,7 @@ fn bench_vm<VM: BenchmarkingVmFactory, const FULL: bool>(
}

fn run_vm_expecting_failures<VM: BenchmarkingVmFactory, const FULL: bool>(
group: &mut BenchmarkGroup<'_, WallTime>,
group: &mut BenchmarkGroup<'_>,
name: &str,
txs: &[Transaction],
expected_failures: &[bool],
Expand All @@ -70,48 +68,48 @@ fn run_vm_expecting_failures<VM: BenchmarkingVmFactory, const FULL: bool>(
}

group.throughput(Throughput::Elements(*txs_in_batch as u64));
group.bench_with_input(
group.bench_metered_with_input(
BenchmarkId::new(name, txs_in_batch),
txs_in_batch,
|bencher, &txs_in_batch| {
if FULL {
// Include VM initialization / drop into the measured time
bencher.iter(|| {
bencher.iter(|timer| {
let _guard = timer.start();
let mut vm = BenchmarkingVm::<VM>::default();
bench_vm::<_, true>(&mut vm, &txs[..txs_in_batch], expected_failures);
});
} else {
bencher.iter_batched(
BenchmarkingVm::<VM>::default,
|mut vm| {
bench_vm::<_, false>(&mut vm, &txs[..txs_in_batch], expected_failures);
vm
},
BatchSize::LargeInput, // VM can consume significant amount of RAM, especially the new one
);
bencher.iter(|timer| {
let mut vm = BenchmarkingVm::<VM>::default();
let guard = timer.start();
bench_vm::<_, false>(&mut vm, &txs[..txs_in_batch], expected_failures);
drop(guard);
});
}
},
);
}
}

fn run_vm<VM: BenchmarkingVmFactory, const FULL: bool>(
group: &mut BenchmarkGroup<'_, WallTime>,
group: &mut BenchmarkGroup<'_>,
name: &str,
txs: &[Transaction],
) {
run_vm_expecting_failures::<VM, FULL>(group, name, txs, &[]);
}

fn bench_fill_bootloader<VM: BenchmarkingVmFactory, const FULL: bool>(c: &mut Criterion) {
let is_test_mode = !std::env::args().any(|arg| arg == "--bench");
let txs_in_batch = if is_test_mode {
fn bench_fill_bootloader<VM: BenchmarkingVmFactory, const FULL: bool>(
c: &mut Criterion<MeteredTime>,
) {
let txs_in_batch = if is_test_mode() {
&TXS_IN_BATCH[..3] // Reduce the number of transactions in a batch so that tests don't take long
} else {
TXS_IN_BATCH
};

let mut group = c.benchmark_group(if FULL {
let mut group = c.metered_group(if FULL {
format!("fill_bootloader_full{}", VM::LABEL.as_suffix())
} else {
format!("fill_bootloader{}", VM::LABEL.as_suffix())
Expand All @@ -121,12 +119,12 @@ fn bench_fill_bootloader<VM: BenchmarkingVmFactory, const FULL: bool>(c: &mut Cr
.measurement_time(Duration::from_secs(10));

// Deploying simple contract
let test_contract =
std::fs::read("deployment_benchmarks/deploy_simple_contract").expect("failed to read file");
let code = cut_to_allowed_bytecode_size(&test_contract).unwrap();
let test_contract = Bytecode::get("deploy_simple_contract");
let max_txs = *txs_in_batch.last().unwrap() as u32;
let txs: Vec<_> = (0..max_txs)
.map(|nonce| get_deploy_tx_with_gas_limit(code, DEPLOY_GAS_LIMIT, nonce))
.map(|nonce| {
get_deploy_tx_with_gas_limit(test_contract.bytecode(), DEPLOY_GAS_LIMIT, nonce)
})
.collect();
run_vm::<VM, FULL>(&mut group, "deploy_simple_contract", &txs);
drop(txs);
Expand Down Expand Up @@ -187,9 +185,12 @@ fn bench_fill_bootloader<VM: BenchmarkingVmFactory, const FULL: bool>(c: &mut Cr
}

criterion_group!(
benches,
bench_fill_bootloader::<Fast, false>,
bench_fill_bootloader::<Fast, true>,
bench_fill_bootloader::<Legacy, false>
name = benches;
config = Criterion::default()
.configure_from_args()
.with_measurement(MeteredTime::new("fill_bootloader"));
targets = bench_fill_bootloader::<Fast, false>,
bench_fill_bootloader::<Fast, true>,
bench_fill_bootloader::<Legacy, false>
);
criterion_main!(benches);
Loading

0 comments on commit bd2b5d8

Please sign in to comment.