Skip to content

Commit

Permalink
Fix benchmark measurements
Browse files Browse the repository at this point in the history
  • Loading branch information
slowli committed Aug 16, 2024
1 parent 55f7b8c commit dcf97b2
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 60 deletions.
43 changes: 20 additions & 23 deletions core/tests/vm-benchmark/benches/criterion.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::time::Duration;

use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use vm_benchmark::{
criterion::{BenchmarkGroup, CriterionExt, MeteredTime},
get_heavy_load_test_tx, get_load_test_deploy_tx, get_load_test_tx, get_realistic_load_test_tx,
Expand All @@ -25,16 +25,17 @@ fn benches_in_folder<VM: BenchmarkingVmFactory, const FULL: bool>(c: &mut Criter
group.bench_metered(bench_name, |bencher| {
if FULL {
// Include VM initialization / drop into the measured time
bencher.iter(|| BenchmarkingVm::<VM>::default().run_transaction(black_box(&tx)));
bencher.iter(|timer| {
let _guard = timer.start();
BenchmarkingVm::<VM>::default().run_transaction(black_box(&tx));
});
} else {
bencher.iter_batched(
BenchmarkingVm::<VM>::default,
|mut vm| {
let result = vm.run_transaction(black_box(&tx));
(vm, result)
},
BatchSize::LargeInput, // VM can consume significant amount of RAM, especially the new one
);
bencher.iter(|timer| {
let mut vm = BenchmarkingVm::<VM>::default();
let guard = timer.start();
let _result = vm.run_transaction(black_box(&tx));
drop(guard); // do not include latency of dropping `_result`
});
}
});
}
Expand Down Expand Up @@ -63,19 +64,15 @@ fn bench_load_test_transaction<VM: BenchmarkingVmFactory>(
tx: &Transaction,
) {
group.bench_metered(name, |bencher| {
bencher.iter_batched(
|| {
let mut vm = BenchmarkingVm::<VM>::default();
vm.run_transaction(&get_load_test_deploy_tx());
vm
},
|mut vm| {
let result = vm.run_transaction(black_box(tx));
assert!(!result.result.is_failed(), "{:?}", result.result);
(vm, result)
},
BatchSize::LargeInput,
);
bencher.iter(|timer| {
let mut vm = BenchmarkingVm::<VM>::default();
vm.run_transaction(&get_load_test_deploy_tx());

let guard = timer.start();
let result = vm.run_transaction(black_box(tx));
drop(guard); // do not include the latency of `result` checks / drop
assert!(!result.result.is_failed(), "{:?}", result.result);
});
});
}

Expand Down
19 changes: 9 additions & 10 deletions core/tests/vm-benchmark/benches/fill_bootloader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
use std::{iter, time::Duration};

use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion, Throughput};
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
use rand::{rngs::StdRng, Rng, SeedableRng};
use vm_benchmark::{
criterion::{is_test_mode, BenchmarkGroup, BenchmarkId, CriterionExt, MeteredTime},
Expand Down Expand Up @@ -74,19 +74,18 @@ fn run_vm_expecting_failures<VM: BenchmarkingVmFactory, const FULL: bool>(
|bencher, &txs_in_batch| {
if FULL {
// Include VM initialization / drop into the measured time
bencher.iter(|| {
bencher.iter(|timer| {
let _guard = timer.start();
let mut vm = BenchmarkingVm::<VM>::default();
bench_vm::<_, true>(&mut vm, &txs[..txs_in_batch], expected_failures);
});
} else {
bencher.iter_batched(
BenchmarkingVm::<VM>::default,
|mut vm| {
bench_vm::<_, false>(&mut vm, &txs[..txs_in_batch], expected_failures);
vm
},
BatchSize::LargeInput, // VM can consume significant amount of RAM, especially the new one
);
bencher.iter(|timer| {
let mut vm = BenchmarkingVm::<VM>::default();
let guard = timer.start();
bench_vm::<_, false>(&mut vm, &txs[..txs_in_batch], expected_failures);
drop(guard);
});
}
},
);
Expand Down
145 changes: 118 additions & 27 deletions core/tests/vm-benchmark/src/criterion.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
//! Criterion helpers and extensions.
use std::{cell::RefCell, env, fmt, sync::Once, thread, time::Duration};
use std::{
cell::RefCell, convert::Infallible, env, fmt, rc::Rc, sync::Once, thread, time::Duration,
};

use criterion::{
measurement::{Measurement, ValueFormatter, WallTime},
Bencher, Criterion, Throughput,
Criterion, Throughput,
};
use once_cell::{sync::OnceCell as SyncOnceCell, unsync::OnceCell};
use tokio::sync::watch;
use vise::{Buckets, EncodeLabelSet, Family, Histogram, Metrics};
use vise::{Buckets, EncodeLabelSet, Family, Histogram, LatencyObserver, Metrics};
use zksync_vlog::prometheus::PrometheusExporterConfig;

/// Checks whether a benchmark binary is running in the test mode (as opposed to benchmarking).
Expand Down Expand Up @@ -68,17 +71,16 @@ impl PrometheusRuntime {
thread_local! {
static CURRENT_BENCH_LABELS: RefCell<Option<BenchLabels>> = const { RefCell::new(None) };
}
static BIN_NAME: SyncOnceCell<&'static str> = SyncOnceCell::new();

/// Measurement for criterion that exports .
#[derive(Debug)]
pub struct MeteredTime {
bin_name: &'static str,
metrics: &'static VmBenchmarkMetrics,
_prometheus: Option<PrometheusRuntime>,
}

impl MeteredTime {
pub fn new(group_name: &'static str) -> Self {
pub fn new(bin_name: &'static str) -> Self {
static PROMETHEUS_INIT: Once = Once::new();

let mut prometheus = None;
Expand All @@ -88,16 +90,18 @@ impl MeteredTime {
});
}

if let Err(prev_name) = BIN_NAME.set(bin_name) {
assert_eq!(prev_name, bin_name, "attempted to redefine binary name");
}

Self {
bin_name: group_name,
metrics: &METRICS,
_prometheus: prometheus,
}
}

fn start_bench(group: String, benchmark: String, arg: Option<String>) {
CURRENT_BENCH_LABELS.replace(Some(BenchLabels {
bin: "", // will be replaced
bin: BIN_NAME.get().copied().unwrap_or(""),
group,
benchmark,
arg,
Expand All @@ -112,17 +116,16 @@ impl MeteredTime {
}

impl Measurement for MeteredTime {
type Intermediate = vise::LatencyObserver<'static>;
type Intermediate = Infallible;
type Value = Duration;

fn start(&self) -> Self::Intermediate {
let mut labels = MeteredTime::get_bench();
labels.bin = self.bin_name;
self.metrics.timing[&labels].start()
// All measurements must be done via `Bencher::iter()`
unreachable!("must not be invoked directly");
}

fn end(&self, i: Self::Intermediate) -> Self::Value {
i.observe()
fn end(&self, _: Self::Intermediate) -> Self::Value {
unreachable!("must not be invoked directly");
}

fn add(&self, v1: &Self::Value, v2: &Self::Value) -> Self::Value {
Expand Down Expand Up @@ -164,6 +167,7 @@ impl BenchmarkId {
pub struct BenchmarkGroup<'a> {
name: String,
inner: criterion::BenchmarkGroup<'a, MeteredTime>,
metrics: &'static VmBenchmarkMetrics,
}

impl BenchmarkGroup<'_> {
Expand All @@ -182,22 +186,101 @@ impl BenchmarkGroup<'_> {
self
}

pub fn bench_metered<F>(&mut self, id: impl Into<String>, bench_fn: F)
pub fn bench_metered<F>(&mut self, id: impl Into<String>, mut bench_fn: F)
where
F: FnMut(&mut Bencher<'_, MeteredTime>),
F: FnMut(&mut Bencher<'_, '_>),
{
let id = id.into();
MeteredTime::start_bench(self.name.clone(), id.clone(), None);
self.inner.bench_function(id, bench_fn);
self.inner.bench_function(id, |bencher| {
bench_fn(&mut Bencher {
inner: bencher,
metrics: self.metrics,
})
});
}

pub fn bench_metered_with_input<I, F>(&mut self, id: BenchmarkId, input: &I, bench_fn: F)
pub fn bench_metered_with_input<I, F>(&mut self, id: BenchmarkId, input: &I, mut bench_fn: F)
where
I: ?Sized,
F: FnMut(&mut Bencher<'_, MeteredTime>, &I),
F: FnMut(&mut Bencher<'_, '_>, &I),
{
MeteredTime::start_bench(self.name.clone(), id.benchmark, Some(id.arg));
self.inner.bench_with_input(id.inner, input, bench_fn);
self.inner
.bench_with_input(id.inner, input, |bencher, input| {
bench_fn(
&mut Bencher {
inner: bencher,
metrics: self.metrics,
},
input,
)
});
}
}

pub struct Bencher<'a, 'r> {
inner: &'r mut criterion::Bencher<'a, MeteredTime>,
metrics: &'static VmBenchmarkMetrics,
}

impl Bencher<'_, '_> {
pub fn iter(&mut self, mut routine: impl FnMut(BenchmarkTimer)) {
let histogram = &self.metrics.timing[&MeteredTime::get_bench()];
self.inner.iter_custom(move |iters| {
let mut total = Duration::ZERO;
for _ in 0..iters {
let (measure, observation) = BenchmarkTimer::new(histogram);
routine(measure);
total += observation.get().copied().unwrap_or_default();
}
total
})
}
}

/// Timer for benchmarks supplied to the `Bencher::iter()` closure.
#[derive(Debug)]
#[must_use = "should be started to start measurements"]
pub struct BenchmarkTimer {
histogram: &'static Histogram<Duration>,
observation: Rc<OnceCell<Duration>>,
}

impl BenchmarkTimer {
fn new(histogram: &'static Histogram<Duration>) -> (Self, Rc<OnceCell<Duration>>) {
let observation = Rc::<OnceCell<_>>::default();
let this = Self {
histogram,
observation: observation.clone(),
};
(this, observation)
}

/// Starts the timer. The timer will remain active until the returned guard is dropped. If you drop the timer implicitly,
/// be careful with the drop order (inverse to the variable declaration order); when in doubt, drop the guard explicitly.
pub fn start(self) -> BenchmarkTimerGuard {
BenchmarkTimerGuard {
observer: Some(self.histogram.start()),
observation: self.observation,
}
}
}

/// Guard returned from [`BenchmarkTimer::start()`].
#[derive(Debug)]
#[must_use = "will stop the timer on drop"]
pub struct BenchmarkTimerGuard {
observer: Option<LatencyObserver<'static>>,
observation: Rc<OnceCell<Duration>>,
}

impl Drop for BenchmarkTimerGuard {
fn drop(&mut self) {
if let Some(observer) = self.observer.take() {
let latency = observer.observe();
self.observation.set(latency).ok();
}
}
}

Expand All @@ -211,6 +294,7 @@ impl CriterionExt for Criterion<MeteredTime> {
BenchmarkGroup {
inner: self.benchmark_group(name.clone()),
name,
metrics: &METRICS,
}
}
}
Expand All @@ -222,23 +306,31 @@ mod tests {
use super::*;
use crate::BYTECODES;

fn test_benchmark(c: &mut Criterion<MeteredTime>) {
fn test_benchmark(c: &mut Criterion<MeteredTime>, metrics: &'static VmBenchmarkMetrics) {
let mut group = c.metered_group("single");
group.metrics = metrics;
for bytecode in BYTECODES {
group.bench_metered(bytecode.name, |bencher| {
bencher.iter(|| thread::sleep(Duration::from_millis(1)))
bencher.iter(|timer| {
let _guard = timer.start();
thread::sleep(Duration::from_millis(1))
})
});
}
drop(group);

let mut group = c.metered_group("with_arg");
group.metrics = metrics;
for bytecode in BYTECODES {
for arg in [1, 10, 100] {
group.bench_metered_with_input(
BenchmarkId::new(bytecode.name, arg),
&arg,
|bencher, _arg| {
bencher.iter(|| thread::sleep(Duration::from_millis(1)));
bencher.iter(|timer| {
let _guard = timer.start();
thread::sleep(Duration::from_millis(1))
});
},
)
}
Expand All @@ -247,16 +339,15 @@ mod tests {

#[test]
fn recording_benchmarks() {
let mut metered_time = MeteredTime::new("test");
let metered_time = MeteredTime::new("test");
let metrics = &*Box::leak(Box::<VmBenchmarkMetrics>::default());
metered_time.metrics = metrics;

let mut criterion = Criterion::default()
.warm_up_time(Duration::from_millis(10))
.measurement_time(Duration::from_millis(10))
.sample_size(10)
.with_measurement(metered_time);
test_benchmark(&mut criterion);
test_benchmark(&mut criterion, metrics);

let timing_labels: HashSet<_> = metrics.timing.to_entries().into_keys().collect();
// Check that labels are as expected.
Expand Down

0 comments on commit dcf97b2

Please sign in to comment.