diff --git a/Cargo.lock b/Cargo.lock index 64faa652855238..ba1db628299ba4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1529,6 +1529,9 @@ dependencies = [ "aptos-infallible", "aptos-metrics-core", "bitvec 1.0.1", + "criterion", + "itertools 0.12.1", + "jemallocator", "once_cell", "proptest", ] diff --git a/experimental/storage/layered-map/Cargo.toml b/experimental/storage/layered-map/Cargo.toml index 64592bc7902f29..6780c94d66d35c 100644 --- a/experimental/storage/layered-map/Cargo.toml +++ b/experimental/storage/layered-map/Cargo.toml @@ -21,4 +21,11 @@ bitvec = "1.0.1" once_cell = { workspace = true } [dev-dependencies] +criterion = { workspace = true } +itertools = { workspace = true } proptest = { workspace = true } +jemallocator = { workspace = true } + +[[bench]] +name = "sorting" +harness = false diff --git a/experimental/storage/layered-map/benches/sorting.rs b/experimental/storage/layered-map/benches/sorting.rs new file mode 100644 index 00000000000000..1dbb6f7fc0734a --- /dev/null +++ b/experimental/storage/layered-map/benches/sorting.rs @@ -0,0 +1,138 @@ +// Copyright (c) Aptos Foundation +// SPDX-License-Identifier: Apache-2.0 + +use aptos_crypto::HashValue; +use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; +use itertools::Itertools; + +#[cfg(unix)] +#[global_allocator] +static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; + +fn recursive_bin_search(sorted_data: &[HashValue], depth: usize) { + if sorted_data.len() <= 1 { + return; + } + + let pivot = sorted_data.partition_point(|key| !key.bit(depth)); + recursive_bin_search(&sorted_data[..pivot], depth + 1); + recursive_bin_search(&sorted_data[pivot..], depth + 1); +} + +fn partition(data: &mut [HashValue], depth: usize) -> usize { + if data.is_empty() { + return 0; + } + + let mut zero_cur = 0; + let mut one_cur = data.len() - 1; + + while zero_cur < one_cur { + while zero_cur < one_cur && !data[zero_cur].bit(depth) { + zero_cur += 1; + } + while one_cur > zero_cur && data[one_cur].bit(depth) { + one_cur -= 1; + } + if zero_cur < one_cur { + data.swap(zero_cur, one_cur); + zero_cur += 1; + one_cur -= 1; + } + } + + if data[zero_cur].bit(depth) { + zero_cur + } else { + zero_cur + 1 + } +} + +fn recursive_partition(data: &mut [HashValue], depth: usize) { + if data.len() <= 1 { + return; + } + + let pivot = partition(data, depth); + recursive_partition(&mut data[..pivot], depth + 1); + recursive_partition(&mut data[pivot..], depth + 1); +} + +fn compare_sorting(c: &mut Criterion) { + let mut group = c.benchmark_group("sorting"); + + const SET_SIZE: usize = 100000; + + let data = std::iter::repeat_with(HashValue::random) + .take(SET_SIZE) + .collect_vec(); + group.throughput(criterion::Throughput::Elements(SET_SIZE as u64)); + + group.bench_function("sort_then_bin_search", |b| { + b.iter_batched( + || data.clone(), + |mut data| { + data.sort(); + recursive_bin_search(&data, 0); + data + }, + BatchSize::SmallInput, + ); + }); + + group.bench_function("recursive_partition", |b| { + b.iter_batched( + || data.clone(), + |mut data| { + recursive_partition(&mut data, 0); + data + }, + BatchSize::SmallInput, + ) + }); + + let mut data = data.clone(); + data.sort(); + + group.bench_function("sort_then_bin_search_pre_sorted", |b| { + b.iter_batched( + || data.clone(), + |mut data| { + data.sort(); + recursive_bin_search(&data, 0); + data + }, + BatchSize::SmallInput, + ); + }); + + group.bench_function("bin_search_pre_sorted", |b| { + b.iter_batched( + || data.clone(), + |data| { + recursive_bin_search(&data, 0); + data + }, + BatchSize::SmallInput, + ); + }); + + group.bench_function("recursive_partition_pre_sorted", |b| { + b.iter_batched( + || data.clone(), + |mut data| { + recursive_partition(&mut data, 0); + data + }, + BatchSize::SmallInput, + ) + }); +} + +criterion_group!( + name = sorting; + config = Criterion::default(); + targets = compare_sorting +); + +criterion_main!(sorting); diff --git a/experimental/storage/layered-map/src/key.rs b/experimental/storage/layered-map/src/key.rs new file mode 100644 index 00000000000000..48d804c2af133f --- /dev/null +++ b/experimental/storage/layered-map/src/key.rs @@ -0,0 +1,24 @@ +// Copyright (c) Aptos Foundation +// SPDX-License-Identifier: Apache-2.0 + +use aptos_crypto::HashValue; +use bitvec::prelude::*; + +/// When recursively creating a new `MapLayer` (a crit bit tree overlay), passing down `Vec<(K, Option)>` +/// That's why we require `Key: Clone` and clone the key and value only when the leaf node is +/// created. +pub trait Key: Clone + Eq { + fn iter_bits(&self) -> impl Iterator; + + fn bit(&self, depth: usize) -> bool; +} + +impl Key for HashValue { + fn iter_bits(&self) -> impl Iterator { + self.iter_bits() + } + + fn bit(&self, depth: usize) -> bool { + *self.as_slice().view_bits::().get(depth).unwrap() + } +} diff --git a/experimental/storage/layered-map/src/lib.rs b/experimental/storage/layered-map/src/lib.rs index e6d2d054b66dc0..d59a812dd5587f 100644 --- a/experimental/storage/layered-map/src/lib.rs +++ b/experimental/storage/layered-map/src/lib.rs @@ -10,6 +10,7 @@ use aptos_crypto::HashValue; use aptos_drop_helper::ArcAsyncDrop; use aptos_infallible::Mutex; use aptos_metrics_core::{IntGaugeHelper, TimerHelper}; +pub use key::Key; use std::sync::Arc; mod dropper; @@ -17,18 +18,10 @@ mod metrics; mod node; pub(crate) mod r#ref; +mod key; #[cfg(test)] mod tests; -/// When recursively creating a new `MapLayer` (a crit bit tree overlay), passing down `Vec<(K, Option)>` -/// That's why we require `Key: Clone` and clone the key and value only when the leaf node is -/// created. -pub trait Key: Clone + Eq { - fn iter_bits(&self) -> impl Iterator; - - fn bit(&self, depth: usize) -> bool; -} - /// Similar to `Key`, we require `Value: Clone`, another reason being it's tricky to figure out the /// lifetime if `get()` returns a reference to the value -- we simply clone the value. pub trait Value: Clone {}