Skip to content

Commit

Permalink
Add basic performance and memory benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
shepmaster committed Jan 25, 2021
1 parent 98b3a4d commit 8bf847e
Show file tree
Hide file tree
Showing 5 changed files with 328 additions and 0 deletions.
2 changes: 2 additions & 0 deletions benchmarks/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/target
Cargo.lock
21 changes: 21 additions & 0 deletions benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
[package]
name = "benchmarks"
version = "0.1.0"
authors = ["Jake Goulding <[email protected]>"]
edition = "2018"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
sxd-string-slab = { path = ".." }

backtrace = "0.3.56"
criterion = "0.3.3"
hashbrown = { version = "0.9.1", default-features = false, features = ["ahash", "inline-more"] }
itertools = "0.10.0"
once_cell = "1.5.2"
rand = "0.7.3"

[[bench]]
name = "benchmark"
harness = false
62 changes: 62 additions & 0 deletions benchmarks/benches/benchmark.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
use benchmarks::{DUPLICATES, DUPLICATES_STRING, NO_DUPLICATES, NO_DUPLICATES_STRING};
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use hashbrown::HashSet;
use once_cell::sync::Lazy;
use std::convert::TryInto;
use sxd_string_slab::StringArena;

fn criterion_benchmark(c: &mut Criterion) {
Lazy::force(&NO_DUPLICATES_STRING);
Lazy::force(&DUPLICATES_STRING);

{
let mut group = c.benchmark_group("no duplicates");
group.throughput(Throughput::Elements(
NO_DUPLICATES.len().try_into().unwrap(),
));

group.bench_function("sxd_string_slab::StringArena", |b| {
b.iter(|| {
let mut arena = StringArena::new();
for s in NO_DUPLICATES_STRING.lines() {
arena.intern(s);
}
})
});

group.bench_function("hashbrown::HashSet", |b| {
b.iter(|| {
let mut arena = HashSet::new();
for s in NO_DUPLICATES_STRING.lines() {
arena.get_or_insert_owned(s);
}
})
});
}

{
let mut group = c.benchmark_group("duplicates");
group.throughput(Throughput::Elements(DUPLICATES.len().try_into().unwrap()));

group.bench_function("sxd_string_slab::StringArena", |b| {
b.iter(|| {
let mut arena = StringArena::new();
for s in DUPLICATES_STRING.lines() {
arena.intern(s);
}
})
});

group.bench_function("hashbrown::HashSet", |b| {
b.iter(|| {
let mut arena = HashSet::new();
for s in DUPLICATES_STRING.lines() {
arena.get_or_insert_owned(s);
}
})
});
}
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
53 changes: 53 additions & 0 deletions benchmarks/src/bin/memory_usage.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
use benchmarks::{alloc::TrackingAllocator, env_or, DUPLICATES_STRING, NO_DUPLICATES_STRING};
use hashbrown::HashSet;
use once_cell::sync::Lazy;
use std::convert::TryFrom;
use sxd_string_slab::StringArena;

#[global_allocator]
static A: TrackingAllocator = TrackingAllocator;

fn main() {
let show_map = env_or("SHOW_MAP", false);
Lazy::force(&NO_DUPLICATES_STRING);
Lazy::force(&DUPLICATES_STRING);

let total_length: usize = DUPLICATES_STRING.lines().map(str::len).sum();
let total_length_f64 = f64::from(u32::try_from(total_length).unwrap());

eprintln!("String data of {} bytes", total_length);

let (_arena, alloc_size, alloc_count, alloc_map) = TrackingAllocator::track_allocations(|| {
let mut arena = StringArena::new();
for s in DUPLICATES_STRING.lines() {
arena.intern(s);
}
arena
});

let percent = f64::from(u32::try_from(alloc_size).unwrap()) / total_length_f64 * 100.0;
eprintln!(
"sxd_string_slab::StringArena: {} bytes ({:.2}%) in {} allocations",
alloc_size, percent, alloc_count
);
if show_map {
eprintln!("{:?}", alloc_map);
}

let (_arena, alloc_size, alloc_count, alloc_map) = TrackingAllocator::track_allocations(|| {
let mut arena = HashSet::new();
for s in DUPLICATES_STRING.lines() {
arena.get_or_insert_owned(s);
}
arena
});

let percent = f64::from(u32::try_from(alloc_size).unwrap()) / total_length_f64 * 100.0;
eprintln!(
"hashbrown::HashSet: {} bytes ({:.2}%) in {} allocations",
alloc_size, percent, alloc_count
);
if show_map {
eprintln!("{:?}", alloc_map);
}
}
190 changes: 190 additions & 0 deletions benchmarks/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
use hashbrown::HashSet;
use itertools::Itertools;
use once_cell::sync::Lazy;
use rand::{distributions::Alphanumeric, rngs::StdRng, seq::SliceRandom, Rng, SeedableRng};
use std::{env, iter, str::FromStr};

pub mod alloc {
use backtrace::Backtrace;
use hashbrown::HashMap;
use once_cell::sync::Lazy;
use std::{
alloc::{GlobalAlloc, Layout, System},
hash::{Hash, Hasher},
mem,
sync::{
atomic::{AtomicBool, AtomicUsize, Ordering},
Mutex,
},
};

pub struct TrackingAllocator;

static RECURSIVE: AtomicBool = AtomicBool::new(false);
static MEMORY_IN_USE: AtomicUsize = AtomicUsize::new(0);
static N_ALLOCATIONS: AtomicUsize = AtomicUsize::new(0);
static ALLOCATION_MAP: Lazy<Mutex<AllocMap>> = Lazy::new(Default::default);

#[derive(Debug, Default)]
pub struct AllocMap(HashMap<Trace, Vec<usize>>);

#[derive(Debug)]
struct Trace(Backtrace);

impl Trace {
fn new() -> Self {
Self(Backtrace::new_unresolved())
}
}

impl Hash for Trace {
fn hash<H>(&self, h: &mut H)
where
H: Hasher,
{
for f in self.0.frames() {
f.ip().hash(h);
}
}
}

impl PartialEq for Trace {
fn eq(&self, other: &Self) -> bool {
self.0
.frames()
.iter()
.map(|f| f.ip())
.eq(other.0.frames().iter().map(|f| f.ip()))
}
}

unsafe impl GlobalAlloc for TrackingAllocator {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
let inside_alloc = RECURSIVE.fetch_or(true, Ordering::SeqCst);

if !inside_alloc {
MEMORY_IN_USE.fetch_add(layout.size(), Ordering::SeqCst);
N_ALLOCATIONS.fetch_add(1, Ordering::SeqCst);
ALLOCATION_MAP
.lock()
.expect("Mutex Poisoned")
.0
.entry(Trace::new())
.or_insert_with(Vec::new)
.push(layout.size());
}

RECURSIVE.store(inside_alloc, Ordering::SeqCst);

System.alloc(layout)
}

unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
let inside_alloc = RECURSIVE.fetch_or(true, Ordering::SeqCst);

if !inside_alloc {
MEMORY_IN_USE.fetch_sub(layout.size(), Ordering::SeqCst);
}
RECURSIVE.store(inside_alloc, Ordering::SeqCst);

System.dealloc(ptr, layout)
}
}

impl Eq for Trace {}

impl TrackingAllocator {
pub fn track_allocations<R>(f: impl FnOnce() -> R) -> (R, usize, usize, AllocMap) {
let start_size = MEMORY_IN_USE.load(Ordering::SeqCst);
let start_allocations = N_ALLOCATIONS.load(Ordering::SeqCst);
let start_map = mem::take(&mut *ALLOCATION_MAP.lock().expect("Mutex Poisoned"));

let r = f();

let end_size = MEMORY_IN_USE.load(Ordering::SeqCst);
let end_allocations = N_ALLOCATIONS.load(Ordering::SeqCst);
let end_map = mem::replace(
&mut *ALLOCATION_MAP.lock().expect("Mutex Poisoned"),
start_map,
);

let end_map = AllocMap(
end_map
.0
.into_iter()
.map(|(mut k, v)| {
k.0.resolve();
(k, v)
})
.collect(),
);

(
r,
end_size - start_size,
end_allocations - start_allocations,
end_map,
)
}
}
}

pub fn env_or<T>(name: &str, default: T) -> T
where
T: FromStr,
{
env_or_else(name, || default)
}

pub fn env_or_else<T>(name: &str, default: impl FnOnce() -> T) -> T
where
T: FromStr,
{
env::var(name)
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or_else(default)
}

pub fn rng() -> StdRng {
static SEED: Lazy<u64> = Lazy::new(|| {
let seed = env_or_else("BENCHMARK_SEED", || rand::thread_rng().gen());
eprintln!("Using random seed {} (can be set via BENCHMARK_SEED)", seed);
seed
});

StdRng::seed_from_u64(*SEED)
}

pub fn string_iter(rng: &mut impl Rng) -> impl Iterator<Item = String> + '_ {
iter::from_fn(move || {
let string_len = rng.gen_range(0, 2048);
Some(rng.sample_iter(Alphanumeric).take(string_len).collect())
})
}

pub static NO_DUPLICATES: Lazy<HashSet<String>> = Lazy::new(|| {
let rng = &mut rng();

let n_items = env_or("N_NO_DUPLICATES", 10_000);
string_iter(rng).take(n_items).collect()
});

pub static NO_DUPLICATES_STRING: Lazy<String> = Lazy::new(|| NO_DUPLICATES.iter().join("\n"));

pub static DUPLICATES: Lazy<Vec<String>> = Lazy::new(|| {
let rng = &mut rng();

let n_items = env_or("N_DUPLICATES", 10_000);
let no_dupes: HashSet<_> = string_iter(rng).take(n_items).collect();
let mut no_dupes: Vec<_> = no_dupes.into_iter().collect();

let n_dupes = rng.gen_range(0, no_dupes.len());
let dupes: Vec<_> = no_dupes.choose_multiple(rng, n_dupes).cloned().collect();
no_dupes.extend(dupes);
no_dupes.shuffle(rng);

no_dupes
});

pub static DUPLICATES_STRING: Lazy<String> = Lazy::new(|| DUPLICATES.iter().join("\n"));

0 comments on commit 8bf847e

Please sign in to comment.