Skip to content

Commit

Permalink
Implement generic search sorted using scalar_at (#167)
Browse files Browse the repository at this point in the history
  • Loading branch information
robert3005 authored Mar 29, 2024
1 parent 4fbea5f commit 497263f
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 49 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 1 addition & 10 deletions vortex-alp/benches/alp_compress.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
use vortex::array::primitive::PrimitiveArray;
use vortex::array::ArrayRef;
use vortex_alp::{ALPArray, ALPFloat, Exponents};
use vortex_alp::{ALPFloat, Exponents};

fn main() {
divan::main();
Expand All @@ -11,10 +9,3 @@ fn alp_compress<T: ALPFloat>(n: usize) -> (Exponents, Vec<T::ALPInt>, Vec<u64>,
let values: Vec<T> = vec![T::from(1.234).unwrap(); n];
T::encode(values.as_slice(), None)
}

// TODO(ngates): remove this
#[divan::bench(args = [100_000, 10_000_000])]
fn alp_compress_array(n: usize) -> ArrayRef {
let array = PrimitiveArray::from(vec![1.234f64; n]);
ALPArray::encode(&array).unwrap()
}
7 changes: 7 additions & 0 deletions vortex-array/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,10 @@ thiserror = { workspace = true }
vortex-alloc = { path = "../vortex-alloc" }
vortex-error = { path = "../vortex-error" }
vortex-schema = { path = "../vortex-schema" }

[dev-dependencies]
criterion = { workspace = true }

[[bench]]
name = "search_sorted"
harness = false
24 changes: 24 additions & 0 deletions vortex-array/benches/search_sorted.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rand::distributions::Uniform;
use rand::{thread_rng, Rng};

use vortex::compute::search_sorted::{SearchSorted, SearchSortedSide};

fn search_sorted(c: &mut Criterion) {
let mut group = c.benchmark_group("search_sorted");

let mut rng = thread_rng();
let range = Uniform::new(0, 100_000_000);
let mut data: Vec<i32> = (0..10_000_000).map(|_| rng.sample(range)).collect();
data.sort();
let needle = rng.sample(range);

group.bench_function("std", |b| b.iter(|| black_box(data.binary_search(&needle))));

group.bench_function("vortex", |b| {
b.iter(|| black_box(data.search_sorted(&needle, SearchSortedSide::Left)))
});
}

criterion_group!(benches, search_sorted);
criterion_main!(benches);
3 changes: 2 additions & 1 deletion vortex-array/src/array/primitive/compute/search_sorted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@ impl SearchSortedFn for PrimitiveArray {

#[cfg(test)]
mod test {
use super::*;
use crate::array::IntoArray;
use crate::compute::search_sorted::search_sorted;

use super::*;

#[test]
fn test_searchsorted_primitive() {
let values = vec![1u16, 2, 3].into_array();
Expand Down
148 changes: 110 additions & 38 deletions vortex-array/src/compute/search_sorted.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
use std::cmp::Ordering;
use std::cmp::Ordering::{Equal, Greater, Less};

use vortex_error::{VortexError, VortexResult};

use crate::array::Array;
use crate::compute::flatten::flatten;
use crate::compute::ArrayCompute;
use crate::compute::scalar_at::scalar_at;
use crate::scalar::Scalar;
use log::info;
use std::cmp::Ordering;

#[derive(Debug, Copy, Clone)]
pub enum SearchSortedSide {
Left,
Right,
Expand All @@ -26,45 +27,116 @@ pub fn search_sorted<T: Into<Scalar>>(
return search_sorted.search_sorted(&scalar, side);
}

// Otherwise, flatten and try again.
info!("SearchSorted not implemented for {}, flattening", array);
flatten(array)?
.into_array()
.search_sorted()
.map(|f| f.search_sorted(&scalar, side))
.unwrap_or_else(|| {
Err(VortexError::NotImplemented(
"search_sorted",
array.encoding().id().name(),
))
})
if array.scalar_at().is_some() {
return Ok(SearchSorted::search_sorted(&array, &scalar, side));
}

Err(VortexError::NotImplemented(
"search_sorted",
array.encoding().id().name(),
))
}

pub trait SearchSorted<T> {
fn search_sorted(&self, value: &T, side: SearchSortedSide) -> usize;
pub trait IndexOrd<V> {
fn index_cmp(&self, idx: usize, elem: &V) -> Option<Ordering>;

fn index_lt(&self, idx: usize, elem: &V) -> bool {
matches!(self.index_cmp(idx, elem), Some(Less))
}

fn index_le(&self, idx: usize, elem: &V) -> bool {
matches!(self.index_cmp(idx, elem), Some(Less | Equal))
}

fn index_gt(&self, idx: usize, elem: &V) -> bool {
matches!(self.index_cmp(idx, elem), Some(Greater))
}

fn index_ge(&self, idx: usize, elem: &V) -> bool {
matches!(self.index_cmp(idx, elem), Some(Greater | Equal))
}
}

impl<T: PartialOrd> SearchSorted<T> for &[T] {
fn search_sorted(&self, value: &T, side: SearchSortedSide) -> usize {
#[allow(clippy::len_without_is_empty)]
pub trait Len {
fn len(&self) -> usize;
}

pub trait SearchSorted<T> {
fn search_sorted(&self, value: &T, side: SearchSortedSide) -> usize
where
Self: IndexOrd<T>,
{
match side {
SearchSortedSide::Left => self
.binary_search_by(|x| {
if x < value {
Ordering::Less
} else {
Ordering::Greater
}
})
.unwrap_or_else(|x| x),
SearchSortedSide::Right => self
.binary_search_by(|x| {
if x <= value {
Ordering::Less
} else {
Ordering::Greater
}
})
.unwrap_or_else(|x| x),
SearchSortedSide::Left => self.search_sorted_by(|idx| {
if self.index_lt(idx, value) {
Less
} else {
Greater
}
}),
SearchSortedSide::Right => self.search_sorted_by(|idx| {
if self.index_le(idx, value) {
Less
} else {
Greater
}
}),
}
}

fn search_sorted_by<F: FnMut(usize) -> Ordering>(&self, f: F) -> usize;
}

impl<S: IndexOrd<T> + Len + ?Sized, T> SearchSorted<T> for S {
// Code adapted from Rust standard library slice::binary_search_by
fn search_sorted_by<F: FnMut(usize) -> Ordering>(&self, mut f: F) -> usize {
// INVARIANTS:
// - 0 <= left <= left + size = right <= self.len()
// - f returns Less for everything in self[..left]
// - f returns Greater for everything in self[right..]
let mut size = self.len();
let mut left = 0;
let mut right = size;
while left < right {
let mid = left + size / 2;
let cmp = f(mid);

left = if cmp == Less { mid + 1 } else { left };
right = if cmp == Greater { mid } else { right };
if cmp == Equal {
return mid;
}

size = right - left;
}

left
}
}

impl IndexOrd<Scalar> for &dyn Array {
fn index_cmp(&self, idx: usize, elem: &Scalar) -> Option<Ordering> {
let scalar_a = scalar_at(*self, idx).ok()?;
scalar_a.partial_cmp(elem)
}
}

impl<T: PartialOrd> IndexOrd<T> for [T] {
fn index_cmp(&self, idx: usize, elem: &T) -> Option<Ordering> {
// SAFETY: Used in search_sorted_by same as the standard library. The search_sorted ensures idx is in bounds
unsafe { self.get_unchecked(idx) }.partial_cmp(elem)
}
}

impl Len for &dyn Array {
fn len(&self) -> usize {
Array::len(*self)
}
}

impl<T> Len for [T] {
fn len(&self) -> usize {
self.len()
}
}

0 comments on commit 497263f

Please sign in to comment.