Skip to content

Commit

Permalink
more testing
Browse files Browse the repository at this point in the history
  • Loading branch information
lwwmanning committed Apr 2, 2024
1 parent eba5456 commit 936015d
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 30 deletions.
13 changes: 9 additions & 4 deletions fastlanez/src/bitpack.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::mem::{MaybeUninit, size_of};

use arrayref::{array_mut_ref, array_ref};
use num_traits::{PrimInt, Unsigned};
use seq_macro::seq;
use uninit::prelude::VecCapacity;

Expand Down Expand Up @@ -39,7 +40,7 @@ pub struct UnsupportedBitWidth;
/// Try to bitpack into a runtime-known bit width.
pub trait TryBitPack
where
Self: Sized,
Self: Sized + Unsigned + PrimInt,
{
fn try_pack<'a>(
input: &[Self; 1024],
Expand Down Expand Up @@ -77,7 +78,11 @@ where
Ok(())
}

fn try_unpack_single(input: &[u8], width: usize, index: usize) -> Result<Self, UnsupportedBitWidth>;
fn try_unpack_single(
input: &[u8],
width: usize,
index: usize,
) -> Result<Self, UnsupportedBitWidth>;
}

macro_rules! bitpack_impl {
Expand Down Expand Up @@ -209,7 +214,7 @@ mod test {

#[test]
fn test_bitpack_roundtrip() {
let input = (0u32..1024).into_iter().collect::<Vec<_>>();
let input = (0u32..1024).collect::<Vec<_>>();
let mut output = Vec::new();
TryBitPack::try_pack_into(array_ref![input, 0, 1024], 10, &mut output).unwrap();
assert_eq!(output.len(), 1280);
Expand All @@ -221,7 +226,7 @@ mod test {

#[test]
fn test_unpack_single() {
let input = (0u32..1024).into_iter().collect::<Vec<_>>();
let input = (0u32..1024).collect::<Vec<_>>();
let mut output = Vec::new();
TryBitPack::try_pack_into(array_ref![input, 0, 1024], 10, &mut output).unwrap();
assert_eq!(output.len(), 1280);
Expand Down
99 changes: 76 additions & 23 deletions vortex-fastlanes/src/bitpacking/compress.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use arrayref::array_ref;
use num_traits::{PrimInt, Unsigned};

use fastlanez::TryBitPack;
use vortex::array::{Array, ArrayRef};
Expand All @@ -14,10 +13,10 @@ use vortex::compute::patch::patch;
use vortex::match_each_integer_ptype;
use vortex::ptype::{NativePType, PType};
use vortex::ptype::PType::{I16, I32, I64, I8, U16, U32, U64, U8};
use vortex::scalar::ListScalarVec;
use vortex::scalar::{ListScalarVec, Scalar};
use vortex::stats::Stat;
use vortex::validity::ArrayValidity;
use vortex_error::{vortex_err, VortexError, VortexResult};
use vortex_error::{vortex_bail, vortex_err, VortexResult};

use crate::{BitPackedArray, BitPackedEncoding};
use crate::downcast::DowncastFastlanes;
Expand Down Expand Up @@ -164,27 +163,27 @@ fn bitpack_patches(
})
}

pub fn bitunpack(array: &BitPackedArray) -> VortexResult<PrimitiveArray> {
pub fn unpack(array: &BitPackedArray) -> VortexResult<PrimitiveArray> {
let bit_width = array.bit_width();
let length = array.len();
let encoded = flatten_primitive(cast(array.encoded(), PType::U8.into())?.as_ref())?;
let ptype: PType = array.dtype().try_into()?;

let mut unpacked = match ptype {
I8 | U8 => PrimitiveArray::from_nullable(
bitunpack_primitive::<u8>(encoded.typed_data::<u8>(), bit_width, length),
unpack_primitive::<u8>(encoded.typed_data::<u8>(), bit_width, length),
array.validity(),
),
I16 | U16 => PrimitiveArray::from_nullable(
bitunpack_primitive::<u16>(encoded.typed_data::<u8>(), bit_width, length),
unpack_primitive::<u16>(encoded.typed_data::<u8>(), bit_width, length),
array.validity(),
),
I32 | U32 => PrimitiveArray::from_nullable(
bitunpack_primitive::<u32>(encoded.typed_data::<u8>(), bit_width, length),
unpack_primitive::<u32>(encoded.typed_data::<u8>(), bit_width, length),
array.validity(),
),
I64 | U64 => PrimitiveArray::from_nullable(
bitunpack_primitive::<u64>(encoded.typed_data::<u8>(), bit_width, length),
unpack_primitive::<u64>(encoded.typed_data::<u8>(), bit_width, length),
array.validity(),
),
_ => panic!("Unsupported ptype {:?}", ptype),
Expand All @@ -204,7 +203,7 @@ pub fn bitunpack(array: &BitPackedArray) -> VortexResult<PrimitiveArray> {
flatten_primitive(&unpacked)
}

fn bitunpack_primitive<T: NativePType + TryBitPack>(
fn unpack_primitive<T: NativePType + TryBitPack>(
packed: &[u8],
bit_width: usize,
length: usize,
Expand Down Expand Up @@ -242,25 +241,66 @@ fn bitunpack_primitive<T: NativePType + TryBitPack>(
output
}

#[allow(dead_code)]
pub fn bitunpack_single<T: TryBitPack + Unsigned + PrimInt>(
pub fn unpack_single(array: &BitPackedArray, index: usize) -> VortexResult<Scalar> {
let bit_width = array.bit_width();
let length = array.len();
let encoded = flatten_primitive(cast(array.encoded(), PType::U8.into())?.as_ref())?;
let ptype: PType = array.dtype().try_into()?;

let scalar: Scalar = match ptype {
I8 | U8 => {
unpack_single_primitive::<u8>(encoded.typed_data::<u8>(), bit_width, length, index)
.map(|v| v.into())
}
I16 | U16 => {
unpack_single_primitive::<u16>(encoded.typed_data::<u8>(), bit_width, length, index)
.map(|v| v.into())
}
I32 | U32 => {
unpack_single_primitive::<u32>(encoded.typed_data::<u8>(), bit_width, length, index)
.map(|v| v.into())
}
I64 | U64 => {
unpack_single_primitive::<u64>(encoded.typed_data::<u8>(), bit_width, length, index)
.map(|v| v.into())
}
_ => vortex_bail!("Unsupported ptype {:?}", ptype),
}?;

// Cast to signed if necessary
if ptype.is_signed_int() {
scalar.cast(&ptype.into())
} else {
Ok(scalar)
}
}

pub fn unpack_single_primitive<T: NativePType + TryBitPack>(
packed: &[u8],
bit_width: usize,
length: usize,
index_to_decode: usize,
) -> VortexResult<T> {
if index_to_decode >= length {
return Err(vortex_err!(
"Index out of bounds: {} >= {}",
index_to_decode,
length
));
return Err(vortex_err!(OutOfBounds:index_to_decode, 0, length));
}
if bit_width == 0 {
return Ok(T::default());
}
if cfg!(target_endian = "big") {
return Err(vortex_err!("bitunpack_single only supports little-endian"));
if bit_width > 64 {
return Err(vortex_err!("Unsupported bit width {}", bit_width));
}

let bytes_per_tranche = 128 * bit_width;
let expected_packed_size = ((length + 1023) / 1024) * bytes_per_tranche;
if packed.len() != expected_packed_size {
return Err(vortex_err!(
"Expected {} packed bytes, got {}",
expected_packed_size,
packed.len()
));
}

let tranche_index = index_to_decode / 1024;
let tranche_bytes = &packed[tranche_index * bytes_per_tranche..][0..bytes_per_tranche];
let index_in_tranche = index_to_decode % 1024;
Expand Down Expand Up @@ -346,11 +386,24 @@ mod test {
let cfg = CompressConfig::new().with_enabled([&BitPackedEncoding as EncodingRef]);
let ctx = CompressCtx::new(Arc::new(cfg));

let values = PrimitiveArray::from(Vec::from_iter((0..n).map(|i| (i % 63) as u8)));
let values = PrimitiveArray::from(Vec::from_iter((0..n).map(|i| (i % 2047) as u16)));
let compressed = ctx.compress(&values, None).unwrap();
assert_eq!(compressed.encoding().id(), BitPackedEncoding.id());

let decompressed = flatten_primitive(compressed.as_ref()).unwrap();
assert_eq!(decompressed.typed_data::<u8>(), values.typed_data::<u8>());
let compressed = compressed.as_bitpacked();
let decompressed = flatten_primitive(compressed).unwrap();
assert_eq!(decompressed.typed_data::<u16>(), values.typed_data::<u16>());

values
.typed_data::<u16>()
.iter()
.enumerate()
.for_each(|(i, v)| {
let scalar_at: u16 =
if let Scalar::Primitive(pscalar) = unpack_single(compressed, i).unwrap() {
pscalar.value().unwrap().try_into().unwrap()
} else {
panic!("expected u8 scalar")
};
assert_eq!(scalar_at, *v);
});
}
}
14 changes: 11 additions & 3 deletions vortex-fastlanes/src/bitpacking/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ use vortex::compute::as_contiguous::as_contiguous;
use vortex::compute::flatten::{flatten_primitive, FlattenFn, FlattenedArray};
use vortex::compute::take::{take, TakeFn};
use vortex::compute::ArrayCompute;
use vortex::compute::scalar_at::ScalarAtFn;
use vortex::match_each_integer_ptype;
use vortex::scalar::Scalar;
use vortex_error::VortexResult;

use crate::bitpacking::compress::bitunpack;
use crate::bitpacking::compress::{unpack, unpack_single};
use crate::downcast::DowncastFastlanes;
use crate::BitPackedArray;

Expand All @@ -25,7 +27,13 @@ impl ArrayCompute for BitPackedArray {

impl FlattenFn for BitPackedArray {
fn flatten(&self) -> VortexResult<FlattenedArray> {
bitunpack(self).map(FlattenedArray::Primitive)
unpack(self).map(FlattenedArray::Primitive)
}
}

impl ScalarAtFn for BitPackedArray {
fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
unpack_single(self, index)
}
}

Expand All @@ -50,7 +58,7 @@ impl TakeFn for BitPackedArray {
let sliced = self.slice(chunk * 1024, (chunk + 1) * 1024)?;

take(
&bitunpack(sliced.as_bitpacked())?,
&unpack(sliced.as_bitpacked())?,
&PrimitiveArray::from(offsets),
)
})
Expand Down

0 comments on commit 936015d

Please sign in to comment.