From a9e616d2630be08b1db55e57446faea5757dc131 Mon Sep 17 00:00:00 2001 From: Will Manning Date: Mon, 12 Aug 2024 09:25:08 -0400 Subject: [PATCH 01/39] wip on unwrap_used --- Cargo.toml | 2 +- vortex-array/src/array/struct_/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1fea1ab2e..ba225de41 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -155,5 +155,5 @@ panic_in_result_fn = { level = "deny" } same_name_method = { level = "deny" } tests_outside_test_module = { level = "deny" } unwrap_in_result = { level = "deny" } -#unwrap_used = { level = "deny" } +unwrap_used = { level = "deny" } use_debug = { level = "deny" } diff --git a/vortex-array/src/array/struct_/mod.rs b/vortex-array/src/array/struct_/mod.rs index 9a453c12f..7c4c19e7d 100644 --- a/vortex-array/src/array/struct_/mod.rs +++ b/vortex-array/src/array/struct_/mod.rs @@ -79,7 +79,7 @@ impl StructArray { .map(|(name, _)| FieldName::from(name.as_ref())) .collect(); let fields: Vec = items.iter().map(|(_, array)| array.clone()).collect(); - let len = fields.first().unwrap().len(); + let len = fields.first().map(|f| f.len()).unwrap_or(0); Self::try_new(FieldNames::from(names), fields, len, Validity::NonNullable) .expect("building StructArray with helper") From e843a33f0fe69a21a5ffea78a4b09d8d9020be94 Mon Sep 17 00:00:00 2001 From: Will Manning Date: Mon, 12 Aug 2024 10:21:14 -0400 Subject: [PATCH 02/39] more pedantic --- Cargo.toml | 16 ++++++++++++++++ bench-vortex/src/reader.rs | 6 +++--- bench-vortex/src/tpch/dbgen.rs | 2 +- bench-vortex/src/tpch/mod.rs | 2 +- encodings/alp/src/array.rs | 2 +- encodings/byte-bool/src/lib.rs | 5 ++++- encodings/datetime-parts/src/compute.rs | 4 +++- encodings/dict/src/compute.rs | 7 ++++--- .../fastlanes/src/bitpacking/compute/slice.rs | 4 ++-- encodings/fastlanes/src/bitpacking/mod.rs | 2 +- encodings/fastlanes/src/for/compute.rs | 6 +++--- encodings/roaring/src/boolean/compute.rs | 4 ++-- encodings/roaring/src/boolean/mod.rs | 2 +- encodings/zigzag/src/zigzag.rs | 2 +- vortex-datafusion/src/persistent/opener.rs | 4 +++- vortex-datafusion/src/persistent/provider.rs | 2 +- vortex-dtype/src/dtype.rs | 18 +++++------------- vortex-flatbuffers/src/generated/array.rs | 4 ++-- vortex-flatbuffers/src/generated/dtype.rs | 10 +++++----- vortex-flatbuffers/src/lib.rs | 5 +++++ .../src/compressors/for.rs | 5 +++-- .../src/compressors/runend.rs | 2 +- vortex-sampling-compressor/src/lib.rs | 12 +++++++++--- vortex-scalar/src/list.rs | 2 +- vortex-serde/src/chunked_reader/take_rows.rs | 2 +- vortex-serde/src/layouts/reader/batch.rs | 2 +- vortex-serde/src/layouts/reader/buffered.rs | 2 +- vortex-serde/src/message_reader.rs | 2 +- 28 files changed, 82 insertions(+), 54 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0d1b2ce63..aea71f69b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -148,10 +148,26 @@ unsafe_op_in_unsafe_fn = "deny" [workspace.lints.clippy] all = { level = "deny", priority = -1 } +#cargo = { level = "deny", priority = -1 } +as_ptr_cast_mut = { level = "deny" } +borrow_as_ptr = { level = "deny" } +collection_is_never_read = { level = "deny" } +cognitive_complexity = { level = "deny" } +debug_assert_with_mut_call = { level = "deny" } +default_numeric_fallback = { level = "deny" } +derive_partial_eq_without_eq = { level = "deny" } +expect_used = { level = "deny" } +equatable_if_let = { level = "deny" } +fallible_impl_from = { level = "deny" } +get_unwrap = { level = "deny" } +host_endian_bytes = { level = "deny" } if_then_some_else_none = { level = "deny" } +inconsistent_struct_constructor = { level = "deny" } +manual_is_variant_and = { level = "deny" } mem_forget = { level = "deny" } or_fun_call = "deny" panic_in_result_fn = { level = "deny" } +redundant_closure_for_method_calls = { level = "deny" } same_name_method = { level = "deny" } tests_outside_test_module = { level = "deny" } unwrap_in_result = { level = "deny" } diff --git a/bench-vortex/src/reader.rs b/bench-vortex/src/reader.rs index bef82d4e5..a7766c34d 100644 --- a/bench-vortex/src/reader.rs +++ b/bench-vortex/src/reader.rs @@ -55,7 +55,7 @@ pub async fn open_vortex(path: &Path) -> VortexResult { .unwrap() .collect_chunked() .await - .map(|a| a.into_array()) + .map(vortex::IntoArray::into_array) } pub async fn rewrite_parquet_as_vortex( @@ -217,7 +217,7 @@ async fn parquet_take_from_stream( .metadata() .row_groups() .iter() - .map(|rg| rg.num_rows()) + .map(parquet::file::metadata::RowGroupMetaData::num_rows) .scan(0i64, |acc, x| { *acc += x; Some(*acc) @@ -236,7 +236,7 @@ async fn parquet_take_from_stream( let row_group_indices = row_groups .keys() .sorted() - .map(|i| row_groups.get(i).unwrap().clone()) + .map(|i| row_groups[i].clone()) .collect_vec(); let reader = builder diff --git a/bench-vortex/src/tpch/dbgen.rs b/bench-vortex/src/tpch/dbgen.rs index edd5a0ead..fdbef554a 100644 --- a/bench-vortex/src/tpch/dbgen.rs +++ b/bench-vortex/src/tpch/dbgen.rs @@ -175,7 +175,7 @@ fn get_or_cache_toolchain( zip_file .url() .path_segments() - .and_then(|segments| segments.last()) + .and_then(std::iter::Iterator::last) .unwrap(), ); diff --git a/bench-vortex/src/tpch/mod.rs b/bench-vortex/src/tpch/mod.rs index 60c9f9522..c3caf856a 100644 --- a/bench-vortex/src/tpch/mod.rs +++ b/bench-vortex/src/tpch/mod.rs @@ -248,7 +248,7 @@ async fn register_vortex_file( .iter() .map(|field| { let name: Arc = field.name().as_str().into(); - let dtype = types_map.get(&name).unwrap().clone(); + let dtype = types_map[&name].clone(); let chunks = arrays_map.remove(&name).unwrap(); ( diff --git a/encodings/alp/src/array.rs b/encodings/alp/src/array.rs index 54da44fb6..f66219ea6 100644 --- a/encodings/alp/src/array.rs +++ b/encodings/alp/src/array.rs @@ -41,7 +41,7 @@ impl ALPArray { let length = encoded.len(); let patches_dtype = patches.as_ref().map(|a| a.dtype().as_nullable()); - let patches_len = patches.as_ref().map(|a| a.len()).unwrap_or(0); + let patches_len = patches.as_ref().map(vortex::Array::len).unwrap_or(0); let mut children = Vec::with_capacity(2); children.push(encoded); if let Some(patch) = patches { diff --git a/encodings/byte-bool/src/lib.rs b/encodings/byte-bool/src/lib.rs index 05e6c6e8b..42a2ec556 100644 --- a/encodings/byte-bool/src/lib.rs +++ b/encodings/byte-bool/src/lib.rs @@ -102,7 +102,10 @@ impl From>> for ByteBoolArray { let validity = Validity::from_iter(value.iter()); // This doesn't reallocate, and the compiler even vectorizes it - let data = value.into_iter().map(|b| b.unwrap_or_default()).collect(); + let data = value + .into_iter() + .map(std::option::Option::unwrap_or_default) + .collect(); Self::try_from_vec(data, validity).unwrap() } diff --git a/encodings/datetime-parts/src/compute.rs b/encodings/datetime-parts/src/compute.rs index fd60e2718..75a0c023a 100644 --- a/encodings/datetime-parts/src/compute.rs +++ b/encodings/datetime-parts/src/compute.rs @@ -114,7 +114,9 @@ pub fn decode_to_temporal(array: &DateTimePartsArray) -> VortexResult VortexResult { - Self::try_new(slice(&self.codes(), start, stop)?, self.values()).map(|a| a.into_array()) + Self::try_new(slice(&self.codes(), start, stop)?, self.values()) + .map(vortex::IntoArray::into_array) } } diff --git a/encodings/fastlanes/src/bitpacking/compute/slice.rs b/encodings/fastlanes/src/bitpacking/compute/slice.rs index fdb11e62b..d2a870878 100644 --- a/encodings/fastlanes/src/bitpacking/compute/slice.rs +++ b/encodings/fastlanes/src/bitpacking/compute/slice.rs @@ -1,7 +1,7 @@ use std::cmp::max; use vortex::compute::{slice, SliceFn}; -use vortex::{Array, IntoArray}; +use vortex::Array; use vortex_error::VortexResult; use crate::BitPackedArray; @@ -22,7 +22,7 @@ impl SliceFn for BitPackedArray { stop - start, offset, ) - .map(|a| a.into_array()) + .map(vortex::IntoArray::into_array) } } diff --git a/encodings/fastlanes/src/bitpacking/mod.rs b/encodings/fastlanes/src/bitpacking/mod.rs index da1e4c895..e17c50879 100644 --- a/encodings/fastlanes/src/bitpacking/mod.rs +++ b/encodings/fastlanes/src/bitpacking/mod.rs @@ -71,7 +71,7 @@ impl BitPackedArray { let metadata = BitPackedMetadata { validity: validity.to_metadata(length)?, - patches_len: patches.as_ref().map(|a| a.len()).unwrap_or(0), + patches_len: patches.as_ref().map(vortex::Array::len).unwrap_or(0), offset, length, bit_width, diff --git a/encodings/fastlanes/src/for/compute.rs b/encodings/fastlanes/src/for/compute.rs index 6b146b3f5..8b3b550c7 100644 --- a/encodings/fastlanes/src/for/compute.rs +++ b/encodings/fastlanes/src/for/compute.rs @@ -3,7 +3,7 @@ use vortex::compute::{ search_sorted, slice, take, ArrayCompute, SearchResult, SearchSortedFn, SearchSortedSide, SliceFn, TakeFn, }; -use vortex::{Array, ArrayDType, IntoArray}; +use vortex::{Array, ArrayDType}; use vortex_dtype::match_each_integer_ptype; use vortex_error::{vortex_bail, VortexResult}; use vortex_scalar::{PrimitiveScalar, Scalar, ScalarValue}; @@ -35,7 +35,7 @@ impl TakeFn for FoRArray { self.reference().clone(), self.shift(), ) - .map(|a| a.into_array()) + .map(vortex::IntoArray::into_array) } } @@ -65,7 +65,7 @@ impl SliceFn for FoRArray { self.reference().clone(), self.shift(), ) - .map(|a| a.into_array()) + .map(vortex::IntoArray::into_array) } } diff --git a/encodings/roaring/src/boolean/compute.rs b/encodings/roaring/src/boolean/compute.rs index a6dd9a83b..db928a527 100644 --- a/encodings/roaring/src/boolean/compute.rs +++ b/encodings/roaring/src/boolean/compute.rs @@ -1,7 +1,7 @@ use croaring::Bitmap; use vortex::compute::unary::ScalarAtFn; use vortex::compute::{ArrayCompute, SliceFn}; -use vortex::{Array, IntoArray}; +use vortex::Array; use vortex_error::VortexResult; use vortex_scalar::Scalar; @@ -28,6 +28,6 @@ impl SliceFn for RoaringBoolArray { let slice_bitmap = Bitmap::from_range(start as u32..stop as u32); let bitmap = self.bitmap().and(&slice_bitmap).add_offset(-(start as i64)); - Self::try_new(bitmap, stop - start).map(|a| a.into_array()) + Self::try_new(bitmap, stop - start).map(vortex::IntoArray::into_array) } } diff --git a/encodings/roaring/src/boolean/mod.rs b/encodings/roaring/src/boolean/mod.rs index 6023e432a..b6b608575 100644 --- a/encodings/roaring/src/boolean/mod.rs +++ b/encodings/roaring/src/boolean/mod.rs @@ -58,7 +58,7 @@ impl RoaringBoolArray { pub fn encode(array: Array) -> VortexResult { if array.encoding().id() == Bool::ID { - roaring_bool_encode(BoolArray::try_from(array)?).map(|a| a.into_array()) + roaring_bool_encode(BoolArray::try_from(array)?).map(vortex::IntoArray::into_array) } else { Err(vortex_err!("RoaringInt can only encode boolean arrays")) } diff --git a/encodings/zigzag/src/zigzag.rs b/encodings/zigzag/src/zigzag.rs index 2fc4d7b8d..3bae6748c 100644 --- a/encodings/zigzag/src/zigzag.rs +++ b/encodings/zigzag/src/zigzag.rs @@ -43,7 +43,7 @@ impl ZigZagArray { PrimitiveArray::try_from(array) .map_err(|_| vortex_err!("ZigZag can only encoding primitive arrays")) .map(|parray| zigzag_encode(&parray))? - .map(|encoded| encoded.into_array()) + .map(vortex::IntoArray::into_array) } pub fn encoded(&self) -> Array { diff --git a/vortex-datafusion/src/persistent/opener.rs b/vortex-datafusion/src/persistent/opener.rs index d3e275c42..f6b67e56f 100644 --- a/vortex-datafusion/src/persistent/opener.rs +++ b/vortex-datafusion/src/persistent/opener.rs @@ -44,7 +44,9 @@ impl FileOpener for VortexFileOpener { Ok(async move { let reader = builder.build().await?; - let stream = reader.map_ok(RecordBatch::from).map_err(|e| e.into()); + let stream = reader + .map_ok(RecordBatch::from) + .map_err(std::convert::Into::into); Ok(Box::pin(stream) as _) } .boxed()) diff --git a/vortex-datafusion/src/persistent/provider.rs b/vortex-datafusion/src/persistent/provider.rs index baeefd4f3..6540c0b66 100644 --- a/vortex-datafusion/src/persistent/provider.rs +++ b/vortex-datafusion/src/persistent/provider.rs @@ -79,7 +79,7 @@ impl TableProvider for VortexFileTableProvider { .data_files .iter() .cloned() - .map(|f| f.into()) + .map(std::convert::Into::into) .collect(), ) .with_projection(projection.cloned()); diff --git a/vortex-dtype/src/dtype.rs b/vortex-dtype/src/dtype.rs index 68aa9e95d..0ea7b650b 100644 --- a/vortex-dtype/src/dtype.rs +++ b/vortex-dtype/src/dtype.rs @@ -50,7 +50,7 @@ impl DType { Primitive(_, n) => matches!(n, Nullable), Utf8(n) => matches!(n, Nullable), Binary(n) => matches!(n, Nullable), - Struct(st, _) => st.dtypes().iter().all(|f| f.is_nullable()), + Struct(st, _) => st.dtypes().iter().all(DType::is_nullable), List(_, n) => matches!(n, Nullable), Extension(_, n) => matches!(n, Nullable), } @@ -86,27 +86,19 @@ impl DType { } pub fn is_unsigned_int(&self) -> bool { - PType::try_from(self) - .map(|ptype| ptype.is_unsigned_int()) - .unwrap_or_default() + PType::try_from(self).is_ok_and(super::ptype::PType::is_unsigned_int) } pub fn is_signed_int(&self) -> bool { - PType::try_from(self) - .map(|ptype| ptype.is_signed_int()) - .unwrap_or_default() + PType::try_from(self).is_ok_and(super::ptype::PType::is_signed_int) } pub fn is_int(&self) -> bool { - PType::try_from(self) - .map(|ptype| ptype.is_int()) - .unwrap_or_default() + PType::try_from(self).is_ok_and(super::ptype::PType::is_int) } pub fn is_float(&self) -> bool { - PType::try_from(self) - .map(|ptype| ptype.is_float()) - .unwrap_or_default() + PType::try_from(self).is_ok_and(super::ptype::PType::is_float) } pub fn is_boolean(&self) -> bool { diff --git a/vortex-flatbuffers/src/generated/array.rs b/vortex-flatbuffers/src/generated/array.rs index 857cc2aac..c755341aa 100644 --- a/vortex-flatbuffers/src/generated/array.rs +++ b/vortex-flatbuffers/src/generated/array.rs @@ -93,7 +93,7 @@ impl<'a> flatbuffers::Verifiable for Version { impl flatbuffers::SimpleToVerifyInSlice for Version {} pub enum ArrayOffset {} -#[derive(Copy, Clone, PartialEq)] +#[derive(Copy, Clone, PartialEq, Eq)] pub struct Array<'a> { pub _tab: flatbuffers::Table<'a>, @@ -275,7 +275,7 @@ impl core::fmt::Debug for Array<'_> { } } pub enum ArrayStatsOffset {} -#[derive(Copy, Clone, PartialEq)] +#[derive(Copy, Clone, PartialEq, Eq)] pub struct ArrayStats<'a> { pub _tab: flatbuffers::Table<'a>, diff --git a/vortex-flatbuffers/src/generated/dtype.rs b/vortex-flatbuffers/src/generated/dtype.rs index b86459ec7..7c2c24b92 100644 --- a/vortex-flatbuffers/src/generated/dtype.rs +++ b/vortex-flatbuffers/src/generated/dtype.rs @@ -250,7 +250,7 @@ impl flatbuffers::SimpleToVerifyInSlice for Type {} pub struct TypeUnionTableOffset {} pub enum NullOffset {} -#[derive(Copy, Clone, PartialEq)] +#[derive(Copy, Clone, PartialEq, Eq)] pub struct Null<'a> { pub _tab: flatbuffers::Table<'a>, @@ -329,7 +329,7 @@ impl core::fmt::Debug for Null<'_> { } } pub enum BoolOffset {} -#[derive(Copy, Clone, PartialEq)] +#[derive(Copy, Clone, PartialEq, Eq)] pub struct Bool<'a> { pub _tab: flatbuffers::Table<'a>, @@ -540,7 +540,7 @@ impl core::fmt::Debug for Primitive<'_> { } } pub enum DecimalOffset {} -#[derive(Copy, Clone, PartialEq)] +#[derive(Copy, Clone, PartialEq, Eq)] pub struct Decimal<'a> { pub _tab: flatbuffers::Table<'a>, @@ -673,7 +673,7 @@ impl core::fmt::Debug for Decimal<'_> { } } pub enum Utf8Offset {} -#[derive(Copy, Clone, PartialEq)] +#[derive(Copy, Clone, PartialEq, Eq)] pub struct Utf8<'a> { pub _tab: flatbuffers::Table<'a>, @@ -770,7 +770,7 @@ impl core::fmt::Debug for Utf8<'_> { } } pub enum BinaryOffset {} -#[derive(Copy, Clone, PartialEq)] +#[derive(Copy, Clone, PartialEq, Eq)] pub struct Binary<'a> { pub _tab: flatbuffers::Table<'a>, diff --git a/vortex-flatbuffers/src/lib.rs b/vortex-flatbuffers/src/lib.rs index 53f571a09..dcb6f740a 100644 --- a/vortex-flatbuffers/src/lib.rs +++ b/vortex-flatbuffers/src/lib.rs @@ -1,5 +1,6 @@ #[cfg(feature = "array")] #[allow(clippy::all)] +#[allow(clippy::derive_partial_eq_without_eq)] #[allow(clippy::unwrap_used)] #[allow(dead_code)] #[allow(non_snake_case)] @@ -12,6 +13,7 @@ pub mod array; #[cfg(feature = "dtype")] #[allow(clippy::all)] +#[allow(clippy::derive_partial_eq_without_eq)] #[allow(clippy::unwrap_used)] #[allow(dead_code)] #[allow(non_snake_case)] @@ -24,6 +26,7 @@ pub mod dtype; #[cfg(feature = "scalar")] #[allow(clippy::all)] +#[allow(clippy::derive_partial_eq_without_eq)] #[allow(clippy::unwrap_used)] #[allow(dead_code)] #[allow(non_snake_case)] @@ -36,6 +39,7 @@ pub mod scalar; #[cfg(feature = "file")] #[allow(clippy::all)] +#[allow(clippy::derive_partial_eq_without_eq)] #[allow(clippy::unwrap_used)] #[allow(dead_code)] #[allow(non_snake_case)] @@ -48,6 +52,7 @@ pub mod footer; #[cfg(feature = "file")] #[allow(clippy::all)] +#[allow(clippy::derive_partial_eq_without_eq)] #[allow(clippy::unwrap_used)] #[allow(dead_code)] #[allow(non_snake_case)] diff --git a/vortex-sampling-compressor/src/compressors/for.rs b/vortex-sampling-compressor/src/compressors/for.rs index fff5353c5..8ef04599e 100644 --- a/vortex-sampling-compressor/src/compressors/for.rs +++ b/vortex-sampling-compressor/src/compressors/for.rs @@ -4,7 +4,7 @@ use vortex::array::PrimitiveArray; use vortex::encoding::EncodingRef; use vortex::stats::{trailing_zeros, ArrayStatistics}; use vortex::validity::ArrayValidity; -use vortex::{Array, ArrayDef, IntoArray}; +use vortex::{Array, ArrayDef}; use vortex_dtype::match_each_integer_ptype; use vortex_error::VortexResult; use vortex_fastlanes::{for_compress, FoR, FoRArray, FoREncoding}; @@ -59,7 +59,8 @@ impl EncodingCompressor for FoRCompressor { .excluding(self) .compress(&child, like.as_ref().and_then(|l| l.child(0)))?; Ok(CompressedArray::new( - FoRArray::try_new(compressed_child.array, min, shift).map(|a| a.into_array())?, + FoRArray::try_new(compressed_child.array, min, shift) + .map(vortex::IntoArray::into_array)?, Some(CompressionTree::new(self, vec![compressed_child.path])), )) } diff --git a/vortex-sampling-compressor/src/compressors/runend.rs b/vortex-sampling-compressor/src/compressors/runend.rs index 4d97f3d36..e15a0c92d 100644 --- a/vortex-sampling-compressor/src/compressors/runend.rs +++ b/vortex-sampling-compressor/src/compressors/runend.rs @@ -67,7 +67,7 @@ impl EncodingCompressor for RunEndCompressor { compressed_values.array, ctx.compress_validity(primitive_array.validity())?, ) - .map(|a| a.into_array())?, + .map(vortex::IntoArray::into_array)?, Some(CompressionTree::new( self, vec![compressed_ends.path, compressed_values.path], diff --git a/vortex-sampling-compressor/src/lib.rs b/vortex-sampling-compressor/src/lib.rs index f96323a24..62426ece5 100644 --- a/vortex-sampling-compressor/src/lib.rs +++ b/vortex-sampling-compressor/src/lib.rs @@ -70,7 +70,7 @@ impl Display for SamplingCompressor<'_> { impl CompressionStrategy for SamplingCompressor<'_> { #[allow(clippy::same_name_method)] fn compress(&self, array: &Array) -> VortexResult { - Self::compress(self, array, None).map(|c| c.into_array()) + Self::compress(self, array, None).map(compressors::CompressedArray::into_array) } fn used_encodings(&self) -> HashSet { @@ -198,7 +198,10 @@ impl<'a> SamplingCompressor<'a> { let chunked = ChunkedArray::try_from(arr)?; let compressed_chunks = chunked .chunks() - .map(|chunk| self.compress_array(&chunk).map(|a| a.into_array())) + .map(|chunk| { + self.compress_array(&chunk) + .map(compressors::CompressedArray::into_array) + }) .collect::>>()?; Ok(CompressedArray::uncompressed( ChunkedArray::try_new(compressed_chunks, chunked.dtype().clone())?.into_array(), @@ -213,7 +216,10 @@ impl<'a> SamplingCompressor<'a> { let strct = StructArray::try_from(arr)?; let compressed_fields = strct .children() - .map(|field| self.compress_array(&field).map(|a| a.into_array())) + .map(|field| { + self.compress_array(&field) + .map(compressors::CompressedArray::into_array) + }) .collect::>>()?; let validity = self.compress_validity(strct.validity())?; Ok(CompressedArray::uncompressed( diff --git a/vortex-scalar/src/list.rs b/vortex-scalar/src/list.rs index 14942057b..6f6b02d19 100644 --- a/vortex-scalar/src/list.rs +++ b/vortex-scalar/src/list.rs @@ -53,7 +53,7 @@ impl<'a> ListScalar<'a> { pub fn elements(&self) -> impl Iterator + '_ { self.elements .as_ref() - .map(|e| e.as_ref()) + .map(std::convert::AsRef::as_ref) .unwrap_or_else(|| &[] as &[ScalarValue]) .iter() .map(|e| Scalar { diff --git a/vortex-serde/src/chunked_reader/take_rows.rs b/vortex-serde/src/chunked_reader/take_rows.rs index 1f83ccfce..35923d659 100644 --- a/vortex-serde/src/chunked_reader/take_rows.rs +++ b/vortex-serde/src/chunked_reader/take_rows.rs @@ -186,7 +186,7 @@ fn find_chunks(row_offsets: &Array, indices: &Array) -> VortexResult { debug_assert!( - self.arrays.iter().all(|a| a.is_none()), + self.arrays.iter().all(std::option::Option::is_none), "Expected layout to produce an array but it was empty" ); return Ok(None); diff --git a/vortex-serde/src/layouts/reader/buffered.rs b/vortex-serde/src/layouts/reader/buffered.rs index 883d4673a..cac5f3aff 100644 --- a/vortex-serde/src/layouts/reader/buffered.rs +++ b/vortex-serde/src/layouts/reader/buffered.rs @@ -28,7 +28,7 @@ impl BufferedReader { } fn buffered_row_count(&self) -> usize { - self.arrays.iter().map(|arr| arr.len()).sum() + self.arrays.iter().map(vortex::Array::len).sum() } fn buffer(&mut self) -> VortexResult> { diff --git a/vortex-serde/src/message_reader.rs b/vortex-serde/src/message_reader.rs index deb7c225d..adcd20ce3 100644 --- a/vortex-serde/src/message_reader.rs +++ b/vortex-serde/src/message_reader.rs @@ -286,7 +286,7 @@ impl ArrayBufferReader { .zip( ipc_buffers .iter() - .map(|b| b.offset()) + .map(vortex_flatbuffers::message::Buffer::offset) .skip(1) .chain([all_buffers_size]), ) From dc8f9c449224e880cbbde4e5f9712b7c9fc6d807 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Mon, 12 Aug 2024 14:42:17 +0100 Subject: [PATCH 03/39] Use then vs then_some for values that have to be lazy (#599) --- encodings/fastlanes/src/bitpacking/compress.rs | 3 +-- encodings/fastlanes/src/bitpacking/mod.rs | 12 +++++++----- vortex-array/src/array/bool/accessors.rs | 8 +------- .../src/compressors/bitpacked.rs | 12 ++++++++---- 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/encodings/fastlanes/src/bitpacking/compress.rs b/encodings/fastlanes/src/bitpacking/compress.rs index e13eabad6..9755fe7f2 100644 --- a/encodings/fastlanes/src/bitpacking/compress.rs +++ b/encodings/fastlanes/src/bitpacking/compress.rs @@ -28,8 +28,7 @@ pub fn bitpack_encode(array: PrimitiveArray, bit_width: usize) -> VortexResult 0).then_some(bitpack_patches(&array, bit_width, num_exceptions)); + let patches = (num_exceptions > 0).then(|| bitpack_patches(&array, bit_width, num_exceptions)); BitPackedArray::try_new(packed, array.validity(), patches, bit_width, array.len()) } diff --git a/encodings/fastlanes/src/bitpacking/mod.rs b/encodings/fastlanes/src/bitpacking/mod.rs index e17c50879..4a547da62 100644 --- a/encodings/fastlanes/src/bitpacking/mod.rs +++ b/encodings/fastlanes/src/bitpacking/mod.rs @@ -113,11 +113,13 @@ impl BitPackedArray { #[inline] pub fn patches(&self) -> Option { (self.metadata().patches_len > 0) - .then_some(self.array().child( - 1, - &self.dtype().with_nullability(Nullability::Nullable), - self.metadata().patches_len, - )) + .then(|| { + self.array().child( + 1, + &self.dtype().with_nullability(Nullability::Nullable), + self.metadata().patches_len, + ) + }) .flatten() } diff --git a/vortex-array/src/array/bool/accessors.rs b/vortex-array/src/array/bool/accessors.rs index e867fbf1a..06f81a7a0 100644 --- a/vortex-array/src/array/bool/accessors.rs +++ b/vortex-array/src/array/bool/accessors.rs @@ -22,13 +22,7 @@ impl ArrayAccessor for BoolArray { Validity::Array(valid) => { let valids = valid.into_bool()?.boolean_buffer(); let mut iter = valids.iter().zip(bools.iter()).map(|(is_valid, value)| { - is_valid.then_some({ - if value { - &TRUE - } else { - &FALSE - } - }) + is_valid.then_some(if value { &TRUE } else { &FALSE }) }); Ok(f(&mut iter)) diff --git a/vortex-sampling-compressor/src/compressors/bitpacked.rs b/vortex-sampling-compressor/src/compressors/bitpacked.rs index 3a6c68003..0e895820c 100644 --- a/vortex-sampling-compressor/src/compressors/bitpacked.rs +++ b/vortex-sampling-compressor/src/compressors/bitpacked.rs @@ -63,10 +63,14 @@ impl EncodingCompressor for BitPackedCompressor { let validity = ctx.compress_validity(parray.validity())?; let packed = bitpack(&parray, bit_width)?; - let patches = (num_exceptions > 0).then_some(ctx.auxiliary("patches").compress( - &bitpack_patches(&parray, bit_width, num_exceptions), - like.as_ref().and_then(|l| l.child(0)), - )?); + let patches = (num_exceptions > 0) + .then(|| { + ctx.auxiliary("patches").compress( + &bitpack_patches(&parray, bit_width, num_exceptions), + like.as_ref().and_then(|l| l.child(0)), + ) + }) + .transpose()?; Ok(CompressedArray::new( BitPackedArray::try_new( From 938f89a82e412d1e7b8f79ca590ef2948fcce29f Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Mon, 12 Aug 2024 15:05:32 +0100 Subject: [PATCH 04/39] Vortex physical expressions support for on-disk data (#581) Adds initial support for vortex-specific physical expression evaluation, mostly for filtering. Falls back to arrow/datafusion if there's any issue. --- bench-vortex/src/bin/tpch_benchmark.rs | 32 +++- .../src/array/bool/compute/boolean.rs | 2 +- vortex-array/src/array/bool/mod.rs | 15 ++ vortex-array/src/compute/filter.rs | 2 +- vortex-array/src/lib.rs | 72 +++++---- vortex-datafusion/src/eval.rs | 6 +- vortex-datafusion/src/expr.rs | 140 +++++++++++++++++- vortex-datafusion/src/lib.rs | 3 +- vortex-datafusion/src/persistent/execution.rs | 4 + vortex-datafusion/src/persistent/opener.rs | 63 +++++++- vortex-datafusion/src/scalar.rs | 38 +++++ 11 files changed, 321 insertions(+), 56 deletions(-) create mode 100644 vortex-datafusion/src/scalar.rs diff --git a/bench-vortex/src/bin/tpch_benchmark.rs b/bench-vortex/src/bin/tpch_benchmark.rs index f8bf19377..7f421032a 100644 --- a/bench-vortex/src/bin/tpch_benchmark.rs +++ b/bench-vortex/src/bin/tpch_benchmark.rs @@ -15,15 +15,35 @@ use prettytable::{Cell, Row, Table}; struct Args { #[arg(short, long, value_delimiter = ',')] queries: Option>, + #[arg(short, long)] + threads: Option, } -#[tokio::main(flavor = "multi_thread", worker_threads = 8)] -async fn main() { +fn main() { + let args = Args::parse(); + + let runtime = match args.threads { + Some(0) => panic!("Can't use 0 threads for runtime"), + Some(1) => tokio::runtime::Builder::new_current_thread() + .enable_all() + .build(), + Some(n) => tokio::runtime::Builder::new_multi_thread() + .worker_threads(n) + .enable_all() + .build(), + None => tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build(), + } + .expect("Failed building the Runtime"); + + runtime.block_on(bench_main(args.queries)); +} + +async fn bench_main(queries: Option>) { // uncomment the below to enable trace logging of datafusion execution // setup_logger(LevelFilter::Trace); - let args = Args::parse(); - // Run TPC-H data gen. let data_dir = DBGen::new(DBGenOptions::default()).generate().unwrap(); @@ -55,7 +75,7 @@ async fn main() { table.add_row(Row::new(cells)); } - let query_count = args.queries.as_ref().map_or(21, |c| c.len()); + let query_count = queries.as_ref().map_or(21, |c| c.len()); // Setup a progress bar let progress = ProgressBar::new((query_count * formats.len()) as u64); @@ -63,7 +83,7 @@ async fn main() { // Send back a channel with the results of Row. let (rows_tx, rows_rx) = sync::mpsc::channel(); for (q, query) in tpch_queries() { - if let Some(queries) = args.queries.as_ref() { + if let Some(queries) = queries.as_ref() { if !queries.contains(&q) { continue; } diff --git a/vortex-array/src/array/bool/compute/boolean.rs b/vortex-array/src/array/bool/compute/boolean.rs index 9cdce9842..091fa72c5 100644 --- a/vortex-array/src/array/bool/compute/boolean.rs +++ b/vortex-array/src/array/bool/compute/boolean.rs @@ -3,7 +3,7 @@ use arrow_array::cast::AsArray as _; use vortex_error::VortexResult; use crate::array::BoolArray; -use crate::arrow::FromArrowArray; +use crate::arrow::FromArrowArray as _; use crate::compute::{AndFn, OrFn}; use crate::{Array, IntoCanonical}; diff --git a/vortex-array/src/array/bool/mod.rs b/vortex-array/src/array/bool/mod.rs index 36b308a9d..26065bb8b 100644 --- a/vortex-array/src/array/bool/mod.rs +++ b/vortex-array/src/array/bool/mod.rs @@ -159,6 +159,7 @@ mod tests { use crate::array::BoolArray; use crate::compute::unary::scalar_at; + use crate::validity::Validity; use crate::variants::BoolArrayTrait; use crate::IntoArray; @@ -169,6 +170,20 @@ mod tests { assert!(scalar); } + #[test] + fn test_all_some_iter() { + let arr = BoolArray::from_iter([Some(true), Some(false)]); + + assert!(matches!(arr.validity(), Validity::AllValid)); + + let arr = arr.into_array(); + + let scalar = bool::try_from(&scalar_at(&arr, 0).unwrap()).unwrap(); + assert!(scalar); + let scalar = bool::try_from(&scalar_at(&arr, 1).unwrap()).unwrap(); + assert!(!scalar); + } + #[test] fn test_bool_from_iter() { let arr = diff --git a/vortex-array/src/compute/filter.rs b/vortex-array/src/compute/filter.rs index c7be6a5b5..3ec0178a7 100644 --- a/vortex-array/src/compute/filter.rs +++ b/vortex-array/src/compute/filter.rs @@ -24,7 +24,7 @@ pub fn filter(array: &Array, predicate: &Array) -> VortexResult { if predicate.dtype() != &DType::Bool(Nullability::NonNullable) { vortex_bail!( "predicate must be non-nullable bool, has dtype {}", - predicate.dtype() + predicate.dtype(), ); } if predicate.len() != array.len() { diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs index 74c9f24f4..d48f23ff8 100644 --- a/vortex-array/src/lib.rs +++ b/vortex-array/src/lib.rs @@ -161,6 +161,41 @@ impl Array { futures_util::stream::once(ready(Ok(self))), ) } + + #[inline] + pub fn with_dyn(&self, mut f: F) -> R + where + F: FnMut(&dyn ArrayTrait) -> R, + { + let mut result = None; + + self.encoding() + .with_dyn(self, &mut |array| { + // Sanity check that the encoding implements the correct array trait + debug_assert!( + match array.dtype() { + DType::Null => array.as_null_array().is_some(), + DType::Bool(_) => array.as_bool_array().is_some(), + DType::Primitive(..) => array.as_primitive_array().is_some(), + DType::Utf8(_) => array.as_utf8_array().is_some(), + DType::Binary(_) => array.as_binary_array().is_some(), + DType::Struct(..) => array.as_struct_array().is_some(), + DType::List(..) => array.as_list_array().is_some(), + DType::Extension(..) => array.as_extension_array().is_some(), + }, + "Encoding {} does not implement the variant trait for {}", + self.encoding().id(), + array.dtype() + ); + + result = Some(f(array)); + Ok(()) + }) + .unwrap(); + + // Now we unwrap the optional, which we know to be populated by the closure. + result.unwrap() + } } /// A depth-first pre-order iterator over a ArrayData. @@ -243,43 +278,6 @@ impl ArrayVisitor for NBytesVisitor { } } -impl Array { - #[inline] - pub fn with_dyn(&self, mut f: F) -> R - where - F: FnMut(&dyn ArrayTrait) -> R, - { - let mut result = None; - - self.encoding() - .with_dyn(self, &mut |array| { - // Sanity check that the encoding implements the correct array trait - debug_assert!( - match array.dtype() { - DType::Null => array.as_null_array().is_some(), - DType::Bool(_) => array.as_bool_array().is_some(), - DType::Primitive(..) => array.as_primitive_array().is_some(), - DType::Utf8(_) => array.as_utf8_array().is_some(), - DType::Binary(_) => array.as_binary_array().is_some(), - DType::Struct(..) => array.as_struct_array().is_some(), - DType::List(..) => array.as_list_array().is_some(), - DType::Extension(..) => array.as_extension_array().is_some(), - }, - "Encoding {} does not implement the variant trait for {}", - self.encoding().id(), - array.dtype() - ); - - result = Some(f(array)); - Ok(()) - }) - .unwrap(); - - // Now we unwrap the optional, which we know to be populated by the closure. - result.unwrap() - } -} - impl Display for Array { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let prefix = match self { diff --git a/vortex-datafusion/src/eval.rs b/vortex-datafusion/src/eval.rs index 49034f92b..cd7ccbdd7 100644 --- a/vortex-datafusion/src/eval.rs +++ b/vortex-datafusion/src/eval.rs @@ -6,6 +6,7 @@ use vortex_error::{vortex_bail, vortex_err, VortexResult}; use vortex_expr::Operator; use crate::can_be_pushed_down; +use crate::scalar::dfvalue_to_scalar; pub struct ExpressionEvaluator; @@ -36,7 +37,10 @@ impl ExpressionEvaluator { .and_then(|a| a.field_by_name(name)) .ok_or(vortex_err!("Missing field {name} in struct array")) }), - Expr::Literal(lit) => Ok(ConstantArray::new(lit.clone(), array.len()).into_array()), + Expr::Literal(lit) => { + let lit = dfvalue_to_scalar(lit.clone()); + Ok(ConstantArray::new(lit, array.len()).into_array()) + } _ => unreachable!(), } } diff --git a/vortex-datafusion/src/expr.rs b/vortex-datafusion/src/expr.rs index 62e348498..342d22b0f 100644 --- a/vortex-datafusion/src/expr.rs +++ b/vortex-datafusion/src/expr.rs @@ -1,9 +1,23 @@ -use arrow_schema::SchemaRef; +#![allow(dead_code)] + +use std::sync::Arc; + +use arrow_schema::{Schema, SchemaRef}; use datafusion::optimizer::simplify_expressions::ExprSimplifier; use datafusion_common::{Result as DFResult, ToDFSchema}; use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::simplify::SimplifyContext; -use datafusion_expr::{and, lit, Expr}; +use datafusion_expr::{and, lit, Expr, Operator as DFOperator}; +use datafusion_physical_expr::PhysicalExpr; +use vortex::array::{ConstantArray, StructArray}; +use vortex::compute::compare; +use vortex::variants::StructArrayTrait; +use vortex::{Array, IntoArray}; +use vortex_error::{vortex_bail, vortex_err, VortexResult}; +use vortex_expr::Operator; +use vortex_scalar::Scalar; + +use crate::scalar::dfvalue_to_scalar; /// Convert a set of expressions into a single AND expression. /// @@ -32,6 +46,128 @@ pub(crate) fn simplify_expr(expr: &Expr, schema: SchemaRef) -> DFResult { simplifier.simplify(expr.clone()) } +pub trait VortexPhysicalExpr: Send + Sync { + fn evaluate(&self, array: &Array) -> VortexResult; +} + +pub struct NoOp; + +pub struct BinaryExpr { + left: Arc, + right: Arc, + operator: DFOperator, +} + +pub struct Column { + name: String, + index: usize, +} + +impl VortexPhysicalExpr for Column { + fn evaluate(&self, array: &Array) -> VortexResult { + let s = StructArray::try_from(array)?; + + let column = s.field_by_name(&self.name).ok_or(vortex_err!( + "Array doesn't contain child array of name {}", + self.name + ))?; + + Ok(column) + } +} + +pub struct Literal { + scalar_value: Scalar, +} + +impl VortexPhysicalExpr for Literal { + fn evaluate(&self, array: &Array) -> VortexResult { + Ok(ConstantArray::new(self.scalar_value.clone(), array.len()).into_array()) + } +} + +impl VortexPhysicalExpr for BinaryExpr { + fn evaluate(&self, array: &Array) -> VortexResult { + let lhs = self.left.evaluate(array)?; + let rhs = self.right.evaluate(array)?; + + let array = match self.operator { + DFOperator::Eq => compare(&lhs, &rhs, Operator::Eq)?, + DFOperator::NotEq => compare(&lhs, &rhs, Operator::NotEq)?, + DFOperator::Lt => compare(&lhs, &rhs, Operator::Lt)?, + DFOperator::LtEq => compare(&lhs, &rhs, Operator::Lte)?, + DFOperator::Gt => compare(&lhs, &rhs, Operator::Gt)?, + DFOperator::GtEq => compare(&lhs, &rhs, Operator::Gte)?, + DFOperator::And => vortex::compute::and(&lhs, &rhs)?, + DFOperator::Or => vortex::compute::or(&lhs, &rhs)?, + _ => vortex_bail!("{} is not a supported DF operator in Vortex", self.operator), + }; + + Ok(array) + } +} + +impl VortexPhysicalExpr for NoOp { + fn evaluate(&self, _array: &Array) -> VortexResult { + vortex_bail!("NoOp::evaluate() should not be called") + } +} + +pub fn convert_expr_to_vortex( + physical_expr: Arc, + input_schema: &Schema, +) -> VortexResult> { + if physical_expr.data_type(input_schema).unwrap().is_temporal() { + vortex_bail!("Doesn't support evaluating operations over temporal values"); + } + if let Some(binary_expr) = physical_expr + .as_any() + .downcast_ref::() + { + let left = convert_expr_to_vortex(binary_expr.left().clone(), input_schema)?; + let right = convert_expr_to_vortex(binary_expr.right().clone(), input_schema)?; + let operator = *binary_expr.op(); + + return Ok(Arc::new(BinaryExpr { + left, + right, + operator, + }) as _); + } + + if let Some(col_expr) = physical_expr + .as_any() + .downcast_ref::() + { + let expr = Column { + name: col_expr.name().to_owned(), + index: col_expr.index(), + }; + + return Ok(Arc::new(expr) as _); + } + + if let Some(lit) = physical_expr + .as_any() + .downcast_ref::() + { + let value = dfvalue_to_scalar(lit.value().clone()); + return Ok(Arc::new(Literal { + scalar_value: value, + }) as _); + } + + if physical_expr + .as_any() + .downcast_ref::() + .is_some() + { + return Ok(Arc::new(NoOp)); + } + + vortex_bail!("Couldn't convert DataFusion physical expression to a vortex expression") +} + #[cfg(test)] mod test { use std::sync::Arc; diff --git a/vortex-datafusion/src/lib.rs b/vortex-datafusion/src/lib.rs index a72d1bbea..8c75b126c 100644 --- a/vortex-datafusion/src/lib.rs +++ b/vortex-datafusion/src/lib.rs @@ -27,12 +27,13 @@ use vortex::array::ChunkedArray; use vortex::{Array, ArrayDType, IntoArrayVariant}; use vortex_error::vortex_err; +pub mod expr; pub mod memory; pub mod persistent; +pub mod scalar; mod datatype; mod eval; -mod expr; mod plans; const SUPPORTED_BINARY_OPS: &[Operator] = &[ diff --git a/vortex-datafusion/src/persistent/execution.rs b/vortex-datafusion/src/persistent/execution.rs index 792cb6845..43ff1a7a4 100644 --- a/vortex-datafusion/src/persistent/execution.rs +++ b/vortex-datafusion/src/persistent/execution.rs @@ -91,12 +91,16 @@ impl ExecutionPlan for VortexExec { let object_store = context .runtime_env() .object_store(&self.file_scan_config.object_store_url)?; + + let arrow_schema = self.file_scan_config.file_schema.clone(); + let opener = VortexFileOpener { ctx: self.ctx.clone(), object_store, projection: self.file_scan_config.projection.clone(), batch_size: None, predicate: self.predicate.clone(), + arrow_schema, }; let stream = FileStream::new(&self.file_scan_config, partition, opener, &self.metrics)?; diff --git a/vortex-datafusion/src/persistent/opener.rs b/vortex-datafusion/src/persistent/opener.rs index f6b67e56f..79fd0768e 100644 --- a/vortex-datafusion/src/persistent/opener.rs +++ b/vortex-datafusion/src/persistent/opener.rs @@ -1,23 +1,31 @@ use std::sync::Arc; -use arrow_array::RecordBatch; +use arrow_array::{Array as _, BooleanArray, RecordBatch}; +use arrow_schema::SchemaRef; +use datafusion::arrow::buffer::{buffer_bin_and_not, BooleanBuffer}; use datafusion::datasource::physical_plan::{FileMeta, FileOpenFuture, FileOpener}; use datafusion_common::Result as DFResult; use datafusion_physical_expr::PhysicalExpr; use futures::{FutureExt as _, TryStreamExt}; use object_store::ObjectStore; -use vortex::Context; +use vortex::array::BoolArray; +use vortex::arrow::FromArrowArray; +use vortex::{Array, Context, IntoArrayVariant as _}; +use vortex_error::VortexResult; use vortex_serde::io::ObjectStoreReadAt; use vortex_serde::layouts::reader::builder::VortexLayoutReaderBuilder; use vortex_serde::layouts::reader::context::{LayoutContext, LayoutDeserializer}; use vortex_serde::layouts::reader::projections::Projection; +use crate::expr::convert_expr_to_vortex; + pub struct VortexFileOpener { pub ctx: Arc, pub object_store: Arc, pub batch_size: Option, pub projection: Option>, pub predicate: Option>, + pub arrow_schema: SchemaRef, } impl FileOpener for VortexFileOpener { @@ -34,9 +42,11 @@ impl FileOpener for VortexFileOpener { builder = builder.with_batch_size(batch_size); } - if let Some(_predicate) = self.predicate.as_ref() { - log::warn!("Missing logic to turn a physical expression into a RowFilter"); - } + let predicate = self + .predicate + .clone() + .map(|predicate| convert_expr_to_vortex(predicate, self.arrow_schema.as_ref())) + .transpose()?; if let Some(projection) = self.projection.as_ref() { builder = builder.with_projection(Projection::new(projection)) @@ -44,11 +54,50 @@ impl FileOpener for VortexFileOpener { Ok(async move { let reader = builder.build().await?; + let stream = reader - .map_ok(RecordBatch::from) - .map_err(std::convert::Into::into); + .and_then(move |array| { + let predicate = predicate.clone(); + async move { + let array = if let Some(predicate) = predicate.as_ref() { + let predicate_result = predicate.evaluate(&array)?; + + let filter_array = null_as_false(&predicate_result.into_bool()?)?; + vortex::compute::filter(&array, &filter_array)? + } else { + array + }; + + VortexResult::Ok(RecordBatch::from(array)) + } + }) + .map_err(|e| e.into()); Ok(Box::pin(stream) as _) } .boxed()) } } + +/// Mask all null values of a Arrow boolean array to false +fn null_as_false(array: &BoolArray) -> VortexResult { + let array = BooleanArray::from(array.boolean_buffer()); + + let boolean_array = match array.nulls() { + None => array, + Some(nulls) => { + let inner_bool_buffer = array.values(); + let buff = buffer_bin_and_not( + inner_bool_buffer.inner(), + inner_bool_buffer.offset(), + nulls.buffer(), + nulls.offset(), + inner_bool_buffer.len(), + ); + let bool_buffer = + BooleanBuffer::new(buff, inner_bool_buffer.offset(), inner_bool_buffer.len()); + BooleanArray::from(bool_buffer) + } + }; + + Ok(Array::from_arrow(&boolean_array, false)) +} diff --git a/vortex-datafusion/src/scalar.rs b/vortex-datafusion/src/scalar.rs new file mode 100644 index 000000000..856a5699a --- /dev/null +++ b/vortex-datafusion/src/scalar.rs @@ -0,0 +1,38 @@ +use datafusion_common::ScalarValue; +use vortex::array::make_temporal_ext_dtype; +use vortex_dtype::{DType, Nullability}; +use vortex_scalar::{PValue, Scalar}; + +pub fn dfvalue_to_scalar(value: ScalarValue) -> Scalar { + match value { + ScalarValue::Null => Some(Scalar::null(DType::Null)), + ScalarValue::Boolean(b) => b.map(Scalar::from), + ScalarValue::Float16(f) => f.map(Scalar::from), + ScalarValue::Float32(f) => f.map(Scalar::from), + ScalarValue::Float64(f) => f.map(Scalar::from), + ScalarValue::Int8(i) => i.map(Scalar::from), + ScalarValue::Int16(i) => i.map(Scalar::from), + ScalarValue::Int32(i) => i.map(Scalar::from), + ScalarValue::Int64(i) => i.map(Scalar::from), + ScalarValue::UInt8(i) => i.map(Scalar::from), + ScalarValue::UInt16(i) => i.map(Scalar::from), + ScalarValue::UInt32(i) => i.map(Scalar::from), + ScalarValue::UInt64(i) => i.map(Scalar::from), + ScalarValue::Utf8(s) => s.as_ref().map(|s| Scalar::from(s.as_str())), + ScalarValue::Utf8View(s) => s.as_ref().map(|s| Scalar::from(s.as_str())), + ScalarValue::LargeUtf8(s) => s.as_ref().map(|s| Scalar::from(s.as_str())), + ScalarValue::Binary(b) => b.as_ref().map(|b| Scalar::from(b.clone())), + ScalarValue::BinaryView(b) => b.as_ref().map(|b| Scalar::from(b.clone())), + ScalarValue::LargeBinary(b) => b.as_ref().map(|b| Scalar::from(b.clone())), + ScalarValue::FixedSizeBinary(_, b) => b.map(|b| Scalar::from(b.clone())), + ScalarValue::Date32(v) => v.map(|i| { + let ext_dtype = make_temporal_ext_dtype(&value.data_type()); + Scalar::new( + DType::Extension(ext_dtype, Nullability::Nullable), + vortex_scalar::ScalarValue::Primitive(PValue::I32(i)), + ) + }), + _ => unimplemented!("Can't convert {value:?} value to a Vortex scalar"), + } + .unwrap_or(Scalar::null(DType::Null)) +} From f57eb3d2632b8910b452d4c2fbb8d2bbe28a7b5b Mon Sep 17 00:00:00 2001 From: Will Manning Date: Mon, 12 Aug 2024 13:05:58 -0400 Subject: [PATCH 05/39] wip --- Cargo.toml | 3 +- clippy.toml | 1 + vortex-array/src/array/bool/mod.rs | 2 +- vortex-array/src/array/chunked/canonical.rs | 2 +- .../src/array/chunked/compute/take.rs | 6 +-- vortex-array/src/array/chunked/mod.rs | 4 +- vortex-array/src/array/constant/compute.rs | 4 +- .../src/array/datetime/temporal/from.rs | 5 +- vortex-array/src/array/extension/mod.rs | 4 +- vortex-array/src/array/null/mod.rs | 2 +- vortex-array/src/array/primitive/mod.rs | 10 ++-- vortex-array/src/array/sparse/mod.rs | 4 +- vortex-array/src/array/struct_/mod.rs | 2 +- vortex-array/src/array/varbin/builder.rs | 12 ++++- .../src/array/varbin/compute/filter.rs | 49 +++++++++++++------ vortex-array/src/array/varbin/compute/take.rs | 14 +++--- vortex-array/src/array/varbin/mod.rs | 10 ++-- vortex-array/src/array/varbinview/mod.rs | 43 +++++++++++----- vortex-array/src/arrow/array.rs | 23 +++++---- vortex-array/src/arrow/dtype.rs | 18 ++++--- vortex-array/src/arrow/recordbatch.rs | 33 +++++++------ vortex-array/src/canonical.rs | 31 ++++++------ vortex-array/src/encoding.rs | 3 +- vortex-scalar/src/arrow.rs | 38 ++++++++------ vortex-scalar/src/datafusion.rs | 18 +++++-- vortex-scalar/src/list.rs | 4 +- vortex-scalar/src/serde/flatbuffers.rs | 3 +- 27 files changed, 209 insertions(+), 139 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index aea71f69b..d0c44d338 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -154,7 +154,7 @@ borrow_as_ptr = { level = "deny" } collection_is_never_read = { level = "deny" } cognitive_complexity = { level = "deny" } debug_assert_with_mut_call = { level = "deny" } -default_numeric_fallback = { level = "deny" } +#default_numeric_fallback = { level = "deny" } derive_partial_eq_without_eq = { level = "deny" } expect_used = { level = "deny" } equatable_if_let = { level = "deny" } @@ -167,7 +167,6 @@ manual_is_variant_and = { level = "deny" } mem_forget = { level = "deny" } or_fun_call = "deny" panic_in_result_fn = { level = "deny" } -redundant_closure_for_method_calls = { level = "deny" } same_name_method = { level = "deny" } tests_outside_test_module = { level = "deny" } unwrap_in_result = { level = "deny" } diff --git a/clippy.toml b/clippy.toml index 154626ef4..59cb72d11 100644 --- a/clippy.toml +++ b/clippy.toml @@ -1 +1,2 @@ +allow-expect-in-tests = true allow-unwrap-in-tests = true diff --git a/vortex-array/src/array/bool/mod.rs b/vortex-array/src/array/bool/mod.rs index 26065bb8b..4db0f8f4a 100644 --- a/vortex-array/src/array/bool/mod.rs +++ b/vortex-array/src/array/bool/mod.rs @@ -27,7 +27,7 @@ pub struct BoolMetadata { impl BoolArray { pub fn buffer(&self) -> &Buffer { - self.array().buffer().expect("missing buffer") + self.array().buffer().unwrap_or_else(|| panic!("Missing buffer in BoolArray")) } pub fn boolean_buffer(&self) -> BooleanBuffer { diff --git a/vortex-array/src/array/chunked/canonical.rs b/vortex-array/src/array/chunked/canonical.rs index abd35a96e..7417b2a81 100644 --- a/vortex-array/src/array/chunked/canonical.rs +++ b/vortex-array/src/array/chunked/canonical.rs @@ -149,7 +149,7 @@ fn swizzle_struct_chunks( field_chunks.push( chunk .field(field_idx) - .expect("all chunks must have same dtype"), + .ok_or_else(|| vortex_err!("All chunks must have same dtype; missing field at index {}, current chunk dtype: {}", field_idx, chunk.dtype()))?, ); } let field_array = ChunkedArray::try_new(field_chunks, field_dtype.clone())?; diff --git a/vortex-array/src/array/chunked/compute/take.rs b/vortex-array/src/array/chunked/compute/take.rs index 09e4f0146..8bdc455ca 100644 --- a/vortex-array/src/array/chunked/compute/take.rs +++ b/vortex-array/src/array/chunked/compute/take.rs @@ -97,9 +97,7 @@ fn take_strict_sorted(chunked: &ChunkedArray, indices: &Array) -> VortexResult VortexResult Array { self.array() .child(0, &Self::ENDS_DTYPE, self.nchunks() + 1) - .expect("missing chunk ends") + .unwrap_or_else(|| panic!("Missing chunk ends in ChunkedArray")) } pub fn find_chunk_idx(&self, index: usize) -> (usize, usize) { @@ -139,7 +139,7 @@ impl FromIterator for ChunkedArray { let dtype = chunks .first() .map(|c| c.dtype().clone()) - .expect("Cannot create a chunked array from an empty iterator"); + .unwrap_or_else(|| panic!("Cannot infer DType from an empty iterator")); Self::try_new(chunks, dtype).unwrap_or_else(|err| { panic!("Failed to create chunked array from iterator: {}", err); }) diff --git a/vortex-array/src/array/constant/compute.rs b/vortex-array/src/array/constant/compute.rs index 807f25ef4..cd3f180f5 100644 --- a/vortex-array/src/array/constant/compute.rs +++ b/vortex-array/src/array/constant/compute.rs @@ -97,7 +97,7 @@ impl SearchSortedFn for ConstantArray { impl CompareFn for ConstantArray { fn compare(&self, rhs: &Array, operator: Operator) -> VortexResult { - if let Some(true) = rhs.statistics().get_as::(Stat::IsConstant) { + if rhs.statistics().get_as::(Stat::IsConstant) == Some(true) { let lhs = self.scalar(); let rhs = scalar_at(rhs, 0)?; @@ -152,7 +152,7 @@ fn constant_array_bool_impl( fallback_fn: impl Fn(&Array, &Array) -> Option>, ) -> VortexResult { // If the right side is constant - if let Some(true) = other.statistics().get_as::(Stat::IsConstant) { + if other.statistics().get_as::(Stat::IsConstant) == Some(true) { let lhs = constant_array.scalar().value().as_bool()?; let rhs = scalar_at(other, 0)?.value().as_bool()?; diff --git a/vortex-array/src/array/datetime/temporal/from.rs b/vortex-array/src/array/datetime/temporal/from.rs index b3cccd90b..f2bc642eb 100644 --- a/vortex-array/src/array/datetime/temporal/from.rs +++ b/vortex-array/src/array/datetime/temporal/from.rs @@ -113,12 +113,11 @@ impl From for ExtMetadata { None => meta.extend_from_slice(0u16.to_le_bytes().as_slice()), Some(tz) => { let tz_bytes = tz.as_bytes(); - let tz_len = u16::try_from(tz_bytes.len()).expect("tz did not fit in u16"); + let tz_len = u16::try_from(tz_bytes.len()).unwrap_or_else(|err| panic!("tz did not fit in u16: {err}")); meta.extend_from_slice(tz_len.to_le_bytes().as_slice()); meta.extend_from_slice(tz_bytes); } - }; - + } ExtMetadata::from(meta.as_slice()) } } diff --git a/vortex-array/src/array/extension/mod.rs b/vortex-array/src/array/extension/mod.rs index 8f3f11e0d..9c1b12302 100644 --- a/vortex-array/src/array/extension/mod.rs +++ b/vortex-array/src/array/extension/mod.rs @@ -28,13 +28,13 @@ impl ExtensionArray { [storage].into(), Default::default(), ) - .expect("Invalid ExtensionArray") + .unwrap_or_else(|err| panic!("Invalid ExtensionArray: {err}")) } pub fn storage(&self) -> Array { self.array() .child(0, &self.metadata().storage_dtype, self.len()) - .expect("Missing storage array") + .unwrap_or_else(|| panic!("Missing storage array for ExtensionArray")) } #[allow(dead_code)] diff --git a/vortex-array/src/array/null/mod.rs b/vortex-array/src/array/null/mod.rs index 7bb406dce..7215971c6 100644 --- a/vortex-array/src/array/null/mod.rs +++ b/vortex-array/src/array/null/mod.rs @@ -28,7 +28,7 @@ impl NullArray { Arc::new([]), StatsSet::nulls(len, &DType::Null), ) - .expect("NullArray::new cannot fail") + .unwrap_or_else(|err| panic!("NullArray::new should never fail! Got: {}", err)) } } diff --git a/vortex-array/src/array/primitive/mod.rs b/vortex-array/src/array/primitive/mod.rs index 9da0ab070..18cd004f4 100644 --- a/vortex-array/src/array/primitive/mod.rs +++ b/vortex-array/src/array/primitive/mod.rs @@ -43,13 +43,13 @@ impl PrimitiveArray { DType::from(ptype).with_nullability(validity.nullability()), length, PrimitiveMetadata { - validity: validity.to_metadata(length).expect("invalid validity"), + validity: validity.to_metadata(length).unwrap_or_else(|err| panic!("Invalid validity: {err}")), }, Some(buffer), validity.into_array().into_iter().collect_vec().into(), StatsSet::new(), ) - .expect("should be valid"), + .unwrap_or_else(|err| panic!("PrimitiveArray::new should never fail! Got: {err}")), } } @@ -90,7 +90,7 @@ impl PrimitiveArray { } pub fn buffer(&self) -> &Buffer { - self.array().buffer().expect("missing buffer") + self.array().buffer().unwrap_or_else(|| panic!("Missing buffer in PrimitiveArray")) } pub fn maybe_null_slice(&self) -> &[T] { @@ -166,7 +166,7 @@ impl PrimitiveArray { pub fn into_buffer(self) -> Buffer { self.into_array() .into_buffer() - .expect("PrimitiveArray must have a buffer") + .unwrap_or_else(|| panic!("PrimitiveArray must have a buffer")) } } @@ -217,6 +217,6 @@ impl AcceptArrayVisitor for PrimitiveArray { impl Array { pub fn as_primitive(&self) -> PrimitiveArray { - PrimitiveArray::try_from(self).expect("expected primitive array") + PrimitiveArray::try_from(self).unwrap_or_else(|err| panic!("Expected primitive array: {err}")) } } diff --git a/vortex-array/src/array/sparse/mod.rs b/vortex-array/src/array/sparse/mod.rs index a54b436cc..b4a3d56df 100644 --- a/vortex-array/src/array/sparse/mod.rs +++ b/vortex-array/src/array/sparse/mod.rs @@ -96,7 +96,7 @@ impl SparseArray { pub fn values(&self) -> Array { self.array() .child(1, self.dtype(), self.metadata().indices_len) - .expect("missing child array") + .unwrap_or_else(|| panic!("Missing child array in SparseArray")) } #[inline] @@ -107,7 +107,7 @@ impl SparseArray { &self.metadata().indices_dtype, self.metadata().indices_len, ) - .expect("missing indices array") + .unwrap_or_else(|| panic!("Missing indices array in SparseArray")) } #[inline] diff --git a/vortex-array/src/array/struct_/mod.rs b/vortex-array/src/array/struct_/mod.rs index 7c4c19e7d..9ecf7b86b 100644 --- a/vortex-array/src/array/struct_/mod.rs +++ b/vortex-array/src/array/struct_/mod.rs @@ -82,7 +82,7 @@ impl StructArray { let len = fields.first().map(|f| f.len()).unwrap_or(0); Self::try_new(FieldNames::from(names), fields, len, Validity::NonNullable) - .expect("building StructArray with helper") + .unwrap_or_else(|err| panic!("Unexpected error while building StructArray from fields: {err}")) } // TODO(aduffy): Add equivalent function to support field masks for nested column access. diff --git a/vortex-array/src/array/varbin/builder.rs b/vortex-array/src/array/varbin/builder.rs index 121f4db87..2f7b71314 100644 --- a/vortex-array/src/array/varbin/builder.rs +++ b/vortex-array/src/array/varbin/builder.rs @@ -36,7 +36,14 @@ impl VarBinBuilder { #[inline] pub fn push_value(&mut self, value: &[u8]) { self.offsets - .push(O::from(self.data.len() + value.len()).unwrap()); + .push(O::from(self.data.len() + value.len()).unwrap_or_else(|| { + panic!( + "Failed to convert sum of {} and {} to offset of type {}", + self.data.len(), + value.len(), + std::any::type_name::() + ) + })); self.data.extend_from_slice(value); self.validity.append_non_null(); } @@ -71,7 +78,8 @@ impl VarBinBuilder { Validity::NonNullable }; - VarBinArray::try_new(offsets.into_array(), data.into_array(), dtype, validity).unwrap() + VarBinArray::try_new(offsets.into_array(), data.into_array(), dtype, validity) + .unwrap_or_else(|err| panic!("Unexpected error while building VarBinArray: {err}")) } } diff --git a/vortex-array/src/array/varbin/compute/filter.rs b/vortex-array/src/array/varbin/compute/filter.rs index d0db9c85a..fcd697eaf 100644 --- a/vortex-array/src/array/varbin/compute/filter.rs +++ b/vortex-array/src/array/varbin/compute/filter.rs @@ -67,24 +67,32 @@ where if let Some(val) = logical_validity.to_null_buffer()? { let mut builder = VarBinBuilder::::with_capacity(selection_count); - predicate.maybe_null_slices_iter().for_each(|(start, end)| { + for (start, end) in predicate.maybe_null_slices_iter() { let null_sl = val.slice(start, end - start); if null_sl.null_count() == 0 { update_non_nullable_slice(data, offsets, &mut builder, start, end) } else { - null_sl.iter().enumerate().for_each(|(idx, valid)| { + for (idx, valid) in null_sl.iter().enumerate() { if valid { - let (s, e) = ( - offsets[idx + start].to_usize().unwrap(), - offsets[idx + start + 1].to_usize().unwrap(), - ); + let s = offsets[idx + start].to_usize().ok_or_else(|| { + vortex_err!( + "Failed to convert offset to usize: {}", + offsets[idx + start] + ) + })?; + let e = offsets[idx + start + 1].to_usize().ok_or_else(|| { + vortex_err!( + "Failed to convert offset to usize: {}", + offsets[idx + start + 1] + ) + })?; builder.push_value(&data[s..e]) } else { builder.push_null() } - }) + } } - }); + } return Ok(builder.finish(dtype)); } @@ -108,11 +116,18 @@ fn update_non_nullable_slice( O: NativePType + 'static + Zero + Copy, usize: AsPrimitive, { - let (offset_start, offset_end) = (&offsets[start], &offsets[end]); - let new_data = &data[offset_start.to_usize().unwrap()..offset_end.to_usize().unwrap()]; + let new_data = { + let offset_start = offsets[start] + .to_usize() + .unwrap_or_else(|| panic!("Failed to convert offset to usize: {}", offsets[start])); + let offset_end = offsets[end] + .to_usize() + .unwrap_or_else(|| panic!("Failed to convert offset to usize: {}", offsets[end])); + &data[offset_start..offset_end] + }; let new_offsets = offsets[start..end + 1] .iter() - .map(|o| *o - *offset_start) + .map(|o| *o - offsets[start]) .dropping(1); builder.push_values(new_data, new_offsets, end - start) } @@ -144,17 +159,21 @@ fn filter_select_var_bin_by_index_primitive_offset( selection_count: usize, ) -> VortexResult { let mut builder = VarBinBuilder::::with_capacity(selection_count); - predicate.maybe_null_indices_iter().for_each(|idx| { + for idx in predicate.maybe_null_indices_iter() { if validity.is_valid(idx) { let (start, end) = ( - offsets[idx].to_usize().unwrap(), - offsets[idx + 1].to_usize().unwrap(), + offsets[idx].to_usize().ok_or_else(|| { + vortex_err!("Failed to convert offset to usize: {}", offsets[idx]) + })?, + offsets[idx + 1].to_usize().ok_or_else(|| { + vortex_err!("Failed to convert offset to usize: {}", offsets[idx + 1]) + })?, ); builder.push(Some(&data[start..end])) } else { builder.push_null() } - }); + } Ok(builder.finish(dtype)) } diff --git a/vortex-array/src/array/varbin/compute/take.rs b/vortex-array/src/array/varbin/compute/take.rs index df0032bab..510fd5bb1 100644 --- a/vortex-array/src/array/varbin/compute/take.rs +++ b/vortex-array/src/array/varbin/compute/take.rs @@ -1,6 +1,6 @@ use arrow_buffer::NullBuffer; use vortex_dtype::{match_each_integer_ptype, DType, NativePType}; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, VortexResult}; use crate::array::varbin::builder::VarBinBuilder; use crate::array::varbin::VarBinArray; @@ -49,9 +49,9 @@ fn take( let mut builder = VarBinBuilder::::with_capacity(indices.len()); for &idx in indices { - let idx = idx.to_usize().unwrap(); - let start = offsets[idx].to_usize().unwrap(); - let stop = offsets[idx + 1].to_usize().unwrap(); + let idx = idx.to_usize().ok_or_else(|| vortex_err!("Failed to convert index to usize: {}", idx))?; + let start = offsets[idx].to_usize().ok_or_else(|| vortex_err!("Failed to convert offset to usize: {}", offsets[idx]))?; + let stop = offsets[idx + 1].to_usize().ok_or_else(|| vortex_err!("Failed to convert offset to usize: {}", offsets[idx + 1]))?; builder.push(Some(&data[start..stop])); } Ok(builder.finish(dtype)) @@ -66,10 +66,10 @@ fn take_nullable( ) -> VarBinArray { let mut builder = VarBinBuilder::::with_capacity(indices.len()); for &idx in indices { - let idx = idx.to_usize().unwrap(); + let idx = idx.to_usize().unwrap_or_else(|| panic!("Failed to convert index to usize: {}", idx)); if null_buffer.is_valid(idx) { - let start = offsets[idx].to_usize().unwrap(); - let stop = offsets[idx + 1].to_usize().unwrap(); + let start = offsets[idx].to_usize().unwrap_or_else(|| panic!("Failed to convert offset to usize: {}", offsets[idx])); + let stop = offsets[idx + 1].to_usize().unwrap_or_else(|| panic!("Failed to convert offset to usize: {}", offsets[idx + 1])); builder.push(Some(&data[start..stop])); } else { builder.push(None); diff --git a/vortex-array/src/array/varbin/mod.rs b/vortex-array/src/array/varbin/mod.rs index 000def8c7..4c6dce8ff 100644 --- a/vortex-array/src/array/varbin/mod.rs +++ b/vortex-array/src/array/varbin/mod.rs @@ -75,7 +75,7 @@ impl VarBinArray { pub fn offsets(&self) -> Array { self.array() .child(0, &self.metadata().offsets_dtype, self.len() + 1) - .expect("missing offsets") + .unwrap_or_else(|| panic!("Missing offsets in VarBinArray")) } pub fn first_offset TryFrom<&'a Scalar, Error = VortexError>>( @@ -91,7 +91,7 @@ impl VarBinArray { pub fn bytes(&self) -> Array { self.array() .child(1, &DType::BYTES, self.metadata().bytes_len) - .expect("missing bytes") + .unwrap_or_else(|| panic!("Missing bytes in VarBinArray")) } pub fn validity(&self) -> Validity { @@ -152,10 +152,10 @@ impl VarBinArray { }) .unwrap_or_else(|| { scalar_at(&self.offsets(), index) - .unwrap() + .unwrap_or_else(|err| panic!("Failed to get offset at index: {}: {}", index, err)) .as_ref() .try_into() - .unwrap() + .unwrap_or_else(|err| panic!("Failed to convert offset to usize: {}", err)) }) } @@ -219,7 +219,7 @@ impl<'a> FromIterator> for VarBinArray { pub fn varbin_scalar(value: Buffer, dtype: &DType) -> Scalar { if matches!(dtype, DType::Utf8(_)) { - Scalar::try_utf8(value, dtype.nullability()).unwrap() + Scalar::try_utf8(value, dtype.nullability()).unwrap_or_else(|err| panic!("Failed to create scalar from utf8 buffer: {}", err)) } else { Scalar::binary(value, dtype.nullability()) } diff --git a/vortex-array/src/array/varbinview/mod.rs b/vortex-array/src/array/varbinview/mod.rs index aa15466fc..7f7bb80a3 100644 --- a/vortex-array/src/array/varbinview/mod.rs +++ b/vortex-array/src/array/varbinview/mod.rs @@ -12,7 +12,6 @@ use itertools::Itertools; use vortex_dtype::{DType, PType}; use vortex_error::{vortex_bail, VortexError, VortexResult}; -use crate::array::primitive::PrimitiveArray; use crate::array::varbin::VarBinArray; use crate::arrow::FromArrowArray; use crate::compute::slice; @@ -159,8 +158,8 @@ impl VarBinViewArray { fn view_slice(&self) -> &[BinaryView] { unsafe { slice::from_raw_parts( - PrimitiveArray::try_from(self.views()) - .expect("Views must be a primitive array") + self.views().into_primitive() + .unwrap_or_else(|err| panic!("Views must be a primitive array: {}", err)) .maybe_null_slice::() .as_ptr() as _, self.views().len() / VIEW_SIZE, @@ -176,14 +175,14 @@ impl VarBinViewArray { pub fn views(&self) -> Array { self.array() .child(0, &DType::BYTES, self.len() * VIEW_SIZE) - .expect("missing views") + .unwrap_or_else(|| panic!("Missing views")) } #[inline] pub fn bytes(&self, idx: usize) -> Array { self.array() .child(idx + 1, &DType::BYTES, self.metadata().data_lens[idx]) - .expect("Missing data buffer") + .unwrap_or_else(|| panic!("Missing data buffer")) } pub fn validity(&self) -> Validity { @@ -201,7 +200,12 @@ impl VarBinViewArray { builder.append_value(s); } let array = Array::from_arrow(&builder.finish(), false); - VarBinViewArray::try_from(array).expect("should be var bin view array") + VarBinViewArray::try_from(array).unwrap_or_else(|err| { + panic!( + "Failed to convert iterator of nullable strings to VarBinViewArray: {}", + err + ) + }) } pub fn from_iter_nullable_str, I: IntoIterator>>( @@ -212,7 +216,12 @@ impl VarBinViewArray { builder.extend(iter); let array = Array::from_arrow(&builder.finish(), true); - VarBinViewArray::try_from(array).expect("should be var bin view array") + VarBinViewArray::try_from(array).unwrap_or_else(|err| { + panic!( + "Failed to convert iterator of nullable strings to VarBinViewArray: {}", + err + ) + }) } pub fn from_iter_bin, I: IntoIterator>(iter: I) -> Self { @@ -222,7 +231,12 @@ impl VarBinViewArray { builder.append_value(b); } let array = Array::from_arrow(&builder.finish(), true); - VarBinViewArray::try_from(array).expect("should be var bin view array") + VarBinViewArray::try_from(array).unwrap_or_else(|err| { + panic!( + "Failed to convert iterator of bytes to VarBinViewArray: {}", + err + ) + }) } pub fn from_iter_nullable_bin, I: IntoIterator>>( @@ -232,7 +246,12 @@ impl VarBinViewArray { let mut builder = BinaryViewBuilder::with_capacity(iter.size_hint().0); builder.extend(iter); let array = Array::from_arrow(&builder.finish(), true); - VarBinViewArray::try_from(array).expect("should be var bin view array") + VarBinViewArray::try_from(array).unwrap_or_else(|err| { + panic!( + "Failed to convert iterator of nullable bytes to VarBinViewArray: {}", + err + ) + }) } pub fn bytes_at(&self, index: usize) -> VortexResult> { @@ -272,17 +291,17 @@ fn as_arrow(var_bin_view: VarBinViewArray) -> ArrayRef { let views = var_bin_view .views() .into_primitive() - .expect("views must be primitive"); + .unwrap_or_else(|err| panic!("Views must be a primitive array: {}", err)); assert_eq!(views.ptype(), PType::U8); let nulls = var_bin_view .logical_validity() .to_null_buffer() - .expect("null buffer"); + .unwrap_or_else(|err| panic!("Failed to convert logical validity to null buffer: {}", err)); let data = (0..var_bin_view.metadata().data_lens.len()) .map(|i| var_bin_view.bytes(i).into_primitive()) .collect::>>() - .expect("bytes arrays must be primitive"); + .unwrap_or_else(|err| panic!("VarBinView byte arrays must be primitive arrays: {}", err)); if !data.is_empty() { assert_eq!(data[0].ptype(), PType::U8); assert!(data.iter().map(|d| d.ptype()).all_equal()); diff --git a/vortex-array/src/arrow/array.rs b/vortex-array/src/arrow/array.rs index 0b9ab8986..95828d2c1 100644 --- a/vortex-array/src/arrow/array.rs +++ b/vortex-array/src/arrow/array.rs @@ -37,7 +37,7 @@ impl From for ArrayData { impl From for ArrayData { fn from(value: NullBuffer) -> Self { BoolArray::try_new(value.into_inner(), Validity::NonNullable) - .unwrap() + .unwrap_or_else(|err| panic!("Failed to convert null buffer to BoolArray: {}", err)) .into() } } @@ -98,7 +98,7 @@ where DataType::Date64 => TemporalArray::new_date(arr.into(), TimeUnit::Ms).into(), DataType::Duration(_) => unimplemented!(), DataType::Interval(_) => unimplemented!(), - _ => panic!("Invalid data type for PrimitiveArray"), + _ => panic!("Invalid data type for PrimitiveArray: {}", T::DATA_TYPE), } } } @@ -119,7 +119,7 @@ where dtype, nulls(value.nulls(), nullable), ) - .unwrap() + .unwrap_or_else(|err| panic!("Failed to convert Arrow GenericByteArray to Vortex VarBinArray: {}", err)) .into() } } @@ -141,7 +141,7 @@ impl FromArrowArray<&GenericByteViewArray> for Array { dtype, nulls(value.nulls(), nullable), ) - .unwrap() + .unwrap_or_else(|err| panic!("Failed to convert Arrow GenericByteViewArray to Vortex VarBinViewArray: {}", err)) .into() } } @@ -149,7 +149,7 @@ impl FromArrowArray<&GenericByteViewArray> for Array { impl FromArrowArray<&ArrowBooleanArray> for Array { fn from_arrow(value: &ArrowBooleanArray, nullable: bool) -> Self { BoolArray::try_new(value.values().clone(), nulls(value.nulls(), nullable)) - .unwrap() + .unwrap_or_else(|err| panic!("Failed to convert Arrow BooleanArray to Vortex BoolArray: {}", err)) .into() } } @@ -174,7 +174,7 @@ impl FromArrowArray<&ArrowStructArray> for Array { value.len(), nulls(value.nulls(), nullable), ) - .unwrap() + .unwrap_or_else(|err| panic!("Failed to convert Arrow StructArray to Vortex StructArray: {}", err)) .into() } } @@ -223,14 +223,17 @@ impl FromArrowArray for Array { DataType::Binary => Self::from_arrow(array.as_binary::(), nullable), DataType::LargeBinary => Self::from_arrow(array.as_binary::(), nullable), DataType::BinaryView => Self::from_arrow( - array.as_any().downcast_ref::().unwrap(), + array.as_any().downcast_ref::().unwrap_or_else(|| panic!("Expected Arrow BinaryViewArray for DataType::BinaryView")), nullable, ), DataType::Utf8View => Self::from_arrow( - array.as_any().downcast_ref::().unwrap(), + array.as_any().downcast_ref::().unwrap_or_else(|| panic!("Expected Arrow StringViewArray for DataType::Utf8View")), + nullable, + ), + DataType::Struct(_) => Self::from_arrow( + array.as_struct(), nullable, ), - DataType::Struct(_) => Self::from_arrow(array.as_struct(), nullable), DataType::Null => Self::from_arrow(as_null_array(&array), nullable), DataType::Timestamp(u, _) => match u { ArrowTimeUnit::Second => { @@ -281,7 +284,7 @@ impl FromArrowArray for Array { } }, _ => panic!( - "TODO(robert): Missing array encoding for dtype {}", + "TODO(robert): Missing array encoding for Arrow data type {}", array.data_type().clone() ), } diff --git a/vortex-array/src/arrow/dtype.rs b/vortex-array/src/arrow/dtype.rs index 554ec59a1..98268a7db 100644 --- a/vortex-array/src/arrow/dtype.rs +++ b/vortex-array/src/arrow/dtype.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use arrow_schema::{DataType, Field, SchemaRef, TimeUnit as ArrowTimeUnit}; use itertools::Itertools; use vortex_dtype::{DType, Nullability, PType, StructDType}; -use vortex_error::{vortex_err, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, VortexError, VortexResult}; use crate::array::{make_temporal_ext_dtype, TimeUnit}; use crate::arrow::{FromArrowType, TryFromArrowType}; @@ -103,14 +103,16 @@ impl From<&ArrowTimeUnit> for TimeUnit { } } -impl From for ArrowTimeUnit { - fn from(value: TimeUnit) -> Self { +impl TryFrom for ArrowTimeUnit { + type Error = VortexError; + + fn try_from(value: TimeUnit) -> VortexResult { match value { - TimeUnit::S => Self::Second, - TimeUnit::Ms => Self::Millisecond, - TimeUnit::Us => Self::Microsecond, - TimeUnit::Ns => Self::Nanosecond, - _ => panic!("cannot convert {value} to Arrow TimeUnit"), + TimeUnit::S => Ok(Self::Second), + TimeUnit::Ms => Ok(Self::Millisecond), + TimeUnit::Us => Ok(Self::Microsecond), + TimeUnit::Ns => Ok(Self::Nanosecond), + _ => vortex_bail!("cannot convert {value} to Arrow TimeUnit"), } } } diff --git a/vortex-array/src/arrow/recordbatch.rs b/vortex-array/src/arrow/recordbatch.rs index 7a23d6f91..f44ad656c 100644 --- a/vortex-array/src/arrow/recordbatch.rs +++ b/vortex-array/src/arrow/recordbatch.rs @@ -1,15 +1,18 @@ use arrow_array::cast::as_struct_array; use arrow_array::RecordBatch; use itertools::Itertools; +use vortex_error::{VortexError, VortexResult}; use crate::array::StructArray; use crate::arrow::FromArrowArray; use crate::validity::Validity; use crate::{Array, IntoArray, IntoCanonical}; -impl From for Array { - fn from(value: RecordBatch) -> Self { - StructArray::try_new( +impl TryFrom for Array { + type Error = VortexError; + + fn try_from(value: RecordBatch) -> VortexResult { + Ok(StructArray::try_new( value .schema() .fields() @@ -25,25 +28,27 @@ impl From for Array { .collect(), value.num_rows(), Validity::AllValid, - ) - .unwrap() - .into() + )? + .into()) } } -impl From for RecordBatch { - fn from(value: Array) -> Self { +impl TryFrom for RecordBatch { + type Error = VortexError; + + fn try_from(value: Array) -> VortexResult { let array_ref = value - .into_canonical() - .expect("struct arrays must canonicalize") + .into_canonical()? .into_arrow(); let struct_array = as_struct_array(array_ref.as_ref()); - RecordBatch::from(struct_array) + Ok(RecordBatch::from(struct_array)) } } -impl From for RecordBatch { - fn from(value: StructArray) -> Self { - RecordBatch::from(value.into_array()) +impl TryFrom for RecordBatch { + type Error = VortexError; + + fn try_from(value: StructArray) -> VortexResult { + RecordBatch::try_from(value.into_array()) } } diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs index 6a9f11bb7..ec97996a9 100644 --- a/vortex-array/src/canonical.rs +++ b/vortex-array/src/canonical.rs @@ -85,7 +85,7 @@ impl Canonical { temporal_to_arrow( TemporalArray::try_from(&a.into_array()) - .expect("array must be known temporal array ext type"), + .unwrap_or_else(|err| panic!("array must be known temporal array ext type: {err}")), ) } } @@ -97,42 +97,42 @@ impl Canonical { pub fn into_null(self) -> VortexResult { match self { Canonical::Null(a) => Ok(a), - _ => vortex_bail!(InvalidArgument: "cannot unwrap NullArray from {:?}", &self), + _ => vortex_bail!("Cannot unwrap NullArray from {:?}", &self), } } pub fn into_bool(self) -> VortexResult { match self { Canonical::Bool(a) => Ok(a), - _ => vortex_bail!(InvalidArgument: "cannot unwrap BoolArray from {:?}", &self), + _ => vortex_bail!("Cannot unwrap BoolArray from {:?}", &self), } } pub fn into_primitive(self) -> VortexResult { match self { Canonical::Primitive(a) => Ok(a), - _ => vortex_bail!(InvalidArgument: "cannot unwrap PrimitiveArray from {:?}", &self), + _ => vortex_bail!("Cannot unwrap PrimitiveArray from {:?}", &self), } } pub fn into_struct(self) -> VortexResult { match self { Canonical::Struct(a) => Ok(a), - _ => vortex_bail!(InvalidArgument: "cannot unwrap StructArray from {:?}", &self), + _ => vortex_bail!("Cannot unwrap StructArray from {:?}", &self), } } pub fn into_varbin(self) -> VortexResult { match self { Canonical::VarBin(a) => Ok(a), - _ => vortex_bail!(InvalidArgument: "cannot unwrap VarBinArray from {:?}", &self), + _ => vortex_bail!("Cannot unwrap VarBinArray from {:?}", &self), } } pub fn into_extension(self) -> VortexResult { match self { Canonical::Extension(a) => Ok(a), - _ => vortex_bail!(InvalidArgument: "cannot unwrap ExtensionArray from {:?}", &self), + _ => vortex_bail!("Cannot unwrap ExtensionArray from {:?}", &self), } } } @@ -147,7 +147,7 @@ fn bool_to_arrow(bool_array: BoolArray) -> ArrayRef { bool_array .logical_validity() .to_null_buffer() - .expect("null buffer"), + .unwrap_or_else(|err| panic!("Failed to get null buffer from logical validity: {err}")), )) } @@ -160,7 +160,7 @@ fn primitive_to_arrow(primitive_array: PrimitiveArray) -> ArrayRef { array .logical_validity() .to_null_buffer() - .expect("null buffer"), + .unwrap_or_else(|err| panic!("Failed to get null buffer from logical validity: {err}")), ) } @@ -183,7 +183,7 @@ fn struct_to_arrow(struct_array: StructArray) -> ArrayRef { let field_arrays: Vec = struct_array .children() .map(|f| { - let canonical = f.into_canonical().unwrap(); + let canonical = f.into_canonical().unwrap_or_else(|err| panic!("Failed to canonicalize field: {err}")); match canonical { // visit nested structs recursively Canonical::Struct(a) => struct_to_arrow(a), @@ -214,7 +214,7 @@ fn varbin_to_arrow(varbin_array: VarBinArray) -> ArrayRef { let offsets = varbin_array .offsets() .into_primitive() - .expect("flatten_primitive"); + .unwrap_or_else(|err| panic!("Failed to canon offsets: {err}")); let offsets = match offsets.ptype() { PType::I32 | PType::I64 => offsets, // Unless it's u64, everything else can be converted into an i32. @@ -222,19 +222,18 @@ fn varbin_to_arrow(varbin_array: VarBinArray) -> ArrayRef { PType::U64 => offsets.reinterpret_cast(PType::I64), PType::U32 => offsets.reinterpret_cast(PType::I32), _ => try_cast(&offsets.to_array(), PType::I32.into()) - .expect("cast to i32") - .into_primitive() - .expect("flatten_primitive"), + .and_then(|a| a.into_primitive()) + .unwrap_or_else(|err| panic!("Failed to cast offsets to PrimitiveArray of i32: {err}")), }; let nulls = varbin_array .logical_validity() .to_null_buffer() - .expect("null buffer"); + .unwrap_or_else(|err| panic!("Failed to get null buffer from logical validity: {err}")); let data = varbin_array .bytes() .into_primitive() - .expect("flatten_primitive"); + .unwrap_or_else(|err| panic!("Failed to canonicalize bytes: {err}")); assert_eq!(data.ptype(), PType::U8); let data = data.buffer(); diff --git a/vortex-array/src/encoding.rs b/vortex-array/src/encoding.rs index e537a5865..d17a10a1d 100644 --- a/vortex-array/src/encoding.rs +++ b/vortex-array/src/encoding.rs @@ -81,7 +81,8 @@ pub trait ArrayEncodingExt { F: for<'b> FnMut(&'b (dyn ArrayTrait + 'b)) -> R, { let typed = - <::Array as TryFrom>::try_from(array.clone()).unwrap(); + <::Array as TryFrom>::try_from(array.clone()) + .unwrap_or_else(|err| panic!("Failed to convert array to {}: {err}", std::any::type_name::<::Array>())); f(&typed) } } diff --git a/vortex-scalar/src/arrow.rs b/vortex-scalar/src/arrow.rs index 836e8181e..594f36086 100644 --- a/vortex-scalar/src/arrow.rs +++ b/vortex-scalar/src/arrow.rs @@ -9,12 +9,19 @@ impl From<&Scalar> for Arc { fn from(value: &Scalar) -> Arc { match value.dtype { DType::Null => Arc::new(NullArray::new(1)), - DType::Bool(_) => match value.value.as_bool().expect("should be bool") { - Some(b) => Arc::new(BooleanArray::new_scalar(b)), - None => Arc::new(BooleanArray::new_null(1)), + DType::Bool(_) => { + let maybe_bool = value.value.as_bool().unwrap_or_else(|err| { + panic!("Expected a bool scalar: {}", err) + }); + match maybe_bool { + Some(b) => Arc::new(BooleanArray::new_scalar(b)), + None => Arc::new(BooleanArray::new_null(1)), + } }, DType::Primitive(ptype, _) => { - let pvalue = value.value.as_pvalue().expect("should be pvalue"); + let pvalue = value.value.as_pvalue().unwrap_or_else(|err| { + panic!("Expected a pvalue scalar: {}", err) + }); match pvalue { None => match ptype { PType::U8 => Arc::new(UInt8Array::new_null(1)), @@ -45,22 +52,23 @@ impl From<&Scalar> for Arc { } } DType::Utf8(_) => { - match value - .value - .as_buffer_string() - .expect("should be buffer string") - { + let maybe_string = value.value.as_buffer_string().unwrap_or_else(|err| { + panic!("Expected a string scalar: {}", err) + }); + match maybe_string { Some(s) => Arc::new(StringArray::new_scalar(s.as_str())), None => Arc::new(StringArray::new_null(1)), - } + } } DType::Binary(_) => { - match value + let maybe_buffer = value .value - .as_buffer_string() - .expect("should be buffer string") - { - Some(s) => Arc::new(BinaryArray::new_scalar(s.as_bytes())), + .as_buffer() + .unwrap_or_else(|err| { + panic!("Expected a binary buffer: {}", err) + }); + match maybe_buffer { + Some(s) => Arc::new(BinaryArray::new_scalar(s)), None => Arc::new(BinaryArray::new_null(1)), } } diff --git a/vortex-scalar/src/datafusion.rs b/vortex-scalar/src/datafusion.rs index 21d401224..3e12dda8f 100644 --- a/vortex-scalar/src/datafusion.rs +++ b/vortex-scalar/src/datafusion.rs @@ -8,9 +8,13 @@ impl From for ScalarValue { fn from(value: Scalar) -> Self { match value.dtype { DType::Null => ScalarValue::Null, - DType::Bool(_) => ScalarValue::Boolean(value.value.as_bool().expect("should be bool")), + DType::Bool(_) => ScalarValue::Boolean(value.value.as_bool().unwrap_or_else(|err| { + panic!("Expected a bool scalar: {}", err) + })), DType::Primitive(ptype, _) => { - let pvalue = value.value.as_pvalue().expect("should be pvalue"); + let pvalue = value.value.as_pvalue().unwrap_or_else(|err| { + panic!("Expected a pvalue scalar: {}", err) + }); match pvalue { None => match ptype { PType::U8 => ScalarValue::UInt8(None), @@ -44,15 +48,19 @@ impl From for ScalarValue { value .value .as_buffer_string() - .expect("should be buffer string") + .unwrap_or_else(|err| { + panic!("Expected a buffer string: {}", err) + }) .map(|b| b.as_str().to_string()), ), DType::Binary(_) => ScalarValue::Binary( value .value .as_buffer() - .expect("should be buffer") - .map(|b| b.as_slice().to_vec()), + .unwrap_or_else(|err| { + panic!("Expected a buffer: {}", err) + }) + .map(|b| b.into_vec().unwrap_or_else(|buf| buf.as_slice().to_vec())), ), DType::Struct(..) => { todo!("struct scalar conversion") diff --git a/vortex-scalar/src/list.rs b/vortex-scalar/src/list.rs index 6f6b02d19..50004a40a 100644 --- a/vortex-scalar/src/list.rs +++ b/vortex-scalar/src/list.rs @@ -106,11 +106,11 @@ impl<'a, T: for<'b> TryFrom<&'b Scalar, Error = VortexError>> TryFrom<&'a Scalar impl From> for Scalar where - Self: From, + Self: From { fn from(value: Vec) -> Self { let scalars = value.into_iter().map(|v| Self::from(v)).collect_vec(); - let element_dtype = scalars.first().expect("Empty list").dtype().clone(); + let element_dtype = scalars.first().unwrap_or_else(|| panic!("Empty list, could not determine element dtype")).dtype().clone(); let dtype = DType::List(Arc::new(element_dtype), NonNullable); Self { dtype, diff --git a/vortex-scalar/src/serde/flatbuffers.rs b/vortex-scalar/src/serde/flatbuffers.rs index a137f9912..6b9321aaf 100644 --- a/vortex-scalar/src/serde/flatbuffers.rs +++ b/vortex-scalar/src/serde/flatbuffers.rs @@ -54,7 +54,8 @@ impl WriteFlatBuffer for ScalarValue { ) -> WIPOffset> { let mut value_se = flexbuffers::FlexbufferSerializer::new(); self.serialize(&mut value_se) - .expect("Failed to serialize ScalarValue"); + .map_err(VortexError::FlexBuffersSerError) + .unwrap_or_else(|err| panic!("Failed to serialize ScalarValue: {}", err)); let flex = Some(fbb.create_vector(value_se.view())); fb::ScalarValue::create(fbb, &fb::ScalarValueArgs { flex }) } From 47f640977ea9bdada357b31fa154ccb315e8684a Mon Sep 17 00:00:00 2001 From: Will Manning Date: Mon, 12 Aug 2024 13:32:58 -0400 Subject: [PATCH 06/39] wip --- Cargo.toml | 2 +- vortex-array/src/implementation.rs | 4 ++-- vortex-array/src/lib.rs | 6 +++--- vortex-array/src/stats/statsset.rs | 6 +++--- vortex-array/src/stream/ext.rs | 2 +- vortex-array/src/validity.rs | 28 ++++++++++++++++++++++------ 6 files changed, 32 insertions(+), 16 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d0c44d338..e43896582 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -158,7 +158,7 @@ debug_assert_with_mut_call = { level = "deny" } derive_partial_eq_without_eq = { level = "deny" } expect_used = { level = "deny" } equatable_if_let = { level = "deny" } -fallible_impl_from = { level = "deny" } +#fallible_impl_from = { level = "deny" } get_unwrap = { level = "deny" } host_endian_bytes = { level = "deny" } if_then_some_else_none = { level = "deny" } diff --git a/vortex-array/src/implementation.rs b/vortex-array/src/implementation.rs index b24740854..070ea55cd 100644 --- a/vortex-array/src/implementation.rs +++ b/vortex-array/src/implementation.rs @@ -242,7 +242,7 @@ where buffer: None, children: vec![], }; - array.with_dyn(|a| a.accept(&mut visitor).unwrap()); + array.with_dyn(|a| a.accept(&mut visitor).unwrap_or_else(|err| panic!("Error while visiting Array View children: {err}"))); ArrayData::try_new( encoding, array.dtype().clone(), @@ -252,7 +252,7 @@ where visitor.children.into(), stats, ) - .unwrap() + .unwrap_or_else(|err| panic!("Failed to create ArrayData from Array View: {err}")) } } } diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs index d48f23ff8..9c01ad421 100644 --- a/vortex-array/src/lib.rs +++ b/vortex-array/src/lib.rs @@ -191,10 +191,10 @@ impl Array { result = Some(f(array)); Ok(()) }) - .unwrap(); + .unwrap_or_else(|err| panic!("Failed to convert Array to {}: {err}", std::any::type_name::())); // Now we unwrap the optional, which we know to be populated by the closure. - result.unwrap() + result.unwrap_or_else(|| panic!("Failed to get result from Array::with_dyn")) } } @@ -248,7 +248,7 @@ pub trait ArrayTrait: { fn nbytes(&self) -> usize { let mut visitor = NBytesVisitor(0); - self.accept(&mut visitor).unwrap(); + self.accept(&mut visitor).unwrap_or_else(|err| panic!("Failed to get nbytes from Array: {err}")); visitor.0 } } diff --git a/vortex-array/src/stats/statsset.rs b/vortex-array/src/stats/statsset.rs index 4152f7cab..37699bd30 100644 --- a/vortex-array/src/stats/statsset.rs +++ b/vortex-array/src/stats/statsset.rs @@ -184,7 +184,7 @@ impl StatsSet { fn merge_scalar_stat(&mut self, other: &Self, stat: Stat) { if let Entry::Occupied(mut e) = self.values.entry(stat) { if let Some(other_value) = other.get_as::(stat) { - let self_value: usize = e.get().try_into().unwrap(); + let self_value: usize = e.get().try_into().unwrap_or_else(|err| panic!("Failed to get stat {} as usize: {err}", stat)); e.insert((self_value + other_value).into()); } else { e.remove(); @@ -204,7 +204,7 @@ impl StatsSet { if let Entry::Occupied(mut e) = self.values.entry(stat) { if let Some(other_value) = other.get_as::>(stat) { // TODO(robert): Avoid the copy here. We could e.get_mut() but need to figure out casting - let self_value: Vec = e.get().try_into().unwrap(); + let self_value: Vec = e.get().try_into().unwrap_or_else(|err| panic!("Failed to get stat {} as Vec: {err}", stat)); e.insert( self_value .iter() @@ -223,7 +223,7 @@ impl StatsSet { fn merge_run_count(&mut self, other: &Self) { if let Entry::Occupied(mut e) = self.values.entry(Stat::RunCount) { if let Some(other_value) = other.get_as::(Stat::RunCount) { - let self_value: usize = e.get().try_into().unwrap(); + let self_value: usize = e.get().try_into().unwrap_or_else(|err| panic!("Failed to get stat {} as usize: {err}", Stat::RunCount)); e.insert((self_value + other_value + 1).into()); } else { e.remove(); diff --git a/vortex-array/src/stream/ext.rs b/vortex-array/src/stream/ext.rs index 7f5008b3d..8771d1800 100644 --- a/vortex-array/src/stream/ext.rs +++ b/vortex-array/src/stream/ext.rs @@ -15,7 +15,7 @@ pub trait ArrayStreamExt: ArrayStream { { async { let dtype = self.dtype().clone(); - let chunks: Vec = self.try_collect().await.unwrap(); + let chunks: Vec = self.try_collect().await.unwrap_or_else(|err| panic!("Failed to collect ArrayStream: {err}")); ChunkedArray::try_new(chunks, dtype) } } diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index 05185e38b..9c8be1197 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -92,7 +92,14 @@ impl Validity { match self { Self::NonNullable | Self::AllValid => true, Self::AllInvalid => false, - Self::Array(a) => bool::try_from(&scalar_at(a, index).unwrap()).unwrap(), + Self::Array(a) => scalar_at(a, index) + .and_then(|s| bool::try_from(&s)) + .unwrap_or_else(|err| { + panic!( + "Failed to get bool from Validity Array at index {}: {err}", + index + ) + }), } } @@ -143,8 +150,18 @@ impl PartialEq for Validity { (Self::AllValid, Self::AllValid) => true, (Self::AllInvalid, Self::AllInvalid) => true, (Self::Array(a), Self::Array(b)) => { - a.clone().into_bool().unwrap().boolean_buffer() - == b.clone().into_bool().unwrap().boolean_buffer() + a.clone() + .into_bool() + .unwrap_or_else(|err| { + panic!("Failed to get Validity Array as BoolArray: {err}") + }) + .boolean_buffer() + == b.clone() + .into_bool() + .unwrap_or_else(|err| { + panic!("Failed to get Validity Array as BoolArray: {err}") + }) + .boolean_buffer() } _ => false, } @@ -202,13 +219,12 @@ impl FromIterator for Validity { LogicalValidity::AllInvalid(count) => BooleanBuffer::new_unset(count), LogicalValidity::Array(array) => array .into_bool() - .expect("validity must flatten to BoolArray") - .boolean_buffer(), + .unwrap_or_else(|err| panic!("Failed to get Validity Array as BoolArray: {err}")).boolean_buffer(), }; buffer.append_buffer(&present); } let bool_array = BoolArray::try_new(buffer.finish(), Validity::NonNullable) - .expect("BoolArray::try_new from BooleanBuffer should always succeed"); + .unwrap_or_else(|err| panic!("BoolArray::try_new from BooleanBuffer should always succeed: {err}")); Self::Array(bool_array.into_array()) } } From f0ce12803b14233323ef1d46403f0c46f01b715d Mon Sep 17 00:00:00 2001 From: Will Manning Date: Mon, 12 Aug 2024 13:51:43 -0400 Subject: [PATCH 07/39] asdfasdf --- encodings/alp/src/alp.rs | 8 +++++++- encodings/alp/src/array.rs | 4 ++-- vortex-array/src/variants.rs | 16 ++++++++-------- vortex-array/src/view.rs | 2 +- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/encodings/alp/src/alp.rs b/encodings/alp/src/alp.rs index ccf389142..627b7d6c4 100644 --- a/encodings/alp/src/alp.rs +++ b/encodings/alp/src/alp.rs @@ -112,7 +112,13 @@ pub trait ALPFloat: Float + 'static { #[inline] fn decode_single(encoded: Self::ALPInt, exponents: Exponents) -> Self { - let encoded_float: Self = Self::from(encoded).unwrap(); + let encoded_float: Self = Self::from(encoded).unwrap_or_else(|| { + panic!( + "Failed to convert {} to {} in ALPFloat::decode_single", + std::any::type_name::(), + std::any::type_name::() + ) + }); encoded_float * Self::F10[exponents.f as usize] * Self::IF10[exponents.e as usize] } } diff --git a/encodings/alp/src/array.rs b/encodings/alp/src/array.rs index f66219ea6..f67e49c5a 100644 --- a/encodings/alp/src/array.rs +++ b/encodings/alp/src/array.rs @@ -73,7 +73,7 @@ impl ALPArray { pub fn encoded(&self) -> Array { self.array() .child(0, &self.metadata().encoded_dtype, self.len()) - .expect("Missing encoded array") + .unwrap_or_else(|| panic!("Missing encoded child in ALPArray")) } #[inline] @@ -97,7 +97,7 @@ impl ALPArray { #[inline] pub fn ptype(&self) -> PType { - self.dtype().try_into().unwrap() + self.dtype().try_into().unwrap_or_else(|err| panic!("Failed to convert DType to PType: {err}")) } } diff --git a/vortex-array/src/variants.rs b/vortex-array/src/variants.rs index 710074dd9..712a8e767 100644 --- a/vortex-array/src/variants.rs +++ b/vortex-array/src/variants.rs @@ -12,7 +12,7 @@ pub trait ArrayVariants { } fn as_null_array_unchecked(&self) -> &dyn NullArrayTrait { - self.as_null_array().expect("Expected NullArray") + self.as_null_array().unwrap_or_else(|| panic!("Expected NullArray")) } fn as_bool_array(&self) -> Option<&dyn BoolArrayTrait> { @@ -20,7 +20,7 @@ pub trait ArrayVariants { } fn as_bool_array_unchecked(&self) -> &dyn BoolArrayTrait { - self.as_bool_array().expect("Expected BoolArray") + self.as_bool_array().unwrap_or_else(|| panic!("Expected BoolArray")) } fn as_primitive_array(&self) -> Option<&dyn PrimitiveArrayTrait> { @@ -28,7 +28,7 @@ pub trait ArrayVariants { } fn as_primitive_array_unchecked(&self) -> &dyn PrimitiveArrayTrait { - self.as_primitive_array().expect("Expected PrimitiveArray") + self.as_primitive_array().unwrap_or_else(|| panic!("Expected PrimitiveArray")) } fn as_utf8_array(&self) -> Option<&dyn Utf8ArrayTrait> { @@ -36,7 +36,7 @@ pub trait ArrayVariants { } fn as_utf8_array_unchecked(&self) -> &dyn Utf8ArrayTrait { - self.as_utf8_array().expect("Expected Utf8Array") + self.as_utf8_array().unwrap_or_else(|| panic!("Expected Utf8Array")) } fn as_binary_array(&self) -> Option<&dyn BinaryArrayTrait> { @@ -44,7 +44,7 @@ pub trait ArrayVariants { } fn as_binary_array_unchecked(&self) -> &dyn BinaryArrayTrait { - self.as_binary_array().expect("Expected BinaryArray") + self.as_binary_array().unwrap_or_else(|| panic!("Expected BinaryArray")) } fn as_struct_array(&self) -> Option<&dyn StructArrayTrait> { @@ -52,7 +52,7 @@ pub trait ArrayVariants { } fn as_struct_array_unchecked(&self) -> &dyn StructArrayTrait { - self.as_struct_array().expect("Expected StructArray") + self.as_struct_array().unwrap_or_else(|| panic!("Expected StructArray")) } fn as_list_array(&self) -> Option<&dyn ListArrayTrait> { @@ -60,7 +60,7 @@ pub trait ArrayVariants { } fn as_list_array_unchecked(&self) -> &dyn ListArrayTrait { - self.as_list_array().expect("Expected ListArray") + self.as_list_array().unwrap_or_else(|| panic!("Expected ListArray")) } fn as_extension_array(&self) -> Option<&dyn ExtensionArrayTrait> { @@ -68,7 +68,7 @@ pub trait ArrayVariants { } fn as_extension_array_unchecked(&self) -> &dyn ExtensionArrayTrait { - self.as_extension_array().expect("Expected ExtensionArray") + self.as_extension_array().unwrap_or_else(|| panic!("Expected ExtensionArray")) } } diff --git a/vortex-array/src/view.rs b/vortex-array/src/view.rs index 0252a3bc3..94a394dd6 100644 --- a/vortex-array/src/view.rs +++ b/vortex-array/src/view.rs @@ -153,7 +153,7 @@ impl ArrayView { let mut collector = ChildrenCollector::default(); Array::View(self.clone()) .with_dyn(|a| a.accept(&mut collector)) - .unwrap(); + .unwrap_or_else(|err| panic!("Failed to get children: {err}")); collector.children } From b8b0622ce121a557d49fd57af9fbdd8ad792b8f8 Mon Sep 17 00:00:00 2001 From: Will Manning Date: Mon, 12 Aug 2024 13:35:32 -0400 Subject: [PATCH 08/39] add vortex_panic macro --- vortex-error/src/lib.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index fe1ca3167..a2cd004c4 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -195,6 +195,14 @@ macro_rules! vortex_bail { }; } +#[macro_export] +macro_rules! vortex_panic { + // TODO: this can be fancier, e.g., add backtrace if it's not already included + ($($tt:tt)+) => { + panic!($($tt)+) + }; +} + #[cfg(feature = "datafusion")] impl From for datafusion_common::DataFusionError { fn from(value: VortexError) -> Self { From ee6f42f948b9264ae77dfe975a19f373d37c6dde Mon Sep 17 00:00:00 2001 From: Will Manning Date: Mon, 12 Aug 2024 14:03:52 -0400 Subject: [PATCH 09/39] Revert "add vortex_panic macro" This reverts commit b8b0622ce121a557d49fd57af9fbdd8ad792b8f8. --- vortex-error/src/lib.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index a2cd004c4..fe1ca3167 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -195,14 +195,6 @@ macro_rules! vortex_bail { }; } -#[macro_export] -macro_rules! vortex_panic { - // TODO: this can be fancier, e.g., add backtrace if it's not already included - ($($tt:tt)+) => { - panic!($($tt)+) - }; -} - #[cfg(feature = "datafusion")] impl From for datafusion_common::DataFusionError { fn from(value: VortexError) -> Self { From 8943ff2b781c17b7e672912a13d8e4da90a1aeab Mon Sep 17 00:00:00 2001 From: Will Manning Date: Mon, 12 Aug 2024 18:16:29 -0400 Subject: [PATCH 10/39] more wip --- encodings/alp/benches/alp_compress.rs | 1 + encodings/alp/src/compress.rs | 5 ++- encodings/byte-bool/src/lib.rs | 7 ++-- encodings/datetime-parts/src/array.rs | 6 +-- encodings/dict/benches/dict_compress.rs | 2 + encodings/dict/src/compress.rs | 6 +-- encodings/dict/src/dict.rs | 14 ++++--- .../fastlanes/benches/bitpacking_take.rs | 2 + .../src/bitpacking/compute/search_sorted.rs | 4 +- encodings/fastlanes/src/bitpacking/mod.rs | 4 +- encodings/fastlanes/src/delta/mod.rs | 6 +-- encodings/fastlanes/src/for/mod.rs | 4 +- encodings/roaring/src/boolean/mod.rs | 4 +- encodings/roaring/src/integer/compress.rs | 6 +-- encodings/roaring/src/integer/mod.rs | 6 +-- encodings/runend-bool/src/array.rs | 2 +- encodings/runend-bool/src/compress.rs | 8 ++-- encodings/runend/src/compress.rs | 4 +- encodings/runend/src/runend.rs | 4 +- encodings/zigzag/src/zigzag.rs | 8 ++-- pyvortex/src/encode.rs | 6 ++- pyvortex/src/vortex_arrow.rs | 9 ++--- vortex-array/benches/compare.rs | 2 + vortex-array/benches/filter_indices.rs | 2 + vortex-array/benches/scalar_subtract.rs | 2 + .../src/array/bool/compute/boolean.rs | 8 ++-- vortex-array/src/array/constant/compute.rs | 2 +- vortex-array/src/arrow/recordbatch.rs | 2 +- vortex-array/src/canonical.rs | 16 ++++---- vortex-array/src/compute/compare.rs | 4 +- vortex-array/src/compute/filter.rs | 4 +- vortex-datafusion/src/expr.rs | 4 +- vortex-datafusion/src/lib.rs | 39 ++++++++++++------- vortex-datafusion/src/memory.rs | 5 ++- vortex-datafusion/src/persistent/opener.rs | 2 +- vortex-datafusion/src/plans.rs | 33 +++++++--------- vortex-error/src/lib.rs | 7 ++++ vortex-serde/benches/ipc_array_reader_take.rs | 1 + vortex-serde/benches/ipc_take.rs | 1 + vortex-serde/src/chunked_reader/mod.rs | 11 +++++- vortex-serde/src/chunked_reader/take_rows.rs | 26 +++++-------- vortex-serde/src/io/object_store.rs | 9 ++++- vortex-serde/src/io/tokio.rs | 8 +++- vortex-serde/src/layouts/reader/builder.rs | 4 +- vortex-serde/src/layouts/reader/layouts.rs | 8 +++- vortex-serde/src/layouts/reader/stream.rs | 6 ++- .../src/layouts/writer/layout_writer.rs | 10 ++--- vortex-serde/src/message_reader.rs | 2 +- vortex-serde/src/messages.rs | 7 +++- vortex-serde/src/stream_reader/mod.rs | 4 +- 50 files changed, 201 insertions(+), 146 deletions(-) diff --git a/encodings/alp/benches/alp_compress.rs b/encodings/alp/benches/alp_compress.rs index d26137a40..85a4c2d9f 100644 --- a/encodings/alp/benches/alp_compress.rs +++ b/encodings/alp/benches/alp_compress.rs @@ -4,6 +4,7 @@ fn main() { divan::main(); } +#[allow(clippy::unwrap_used)] #[divan::bench(types = [f32, f64], args = [100_000, 10_000_000])] fn alp_compress(n: usize) -> (Exponents, Vec, Vec, Vec) { let values: Vec = vec![T::from(1.234).unwrap(); n]; diff --git a/encodings/alp/src/compress.rs b/encodings/alp/src/compress.rs index 825d2a11c..fc9bfaaeb 100644 --- a/encodings/alp/src/compress.rs +++ b/encodings/alp/src/compress.rs @@ -44,7 +44,7 @@ where len, Scalar::null(values.dtype().as_nullable()), ) - .unwrap() + .unwrap_or_else(|err| panic!("Failed to create SparseArray for ALP patches: {err}")) .into_array() }), ) @@ -62,7 +62,8 @@ pub fn alp_encode(parray: &PrimitiveArray) -> VortexResult { pub fn decompress(array: ALPArray) -> VortexResult { let encoded = array.encoded().into_primitive()?; - let decoded = match_each_alp_float_ptype!(array.dtype().try_into().unwrap(), |$T| { + let ptype = array.dtype().try_into()?; + let decoded = match_each_alp_float_ptype!(ptype, |$T| { PrimitiveArray::from_vec( decompress_primitive::<$T>(encoded.maybe_null_slice(), array.exponents()), encoded.validity(), diff --git a/encodings/byte-bool/src/lib.rs b/encodings/byte-bool/src/lib.rs index 42a2ec556..8c7581388 100644 --- a/encodings/byte-bool/src/lib.rs +++ b/encodings/byte-bool/src/lib.rs @@ -64,7 +64,7 @@ impl ByteBoolArray { } pub fn buffer(&self) -> &Buffer { - self.array().buffer().expect("missing mandatory buffer") + self.array().buffer().unwrap_or_else(|| panic!("ByteBoolArray is missing the underlying buffer")) } fn maybe_null_slice(&self) -> &[bool] { @@ -93,7 +93,7 @@ impl BoolArrayTrait for ByteBoolArray { impl From> for ByteBoolArray { fn from(value: Vec) -> Self { - Self::try_from_vec(value, Validity::AllValid).unwrap() + Self::try_from_vec(value, Validity::AllValid).unwrap_or_else(|err| panic!("Failed to create ByteBoolArray from Vec: {err}")) } } @@ -107,7 +107,8 @@ impl From>> for ByteBoolArray { .map(std::option::Option::unwrap_or_default) .collect(); - Self::try_from_vec(data, validity).unwrap() + Self::try_from_vec(data, validity) + .unwrap_or_else(|err| panic!("Failed to create ByteBoolArray from nullable bools: {err}")) } } diff --git a/encodings/datetime-parts/src/array.rs b/encodings/datetime-parts/src/array.rs index 34722985c..fbc83163d 100644 --- a/encodings/datetime-parts/src/array.rs +++ b/encodings/datetime-parts/src/array.rs @@ -65,19 +65,19 @@ impl DateTimePartsArray { pub fn days(&self) -> Array { self.array() .child(0, &self.metadata().days_dtype, self.len()) - .expect("Missing days array") + .unwrap_or_else(|| panic!("DatetimePartsArray missing days array")) } pub fn seconds(&self) -> Array { self.array() .child(1, &self.metadata().seconds_dtype, self.len()) - .expect("Missing seconds array") + .unwrap_or_else(|| panic!("DatetimePartsArray missing seconds array")) } pub fn subsecond(&self) -> Array { self.array() .child(2, &self.metadata().subseconds_dtype, self.len()) - .expect("Missing subsecond array") + .unwrap_or_else(|| panic!("DatetimePartsArray missing subsecond array")) } } diff --git a/encodings/dict/benches/dict_compress.rs b/encodings/dict/benches/dict_compress.rs index bb3286e12..adff34684 100644 --- a/encodings/dict/benches/dict_compress.rs +++ b/encodings/dict/benches/dict_compress.rs @@ -1,3 +1,5 @@ +#![allow(clippy::unwrap_used)] + use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rand::distributions::{Alphanumeric, Uniform}; use rand::prelude::SliceRandom; diff --git a/encodings/dict/src/compress.rs b/encodings/dict/src/compress.rs index 56f2e2117..96408873e 100644 --- a/encodings/dict/src/compress.rs +++ b/encodings/dict/src/compress.rs @@ -65,7 +65,7 @@ pub fn dict_encode_typed_primitive( } } }) - .unwrap(); + .unwrap_or_else(|err| panic!("Failed to iterate over primitive array during dictionary encoding: {err}")); let values_validity = if array.dtype().is_nullable() { let mut validity = vec![true; values.len()]; @@ -86,7 +86,7 @@ pub fn dict_encode_typed_primitive( pub fn dict_encode_varbin(array: &VarBinArray) -> (PrimitiveArray, VarBinArray) { array .with_iterator(|iter| dict_encode_typed_varbin(array.dtype().clone(), iter)) - .unwrap() + .unwrap_or_else(|err| panic!("Failed to iterate over varbin array during dictionary encoding: {err}")) } fn lookup_bytes<'a, T: NativePType + AsPrimitive>( @@ -165,7 +165,7 @@ where dtype, values_validity, ) - .unwrap(), + .unwrap_or_else(|err| panic!("Failed to create VarBinArray dictionary during encoding: {err}")) ) } diff --git a/encodings/dict/src/dict.rs b/encodings/dict/src/dict.rs index 7cda4a494..6b7a159ef 100644 --- a/encodings/dict/src/dict.rs +++ b/encodings/dict/src/dict.rs @@ -44,14 +44,14 @@ impl DictArray { pub fn values(&self) -> Array { self.array() .child(0, self.dtype(), self.metadata().values_len) - .expect("Missing values") + .unwrap_or_else(|| panic!("DictArray missing values")) } #[inline] pub fn codes(&self) -> Array { self.array() .child(1, &self.metadata().codes_dtype, self.len()) - .expect("Missing codes") + .unwrap_or_else(|| panic!("DictArray missing codes")) } } @@ -66,16 +66,20 @@ impl IntoCanonical for DictArray { impl ArrayValidity for DictArray { fn is_valid(&self, index: usize) -> bool { let values_index = scalar_at(&self.codes(), index) - .unwrap() + .unwrap_or_else(|err| { + panic!("Failed to get index {} from DictArray codes: {err}", index) + }) .as_ref() .try_into() - .unwrap(); + .unwrap_or_else(|err| panic!("Failed to convert dictionary code to usize: {err}")); self.values().with_dyn(|a| a.is_valid(values_index)) } fn logical_validity(&self) -> LogicalValidity { if self.dtype().is_nullable() { - let primitive_codes = self.codes().into_primitive().unwrap(); + let primitive_codes = self.codes().into_primitive().unwrap_or_else(|err| { + panic!("Failed to convert DictArray codes to primitive array: {err}") + }); match_each_integer_ptype!(primitive_codes.ptype(), |$P| { ArrayAccessor::<$P>::with_iterator(&primitive_codes, |iter| { LogicalValidity::Array( diff --git a/encodings/fastlanes/benches/bitpacking_take.rs b/encodings/fastlanes/benches/bitpacking_take.rs index fba3d28c2..b96b7cbec 100644 --- a/encodings/fastlanes/benches/bitpacking_take.rs +++ b/encodings/fastlanes/benches/bitpacking_take.rs @@ -1,3 +1,5 @@ +#![allow(clippy::unwrap_used)] + use criterion::{black_box, criterion_group, criterion_main, Criterion}; use itertools::Itertools; use rand::distributions::Uniform; diff --git a/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs b/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs index 772ea0692..35e28999e 100644 --- a/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs +++ b/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs @@ -43,13 +43,13 @@ struct BitPackedSearch { impl BitPackedSearch { pub fn new(array: &BitPackedArray) -> Self { Self { - packed: array.packed().into_primitive().unwrap(), + packed: array.packed().into_primitive().unwrap_or_else(|err| panic!("Failed to get packed bytes as PrimitiveArray: {err}")), offset: array.offset(), length: array.len(), bit_width: array.bit_width(), min_patch_offset: array.patches().map(|p| { SparseArray::try_from(p) - .expect("Only Sparse patches are supported") + .unwrap_or_else(|err| panic!("Only sparse patches are supported: {err}")) .min_index() }), } diff --git a/encodings/fastlanes/src/bitpacking/mod.rs b/encodings/fastlanes/src/bitpacking/mod.rs index 4a547da62..a038b6e9d 100644 --- a/encodings/fastlanes/src/bitpacking/mod.rs +++ b/encodings/fastlanes/src/bitpacking/mod.rs @@ -102,7 +102,7 @@ impl BitPackedArray { &self.dtype().with_nullability(Nullability::NonNullable), self.packed_len(), ) - .expect("Missing packed array") + .unwrap_or_else(|| panic!("BitpackedArray is missing packed child bytes array")) } #[inline] @@ -150,7 +150,7 @@ impl BitPackedArray { #[inline] pub fn ptype(&self) -> PType { - self.dtype().try_into().unwrap() + self.dtype().try_into().unwrap_or_else(|err| panic!("Failed to convert BitpackedArray DType to PType: {err}")) } #[inline] diff --git a/encodings/fastlanes/src/delta/mod.rs b/encodings/fastlanes/src/delta/mod.rs index 7b8f7798f..1fb9dc1d3 100644 --- a/encodings/fastlanes/src/delta/mod.rs +++ b/encodings/fastlanes/src/delta/mod.rs @@ -61,19 +61,19 @@ impl DeltaArray { pub fn bases(&self) -> Array { self.array() .child(0, self.dtype(), self.bases_len()) - .expect("Missing bases") + .unwrap_or_else(|| panic!("DeltaArray is missing bases")) } #[inline] pub fn deltas(&self) -> Array { self.array() .child(1, self.dtype(), self.len()) - .expect("Missing deltas") + .unwrap_or_else(|| panic!("DeltaArray is missing deltas")) } #[inline] fn lanes(&self) -> usize { - let ptype = self.dtype().try_into().unwrap(); + let ptype = self.dtype().try_into().unwrap_or_else(|err| panic!("Failed to convert DeltaArray DType to PType: {err}")); match_each_unsigned_integer_ptype!(ptype, |$T| { <$T as fastlanes::FastLanes>::LANES }) diff --git a/encodings/fastlanes/src/for/mod.rs b/encodings/fastlanes/src/for/mod.rs index 45bb7f1e8..1ead9e722 100644 --- a/encodings/fastlanes/src/for/mod.rs +++ b/encodings/fastlanes/src/for/mod.rs @@ -50,7 +50,7 @@ impl FoRArray { }; self.array() .child(0, dtype, self.len()) - .expect("Missing FoR child") + .unwrap_or_else(|| panic!("FoRArray is missing encoded child array")) } #[inline] @@ -65,7 +65,7 @@ impl FoRArray { #[inline] pub fn ptype(&self) -> PType { - self.dtype().try_into().unwrap() + self.dtype().try_into().unwrap_or_else(|err| panic!("Failed to convert FoRArray DType to PType: {err}")) } } diff --git a/encodings/roaring/src/boolean/mod.rs b/encodings/roaring/src/boolean/mod.rs index b6b608575..bff86560c 100644 --- a/encodings/roaring/src/boolean/mod.rs +++ b/encodings/roaring/src/boolean/mod.rs @@ -10,7 +10,7 @@ use vortex::validity::{ArrayValidity, LogicalValidity, Validity}; use vortex::variants::{ArrayVariants, BoolArrayTrait}; use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex::{ - impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoArray, IntoCanonical, + impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoCanonical, TypedArray, }; use vortex_buffer::Buffer; @@ -51,7 +51,7 @@ impl RoaringBoolArray { Bitmap::deserialize::( self.array() .buffer() - .expect("RoaringBoolArray buffer is missing") + .unwrap_or_else(|| panic!("RoaringBoolArray buffer is missing")) .as_ref(), ) } diff --git a/encodings/roaring/src/integer/compress.rs b/encodings/roaring/src/integer/compress.rs index 106079b2f..7d0c92e97 100644 --- a/encodings/roaring/src/integer/compress.rs +++ b/encodings/roaring/src/integer/compress.rs @@ -2,7 +2,7 @@ use croaring::Bitmap; use num_traits::NumCast; use vortex::array::PrimitiveArray; use vortex_dtype::{NativePType, PType}; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, VortexResult}; use crate::RoaringIntArray; @@ -12,7 +12,7 @@ pub fn roaring_int_encode(parray: PrimitiveArray) -> VortexResult roaring_encode_primitive::(parray.maybe_null_slice()), PType::U32 => roaring_encode_primitive::(parray.maybe_null_slice()), PType::U64 => roaring_encode_primitive::(parray.maybe_null_slice()), - _ => vortex_bail!("Unsupported ptype {}", parray.ptype()), + _ => vortex_bail!("Unsupported PType {}", parray.ptype()), } } @@ -20,7 +20,7 @@ fn roaring_encode_primitive( values: &[T], ) -> VortexResult { let mut bitmap = Bitmap::new(); - bitmap.extend(values.iter().map(|i| i.to_u32().unwrap())); + bitmap.extend(values.iter().map(|i| i.to_u32().ok_or_else(|| vortex_err!("Failed to cast value {} to u32", i))).collect::>>()?); bitmap.run_optimize(); bitmap.shrink_to_fit(); RoaringIntArray::try_new(bitmap, T::PTYPE) diff --git a/encodings/roaring/src/integer/mod.rs b/encodings/roaring/src/integer/mod.rs index 1e5481393..abc23b083 100644 --- a/encodings/roaring/src/integer/mod.rs +++ b/encodings/roaring/src/integer/mod.rs @@ -14,7 +14,7 @@ use vortex::{ use vortex_buffer::Buffer; use vortex_dtype::Nullability::NonNullable; use vortex_dtype::{DType, PType}; -use vortex_error::{vortex_bail, vortex_err, VortexResult}; +use vortex_error::{vortex_bail, VortexResult}; mod compress; mod compute; @@ -49,7 +49,7 @@ impl RoaringIntArray { Bitmap::deserialize::( self.array() .buffer() - .expect("RoaringBoolArray buffer is missing") + .unwrap_or_else(|| panic!("RoaringBoolArray buffer is missing")) .as_ref(), ) } @@ -62,7 +62,7 @@ impl RoaringIntArray { if array.encoding().id() == Primitive::ID { Ok(roaring_int_encode(PrimitiveArray::try_from(array)?)?.into_array()) } else { - Err(vortex_err!("RoaringInt can only encode primitive arrays")) + vortex_bail!("RoaringInt can only encode primitive arrays") } } } diff --git a/encodings/runend-bool/src/array.rs b/encodings/runend-bool/src/array.rs index 93150ea5a..8166d255c 100644 --- a/encodings/runend-bool/src/array.rs +++ b/encodings/runend-bool/src/array.rs @@ -96,7 +96,7 @@ impl RunEndBoolArray { pub fn ends(&self) -> Array { self.array() .child(0, &self.metadata().ends_dtype, self.metadata().num_runs) - .expect("missing ends") + .unwrap_or_else(|| panic!("RunEndBoolArray is missing its run ends")) } } diff --git a/encodings/runend-bool/src/compress.rs b/encodings/runend-bool/src/compress.rs index f98e2ec7c..1a48fb50f 100644 --- a/encodings/runend-bool/src/compress.rs +++ b/encodings/runend-bool/src/compress.rs @@ -28,7 +28,9 @@ pub fn runend_bool_encode_slice(elements: &BooleanBuffer) -> (Vec, bool) { ends.push(s as u64); ends.push(e as u64); } - if *ends.last().unwrap() != elements.len() as u64 { + + let last_end = ends.last().unwrap_or_else(|| panic!("RunEndBoolArray is missing its run ends")); + if *last_end != elements.len() as u64 { ends.push(elements.len() as u64) } @@ -54,8 +56,8 @@ pub fn runend_bool_decode_slice + FromPrimit offset: usize, length: usize, ) -> Vec { - let offset_e = E::from_usize(offset).unwrap(); - let length_e = E::from_usize(length).unwrap(); + let offset_e = E::from_usize(offset).unwrap_or_else(|| panic!("offset {} cannot be converted to {}", offset, std::any::type_name::())); + let length_e = E::from_usize(length).unwrap_or_else(|| panic!("length {} cannot be converted to {}", length, std::any::type_name::())); let trimmed_ends = run_ends .iter() .map(|v| *v - offset_e) diff --git a/encodings/runend/src/compress.rs b/encodings/runend/src/compress.rs index ce5a70d59..55944fde7 100644 --- a/encodings/runend/src/compress.rs +++ b/encodings/runend/src/compress.rs @@ -92,8 +92,8 @@ pub fn runend_decode_primitive< offset: usize, length: usize, ) -> Vec { - let offset_e = E::from_usize(offset).unwrap(); - let length_e = E::from_usize(length).unwrap(); + let offset_e = E::from_usize(offset).unwrap_or_else(|| panic!("offset {} cannot be converted to {}", offset, std::any::type_name::())); + let length_e = E::from_usize(length).unwrap_or_else(|| panic!("length {} cannot be converted to {}", length, std::any::type_name::())); let trimmed_ends = run_ends .iter() .map(|v| *v - offset_e) diff --git a/encodings/runend/src/runend.rs b/encodings/runend/src/runend.rs index 8bd374ecc..88a4edd73 100644 --- a/encodings/runend/src/runend.rs +++ b/encodings/runend/src/runend.rs @@ -106,14 +106,14 @@ impl RunEndArray { pub fn ends(&self) -> Array { self.array() .child(0, &self.metadata().ends_dtype, self.metadata().num_runs) - .expect("missing ends") + .unwrap_or_else(|| panic!("RunEndArray is missing its run ends")) } #[inline] pub fn values(&self) -> Array { self.array() .child(1, self.dtype(), self.metadata().num_runs) - .expect("missing values") + .unwrap_or_else(|| panic!("RunEndArray is missing its values")) } } diff --git a/encodings/zigzag/src/zigzag.rs b/encodings/zigzag/src/zigzag.rs index 3bae6748c..2edef3aa4 100644 --- a/encodings/zigzag/src/zigzag.rs +++ b/encodings/zigzag/src/zigzag.rs @@ -5,7 +5,7 @@ use vortex::validity::{ArrayValidity, LogicalValidity}; use vortex::variants::{ArrayVariants, PrimitiveArrayTrait}; use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex::{ - impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoArray, IntoArrayVariant, + impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoArrayVariant, IntoCanonical, }; use vortex_dtype::{DType, PType}; @@ -21,7 +21,7 @@ pub struct ZigZagMetadata; impl ZigZagArray { pub fn new(encoded: Array) -> Self { - Self::try_new(encoded).unwrap() + Self::try_new(encoded).unwrap_or_else(|err| panic!("Failed to construct ZigZagArray: {}", err)) } pub fn try_new(encoded: Array) -> VortexResult { @@ -47,11 +47,11 @@ impl ZigZagArray { } pub fn encoded(&self) -> Array { - let ptype = PType::try_from(self.dtype()).expect("ptype"); + let ptype = PType::try_from(self.dtype()).unwrap_or_else(|err| panic!("Failed to convert DType {} to PType: {}", self.dtype(), err)); let encoded = DType::from(ptype.to_unsigned()).with_nullability(self.dtype().nullability()); self.array() .child(0, &encoded, self.len()) - .expect("Missing encoded array") + .unwrap_or_else(|| panic!("ZigZagArray is missing its encoded array")) } } diff --git a/pyvortex/src/encode.rs b/pyvortex/src/encode.rs index 564cabce0..921905d04 100644 --- a/pyvortex/src/encode.rs +++ b/pyvortex/src/encode.rs @@ -9,10 +9,11 @@ use vortex::array::ChunkedArray; use vortex::arrow::{FromArrowArray, FromArrowType}; use vortex::Array; use vortex_dtype::DType; +use vortex_error::VortexError; use crate::array::PyArray; use crate::error::PyVortexError; -use crate::vortex_arrow::map_arrow_err; +use crate::vortex_arrow::map_to_pyerr; /// The main entry point for creating enc arrays from other Python objects. /// @@ -52,7 +53,8 @@ pub fn encode(obj: &Bound) -> PyResult> { let dtype = DType::from_arrow(array_stream.schema()); let chunks = array_stream .into_iter() - .map(|b| b.map(Array::from).map_err(map_arrow_err)) + .map(|b| b.map_err(VortexError::ArrowError)) + .map(|b| b.and_then(Array::try_from).map_err(map_to_pyerr)) .collect::>>()?; PyArray::wrap( obj.py(), diff --git a/pyvortex/src/vortex_arrow.rs b/pyvortex/src/vortex_arrow.rs index d74f04963..969fc481f 100644 --- a/pyvortex/src/vortex_arrow.rs +++ b/pyvortex/src/vortex_arrow.rs @@ -1,5 +1,4 @@ use arrow::array::{Array as ArrowArray, ArrayRef}; -use arrow::error::ArrowError; use arrow::pyarrow::ToPyArrow; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; @@ -7,7 +6,7 @@ use pyo3::types::{IntoPyDict, PyList}; use vortex::array::ChunkedArray; use vortex::{Array, IntoCanonical}; -pub fn map_arrow_err(error: ArrowError) -> PyErr { +pub fn map_to_pyerr(error: E) -> PyErr { PyValueError::new_err(error.to_string()) } @@ -17,10 +16,10 @@ pub fn export_array<'py>(py: Python<'py>, array: &Array) -> PyResult = if let Ok(chunked_array) = ChunkedArray::try_from(array) { chunked_array .chunks() - .map(|chunk| chunk.into_canonical().unwrap().into_arrow()) - .collect() + .map(|chunk| chunk.into_canonical().and_then(|c| c.into_arrow()).map_err(map_to_pyerr)) + .collect::>>()? } else { - vec![array.clone().into_canonical().unwrap().into_arrow()] + vec![array.clone().into_canonical().and_then(|c| c.into_arrow()).map_err(map_to_pyerr)?] }; if chunks.is_empty() { return Err(PyValueError::new_err("No chunks in array")); diff --git a/vortex-array/benches/compare.rs b/vortex-array/benches/compare.rs index b8d7c3518..fd8fc88a4 100644 --- a/vortex-array/benches/compare.rs +++ b/vortex-array/benches/compare.rs @@ -1,3 +1,5 @@ +#![allow(clippy::unwrap_used)] + use criterion::{black_box, criterion_group, criterion_main, Criterion}; use itertools::Itertools; use rand::distributions::Uniform; diff --git a/vortex-array/benches/filter_indices.rs b/vortex-array/benches/filter_indices.rs index 97d6eafd7..07d9d820f 100644 --- a/vortex-array/benches/filter_indices.rs +++ b/vortex-array/benches/filter_indices.rs @@ -1,3 +1,5 @@ +#![allow(clippy::unwrap_used)] + use criterion::{black_box, criterion_group, criterion_main, Criterion}; use itertools::Itertools; use rand::distributions::Uniform; diff --git a/vortex-array/benches/scalar_subtract.rs b/vortex-array/benches/scalar_subtract.rs index f6608f850..977be839e 100644 --- a/vortex-array/benches/scalar_subtract.rs +++ b/vortex-array/benches/scalar_subtract.rs @@ -1,3 +1,5 @@ +#![allow(clippy::unwrap_used)] + use criterion::{black_box, criterion_group, criterion_main, Criterion}; use itertools::Itertools; use rand::distributions::Uniform; diff --git a/vortex-array/src/array/bool/compute/boolean.rs b/vortex-array/src/array/bool/compute/boolean.rs index 091fa72c5..baf103599 100644 --- a/vortex-array/src/array/bool/compute/boolean.rs +++ b/vortex-array/src/array/bool/compute/boolean.rs @@ -9,10 +9,10 @@ use crate::{Array, IntoCanonical}; impl OrFn for BoolArray { fn or(&self, array: &Array) -> VortexResult { - let lhs = self.clone().into_canonical()?.into_arrow(); + let lhs = self.clone().into_canonical()?.into_arrow()?; let lhs = lhs.as_boolean(); - let rhs = array.clone().into_canonical()?.into_arrow(); + let rhs = array.clone().into_canonical()?.into_arrow()?; let rhs = rhs.as_boolean(); let array = boolean::or(lhs, rhs)?; @@ -23,10 +23,10 @@ impl OrFn for BoolArray { impl AndFn for BoolArray { fn and(&self, array: &Array) -> VortexResult { - let lhs = self.clone().into_canonical()?.into_arrow(); + let lhs = self.clone().into_canonical()?.into_arrow()?; let lhs = lhs.as_boolean(); - let rhs = array.clone().into_canonical()?.into_arrow(); + let rhs = array.clone().into_canonical()?.into_arrow()?; let rhs = rhs.as_boolean(); let array = boolean::and(lhs, rhs)?; diff --git a/vortex-array/src/array/constant/compute.rs b/vortex-array/src/array/constant/compute.rs index cd3f180f5..74748d5d9 100644 --- a/vortex-array/src/array/constant/compute.rs +++ b/vortex-array/src/array/constant/compute.rs @@ -106,7 +106,7 @@ impl CompareFn for ConstantArray { Ok(ConstantArray::new(scalar, self.len()).into_array()) } else { let datum = Arc::::from(self.scalar()); - let rhs = rhs.clone().into_canonical()?.into_arrow(); + let rhs = rhs.clone().into_canonical()?.into_arrow()?; let rhs = rhs.as_ref(); let boolean_array = match operator { diff --git a/vortex-array/src/arrow/recordbatch.rs b/vortex-array/src/arrow/recordbatch.rs index f44ad656c..503709505 100644 --- a/vortex-array/src/arrow/recordbatch.rs +++ b/vortex-array/src/arrow/recordbatch.rs @@ -39,7 +39,7 @@ impl TryFrom for RecordBatch { fn try_from(value: Array) -> VortexResult { let array_ref = value .into_canonical()? - .into_arrow(); + .into_arrow()?; let struct_array = as_struct_array(array_ref.as_ref()); Ok(RecordBatch::from(struct_array)) } diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs index ec97996a9..05679130a 100644 --- a/vortex-array/src/canonical.rs +++ b/vortex-array/src/canonical.rs @@ -71,8 +71,8 @@ impl Canonical { /// Scalar arrays such as Bool and Primitive canonical arrays should convert with /// zero copies, while more complex variants such as Struct may require allocations if its child /// arrays require decompression. - pub fn into_arrow(self) -> ArrayRef { - match self { + pub fn into_arrow(self) -> VortexResult { + Ok(match self { Canonical::Null(a) => null_to_arrow(a), Canonical::Bool(a) => bool_to_arrow(a), Canonical::Primitive(a) => primitive_to_arrow(a), @@ -80,15 +80,14 @@ impl Canonical { Canonical::VarBin(a) => varbin_to_arrow(a), Canonical::Extension(a) => { if !is_temporal_ext_type(a.id()) { - panic!("unsupported extension dtype with ID {}", a.id().as_ref()) + vortex_bail!("unsupported extension dtype with ID {}", a.id().as_ref()) } temporal_to_arrow( - TemporalArray::try_from(&a.into_array()) - .unwrap_or_else(|err| panic!("array must be known temporal array ext type: {err}")), + TemporalArray::try_from(&a.into_array())?, ) } - } + }) } } @@ -187,7 +186,7 @@ fn struct_to_arrow(struct_array: StructArray) -> ArrayRef { match canonical { // visit nested structs recursively Canonical::Struct(a) => struct_to_arrow(a), - _ => canonical.into_arrow(), + _ => canonical.into_arrow().unwrap_or_else(|err| panic!("Failed to convert canonicalized field to arrow: {err}")), } }) .collect(); @@ -441,7 +440,7 @@ impl From for Array { mod test { use arrow_array::types::{Int64Type, UInt64Type}; use arrow_array::{ - Array, PrimitiveArray as ArrowPrimitiveArray, StructArray as ArrowStructArray, + PrimitiveArray as ArrowPrimitiveArray, StructArray as ArrowStructArray, }; use vortex_dtype::Nullability; use vortex_scalar::Scalar; @@ -483,6 +482,7 @@ mod test { .into_canonical() .unwrap() .into_arrow() + .unwrap() .as_any() .downcast_ref::() .cloned() diff --git a/vortex-array/src/compute/compare.rs b/vortex-array/src/compute/compare.rs index 550ddca30..4967abc0d 100644 --- a/vortex-array/src/compute/compare.rs +++ b/vortex-array/src/compute/compare.rs @@ -35,8 +35,8 @@ pub fn compare(left: &Array, right: &Array, operator: Operator) -> VortexResult< } // Fallback to arrow on canonical types - let lhs = left.clone().into_canonical()?.into_arrow(); - let rhs = right.clone().into_canonical()?.into_arrow(); + let lhs = left.clone().into_canonical()?.into_arrow()?; + let rhs = right.clone().into_canonical()?.into_arrow()?; let array = match operator { Operator::Eq => cmp::eq(&lhs.as_ref(), &rhs.as_ref())?, diff --git a/vortex-array/src/compute/filter.rs b/vortex-array/src/compute/filter.rs index 3ec0178a7..71f5f6b93 100644 --- a/vortex-array/src/compute/filter.rs +++ b/vortex-array/src/compute/filter.rs @@ -40,8 +40,8 @@ pub fn filter(array: &Array, predicate: &Array) -> VortexResult { filter_fn.filter(predicate) } else { // Fallback: implement using Arrow kernels. - let array_ref = array.clone().into_canonical()?.into_arrow(); - let predicate_ref = predicate.clone().into_canonical()?.into_arrow(); + let array_ref = array.clone().into_canonical()?.into_arrow()?; + let predicate_ref = predicate.clone().into_canonical()?.into_arrow()?; let filtered = arrow_select::filter::filter(array_ref.as_ref(), predicate_ref.as_boolean())?; diff --git a/vortex-datafusion/src/expr.rs b/vortex-datafusion/src/expr.rs index 342d22b0f..ef86585f2 100644 --- a/vortex-datafusion/src/expr.rs +++ b/vortex-datafusion/src/expr.rs @@ -13,7 +13,7 @@ use vortex::array::{ConstantArray, StructArray}; use vortex::compute::compare; use vortex::variants::StructArrayTrait; use vortex::{Array, IntoArray}; -use vortex_error::{vortex_bail, vortex_err, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, VortexError, VortexResult}; use vortex_expr::Operator; use vortex_scalar::Scalar; @@ -117,7 +117,7 @@ pub fn convert_expr_to_vortex( physical_expr: Arc, input_schema: &Schema, ) -> VortexResult> { - if physical_expr.data_type(input_schema).unwrap().is_temporal() { + if physical_expr.data_type(input_schema).map_err(VortexError::from)?.is_temporal() { vortex_bail!("Doesn't support evaluating operations over temporal values"); } if let Some(binary_expr) = physical_expr diff --git a/vortex-datafusion/src/lib.rs b/vortex-datafusion/src/lib.rs index 8c75b126c..a2734c780 100644 --- a/vortex-datafusion/src/lib.rs +++ b/vortex-datafusion/src/lib.rs @@ -19,13 +19,12 @@ use datafusion_execution::object_store::ObjectStoreUrl; use datafusion_expr::{Expr, Operator}; use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties}; use futures::Stream; -use itertools::Itertools; use memory::{VortexMemTable, VortexMemTableOptions}; use persistent::config::VortexTableOptions; use persistent::provider::VortexFileTableProvider; use vortex::array::ChunkedArray; use vortex::{Array, ArrayDType, IntoArrayVariant}; -use vortex_error::vortex_err; +use vortex_error::{vortex_err, VortexError, VortexResult}; pub mod expr; pub mod memory; @@ -167,21 +166,31 @@ fn can_be_pushed_down(expr: &Expr) -> bool { } } -fn get_filter_projection(exprs: &[Expr], schema: SchemaRef) -> Vec { - let referenced_columns: HashSet = - exprs.iter().flat_map(get_column_references).collect(); - - let projection: Vec = referenced_columns +fn get_filter_projection(exprs: &[Expr], schema: SchemaRef) -> VortexResult> { + let referenced_columns: HashSet = exprs .iter() - .map(|col_name| schema.column_with_name(col_name).unwrap().0) - .sorted() + .map(get_column_references) + .collect::>>()? + .into_iter() + .flatten() .collect(); - projection + let mut projection: Vec = referenced_columns + .iter() + .map(|col_name| { + schema + .column_with_name(col_name) + .ok_or_else(|| vortex_err!("Column not found: {}", col_name)) + .map(|c| c.0) + }) + .collect::>>()?; + projection.sort(); + + Ok(projection) } /// Extract out the columns from our table referenced by the expression. -fn get_column_references(expr: &Expr) -> HashSet { +fn get_column_references(expr: &Expr) -> VortexResult> { let mut references = HashSet::new(); expr.apply(|node| match node { @@ -192,9 +201,9 @@ fn get_column_references(expr: &Expr) -> HashSet { } _ => Ok(TreeNodeRecursion::Continue), }) - .unwrap(); + .map_err(VortexError::from)?; - references + Ok(references) } /// Physical plan node for scans against an in-memory, possibly chunked Vortex Array. @@ -246,7 +255,7 @@ impl Stream for VortexRecordBatchStream { let chunk = this .chunks .chunk(this.idx) - .expect("nchunks should match precomputed"); + .ok_or_else(|| vortex_err!("nchunks should match precomputed"))?; this.idx += 1; let struct_array = chunk @@ -261,7 +270,7 @@ impl Stream for VortexRecordBatchStream { exec_datafusion_err!("projection pushdown to Vortex failed: {vortex_err}") })?; - Poll::Ready(Some(Ok(projected_struct.into()))) + Poll::Ready(Some(Ok(projected_struct.try_into()?))) } fn size_hint(&self) -> (usize, Option) { diff --git a/vortex-datafusion/src/memory.rs b/vortex-datafusion/src/memory.rs index b7da3a95d..469bc36bf 100644 --- a/vortex-datafusion/src/memory.rs +++ b/vortex-datafusion/src/memory.rs @@ -6,7 +6,7 @@ use async_trait::async_trait; use datafusion::catalog::Session; use datafusion::datasource::TableProvider; use datafusion::prelude::*; -use datafusion_common::Result as DFResult; +use datafusion_common::{DataFusionError, Result as DFResult}; use datafusion_expr::{TableProviderFilterPushDown, TableType}; use datafusion_physical_expr::EquivalenceProperties; use datafusion_physical_plan::{ExecutionMode, ExecutionPlan, Partitioning, PlanProperties}; @@ -97,7 +97,8 @@ impl TableProvider for VortexMemTable { // using the calculated indices from the filter. Some(filter_exprs) => { let filter_projection = - get_filter_projection(filter_exprs, self.schema_ref.clone()); + get_filter_projection(filter_exprs, self.schema_ref.clone()) + .map_err(DataFusionError::from)?; Ok(make_filter_then_take_plan( self.schema_ref.clone(), diff --git a/vortex-datafusion/src/persistent/opener.rs b/vortex-datafusion/src/persistent/opener.rs index 79fd0768e..4cdfa3915 100644 --- a/vortex-datafusion/src/persistent/opener.rs +++ b/vortex-datafusion/src/persistent/opener.rs @@ -68,7 +68,7 @@ impl FileOpener for VortexFileOpener { array }; - VortexResult::Ok(RecordBatch::from(array)) + RecordBatch::try_from(array) } }) .map_err(|e| e.into()); diff --git a/vortex-datafusion/src/plans.rs b/vortex-datafusion/src/plans.rs index 7b39a5864..add9a16f8 100644 --- a/vortex-datafusion/src/plans.rs +++ b/vortex-datafusion/src/plans.rs @@ -25,7 +25,7 @@ use vortex::array::ChunkedArray; use vortex::arrow::FromArrowArray; use vortex::compute::take; use vortex::{Array, ArrayDType, IntoArray, IntoArrayVariant, IntoCanonical}; -use vortex_error::vortex_err; +use vortex_error::{vortex_err, VortexError}; use crate::datatype::infer_schema; use crate::eval::ExpressionEvaluator; @@ -174,22 +174,20 @@ impl Stream for RowIndicesStream { let next_chunk = this .chunked_array .chunk(this.chunk_idx) - .expect("chunk index in-bounds"); + .ok_or_else(|| vortex_err!("Chunk not found for index {}, nchunks: {}", this.chunk_idx, this.chunked_array.nchunks()))?; this.chunk_idx += 1; // Get the unfiltered record batch. // Since this is a one-shot, we only want to poll the inner future once, to create the // initial batch for us to process. let vortex_struct = next_chunk - .into_struct() - .expect("chunks must be StructArray") - .project(this.filter_projection.as_slice()) - .expect("projection should succeed"); + .into_struct()? + .project(this.filter_projection.as_slice())?; // TODO(adamg): Filter on vortex arrays let array = - ExpressionEvaluator::eval(vortex_struct.into_array(), &this.conjunction_expr).unwrap(); - let selection = array.into_canonical().unwrap().into_arrow(); + ExpressionEvaluator::eval(vortex_struct.into_array(), &this.conjunction_expr)?; + let selection = array.into_canonical()?.into_arrow()?; // Convert the `selection` BooleanArray into a UInt64Array of indices. let selection_indices = selection @@ -235,7 +233,7 @@ impl TakeRowsExec { row_indices: Arc, table: &ChunkedArray, ) -> Self { - let output_schema = Arc::new(schema_ref.project(projection).unwrap()); + let output_schema = Arc::new(schema_ref.project(projection).unwrap_or_else(|err| panic!("Failed to project schema: {}", VortexError::from(err)))); let plan_properties = PlanProperties::new( EquivalenceProperties::new(output_schema.clone()), Partitioning::UnknownPartitioning(1), @@ -365,27 +363,24 @@ where vec![], &opts, ) - .unwrap()))); + .map_err(DataFusionError::from)?))); } let chunk = this .vortex_array .chunk(*this.chunk_idx) - .expect("streamed too many chunks") - .into_struct() - .expect("chunks must be struct-encoded"); + .ok_or_else(|| vortex_err!("Chunk not found for index {}, nchunks: {}", this.chunk_idx, this.vortex_array.nchunks()))? + .into_struct()?; *this.chunk_idx += 1; // TODO(aduffy): this re-decodes the fields from the filter schema, which is wasteful. // We should find a way to avoid decoding the filter columns and only decode the other // columns, then stitch the StructArray back together from those. - let projected_for_output = chunk.project(this.output_projection).unwrap(); - let decoded = take(&projected_for_output.into_array(), &row_indices) - .expect("take") - .into_canonical() - .expect("into_canonical") - .into_arrow(); + let projected_for_output = chunk.project(this.output_projection)?; + let decoded = take(&projected_for_output.into_array(), &row_indices)? + .into_canonical()? + .into_arrow()?; // Send back a single record batch of the decoded data. let output_batch = RecordBatch::from(decoded.as_struct()); diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index fe1ca3167..87dc18426 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -136,6 +136,13 @@ pub enum VortexError { #[backtrace] object_store::Error, ), + #[cfg(feature = "datafusion")] + #[error(transparent)] + DataFusion( + #[from] + #[backtrace] + datafusion_common::DataFusionError, + ), } pub type VortexResult = Result; diff --git a/vortex-serde/benches/ipc_array_reader_take.rs b/vortex-serde/benches/ipc_array_reader_take.rs index 98c359480..d6282ab19 100644 --- a/vortex-serde/benches/ipc_array_reader_take.rs +++ b/vortex-serde/benches/ipc_array_reader_take.rs @@ -1,3 +1,4 @@ +#![allow(clippy::unwrap_used)] use std::sync::Arc; use criterion::async_executor::FuturesExecutor; diff --git a/vortex-serde/benches/ipc_take.rs b/vortex-serde/benches/ipc_take.rs index fc107f394..92bfdfe43 100644 --- a/vortex-serde/benches/ipc_take.rs +++ b/vortex-serde/benches/ipc_take.rs @@ -1,3 +1,4 @@ +#![allow(clippy::unwrap_used)] use std::sync::Arc; use arrow::ipc::reader::StreamReader as ArrowStreamReader; diff --git a/vortex-serde/src/chunked_reader/mod.rs b/vortex-serde/src/chunked_reader/mod.rs index 3d845ae25..83156ff7b 100644 --- a/vortex-serde/src/chunked_reader/mod.rs +++ b/vortex-serde/src/chunked_reader/mod.rs @@ -54,10 +54,17 @@ impl ChunkedArrayReader { pub async fn array_stream(&mut self) -> impl ArrayStream + '_ { let mut cursor = Cursor::new(&self.read); - cursor.set_position(u64::try_from(&scalar_at(&self.byte_offsets, 0).unwrap()).unwrap()); + let byte_offset = scalar_at(&self.byte_offsets, 0) + .and_then(|s| u64::try_from(&s)) + .unwrap_or_else(|err| { + panic!("Failed to convert byte_offset to u64: {err}"); + }); + cursor.set_position(byte_offset); StreamArrayReader::try_new(cursor, self.context.clone()) .await - .unwrap() + .unwrap_or_else(|err| { + panic!("Failed to create stream array reader: {err}"); + }) .with_dtype(self.dtype.clone()) .into_array_stream() } diff --git a/vortex-serde/src/chunked_reader/take_rows.rs b/vortex-serde/src/chunked_reader/take_rows.rs index 35923d659..e85e41d64 100644 --- a/vortex-serde/src/chunked_reader/take_rows.rs +++ b/vortex-serde/src/chunked_reader/take_rows.rs @@ -11,7 +11,7 @@ use vortex::stats::ArrayStatistics; use vortex::stream::{ArrayStream, ArrayStreamExt}; use vortex::{Array, ArrayDType, IntoArray, IntoArrayVariant}; use vortex_dtype::PType; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, VortexResult}; use vortex_scalar::Scalar; use crate::chunked_reader::ChunkedArrayReader; @@ -54,24 +54,19 @@ impl ChunkedArrayReader { // Coalesce the chunks that we're going to read from. let coalesced_chunks = self.coalesce_chunks(chunk_idxs.as_ref()); + let mut start_chunks: Vec = Vec::with_capacity(coalesced_chunks.len()); + let mut stop_chunks: Vec = Vec::with_capacity(coalesced_chunks.len()); + for (i, chunks) in coalesced_chunks.iter().enumerate() { + start_chunks.push(chunks.first().ok_or_else(|| vortex_err!("Coalesced chunk {i} cannot be empty"))?.chunk_idx); + stop_chunks.push(chunks.last().ok_or_else(|| vortex_err!("Coalesced chunk {i} cannot be empty"))?.chunk_idx + 1); + } + // Grab the row and byte offsets for each chunk range. - let start_chunks = PrimitiveArray::from( - coalesced_chunks - .iter() - .map(|chunks| chunks[0].chunk_idx) - .collect_vec(), - ) - .into_array(); + let start_chunks = PrimitiveArray::from(start_chunks).into_array(); let start_rows = take(&self.row_offsets, &start_chunks)?.into_primitive()?; let start_bytes = take(&self.byte_offsets, &start_chunks)?.into_primitive()?; - let stop_chunks = PrimitiveArray::from( - coalesced_chunks - .iter() - .map(|chunks| chunks.last().unwrap().chunk_idx + 1) - .collect_vec(), - ) - .into_array(); + let stop_chunks = PrimitiveArray::from(stop_chunks).into_array(); let stop_rows = take(&self.row_offsets, &stop_chunks)?.into_primitive()?; let stop_bytes = take(&self.byte_offsets, &stop_chunks)?.into_primitive()?; @@ -106,7 +101,6 @@ impl ChunkedArrayReader { let _hint = self.read.performance_hint(); chunk_idxs .iter() - .cloned() .map(|chunk_idx| vec![chunk_idx.clone()]) .collect_vec() } diff --git a/vortex-serde/src/io/object_store.rs b/vortex-serde/src/io/object_store.rs index 863ce81fa..62cc7ed09 100644 --- a/vortex-serde/src/io/object_store.rs +++ b/vortex-serde/src/io/object_store.rs @@ -11,7 +11,7 @@ use object_store::path::Path; use object_store::{ObjectStore, WriteMultipart}; use vortex_buffer::io_buf::IoBuf; use vortex_buffer::Buffer; -use vortex_error::VortexResult; +use vortex_error::{VortexError, VortexResult}; use crate::io::{VortexRead, VortexReadAt, VortexWrite}; @@ -78,7 +78,12 @@ impl VortexReadAt for ObjectStoreReadAt { } async fn size(&self) -> u64 { - self.object_store.head(&self.location).await.unwrap().size as u64 + self.object_store + .head(&self.location) + .await + .map_err(VortexError::ObjectStore) + .unwrap_or_else(|err| panic!("Failed to get size of object at location {}: {err}", self.location)) + .size as u64 } } diff --git a/vortex-serde/src/io/tokio.rs b/vortex-serde/src/io/tokio.rs index ef0f4fb45..a2149a380 100644 --- a/vortex-serde/src/io/tokio.rs +++ b/vortex-serde/src/io/tokio.rs @@ -7,6 +7,7 @@ use bytes::BytesMut; use tokio::fs::File; use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; use vortex_buffer::io_buf::IoBuf; +use vortex_error::VortexError; use crate::io::{VortexRead, VortexReadAt, VortexWrite}; @@ -27,7 +28,12 @@ impl VortexReadAt for TokioAdapter { } async fn size(&self) -> u64 { - self.0.metadata().await.unwrap().len() + self.0 + .metadata() + .await + .map_err(VortexError::IOError) + .unwrap_or_else(|err| panic!("Failed to get size of file: {err}")) + .len() } } diff --git a/vortex-serde/src/layouts/reader/builder.rs b/vortex-serde/src/layouts/reader/builder.rs index f12f173de..2c130eea4 100644 --- a/vortex-serde/src/layouts/reader/builder.rs +++ b/vortex-serde/src/layouts/reader/builder.rs @@ -133,12 +133,12 @@ impl VortexLayoutReaderBuilder { let footer_offset = u64::from_le_bytes( buf[magic_bytes_loc - 8..magic_bytes_loc] .try_into() - .unwrap(), + .unwrap_or_else(|err| panic!("Failed to slice footer offset: {err}")), ); let schema_offset = u64::from_le_bytes( buf[magic_bytes_loc - 16..magic_bytes_loc - 8] .try_into() - .unwrap(), + .unwrap_or_else(|err| panic!("Failed to slice schema offset: {err}")), ); Ok(Footer { diff --git a/vortex-serde/src/layouts/reader/layouts.rs b/vortex-serde/src/layouts/reader/layouts.rs index 0a0101bee..98e7fa21b 100644 --- a/vortex-serde/src/layouts/reader/layouts.rs +++ b/vortex-serde/src/layouts/reader/layouts.rs @@ -149,7 +149,9 @@ impl ColumnLayout { let tab = flatbuffers::Table::new(&self.fb_bytes, self.fb_loc); fb::Layout::init_from_table(tab) }; - fb_layout.layout_as_nested_layout().expect("must be nested") + fb_layout.layout_as_nested_layout().unwrap_or_else(|| { + panic!("ColumnLayout: Failed to read nested layout from flatbuffer") + }) } fn read_child( @@ -285,7 +287,9 @@ impl ChunkedLayout { let tab = flatbuffers::Table::new(&self.fb_bytes, self.fb_loc); fb::Layout::init_from_table(tab) }; - fb_layout.layout_as_nested_layout().expect("must be nested") + fb_layout.layout_as_nested_layout().unwrap_or_else(|| { + panic!("ChunkedLayout: Failed to read nested layout from flatbuffer") + }) } } diff --git a/vortex-serde/src/layouts/reader/stream.rs b/vortex-serde/src/layouts/reader/stream.rs index 7983c3996..cbb100d69 100644 --- a/vortex-serde/src/layouts/reader/stream.rs +++ b/vortex-serde/src/layouts/reader/stream.rs @@ -97,7 +97,7 @@ impl Stream for VortexLayoutBatchStrea match read { ReadResult::GetMsgs(messages) => { let reader = - mem::take(&mut self.reader).expect("Invalid state transition"); + mem::take(&mut self.reader).ok_or_else(|| vortex_err!("Invalid state transition"))?; let read_future = read_ranges(reader, messages).boxed(); self.state = StreamingState::Reading(read_future); } @@ -123,7 +123,9 @@ impl Stream for VortexLayoutBatchStrea } StreamingState::Reading(f) => match ready!(f.poll_unpin(cx)) { Ok((read, buffers)) => { - let mut write_cache = self.messages_cache.write().unwrap(); + let mut write_cache = self.messages_cache.write().unwrap_or_else(|err| { + panic!("Failed to write to message cache: {err}") + }); for (id, buf) in buffers { write_cache.set(id, buf) } diff --git a/vortex-serde/src/layouts/writer/layout_writer.rs b/vortex-serde/src/layouts/writer/layout_writer.rs index f841b2249..2735e7331 100644 --- a/vortex-serde/src/layouts/writer/layout_writer.rs +++ b/vortex-serde/src/layouts/writer/layout_writer.rs @@ -10,7 +10,7 @@ use vortex::validity::Validity; use vortex::{Array, ArrayDType, IntoArray}; use vortex_buffer::io_buf::IoBuf; use vortex_dtype::DType; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, VortexResult}; use vortex_flatbuffers::{footer as fb, WriteFlatBuffer}; use crate::io::VortexWrite; @@ -132,7 +132,7 @@ impl LayoutWriter { row_offsets .last() .map(|off| off + chunk.len() as u64) - .expect("Row offsets should be initialized with a value"), + .ok_or_else(|| vortex_err!("Row offsets should be initialized with a value"))?, ); self.msgs.write_batch(chunk).await?; byte_offsets.push(self.msgs.tell()); @@ -154,8 +154,8 @@ impl LayoutWriter { } async fn write_metadata_arrays(&mut self) -> VortexResult { - let DType::Struct(..) = self.dtype.as_ref().expect("Should have written values") else { - unreachable!("Values are a structarray") + let DType::Struct(..) = self.dtype.as_ref().ok_or_else(|| vortex_err!("Should have written values"))? else { + unreachable!("Values are a StructArray") }; let mut column_layouts = VecDeque::with_capacity(self.column_chunks.len()); @@ -229,7 +229,7 @@ impl LayoutWriter { let dtype_len = Self::write_flatbuffer( &mut w, - &IPCSchema(&self.dtype.expect("Needed a schema at this point")), + &IPCSchema(&self.dtype.ok_or_else(|| vortex_err!("Schema should be written by now"))?), ) .await?; let _ = Self::write_flatbuffer(&mut w, &footer).await?; diff --git a/vortex-serde/src/message_reader.rs b/vortex-serde/src/message_reader.rs index adcd20ce3..32d93da7c 100644 --- a/vortex-serde/src/message_reader.rs +++ b/vortex-serde/src/message_reader.rs @@ -95,7 +95,7 @@ impl MessageReader { let buf = self.next().await?; let msg = unsafe { root_unchecked::(&buf) } .header_as_schema() - .expect("Checked earlier in the function"); + .ok_or_else(|| vortex_err!("Expected schema message; this was checked earlier in the function"))?; Ok(IPCDType::read_flatbuffer(&msg)?.0) } diff --git a/vortex-serde/src/messages.rs b/vortex-serde/src/messages.rs index ff3d3415b..51b201e5d 100644 --- a/vortex-serde/src/messages.rs +++ b/vortex-serde/src/messages.rs @@ -135,10 +135,13 @@ impl<'a> WriteFlatBuffer for IPCArray<'a> { .metadata() .try_serialize_metadata() // TODO(ngates): should we serialize externally to here? - .unwrap(); + .unwrap_or_else(|err| panic!("Failed to serialize metadata: {}", err)); Some(fbb.create_vector(metadata.as_ref())) } - Array::View(v) => Some(fbb.create_vector(v.metadata().unwrap())), + Array::View(v) => Some(fbb.create_vector(v.metadata().unwrap_or_else(|| { + // TODO(wmanning): should this just return None? why does this panic? + panic!("ArrayView is missing metadata during serialization") + }))), }; let children = column_data diff --git a/vortex-serde/src/stream_reader/mod.rs b/vortex-serde/src/stream_reader/mod.rs index f8568eebd..73663070e 100644 --- a/vortex-serde/src/stream_reader/mod.rs +++ b/vortex-serde/src/stream_reader/mod.rs @@ -41,12 +41,12 @@ impl StreamArrayReader { /// Reads a single array from the stream. pub fn array_stream(&mut self) -> impl ArrayStream + '_ { - let dtype = self.dtype.as_ref().expect("DType not set").deref().clone(); + let dtype = self.dtype.as_ref().unwrap_or_else(|| panic!("DType not set")).deref().clone(); self.msgs.array_stream(self.ctx.clone(), dtype) } pub fn into_array_stream(self) -> impl ArrayStream { - let dtype = self.dtype.as_ref().expect("DType not set").deref().clone(); + let dtype = self.dtype.as_ref().unwrap_or_else(|| panic!("DType not set")).deref().clone(); self.msgs.into_array_stream(self.ctx.clone(), dtype) } From 05b7030d4da844174651e5961165ab106603ca4f Mon Sep 17 00:00:00 2001 From: Will Manning Date: Tue, 13 Aug 2024 17:28:42 -0400 Subject: [PATCH 11/39] format the world --- bench-vortex/Cargo.toml | 9 +++- bench-vortex/benches/datafusion_benchmark.rs | 4 +- bench-vortex/benches/tpch_benchmark.rs | 2 - bench-vortex/src/bin/tpch_benchmark.rs | 2 - bench-vortex/src/data_downloads.rs | 2 +- bench-vortex/src/lib.rs | 7 +-- bench-vortex/src/reader.rs | 2 +- bench-vortex/src/tpch/mod.rs | 4 +- encodings/alp/src/array.rs | 4 +- encodings/byte-bool/src/lib.rs | 12 +++-- encodings/dict/src/compress.rs | 12 +++-- .../src/bitpacking/compute/search_sorted.rs | 4 +- encodings/fastlanes/src/bitpacking/mod.rs | 4 +- encodings/fastlanes/src/delta/mod.rs | 5 +- encodings/fastlanes/src/for/mod.rs | 4 +- encodings/roaring/src/boolean/mod.rs | 3 +- encodings/roaring/src/integer/compress.rs | 10 +++- encodings/runend-bool/src/compress.rs | 20 ++++++-- encodings/runend/src/compress.rs | 16 ++++++- encodings/zigzag/src/zigzag.rs | 7 ++- pyvortex/src/vortex_arrow.rs | 13 ++++- vortex-array/src/array/arbitrary.rs | 6 +-- vortex-array/src/array/bool/mod.rs | 4 +- .../src/array/chunked/compute/take.rs | 4 +- .../src/array/datetime/temporal/from.rs | 3 +- vortex-array/src/array/primitive/mod.rs | 11 +++-- vortex-array/src/array/struct_/mod.rs | 5 +- vortex-array/src/array/varbin/compute/take.rs | 24 +++++++--- vortex-array/src/array/varbin/mod.rs | 7 ++- vortex-array/src/array/varbinview/mod.rs | 3 +- vortex-array/src/arrow/array.rs | 47 +++++++++++++++---- vortex-array/src/arrow/recordbatch.rs | 4 +- vortex-array/src/canonical.rs | 20 ++++---- vortex-array/src/encoding.rs | 10 ++-- vortex-array/src/implementation.rs | 6 ++- vortex-array/src/lib.rs | 10 +++- vortex-array/src/stats/statsset.rs | 14 ++++-- vortex-array/src/stream/ext.rs | 5 +- vortex-array/src/validity.rs | 11 +++-- vortex-array/src/variants.rs | 24 ++++++---- vortex-datafusion/examples/table_provider.rs | 5 +- vortex-datafusion/src/expr.rs | 6 ++- vortex-datafusion/src/memory.rs | 14 ++++-- vortex-datafusion/src/persistent/opener.rs | 4 +- vortex-datafusion/src/plans.rs | 27 +++++++---- vortex-scalar/src/arrow.rs | 29 ++++++------ vortex-scalar/src/datafusion.rs | 24 +++++----- vortex-scalar/src/list.rs | 8 +++- vortex-serde/src/chunked_reader/take_rows.rs | 15 +++++- vortex-serde/src/io/object_store.rs | 7 ++- vortex-serde/src/layouts/reader/layouts.rs | 6 +-- vortex-serde/src/layouts/reader/stream.rs | 4 +- .../src/layouts/writer/layout_writer.rs | 12 ++++- vortex-serde/src/message_reader.rs | 4 +- vortex-serde/src/stream_reader/mod.rs | 14 +++++- 55 files changed, 382 insertions(+), 161 deletions(-) diff --git a/bench-vortex/Cargo.toml b/bench-vortex/Cargo.toml index ad1fdc34b..18e7a2730 100644 --- a/bench-vortex/Cargo.toml +++ b/bench-vortex/Cargo.toml @@ -12,8 +12,13 @@ include = { workspace = true } edition = { workspace = true } rust-version = { workspace = true } -[lints] -workspace = true +[lints.rust] +warnings = "deny" +unsafe_op_in_unsafe_fn = "deny" + +[lints.clippy] +all = { level = "deny", priority = -1 } +or_fun_call = "deny" [dependencies] anyhow = { workspace = true } diff --git a/bench-vortex/benches/datafusion_benchmark.rs b/bench-vortex/benches/datafusion_benchmark.rs index f969ebacd..88d316d9d 100644 --- a/bench-vortex/benches/datafusion_benchmark.rs +++ b/bench-vortex/benches/datafusion_benchmark.rs @@ -1,5 +1,3 @@ -#![allow(clippy::use_debug)] - use std::collections::HashSet; use std::sync::Arc; @@ -83,7 +81,7 @@ fn toy_dataset_arrow() -> RecordBatch { } fn toy_dataset_vortex(compress: bool) -> Array { - let uncompressed = toy_dataset_arrow().into(); + let uncompressed = toy_dataset_arrow().try_into().unwrap(); if !compress { return uncompressed; diff --git a/bench-vortex/benches/tpch_benchmark.rs b/bench-vortex/benches/tpch_benchmark.rs index 9862b8765..8054f525a 100644 --- a/bench-vortex/benches/tpch_benchmark.rs +++ b/bench-vortex/benches/tpch_benchmark.rs @@ -1,5 +1,3 @@ -#![allow(clippy::use_debug)] - use bench_vortex::tpch::dbgen::{DBGen, DBGenOptions}; use bench_vortex::tpch::{load_datasets, tpch_queries, Format}; use criterion::{criterion_group, criterion_main, Criterion}; diff --git a/bench-vortex/src/bin/tpch_benchmark.rs b/bench-vortex/src/bin/tpch_benchmark.rs index 7f421032a..8056b27ef 100644 --- a/bench-vortex/src/bin/tpch_benchmark.rs +++ b/bench-vortex/src/bin/tpch_benchmark.rs @@ -1,5 +1,3 @@ -#![allow(clippy::use_debug)] - use std::sync; use std::time::SystemTime; diff --git a/bench-vortex/src/data_downloads.rs b/bench-vortex/src/data_downloads.rs index 2cdeea796..3902763cb 100644 --- a/bench-vortex/src/data_downloads.rs +++ b/bench-vortex/src/data_downloads.rs @@ -46,7 +46,7 @@ pub fn data_vortex_uncompressed(fname_out: &str, downloaded_data: PathBuf) -> Pa let array = ChunkedArray::try_new( reader .into_iter() - .map(|batch_result| Array::from(batch_result.unwrap())) + .map(|batch_result| Array::try_from(batch_result.unwrap()).unwrap()) .collect(), dtype, ) diff --git a/bench-vortex/src/lib.rs b/bench-vortex/src/lib.rs index 216ef859b..90346dc05 100644 --- a/bench-vortex/src/lib.rs +++ b/bench-vortex/src/lib.rs @@ -188,7 +188,8 @@ pub fn compress_taxi_data() -> Array { let chunks = reader .into_iter() .map(|batch_result| batch_result.unwrap()) - .map(Array::from) + .map(Array::try_from) + .map(Result::unwrap) .map(|array| { uncompressed_size += array.nbytes(); compressor.compress(&array).unwrap() @@ -286,7 +287,7 @@ mod test { let struct_arrow: ArrowStructArray = record_batch.into(); let arrow_array: ArrowArrayRef = Arc::new(struct_arrow); let vortex_array = Array::from_arrow(arrow_array.clone(), false); - let vortex_as_arrow = vortex_array.into_canonical().unwrap().into_arrow(); + let vortex_as_arrow = vortex_array.into_canonical().unwrap().into_arrow().unwrap(); assert_eq!(vortex_as_arrow.deref(), arrow_array.deref()); } } @@ -307,7 +308,7 @@ mod test { let vortex_array = Array::from_arrow(arrow_array.clone(), false); let compressed = compressor.compress(&vortex_array).unwrap(); - let compressed_as_arrow = compressed.into_canonical().unwrap().into_arrow(); + let compressed_as_arrow = compressed.into_canonical().unwrap().into_arrow().unwrap(); assert_eq!(compressed_as_arrow.deref(), arrow_array.deref()); } } diff --git a/bench-vortex/src/reader.rs b/bench-vortex/src/reader.rs index a7766c34d..b5870826e 100644 --- a/bench-vortex/src/reader.rs +++ b/bench-vortex/src/reader.rs @@ -98,7 +98,7 @@ pub fn compress_parquet_to_vortex(parquet_path: &Path) -> VortexResult>(); let mut arrays_map: HashMap, Vec> = HashMap::default(); diff --git a/encodings/alp/src/array.rs b/encodings/alp/src/array.rs index f67e49c5a..d30c3650e 100644 --- a/encodings/alp/src/array.rs +++ b/encodings/alp/src/array.rs @@ -97,7 +97,9 @@ impl ALPArray { #[inline] pub fn ptype(&self) -> PType { - self.dtype().try_into().unwrap_or_else(|err| panic!("Failed to convert DType to PType: {err}")) + self.dtype() + .try_into() + .unwrap_or_else(|err| panic!("Failed to convert DType to PType: {err}")) } } diff --git a/encodings/byte-bool/src/lib.rs b/encodings/byte-bool/src/lib.rs index 8c7581388..884a34afd 100644 --- a/encodings/byte-bool/src/lib.rs +++ b/encodings/byte-bool/src/lib.rs @@ -64,7 +64,9 @@ impl ByteBoolArray { } pub fn buffer(&self) -> &Buffer { - self.array().buffer().unwrap_or_else(|| panic!("ByteBoolArray is missing the underlying buffer")) + self.array() + .buffer() + .unwrap_or_else(|| panic!("ByteBoolArray is missing the underlying buffer")) } fn maybe_null_slice(&self) -> &[bool] { @@ -93,7 +95,8 @@ impl BoolArrayTrait for ByteBoolArray { impl From> for ByteBoolArray { fn from(value: Vec) -> Self { - Self::try_from_vec(value, Validity::AllValid).unwrap_or_else(|err| panic!("Failed to create ByteBoolArray from Vec: {err}")) + Self::try_from_vec(value, Validity::AllValid) + .unwrap_or_else(|err| panic!("Failed to create ByteBoolArray from Vec: {err}")) } } @@ -107,8 +110,9 @@ impl From>> for ByteBoolArray { .map(std::option::Option::unwrap_or_default) .collect(); - Self::try_from_vec(data, validity) - .unwrap_or_else(|err| panic!("Failed to create ByteBoolArray from nullable bools: {err}")) + Self::try_from_vec(data, validity).unwrap_or_else(|err| { + panic!("Failed to create ByteBoolArray from nullable bools: {err}") + }) } } diff --git a/encodings/dict/src/compress.rs b/encodings/dict/src/compress.rs index 96408873e..eae2babec 100644 --- a/encodings/dict/src/compress.rs +++ b/encodings/dict/src/compress.rs @@ -65,7 +65,9 @@ pub fn dict_encode_typed_primitive( } } }) - .unwrap_or_else(|err| panic!("Failed to iterate over primitive array during dictionary encoding: {err}")); + .unwrap_or_else(|err| { + panic!("Failed to iterate over primitive array during dictionary encoding: {err}") + }); let values_validity = if array.dtype().is_nullable() { let mut validity = vec![true; values.len()]; @@ -86,7 +88,9 @@ pub fn dict_encode_typed_primitive( pub fn dict_encode_varbin(array: &VarBinArray) -> (PrimitiveArray, VarBinArray) { array .with_iterator(|iter| dict_encode_typed_varbin(array.dtype().clone(), iter)) - .unwrap_or_else(|err| panic!("Failed to iterate over varbin array during dictionary encoding: {err}")) + .unwrap_or_else(|err| { + panic!("Failed to iterate over varbin array during dictionary encoding: {err}") + }) } fn lookup_bytes<'a, T: NativePType + AsPrimitive>( @@ -165,7 +169,9 @@ where dtype, values_validity, ) - .unwrap_or_else(|err| panic!("Failed to create VarBinArray dictionary during encoding: {err}")) + .unwrap_or_else(|err| { + panic!("Failed to create VarBinArray dictionary during encoding: {err}") + }), ) } diff --git a/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs b/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs index 35e28999e..76a1acfd6 100644 --- a/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs +++ b/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs @@ -43,7 +43,9 @@ struct BitPackedSearch { impl BitPackedSearch { pub fn new(array: &BitPackedArray) -> Self { Self { - packed: array.packed().into_primitive().unwrap_or_else(|err| panic!("Failed to get packed bytes as PrimitiveArray: {err}")), + packed: array.packed().into_primitive().unwrap_or_else(|err| { + panic!("Failed to get packed bytes as PrimitiveArray: {err}") + }), offset: array.offset(), length: array.len(), bit_width: array.bit_width(), diff --git a/encodings/fastlanes/src/bitpacking/mod.rs b/encodings/fastlanes/src/bitpacking/mod.rs index 2a6409696..6ae9b69bc 100644 --- a/encodings/fastlanes/src/bitpacking/mod.rs +++ b/encodings/fastlanes/src/bitpacking/mod.rs @@ -152,7 +152,9 @@ impl BitPackedArray { #[inline] pub fn ptype(&self) -> PType { - self.dtype().try_into().unwrap_or_else(|err| panic!("Failed to convert BitpackedArray DType to PType: {err}")) + self.dtype() + .try_into() + .unwrap_or_else(|err| panic!("Failed to convert BitpackedArray DType to PType: {err}")) } #[inline] diff --git a/encodings/fastlanes/src/delta/mod.rs b/encodings/fastlanes/src/delta/mod.rs index 1fb9dc1d3..e8f592f33 100644 --- a/encodings/fastlanes/src/delta/mod.rs +++ b/encodings/fastlanes/src/delta/mod.rs @@ -73,7 +73,10 @@ impl DeltaArray { #[inline] fn lanes(&self) -> usize { - let ptype = self.dtype().try_into().unwrap_or_else(|err| panic!("Failed to convert DeltaArray DType to PType: {err}")); + let ptype = self + .dtype() + .try_into() + .unwrap_or_else(|err| panic!("Failed to convert DeltaArray DType to PType: {err}")); match_each_unsigned_integer_ptype!(ptype, |$T| { <$T as fastlanes::FastLanes>::LANES }) diff --git a/encodings/fastlanes/src/for/mod.rs b/encodings/fastlanes/src/for/mod.rs index 1ead9e722..8f68782c0 100644 --- a/encodings/fastlanes/src/for/mod.rs +++ b/encodings/fastlanes/src/for/mod.rs @@ -65,7 +65,9 @@ impl FoRArray { #[inline] pub fn ptype(&self) -> PType { - self.dtype().try_into().unwrap_or_else(|err| panic!("Failed to convert FoRArray DType to PType: {err}")) + self.dtype() + .try_into() + .unwrap_or_else(|err| panic!("Failed to convert FoRArray DType to PType: {err}")) } } diff --git a/encodings/roaring/src/boolean/mod.rs b/encodings/roaring/src/boolean/mod.rs index bff86560c..f7e517698 100644 --- a/encodings/roaring/src/boolean/mod.rs +++ b/encodings/roaring/src/boolean/mod.rs @@ -10,8 +10,7 @@ use vortex::validity::{ArrayValidity, LogicalValidity, Validity}; use vortex::variants::{ArrayVariants, BoolArrayTrait}; use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex::{ - impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoCanonical, - TypedArray, + impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoCanonical, TypedArray, }; use vortex_buffer::Buffer; use vortex_dtype::DType; diff --git a/encodings/roaring/src/integer/compress.rs b/encodings/roaring/src/integer/compress.rs index 7d0c92e97..47faea260 100644 --- a/encodings/roaring/src/integer/compress.rs +++ b/encodings/roaring/src/integer/compress.rs @@ -20,7 +20,15 @@ fn roaring_encode_primitive( values: &[T], ) -> VortexResult { let mut bitmap = Bitmap::new(); - bitmap.extend(values.iter().map(|i| i.to_u32().ok_or_else(|| vortex_err!("Failed to cast value {} to u32", i))).collect::>>()?); + bitmap.extend( + values + .iter() + .map(|i| { + i.to_u32() + .ok_or_else(|| vortex_err!("Failed to cast value {} to u32", i)) + }) + .collect::>>()?, + ); bitmap.run_optimize(); bitmap.shrink_to_fit(); RoaringIntArray::try_new(bitmap, T::PTYPE) diff --git a/encodings/runend-bool/src/compress.rs b/encodings/runend-bool/src/compress.rs index 1a48fb50f..1d78cb202 100644 --- a/encodings/runend-bool/src/compress.rs +++ b/encodings/runend-bool/src/compress.rs @@ -29,7 +29,9 @@ pub fn runend_bool_encode_slice(elements: &BooleanBuffer) -> (Vec, bool) { ends.push(e as u64); } - let last_end = ends.last().unwrap_or_else(|| panic!("RunEndBoolArray is missing its run ends")); + let last_end = ends + .last() + .unwrap_or_else(|| panic!("RunEndBoolArray is missing its run ends")); if *last_end != elements.len() as u64 { ends.push(elements.len() as u64) } @@ -56,8 +58,20 @@ pub fn runend_bool_decode_slice + FromPrimit offset: usize, length: usize, ) -> Vec { - let offset_e = E::from_usize(offset).unwrap_or_else(|| panic!("offset {} cannot be converted to {}", offset, std::any::type_name::())); - let length_e = E::from_usize(length).unwrap_or_else(|| panic!("length {} cannot be converted to {}", length, std::any::type_name::())); + let offset_e = E::from_usize(offset).unwrap_or_else(|| { + panic!( + "offset {} cannot be converted to {}", + offset, + std::any::type_name::() + ) + }); + let length_e = E::from_usize(length).unwrap_or_else(|| { + panic!( + "length {} cannot be converted to {}", + length, + std::any::type_name::() + ) + }); let trimmed_ends = run_ends .iter() .map(|v| *v - offset_e) diff --git a/encodings/runend/src/compress.rs b/encodings/runend/src/compress.rs index 55944fde7..ef4e54b2a 100644 --- a/encodings/runend/src/compress.rs +++ b/encodings/runend/src/compress.rs @@ -92,8 +92,20 @@ pub fn runend_decode_primitive< offset: usize, length: usize, ) -> Vec { - let offset_e = E::from_usize(offset).unwrap_or_else(|| panic!("offset {} cannot be converted to {}", offset, std::any::type_name::())); - let length_e = E::from_usize(length).unwrap_or_else(|| panic!("length {} cannot be converted to {}", length, std::any::type_name::())); + let offset_e = E::from_usize(offset).unwrap_or_else(|| { + panic!( + "offset {} cannot be converted to {}", + offset, + std::any::type_name::() + ) + }); + let length_e = E::from_usize(length).unwrap_or_else(|| { + panic!( + "length {} cannot be converted to {}", + length, + std::any::type_name::() + ) + }); let trimmed_ends = run_ends .iter() .map(|v| *v - offset_e) diff --git a/encodings/zigzag/src/zigzag.rs b/encodings/zigzag/src/zigzag.rs index 2edef3aa4..3a6b3a6ec 100644 --- a/encodings/zigzag/src/zigzag.rs +++ b/encodings/zigzag/src/zigzag.rs @@ -21,7 +21,8 @@ pub struct ZigZagMetadata; impl ZigZagArray { pub fn new(encoded: Array) -> Self { - Self::try_new(encoded).unwrap_or_else(|err| panic!("Failed to construct ZigZagArray: {}", err)) + Self::try_new(encoded) + .unwrap_or_else(|err| panic!("Failed to construct ZigZagArray: {}", err)) } pub fn try_new(encoded: Array) -> VortexResult { @@ -47,7 +48,9 @@ impl ZigZagArray { } pub fn encoded(&self) -> Array { - let ptype = PType::try_from(self.dtype()).unwrap_or_else(|err| panic!("Failed to convert DType {} to PType: {}", self.dtype(), err)); + let ptype = PType::try_from(self.dtype()).unwrap_or_else(|err| { + panic!("Failed to convert DType {} to PType: {}", self.dtype(), err) + }); let encoded = DType::from(ptype.to_unsigned()).with_nullability(self.dtype().nullability()); self.array() .child(0, &encoded, self.len()) diff --git a/pyvortex/src/vortex_arrow.rs b/pyvortex/src/vortex_arrow.rs index 969fc481f..67c030357 100644 --- a/pyvortex/src/vortex_arrow.rs +++ b/pyvortex/src/vortex_arrow.rs @@ -16,10 +16,19 @@ pub fn export_array<'py>(py: Python<'py>, array: &Array) -> PyResult = if let Ok(chunked_array) = ChunkedArray::try_from(array) { chunked_array .chunks() - .map(|chunk| chunk.into_canonical().and_then(|c| c.into_arrow()).map_err(map_to_pyerr)) + .map(|chunk| { + chunk + .into_canonical() + .and_then(|c| c.into_arrow()) + .map_err(map_to_pyerr) + }) .collect::>>()? } else { - vec![array.clone().into_canonical().and_then(|c| c.into_arrow()).map_err(map_to_pyerr)?] + vec![array + .clone() + .into_canonical() + .and_then(|c| c.into_arrow()) + .map_err(map_to_pyerr)?] }; if chunks.is_empty() { return Err(PyValueError::new_err("No chunks in array")); diff --git a/vortex-array/src/array/arbitrary.rs b/vortex-array/src/array/arbitrary.rs index 4bab4e94d..33ec9c323 100644 --- a/vortex-array/src/array/arbitrary.rs +++ b/vortex-array/src/array/arbitrary.rs @@ -13,7 +13,7 @@ impl<'a> Arbitrary<'a> for Array { } fn random_array(u: &mut Unstructured) -> Result { - match u.int_in_range(0..=9).unwrap() { + match u.int_in_range(0..=9)? { 0 => random_primitive::(u), 1 => random_primitive::(u), 2 => random_primitive::(u), @@ -33,7 +33,7 @@ fn random_array(u: &mut Unstructured) -> Result { fn random_string(u: &mut Unstructured) -> Result { let v = Vec::>::arbitrary(u)?; - let arr = match u.int_in_range(0..=1).unwrap() { + let arr = match u.int_in_range(0..=1)? { 0 => VarBinArray::from_iter(v, DType::Utf8(Nullability::Nullable)).into_array(), 1 => VarBinViewArray::from_iter_nullable_str(v).into_array(), _ => unreachable!(), @@ -44,7 +44,7 @@ fn random_string(u: &mut Unstructured) -> Result { fn random_bytes(u: &mut Unstructured) -> Result { let v = Vec::>>::arbitrary(u)?; - let arr = match u.int_in_range(0..=1).unwrap() { + let arr = match u.int_in_range(0..=1)? { 0 => VarBinArray::from_iter(v, DType::Utf8(Nullability::Nullable)).into_array(), 1 => VarBinViewArray::from_iter_nullable_bin(v).into_array(), _ => unreachable!(), diff --git a/vortex-array/src/array/bool/mod.rs b/vortex-array/src/array/bool/mod.rs index 4db0f8f4a..6eb8ef355 100644 --- a/vortex-array/src/array/bool/mod.rs +++ b/vortex-array/src/array/bool/mod.rs @@ -27,7 +27,9 @@ pub struct BoolMetadata { impl BoolArray { pub fn buffer(&self) -> &Buffer { - self.array().buffer().unwrap_or_else(|| panic!("Missing buffer in BoolArray")) + self.array() + .buffer() + .unwrap_or_else(|| panic!("Missing buffer in BoolArray")) } pub fn boolean_buffer(&self) -> BooleanBuffer { diff --git a/vortex-array/src/array/chunked/compute/take.rs b/vortex-array/src/array/chunked/compute/take.rs index 8bdc455ca..dcdbf144f 100644 --- a/vortex-array/src/array/chunked/compute/take.rs +++ b/vortex-array/src/array/chunked/compute/take.rs @@ -113,7 +113,9 @@ fn take_strict_sorted(chunked: &ChunkedArray, indices: &Array) -> VortexResult for ExtMetadata { None => meta.extend_from_slice(0u16.to_le_bytes().as_slice()), Some(tz) => { let tz_bytes = tz.as_bytes(); - let tz_len = u16::try_from(tz_bytes.len()).unwrap_or_else(|err| panic!("tz did not fit in u16: {err}")); + let tz_len = u16::try_from(tz_bytes.len()) + .unwrap_or_else(|err| panic!("tz did not fit in u16: {err}")); meta.extend_from_slice(tz_len.to_le_bytes().as_slice()); meta.extend_from_slice(tz_bytes); } diff --git a/vortex-array/src/array/primitive/mod.rs b/vortex-array/src/array/primitive/mod.rs index 18cd004f4..48bed19e7 100644 --- a/vortex-array/src/array/primitive/mod.rs +++ b/vortex-array/src/array/primitive/mod.rs @@ -43,7 +43,9 @@ impl PrimitiveArray { DType::from(ptype).with_nullability(validity.nullability()), length, PrimitiveMetadata { - validity: validity.to_metadata(length).unwrap_or_else(|err| panic!("Invalid validity: {err}")), + validity: validity + .to_metadata(length) + .unwrap_or_else(|err| panic!("Invalid validity: {err}")), }, Some(buffer), validity.into_array().into_iter().collect_vec().into(), @@ -90,7 +92,9 @@ impl PrimitiveArray { } pub fn buffer(&self) -> &Buffer { - self.array().buffer().unwrap_or_else(|| panic!("Missing buffer in PrimitiveArray")) + self.array() + .buffer() + .unwrap_or_else(|| panic!("Missing buffer in PrimitiveArray")) } pub fn maybe_null_slice(&self) -> &[T] { @@ -217,6 +221,7 @@ impl AcceptArrayVisitor for PrimitiveArray { impl Array { pub fn as_primitive(&self) -> PrimitiveArray { - PrimitiveArray::try_from(self).unwrap_or_else(|err| panic!("Expected primitive array: {err}")) + PrimitiveArray::try_from(self) + .unwrap_or_else(|err| panic!("Expected primitive array: {err}")) } } diff --git a/vortex-array/src/array/struct_/mod.rs b/vortex-array/src/array/struct_/mod.rs index 9ecf7b86b..d74e5a6b2 100644 --- a/vortex-array/src/array/struct_/mod.rs +++ b/vortex-array/src/array/struct_/mod.rs @@ -81,8 +81,9 @@ impl StructArray { let fields: Vec = items.iter().map(|(_, array)| array.clone()).collect(); let len = fields.first().map(|f| f.len()).unwrap_or(0); - Self::try_new(FieldNames::from(names), fields, len, Validity::NonNullable) - .unwrap_or_else(|err| panic!("Unexpected error while building StructArray from fields: {err}")) + Self::try_new(FieldNames::from(names), fields, len, Validity::NonNullable).unwrap_or_else( + |err| panic!("Unexpected error while building StructArray from fields: {err}"), + ) } // TODO(aduffy): Add equivalent function to support field masks for nested column access. diff --git a/vortex-array/src/array/varbin/compute/take.rs b/vortex-array/src/array/varbin/compute/take.rs index 510fd5bb1..e03f4412e 100644 --- a/vortex-array/src/array/varbin/compute/take.rs +++ b/vortex-array/src/array/varbin/compute/take.rs @@ -49,9 +49,15 @@ fn take( let mut builder = VarBinBuilder::::with_capacity(indices.len()); for &idx in indices { - let idx = idx.to_usize().ok_or_else(|| vortex_err!("Failed to convert index to usize: {}", idx))?; - let start = offsets[idx].to_usize().ok_or_else(|| vortex_err!("Failed to convert offset to usize: {}", offsets[idx]))?; - let stop = offsets[idx + 1].to_usize().ok_or_else(|| vortex_err!("Failed to convert offset to usize: {}", offsets[idx + 1]))?; + let idx = idx + .to_usize() + .ok_or_else(|| vortex_err!("Failed to convert index to usize: {}", idx))?; + let start = offsets[idx] + .to_usize() + .ok_or_else(|| vortex_err!("Failed to convert offset to usize: {}", offsets[idx]))?; + let stop = offsets[idx + 1].to_usize().ok_or_else(|| { + vortex_err!("Failed to convert offset to usize: {}", offsets[idx + 1]) + })?; builder.push(Some(&data[start..stop])); } Ok(builder.finish(dtype)) @@ -66,10 +72,16 @@ fn take_nullable( ) -> VarBinArray { let mut builder = VarBinBuilder::::with_capacity(indices.len()); for &idx in indices { - let idx = idx.to_usize().unwrap_or_else(|| panic!("Failed to convert index to usize: {}", idx)); + let idx = idx + .to_usize() + .unwrap_or_else(|| panic!("Failed to convert index to usize: {}", idx)); if null_buffer.is_valid(idx) { - let start = offsets[idx].to_usize().unwrap_or_else(|| panic!("Failed to convert offset to usize: {}", offsets[idx])); - let stop = offsets[idx + 1].to_usize().unwrap_or_else(|| panic!("Failed to convert offset to usize: {}", offsets[idx + 1])); + let start = offsets[idx] + .to_usize() + .unwrap_or_else(|| panic!("Failed to convert offset to usize: {}", offsets[idx])); + let stop = offsets[idx + 1].to_usize().unwrap_or_else(|| { + panic!("Failed to convert offset to usize: {}", offsets[idx + 1]) + }); builder.push(Some(&data[start..stop])); } else { builder.push(None); diff --git a/vortex-array/src/array/varbin/mod.rs b/vortex-array/src/array/varbin/mod.rs index 4c6dce8ff..ce234707c 100644 --- a/vortex-array/src/array/varbin/mod.rs +++ b/vortex-array/src/array/varbin/mod.rs @@ -152,7 +152,9 @@ impl VarBinArray { }) .unwrap_or_else(|| { scalar_at(&self.offsets(), index) - .unwrap_or_else(|err| panic!("Failed to get offset at index: {}: {}", index, err)) + .unwrap_or_else(|err| { + panic!("Failed to get offset at index: {}: {}", index, err) + }) .as_ref() .try_into() .unwrap_or_else(|err| panic!("Failed to convert offset to usize: {}", err)) @@ -219,7 +221,8 @@ impl<'a> FromIterator> for VarBinArray { pub fn varbin_scalar(value: Buffer, dtype: &DType) -> Scalar { if matches!(dtype, DType::Utf8(_)) { - Scalar::try_utf8(value, dtype.nullability()).unwrap_or_else(|err| panic!("Failed to create scalar from utf8 buffer: {}", err)) + Scalar::try_utf8(value, dtype.nullability()) + .unwrap_or_else(|err| panic!("Failed to create scalar from utf8 buffer: {}", err)) } else { Scalar::binary(value, dtype.nullability()) } diff --git a/vortex-array/src/array/varbinview/mod.rs b/vortex-array/src/array/varbinview/mod.rs index 7f7bb80a3..6da1c54f8 100644 --- a/vortex-array/src/array/varbinview/mod.rs +++ b/vortex-array/src/array/varbinview/mod.rs @@ -158,7 +158,8 @@ impl VarBinViewArray { fn view_slice(&self) -> &[BinaryView] { unsafe { slice::from_raw_parts( - self.views().into_primitive() + self.views() + .into_primitive() .unwrap_or_else(|err| panic!("Views must be a primitive array: {}", err)) .maybe_null_slice::() .as_ptr() as _, diff --git a/vortex-array/src/arrow/array.rs b/vortex-array/src/arrow/array.rs index 95828d2c1..8345713df 100644 --- a/vortex-array/src/arrow/array.rs +++ b/vortex-array/src/arrow/array.rs @@ -119,7 +119,12 @@ where dtype, nulls(value.nulls(), nullable), ) - .unwrap_or_else(|err| panic!("Failed to convert Arrow GenericByteArray to Vortex VarBinArray: {}", err)) + .unwrap_or_else(|err| { + panic!( + "Failed to convert Arrow GenericByteArray to Vortex VarBinArray: {}", + err + ) + }) .into() } } @@ -141,7 +146,12 @@ impl FromArrowArray<&GenericByteViewArray> for Array { dtype, nulls(value.nulls(), nullable), ) - .unwrap_or_else(|err| panic!("Failed to convert Arrow GenericByteViewArray to Vortex VarBinViewArray: {}", err)) + .unwrap_or_else(|err| { + panic!( + "Failed to convert Arrow GenericByteViewArray to Vortex VarBinViewArray: {}", + err + ) + }) .into() } } @@ -149,7 +159,12 @@ impl FromArrowArray<&GenericByteViewArray> for Array { impl FromArrowArray<&ArrowBooleanArray> for Array { fn from_arrow(value: &ArrowBooleanArray, nullable: bool) -> Self { BoolArray::try_new(value.values().clone(), nulls(value.nulls(), nullable)) - .unwrap_or_else(|err| panic!("Failed to convert Arrow BooleanArray to Vortex BoolArray: {}", err)) + .unwrap_or_else(|err| { + panic!( + "Failed to convert Arrow BooleanArray to Vortex BoolArray: {}", + err + ) + }) .into() } } @@ -174,7 +189,12 @@ impl FromArrowArray<&ArrowStructArray> for Array { value.len(), nulls(value.nulls(), nullable), ) - .unwrap_or_else(|err| panic!("Failed to convert Arrow StructArray to Vortex StructArray: {}", err)) + .unwrap_or_else(|err| { + panic!( + "Failed to convert Arrow StructArray to Vortex StructArray: {}", + err + ) + }) .into() } } @@ -223,17 +243,24 @@ impl FromArrowArray for Array { DataType::Binary => Self::from_arrow(array.as_binary::(), nullable), DataType::LargeBinary => Self::from_arrow(array.as_binary::(), nullable), DataType::BinaryView => Self::from_arrow( - array.as_any().downcast_ref::().unwrap_or_else(|| panic!("Expected Arrow BinaryViewArray for DataType::BinaryView")), + array + .as_any() + .downcast_ref::() + .unwrap_or_else(|| { + panic!("Expected Arrow BinaryViewArray for DataType::BinaryView") + }), nullable, ), DataType::Utf8View => Self::from_arrow( - array.as_any().downcast_ref::().unwrap_or_else(|| panic!("Expected Arrow StringViewArray for DataType::Utf8View")), - nullable, - ), - DataType::Struct(_) => Self::from_arrow( - array.as_struct(), + array + .as_any() + .downcast_ref::() + .unwrap_or_else(|| { + panic!("Expected Arrow StringViewArray for DataType::Utf8View") + }), nullable, ), + DataType::Struct(_) => Self::from_arrow(array.as_struct(), nullable), DataType::Null => Self::from_arrow(as_null_array(&array), nullable), DataType::Timestamp(u, _) => match u { ArrowTimeUnit::Second => { diff --git a/vortex-array/src/arrow/recordbatch.rs b/vortex-array/src/arrow/recordbatch.rs index 503709505..03b133887 100644 --- a/vortex-array/src/arrow/recordbatch.rs +++ b/vortex-array/src/arrow/recordbatch.rs @@ -37,9 +37,7 @@ impl TryFrom for RecordBatch { type Error = VortexError; fn try_from(value: Array) -> VortexResult { - let array_ref = value - .into_canonical()? - .into_arrow()?; + let array_ref = value.into_canonical()?.into_arrow()?; let struct_array = as_struct_array(array_ref.as_ref()); Ok(RecordBatch::from(struct_array)) } diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs index 05679130a..c4232fbaa 100644 --- a/vortex-array/src/canonical.rs +++ b/vortex-array/src/canonical.rs @@ -83,9 +83,7 @@ impl Canonical { vortex_bail!("unsupported extension dtype with ID {}", a.id().as_ref()) } - temporal_to_arrow( - TemporalArray::try_from(&a.into_array())?, - ) + temporal_to_arrow(TemporalArray::try_from(&a.into_array())?) } }) } @@ -159,7 +157,9 @@ fn primitive_to_arrow(primitive_array: PrimitiveArray) -> ArrayRef { array .logical_validity() .to_null_buffer() - .unwrap_or_else(|err| panic!("Failed to get null buffer from logical validity: {err}")), + .unwrap_or_else(|err| { + panic!("Failed to get null buffer from logical validity: {err}") + }), ) } @@ -182,11 +182,15 @@ fn struct_to_arrow(struct_array: StructArray) -> ArrayRef { let field_arrays: Vec = struct_array .children() .map(|f| { - let canonical = f.into_canonical().unwrap_or_else(|err| panic!("Failed to canonicalize field: {err}")); + let canonical = f + .into_canonical() + .unwrap_or_else(|err| panic!("Failed to canonicalize field: {err}")); match canonical { // visit nested structs recursively Canonical::Struct(a) => struct_to_arrow(a), - _ => canonical.into_arrow().unwrap_or_else(|err| panic!("Failed to convert canonicalized field to arrow: {err}")), + _ => canonical.into_arrow().unwrap_or_else(|err| { + panic!("Failed to convert canonicalized field to arrow: {err}") + }), } }) .collect(); @@ -439,9 +443,7 @@ impl From for Array { #[cfg(test)] mod test { use arrow_array::types::{Int64Type, UInt64Type}; - use arrow_array::{ - PrimitiveArray as ArrowPrimitiveArray, StructArray as ArrowStructArray, - }; + use arrow_array::{PrimitiveArray as ArrowPrimitiveArray, StructArray as ArrowStructArray}; use vortex_dtype::Nullability; use vortex_scalar::Scalar; diff --git a/vortex-array/src/encoding.rs b/vortex-array/src/encoding.rs index d17a10a1d..7c19f91ce 100644 --- a/vortex-array/src/encoding.rs +++ b/vortex-array/src/encoding.rs @@ -80,9 +80,13 @@ pub trait ArrayEncodingExt { where F: for<'b> FnMut(&'b (dyn ArrayTrait + 'b)) -> R, { - let typed = - <::Array as TryFrom>::try_from(array.clone()) - .unwrap_or_else(|err| panic!("Failed to convert array to {}: {err}", std::any::type_name::<::Array>())); + let typed = <::Array as TryFrom>::try_from(array.clone()) + .unwrap_or_else(|err| { + panic!( + "Failed to convert array to {}: {err}", + std::any::type_name::<::Array>() + ) + }); f(&typed) } } diff --git a/vortex-array/src/implementation.rs b/vortex-array/src/implementation.rs index 070ea55cd..612cd6eff 100644 --- a/vortex-array/src/implementation.rs +++ b/vortex-array/src/implementation.rs @@ -242,7 +242,11 @@ where buffer: None, children: vec![], }; - array.with_dyn(|a| a.accept(&mut visitor).unwrap_or_else(|err| panic!("Error while visiting Array View children: {err}"))); + array.with_dyn(|a| { + a.accept(&mut visitor).unwrap_or_else(|err| { + panic!("Error while visiting Array View children: {err}") + }) + }); ArrayData::try_new( encoding, array.dtype().clone(), diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs index 9c01ad421..bc01b2e0a 100644 --- a/vortex-array/src/lib.rs +++ b/vortex-array/src/lib.rs @@ -191,7 +191,12 @@ impl Array { result = Some(f(array)); Ok(()) }) - .unwrap_or_else(|err| panic!("Failed to convert Array to {}: {err}", std::any::type_name::())); + .unwrap_or_else(|err| { + panic!( + "Failed to convert Array to {}: {err}", + std::any::type_name::() + ) + }); // Now we unwrap the optional, which we know to be populated by the closure. result.unwrap_or_else(|| panic!("Failed to get result from Array::with_dyn")) @@ -248,7 +253,8 @@ pub trait ArrayTrait: { fn nbytes(&self) -> usize { let mut visitor = NBytesVisitor(0); - self.accept(&mut visitor).unwrap_or_else(|err| panic!("Failed to get nbytes from Array: {err}")); + self.accept(&mut visitor) + .unwrap_or_else(|err| panic!("Failed to get nbytes from Array: {err}")); visitor.0 } } diff --git a/vortex-array/src/stats/statsset.rs b/vortex-array/src/stats/statsset.rs index 37699bd30..b029fedb8 100644 --- a/vortex-array/src/stats/statsset.rs +++ b/vortex-array/src/stats/statsset.rs @@ -184,7 +184,10 @@ impl StatsSet { fn merge_scalar_stat(&mut self, other: &Self, stat: Stat) { if let Entry::Occupied(mut e) = self.values.entry(stat) { if let Some(other_value) = other.get_as::(stat) { - let self_value: usize = e.get().try_into().unwrap_or_else(|err| panic!("Failed to get stat {} as usize: {err}", stat)); + let self_value: usize = e + .get() + .try_into() + .unwrap_or_else(|err| panic!("Failed to get stat {} as usize: {err}", stat)); e.insert((self_value + other_value).into()); } else { e.remove(); @@ -204,7 +207,10 @@ impl StatsSet { if let Entry::Occupied(mut e) = self.values.entry(stat) { if let Some(other_value) = other.get_as::>(stat) { // TODO(robert): Avoid the copy here. We could e.get_mut() but need to figure out casting - let self_value: Vec = e.get().try_into().unwrap_or_else(|err| panic!("Failed to get stat {} as Vec: {err}", stat)); + let self_value: Vec = e + .get() + .try_into() + .unwrap_or_else(|err| panic!("Failed to get stat {} as Vec: {err}", stat)); e.insert( self_value .iter() @@ -223,7 +229,9 @@ impl StatsSet { fn merge_run_count(&mut self, other: &Self) { if let Entry::Occupied(mut e) = self.values.entry(Stat::RunCount) { if let Some(other_value) = other.get_as::(Stat::RunCount) { - let self_value: usize = e.get().try_into().unwrap_or_else(|err| panic!("Failed to get stat {} as usize: {err}", Stat::RunCount)); + let self_value: usize = e.get().try_into().unwrap_or_else(|err| { + panic!("Failed to get stat {} as usize: {err}", Stat::RunCount) + }); e.insert((self_value + other_value + 1).into()); } else { e.remove(); diff --git a/vortex-array/src/stream/ext.rs b/vortex-array/src/stream/ext.rs index 8771d1800..f560b5a62 100644 --- a/vortex-array/src/stream/ext.rs +++ b/vortex-array/src/stream/ext.rs @@ -15,7 +15,10 @@ pub trait ArrayStreamExt: ArrayStream { { async { let dtype = self.dtype().clone(); - let chunks: Vec = self.try_collect().await.unwrap_or_else(|err| panic!("Failed to collect ArrayStream: {err}")); + let chunks: Vec = self + .try_collect() + .await + .unwrap_or_else(|err| panic!("Failed to collect ArrayStream: {err}")); ChunkedArray::try_new(chunks, dtype) } } diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index 9c8be1197..7ae375e66 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -219,12 +219,17 @@ impl FromIterator for Validity { LogicalValidity::AllInvalid(count) => BooleanBuffer::new_unset(count), LogicalValidity::Array(array) => array .into_bool() - .unwrap_or_else(|err| panic!("Failed to get Validity Array as BoolArray: {err}")).boolean_buffer(), + .unwrap_or_else(|err| { + panic!("Failed to get Validity Array as BoolArray: {err}") + }) + .boolean_buffer(), }; buffer.append_buffer(&present); } - let bool_array = BoolArray::try_new(buffer.finish(), Validity::NonNullable) - .unwrap_or_else(|err| panic!("BoolArray::try_new from BooleanBuffer should always succeed: {err}")); + let bool_array = + BoolArray::try_new(buffer.finish(), Validity::NonNullable).unwrap_or_else(|err| { + panic!("BoolArray::try_new from BooleanBuffer should always succeed: {err}") + }); Self::Array(bool_array.into_array()) } } diff --git a/vortex-array/src/variants.rs b/vortex-array/src/variants.rs index 712a8e767..afd52babc 100644 --- a/vortex-array/src/variants.rs +++ b/vortex-array/src/variants.rs @@ -12,7 +12,8 @@ pub trait ArrayVariants { } fn as_null_array_unchecked(&self) -> &dyn NullArrayTrait { - self.as_null_array().unwrap_or_else(|| panic!("Expected NullArray")) + self.as_null_array() + .unwrap_or_else(|| panic!("Expected NullArray")) } fn as_bool_array(&self) -> Option<&dyn BoolArrayTrait> { @@ -20,7 +21,8 @@ pub trait ArrayVariants { } fn as_bool_array_unchecked(&self) -> &dyn BoolArrayTrait { - self.as_bool_array().unwrap_or_else(|| panic!("Expected BoolArray")) + self.as_bool_array() + .unwrap_or_else(|| panic!("Expected BoolArray")) } fn as_primitive_array(&self) -> Option<&dyn PrimitiveArrayTrait> { @@ -28,7 +30,8 @@ pub trait ArrayVariants { } fn as_primitive_array_unchecked(&self) -> &dyn PrimitiveArrayTrait { - self.as_primitive_array().unwrap_or_else(|| panic!("Expected PrimitiveArray")) + self.as_primitive_array() + .unwrap_or_else(|| panic!("Expected PrimitiveArray")) } fn as_utf8_array(&self) -> Option<&dyn Utf8ArrayTrait> { @@ -36,7 +39,8 @@ pub trait ArrayVariants { } fn as_utf8_array_unchecked(&self) -> &dyn Utf8ArrayTrait { - self.as_utf8_array().unwrap_or_else(|| panic!("Expected Utf8Array")) + self.as_utf8_array() + .unwrap_or_else(|| panic!("Expected Utf8Array")) } fn as_binary_array(&self) -> Option<&dyn BinaryArrayTrait> { @@ -44,7 +48,8 @@ pub trait ArrayVariants { } fn as_binary_array_unchecked(&self) -> &dyn BinaryArrayTrait { - self.as_binary_array().unwrap_or_else(|| panic!("Expected BinaryArray")) + self.as_binary_array() + .unwrap_or_else(|| panic!("Expected BinaryArray")) } fn as_struct_array(&self) -> Option<&dyn StructArrayTrait> { @@ -52,7 +57,8 @@ pub trait ArrayVariants { } fn as_struct_array_unchecked(&self) -> &dyn StructArrayTrait { - self.as_struct_array().unwrap_or_else(|| panic!("Expected StructArray")) + self.as_struct_array() + .unwrap_or_else(|| panic!("Expected StructArray")) } fn as_list_array(&self) -> Option<&dyn ListArrayTrait> { @@ -60,7 +66,8 @@ pub trait ArrayVariants { } fn as_list_array_unchecked(&self) -> &dyn ListArrayTrait { - self.as_list_array().unwrap_or_else(|| panic!("Expected ListArray")) + self.as_list_array() + .unwrap_or_else(|| panic!("Expected ListArray")) } fn as_extension_array(&self) -> Option<&dyn ExtensionArrayTrait> { @@ -68,7 +75,8 @@ pub trait ArrayVariants { } fn as_extension_array_unchecked(&self) -> &dyn ExtensionArrayTrait { - self.as_extension_array().unwrap_or_else(|| panic!("Expected ExtensionArray")) + self.as_extension_array() + .unwrap_or_else(|| panic!("Expected ExtensionArray")) } } diff --git a/vortex-datafusion/examples/table_provider.rs b/vortex-datafusion/examples/table_provider.rs index 693a40f5c..25f082935 100644 --- a/vortex-datafusion/examples/table_provider.rs +++ b/vortex-datafusion/examples/table_provider.rs @@ -36,8 +36,7 @@ async fn main() -> anyhow::Result<()> { vec![strings, numbers], 8, Validity::NonNullable, - ) - .unwrap(); + )?; let filepath = temp_dir.path().join("a.vtx"); @@ -74,7 +73,7 @@ async fn main() -> anyhow::Result<()> { let ctx = SessionContext::new(); ctx.register_table("vortex_tbl", Arc::clone(&provider) as _)?; - let url = Url::try_from("file://").unwrap(); + let url = Url::try_from("file://")?; ctx.register_object_store(&url, object_store); run_query(&ctx, "SELECT * FROM vortex_tbl").await?; diff --git a/vortex-datafusion/src/expr.rs b/vortex-datafusion/src/expr.rs index ef86585f2..fdf019d45 100644 --- a/vortex-datafusion/src/expr.rs +++ b/vortex-datafusion/src/expr.rs @@ -117,7 +117,11 @@ pub fn convert_expr_to_vortex( physical_expr: Arc, input_schema: &Schema, ) -> VortexResult> { - if physical_expr.data_type(input_schema).map_err(VortexError::from)?.is_temporal() { + if physical_expr + .data_type(input_schema) + .map_err(VortexError::from)? + .is_temporal() + { vortex_bail!("Doesn't support evaluating operations over temporal values"); } if let Some(binary_expr) = physical_expr diff --git a/vortex-datafusion/src/memory.rs b/vortex-datafusion/src/memory.rs index 469bc36bf..47d4e75fe 100644 --- a/vortex-datafusion/src/memory.rs +++ b/vortex-datafusion/src/memory.rs @@ -13,6 +13,7 @@ use datafusion_physical_plan::{ExecutionMode, ExecutionPlan, Partitioning, PlanP use itertools::Itertools; use vortex::array::ChunkedArray; use vortex::{Array, ArrayDType as _}; +use vortex_error::VortexError; use crate::datatype::infer_schema; use crate::plans::{RowSelectorExec, TakeRowsExec}; @@ -43,7 +44,9 @@ impl VortexMemTable { Ok(a) => a, _ => { let dtype = array.dtype().clone(); - ChunkedArray::try_new(vec![array], dtype).unwrap() + ChunkedArray::try_new(vec![array], dtype).unwrap_or_else(|err| { + panic!("Failed to wrap array as a ChunkedArray with 1 chunk: {err}") + }) } }; @@ -98,7 +101,7 @@ impl TableProvider for VortexMemTable { Some(filter_exprs) => { let filter_projection = get_filter_projection(filter_exprs, self.schema_ref.clone()) - .map_err(DataFusionError::from)?; + .map_err(DataFusionError::from)?; Ok(make_filter_then_take_plan( self.schema_ref.clone(), @@ -116,7 +119,12 @@ impl TableProvider for VortexMemTable { let output_schema = Arc::new( self.schema_ref .project(output_projection.as_slice()) - .expect("project output schema"), + .unwrap_or_else(|err| { + panic!( + "Failed to project output schema: {}", + VortexError::from(err) + ) + }), ); let plan_properties = PlanProperties::new( EquivalenceProperties::new(output_schema), diff --git a/vortex-datafusion/src/persistent/opener.rs b/vortex-datafusion/src/persistent/opener.rs index e95c3ed07..d341c2694 100644 --- a/vortex-datafusion/src/persistent/opener.rs +++ b/vortex-datafusion/src/persistent/opener.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use arrow_array::cast::AsArray; use arrow_array::{Array as _, BooleanArray, RecordBatch}; use arrow_schema::SchemaRef; -use datafusion::arrow::buffer::{buffer_bin_and, buffer_bin_and_not, BooleanBuffer}; +use datafusion::arrow::buffer::{buffer_bin_and, BooleanBuffer}; use datafusion::datasource::physical_plan::{FileMeta, FileOpenFuture, FileOpener}; use datafusion_common::Result as DFResult; use datafusion_physical_expr::PhysicalExpr; @@ -79,7 +79,7 @@ impl FileOpener for VortexFileOpener { } } -/// Mask all null values of a Arrow boolean array to false +/// Mask all null values of an Arrow boolean array to false fn null_as_false(array: BoolArray) -> VortexResult { let arrow_array = array.into_canonical()?.into_arrow()?; let array = arrow_array.as_boolean(); diff --git a/vortex-datafusion/src/plans.rs b/vortex-datafusion/src/plans.rs index add9a16f8..5342df98f 100644 --- a/vortex-datafusion/src/plans.rs +++ b/vortex-datafusion/src/plans.rs @@ -171,10 +171,13 @@ impl Stream for RowIndicesStream { return Poll::Ready(None); } - let next_chunk = this - .chunked_array - .chunk(this.chunk_idx) - .ok_or_else(|| vortex_err!("Chunk not found for index {}, nchunks: {}", this.chunk_idx, this.chunked_array.nchunks()))?; + let next_chunk = this.chunked_array.chunk(this.chunk_idx).ok_or_else(|| { + vortex_err!( + "Chunk not found for index {}, nchunks: {}", + this.chunk_idx, + this.chunked_array.nchunks() + ) + })?; this.chunk_idx += 1; // Get the unfiltered record batch. @@ -185,8 +188,7 @@ impl Stream for RowIndicesStream { .project(this.filter_projection.as_slice())?; // TODO(adamg): Filter on vortex arrays - let array = - ExpressionEvaluator::eval(vortex_struct.into_array(), &this.conjunction_expr)?; + let array = ExpressionEvaluator::eval(vortex_struct.into_array(), &this.conjunction_expr)?; let selection = array.into_canonical()?.into_arrow()?; // Convert the `selection` BooleanArray into a UInt64Array of indices. @@ -233,7 +235,10 @@ impl TakeRowsExec { row_indices: Arc, table: &ChunkedArray, ) -> Self { - let output_schema = Arc::new(schema_ref.project(projection).unwrap_or_else(|err| panic!("Failed to project schema: {}", VortexError::from(err)))); + let output_schema = + Arc::new(schema_ref.project(projection).unwrap_or_else(|err| { + panic!("Failed to project schema: {}", VortexError::from(err)) + })); let plan_properties = PlanProperties::new( EquivalenceProperties::new(output_schema.clone()), Partitioning::UnknownPartitioning(1), @@ -369,7 +374,13 @@ where let chunk = this .vortex_array .chunk(*this.chunk_idx) - .ok_or_else(|| vortex_err!("Chunk not found for index {}, nchunks: {}", this.chunk_idx, this.vortex_array.nchunks()))? + .ok_or_else(|| { + vortex_err!( + "Chunk not found for index {}, nchunks: {}", + this.chunk_idx, + this.vortex_array.nchunks() + ) + })? .into_struct()?; *this.chunk_idx += 1; diff --git a/vortex-scalar/src/arrow.rs b/vortex-scalar/src/arrow.rs index 594f36086..562039842 100644 --- a/vortex-scalar/src/arrow.rs +++ b/vortex-scalar/src/arrow.rs @@ -10,18 +10,20 @@ impl From<&Scalar> for Arc { match value.dtype { DType::Null => Arc::new(NullArray::new(1)), DType::Bool(_) => { - let maybe_bool = value.value.as_bool().unwrap_or_else(|err| { - panic!("Expected a bool scalar: {}", err) - }); + let maybe_bool = value + .value + .as_bool() + .unwrap_or_else(|err| panic!("Expected a bool scalar: {}", err)); match maybe_bool { Some(b) => Arc::new(BooleanArray::new_scalar(b)), None => Arc::new(BooleanArray::new_null(1)), } - }, + } DType::Primitive(ptype, _) => { - let pvalue = value.value.as_pvalue().unwrap_or_else(|err| { - panic!("Expected a pvalue scalar: {}", err) - }); + let pvalue = value + .value + .as_pvalue() + .unwrap_or_else(|err| panic!("Expected a pvalue scalar: {}", err)); match pvalue { None => match ptype { PType::U8 => Arc::new(UInt8Array::new_null(1)), @@ -52,21 +54,20 @@ impl From<&Scalar> for Arc { } } DType::Utf8(_) => { - let maybe_string = value.value.as_buffer_string().unwrap_or_else(|err| { - panic!("Expected a string scalar: {}", err) - }); + let maybe_string = value + .value + .as_buffer_string() + .unwrap_or_else(|err| panic!("Expected a string scalar: {}", err)); match maybe_string { Some(s) => Arc::new(StringArray::new_scalar(s.as_str())), None => Arc::new(StringArray::new_null(1)), - } + } } DType::Binary(_) => { let maybe_buffer = value .value .as_buffer() - .unwrap_or_else(|err| { - panic!("Expected a binary buffer: {}", err) - }); + .unwrap_or_else(|err| panic!("Expected a binary buffer: {}", err)); match maybe_buffer { Some(s) => Arc::new(BinaryArray::new_scalar(s)), None => Arc::new(BinaryArray::new_null(1)), diff --git a/vortex-scalar/src/datafusion.rs b/vortex-scalar/src/datafusion.rs index 3e12dda8f..1f92ce656 100644 --- a/vortex-scalar/src/datafusion.rs +++ b/vortex-scalar/src/datafusion.rs @@ -8,13 +8,17 @@ impl From for ScalarValue { fn from(value: Scalar) -> Self { match value.dtype { DType::Null => ScalarValue::Null, - DType::Bool(_) => ScalarValue::Boolean(value.value.as_bool().unwrap_or_else(|err| { - panic!("Expected a bool scalar: {}", err) - })), + DType::Bool(_) => ScalarValue::Boolean( + value + .value + .as_bool() + .unwrap_or_else(|err| panic!("Expected a bool scalar: {}", err)), + ), DType::Primitive(ptype, _) => { - let pvalue = value.value.as_pvalue().unwrap_or_else(|err| { - panic!("Expected a pvalue scalar: {}", err) - }); + let pvalue = value + .value + .as_pvalue() + .unwrap_or_else(|err| panic!("Expected a pvalue scalar: {}", err)); match pvalue { None => match ptype { PType::U8 => ScalarValue::UInt8(None), @@ -48,18 +52,14 @@ impl From for ScalarValue { value .value .as_buffer_string() - .unwrap_or_else(|err| { - panic!("Expected a buffer string: {}", err) - }) + .unwrap_or_else(|err| panic!("Expected a buffer string: {}", err)) .map(|b| b.as_str().to_string()), ), DType::Binary(_) => ScalarValue::Binary( value .value .as_buffer() - .unwrap_or_else(|err| { - panic!("Expected a buffer: {}", err) - }) + .unwrap_or_else(|err| panic!("Expected a buffer: {}", err)) .map(|b| b.into_vec().unwrap_or_else(|buf| buf.as_slice().to_vec())), ), DType::Struct(..) => { diff --git a/vortex-scalar/src/list.rs b/vortex-scalar/src/list.rs index 50004a40a..95b46772d 100644 --- a/vortex-scalar/src/list.rs +++ b/vortex-scalar/src/list.rs @@ -106,11 +106,15 @@ impl<'a, T: for<'b> TryFrom<&'b Scalar, Error = VortexError>> TryFrom<&'a Scalar impl From> for Scalar where - Self: From + Self: From, { fn from(value: Vec) -> Self { let scalars = value.into_iter().map(|v| Self::from(v)).collect_vec(); - let element_dtype = scalars.first().unwrap_or_else(|| panic!("Empty list, could not determine element dtype")).dtype().clone(); + let element_dtype = scalars + .first() + .unwrap_or_else(|| panic!("Empty list, could not determine element dtype")) + .dtype() + .clone(); let dtype = DType::List(Arc::new(element_dtype), NonNullable); Self { dtype, diff --git a/vortex-serde/src/chunked_reader/take_rows.rs b/vortex-serde/src/chunked_reader/take_rows.rs index e85e41d64..d48e5423a 100644 --- a/vortex-serde/src/chunked_reader/take_rows.rs +++ b/vortex-serde/src/chunked_reader/take_rows.rs @@ -57,8 +57,19 @@ impl ChunkedArrayReader { let mut start_chunks: Vec = Vec::with_capacity(coalesced_chunks.len()); let mut stop_chunks: Vec = Vec::with_capacity(coalesced_chunks.len()); for (i, chunks) in coalesced_chunks.iter().enumerate() { - start_chunks.push(chunks.first().ok_or_else(|| vortex_err!("Coalesced chunk {i} cannot be empty"))?.chunk_idx); - stop_chunks.push(chunks.last().ok_or_else(|| vortex_err!("Coalesced chunk {i} cannot be empty"))?.chunk_idx + 1); + start_chunks.push( + chunks + .first() + .ok_or_else(|| vortex_err!("Coalesced chunk {i} cannot be empty"))? + .chunk_idx, + ); + stop_chunks.push( + chunks + .last() + .ok_or_else(|| vortex_err!("Coalesced chunk {i} cannot be empty"))? + .chunk_idx + + 1, + ); } // Grab the row and byte offsets for each chunk range. diff --git a/vortex-serde/src/io/object_store.rs b/vortex-serde/src/io/object_store.rs index 62cc7ed09..91f471177 100644 --- a/vortex-serde/src/io/object_store.rs +++ b/vortex-serde/src/io/object_store.rs @@ -82,7 +82,12 @@ impl VortexReadAt for ObjectStoreReadAt { .head(&self.location) .await .map_err(VortexError::ObjectStore) - .unwrap_or_else(|err| panic!("Failed to get size of object at location {}: {err}", self.location)) + .unwrap_or_else(|err| { + panic!( + "Failed to get size of object at location {}: {err}", + self.location + ) + }) .size as u64 } } diff --git a/vortex-serde/src/layouts/reader/layouts.rs b/vortex-serde/src/layouts/reader/layouts.rs index 98e7fa21b..744efe532 100644 --- a/vortex-serde/src/layouts/reader/layouts.rs +++ b/vortex-serde/src/layouts/reader/layouts.rs @@ -149,9 +149,9 @@ impl ColumnLayout { let tab = flatbuffers::Table::new(&self.fb_bytes, self.fb_loc); fb::Layout::init_from_table(tab) }; - fb_layout.layout_as_nested_layout().unwrap_or_else(|| { - panic!("ColumnLayout: Failed to read nested layout from flatbuffer") - }) + fb_layout + .layout_as_nested_layout() + .unwrap_or_else(|| panic!("ColumnLayout: Failed to read nested layout from flatbuffer")) } fn read_child( diff --git a/vortex-serde/src/layouts/reader/stream.rs b/vortex-serde/src/layouts/reader/stream.rs index cbb100d69..197beb309 100644 --- a/vortex-serde/src/layouts/reader/stream.rs +++ b/vortex-serde/src/layouts/reader/stream.rs @@ -96,8 +96,8 @@ impl Stream for VortexLayoutBatchStrea if let Some(read) = self.layout.read()? { match read { ReadResult::GetMsgs(messages) => { - let reader = - mem::take(&mut self.reader).ok_or_else(|| vortex_err!("Invalid state transition"))?; + let reader = mem::take(&mut self.reader) + .ok_or_else(|| vortex_err!("Invalid state transition"))?; let read_future = read_ranges(reader, messages).boxed(); self.state = StreamingState::Reading(read_future); } diff --git a/vortex-serde/src/layouts/writer/layout_writer.rs b/vortex-serde/src/layouts/writer/layout_writer.rs index 2735e7331..990c99ef4 100644 --- a/vortex-serde/src/layouts/writer/layout_writer.rs +++ b/vortex-serde/src/layouts/writer/layout_writer.rs @@ -154,7 +154,11 @@ impl LayoutWriter { } async fn write_metadata_arrays(&mut self) -> VortexResult { - let DType::Struct(..) = self.dtype.as_ref().ok_or_else(|| vortex_err!("Should have written values"))? else { + let DType::Struct(..) = self + .dtype + .as_ref() + .ok_or_else(|| vortex_err!("Should have written values"))? + else { unreachable!("Values are a StructArray") }; @@ -229,7 +233,11 @@ impl LayoutWriter { let dtype_len = Self::write_flatbuffer( &mut w, - &IPCSchema(&self.dtype.ok_or_else(|| vortex_err!("Schema should be written by now"))?), + &IPCSchema( + &self + .dtype + .ok_or_else(|| vortex_err!("Schema should be written by now"))?, + ), ) .await?; let _ = Self::write_flatbuffer(&mut w, &footer).await?; diff --git a/vortex-serde/src/message_reader.rs b/vortex-serde/src/message_reader.rs index 9372bb2d9..8d648581c 100644 --- a/vortex-serde/src/message_reader.rs +++ b/vortex-serde/src/message_reader.rs @@ -95,7 +95,9 @@ impl MessageReader { let buf = self.next().await?; let msg = unsafe { root_unchecked::(&buf) } .header_as_schema() - .ok_or_else(|| vortex_err!("Expected schema message; this was checked earlier in the function"))?; + .ok_or_else(|| { + vortex_err!("Expected schema message; this was checked earlier in the function") + })?; Ok(IPCDType::read_flatbuffer(&msg)?.0) } diff --git a/vortex-serde/src/stream_reader/mod.rs b/vortex-serde/src/stream_reader/mod.rs index 73663070e..3b61a35cb 100644 --- a/vortex-serde/src/stream_reader/mod.rs +++ b/vortex-serde/src/stream_reader/mod.rs @@ -41,12 +41,22 @@ impl StreamArrayReader { /// Reads a single array from the stream. pub fn array_stream(&mut self) -> impl ArrayStream + '_ { - let dtype = self.dtype.as_ref().unwrap_or_else(|| panic!("DType not set")).deref().clone(); + let dtype = self + .dtype + .as_ref() + .unwrap_or_else(|| panic!("DType not set")) + .deref() + .clone(); self.msgs.array_stream(self.ctx.clone(), dtype) } pub fn into_array_stream(self) -> impl ArrayStream { - let dtype = self.dtype.as_ref().unwrap_or_else(|| panic!("DType not set")).deref().clone(); + let dtype = self + .dtype + .as_ref() + .unwrap_or_else(|| panic!("DType not set")) + .deref() + .clone(); self.msgs.into_array_stream(self.ctx.clone(), dtype) } From e3d482246739238f4c2ddf42e5cedb685bd472fb Mon Sep 17 00:00:00 2001 From: Will Manning Date: Tue, 13 Aug 2024 17:30:30 -0400 Subject: [PATCH 12/39] fallible_impl_from --- Cargo.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0c2757a33..76b1ba2a3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -163,11 +163,10 @@ borrow_as_ptr = { level = "deny" } collection_is_never_read = { level = "deny" } cognitive_complexity = { level = "deny" } debug_assert_with_mut_call = { level = "deny" } -#default_numeric_fallback = { level = "deny" } derive_partial_eq_without_eq = { level = "deny" } expect_used = { level = "deny" } equatable_if_let = { level = "deny" } -#fallible_impl_from = { level = "deny" } +fallible_impl_from = { level = "deny" } get_unwrap = { level = "deny" } host_endian_bytes = { level = "deny" } if_then_some_else_none = { level = "deny" } From b45bd09757a7429b8f6aa3319ff333b7683e6026 Mon Sep 17 00:00:00 2001 From: Will Manning Date: Tue, 13 Aug 2024 17:31:02 -0400 Subject: [PATCH 13/39] Reapply "add vortex_panic macro" This reverts commit ee6f42f948b9264ae77dfe975a19f373d37c6dde. --- vortex-error/src/lib.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index 87dc18426..89b18f162 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -202,6 +202,14 @@ macro_rules! vortex_bail { }; } +#[macro_export] +macro_rules! vortex_panic { + // TODO: this can be fancier, e.g., add backtrace if it's not already included + ($($tt:tt)+) => { + panic!($($tt)+) + }; +} + #[cfg(feature = "datafusion")] impl From for datafusion_common::DataFusionError { fn from(value: VortexError) -> Self { From 8bf9cd3806cd9d82232d69dfbe71ce9f83d10e10 Mon Sep 17 00:00:00 2001 From: Will Manning Date: Wed, 14 Aug 2024 13:11:10 -0400 Subject: [PATCH 14/39] wip on vortex_panic --- vortex-datafusion/src/plans.rs | 4 ++-- vortex-error/src/lib.rs | 36 ++++++++++++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/vortex-datafusion/src/plans.rs b/vortex-datafusion/src/plans.rs index 5342df98f..7ba6a403a 100644 --- a/vortex-datafusion/src/plans.rs +++ b/vortex-datafusion/src/plans.rs @@ -25,7 +25,7 @@ use vortex::array::ChunkedArray; use vortex::arrow::FromArrowArray; use vortex::compute::take; use vortex::{Array, ArrayDType, IntoArray, IntoArrayVariant, IntoCanonical}; -use vortex_error::{vortex_err, VortexError}; +use vortex_error::{vortex_err, vortex_panic, VortexError}; use crate::datatype::infer_schema; use crate::eval::ExpressionEvaluator; @@ -237,7 +237,7 @@ impl TakeRowsExec { ) -> Self { let output_schema = Arc::new(schema_ref.project(projection).unwrap_or_else(|err| { - panic!("Failed to project schema: {}", VortexError::from(err)) + vortex_panic!("Failed to project schema", VortexError::from(err)) })); let plan_properties = PlanProperties::new( EquivalenceProperties::new(output_schema.clone()), diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index 89b18f162..d9f843a24 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -1,4 +1,5 @@ #![feature(error_generic_member_access)] +#![feature(min_specialization)] use std::backtrace::Backtrace; use std::borrow::Cow; @@ -57,6 +58,8 @@ pub enum VortexError { NotImplemented(ErrString, ErrString, Backtrace), #[error("expected type: {0} but instead got {1}\nBacktrace:\n{2}")] MismatchedTypes(ErrString, ErrString, Backtrace), + #[error("{0}: {1}")] + Context(ErrString, Box), #[error(transparent)] ArrowError( #[from] @@ -179,6 +182,12 @@ macro_rules! vortex_err { $crate::VortexError::MismatchedTypes($expected.to_string().into(), $actual.to_string().into(), Backtrace::capture()) ) }}; + (Context: $fmt:literal $(, $arg:expr)*, $err:expr $(,)?) => {{ + use std::backtrace::Backtrace; + $crate::__private::must_use( + $crate::VortexError::Context(format!($fmt, $($arg),*).into(), Box::new($err)) + ) + }}; ($variant:ident: $fmt:literal $(, $arg:expr)* $(,)?) => {{ use std::backtrace::Backtrace; $crate::__private::must_use( @@ -202,11 +211,34 @@ macro_rules! vortex_bail { }; } +pub trait VortexPanic { + fn panic(self); +} + +impl VortexPanic for VortexError { + #[allow(clippy::panic)] + fn panic(self) { + panic!("{}", self) + } +} + +impl> VortexPanic for T { + #[allow(clippy::panic)] + default fn panic(self) { + let err: VortexError = self.into(); + panic!("{}", err) + } +} + #[macro_export] macro_rules! vortex_panic { // TODO: this can be fancier, e.g., add backtrace if it's not already included - ($($tt:tt)+) => { - panic!($($tt)+) + ($err:expr) => {{ + use $crate::VortexPanic; + <$err as VortexPanic>::panic() + }}; + ($fmt:literal $(, $arg:expr)* $(,)?) => { + $crate::vortex_panic!($crate::vortex_err!(Context: $fmt, $($arg),*)) }; } From d01abd26e4b88c50bd04a0b0d5c978a9b7ad297b Mon Sep 17 00:00:00 2001 From: Will Manning Date: Wed, 14 Aug 2024 13:20:55 -0400 Subject: [PATCH 15/39] remove vortex_panic for now --- vortex-error/src/lib.rs | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index d9f843a24..b5840a127 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -1,5 +1,4 @@ #![feature(error_generic_member_access)] -#![feature(min_specialization)] use std::backtrace::Backtrace; use std::borrow::Cow; @@ -211,37 +210,6 @@ macro_rules! vortex_bail { }; } -pub trait VortexPanic { - fn panic(self); -} - -impl VortexPanic for VortexError { - #[allow(clippy::panic)] - fn panic(self) { - panic!("{}", self) - } -} - -impl> VortexPanic for T { - #[allow(clippy::panic)] - default fn panic(self) { - let err: VortexError = self.into(); - panic!("{}", err) - } -} - -#[macro_export] -macro_rules! vortex_panic { - // TODO: this can be fancier, e.g., add backtrace if it's not already included - ($err:expr) => {{ - use $crate::VortexPanic; - <$err as VortexPanic>::panic() - }}; - ($fmt:literal $(, $arg:expr)* $(,)?) => { - $crate::vortex_panic!($crate::vortex_err!(Context: $fmt, $($arg),*)) - }; -} - #[cfg(feature = "datafusion")] impl From for datafusion_common::DataFusionError { fn from(value: VortexError) -> Self { From f147ce789cea7b1e1e57db9a5f8a558bca8b4ce2 Mon Sep 17 00:00:00 2001 From: Will Manning Date: Thu, 22 Aug 2024 15:13:32 -0700 Subject: [PATCH 16/39] fixes --- encodings/fsst/src/array.rs | 4 ++-- vortex-array/src/arrow/dtype.rs | 2 +- vortex-array/src/arrow/recordbatch.rs | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/encodings/fsst/src/array.rs b/encodings/fsst/src/array.rs index 4a94e66ef..ead96b9e0 100644 --- a/encodings/fsst/src/array.rs +++ b/encodings/fsst/src/array.rs @@ -66,14 +66,14 @@ impl FSSTArray { pub fn symbols(&self) -> Array { self.array() .child(0, &SYMBOLS_DTYPE, self.metadata().symbols_len) - .expect("FSSTArray must have a symbols child array") + .unwrap_or_else(|| panic!("FSSTArray must have a symbols child array")) } /// Access the codes array pub fn codes(&self) -> Array { self.array() .child(1, &self.metadata().codes_dtype, self.len()) - .expect("FSSTArray must have a codes child array") + .unwrap_or_else(|| panic!("FSSTArray must have a codes child array")) } /// Build a [`Decompressor`][fsst::Decompressor] that can be used to decompress values from diff --git a/vortex-array/src/arrow/dtype.rs b/vortex-array/src/arrow/dtype.rs index 271824a2f..6398e6e37 100644 --- a/vortex-array/src/arrow/dtype.rs +++ b/vortex-array/src/arrow/dtype.rs @@ -4,7 +4,7 @@ use arrow_schema::{DataType, Field, SchemaRef}; use itertools::Itertools; use vortex_datetime_dtype::arrow::make_temporal_ext_dtype; use vortex_dtype::{DType, Nullability, PType, StructDType}; -use vortex_error::{vortex_bail, vortex_err, VortexError, VortexResult}; +use vortex_error::{vortex_err, VortexResult}; use crate::arrow::{FromArrowType, TryFromArrowType}; diff --git a/vortex-array/src/arrow/recordbatch.rs b/vortex-array/src/arrow/recordbatch.rs index 8450f1110..d1a649177 100644 --- a/vortex-array/src/arrow/recordbatch.rs +++ b/vortex-array/src/arrow/recordbatch.rs @@ -1,7 +1,7 @@ use arrow_array::cast::as_struct_array; use arrow_array::RecordBatch; use itertools::Itertools; -use vortex_error::{VortexError, VortexResult}; +use vortex_error::{vortex_err, VortexError, VortexResult}; use crate::array::StructArray; use crate::arrow::FromArrowArray; @@ -42,7 +42,7 @@ impl TryFrom for RecordBatch { vortex_err!("RecordBatch can only be constructed from a Vortex StructArray: {err}") })?; - Ok(RecordBatch::from(struct_arr)) + RecordBatch::try_from(struct_arr) } } From e8300eb1a15ceb9577233b20601e2f060eb3d409 Mon Sep 17 00:00:00 2001 From: Will Manning Date: Mon, 26 Aug 2024 18:01:02 -0400 Subject: [PATCH 17/39] wip --- vortex-array/src/array/chunked/canonical.rs | 2 +- vortex-array/src/array/chunked/compute/mod.rs | 7 +++- vortex-array/src/array/chunked/mod.rs | 4 +- vortex-array/src/array/constant/compute.rs | 2 +- vortex-array/src/array/sparse/compute/mod.rs | 2 +- vortex-scalar/src/arrow.rs | 38 +++++++++---------- vortex-scalar/src/datafusion.rs | 31 ++++++++------- 7 files changed, 45 insertions(+), 41 deletions(-) diff --git a/vortex-array/src/array/chunked/canonical.rs b/vortex-array/src/array/chunked/canonical.rs index 43d739a12..772d99be4 100644 --- a/vortex-array/src/array/chunked/canonical.rs +++ b/vortex-array/src/array/chunked/canonical.rs @@ -230,7 +230,7 @@ fn pack_varbin(chunks: &[Array], validity: Validity, dtype: &DType) -> VortexRes slice(&chunk.bytes(), first_offset_value, last_offset_value)?.into_primitive()?; data_bytes.extend_from_slice(primitive_bytes.buffer()); - let adjustment_from_previous = *offsets.last().expect("offsets has at least one element"); + let adjustment_from_previous = *offsets.last().ok_or_else(|| vortex_err!("VarBinArray offsets must have at least one element"))?; offsets.extend( offsets_arr .maybe_null_slice::() diff --git a/vortex-array/src/array/chunked/compute/mod.rs b/vortex-array/src/array/chunked/compute/mod.rs index a5d7dee69..fe5d15e42 100644 --- a/vortex-array/src/array/chunked/compute/mod.rs +++ b/vortex-array/src/array/chunked/compute/mod.rs @@ -51,7 +51,12 @@ impl ScalarAtFn for ChunkedArray { fn scalar_at_unchecked(&self, index: usize) -> Scalar { let (chunk_index, chunk_offset) = self.find_chunk_idx(index); - scalar_at_unchecked(&self.chunk(chunk_index).unwrap(), chunk_offset) + scalar_at_unchecked( + &self + .chunk(chunk_index) + .unwrap_or_else(|| panic!("{}", vortex_err!(OutOfBounds: chunk_index, 0, self.nchunks()))), + chunk_offset, + ) } } diff --git a/vortex-array/src/array/chunked/mod.rs b/vortex-array/src/array/chunked/mod.rs index 84b1bc1f2..fd144e537 100644 --- a/vortex-array/src/array/chunked/mod.rs +++ b/vortex-array/src/array/chunked/mod.rs @@ -6,7 +6,7 @@ use futures_util::stream; use itertools::Itertools; use serde::{Deserialize, Serialize}; use vortex_dtype::{DType, Nullability, PType}; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, VortexResult}; use vortex_scalar::Scalar; use crate::array::primitive::PrimitiveArray; @@ -167,7 +167,7 @@ impl ArrayValidity for ChunkedArray { fn is_valid(&self, index: usize) -> bool { let (chunk, offset_in_chunk) = self.find_chunk_idx(index); self.chunk(chunk) - .expect("must be a valid chunk index") + .unwrap_or_else(|| panic!("{}", vortex_err!(OutOfBounds: chunk, 0, self.nchunks()))) .with_dyn(|a| a.is_valid(offset_in_chunk)) } diff --git a/vortex-array/src/array/constant/compute.rs b/vortex-array/src/array/constant/compute.rs index e31627deb..80cd6d1af 100644 --- a/vortex-array/src/array/constant/compute.rs +++ b/vortex-array/src/array/constant/compute.rs @@ -109,7 +109,7 @@ impl CompareFn for ConstantArray { Ok(ConstantArray::new(scalar, self.len()).into_array()) } else { - let datum = Arc::::from(self.scalar()); + let datum = Arc::::try_from(self.scalar())?; let rhs = rhs.clone().into_canonical()?.into_arrow()?; let rhs = rhs.as_ref(); diff --git a/vortex-array/src/array/sparse/compute/mod.rs b/vortex-array/src/array/sparse/compute/mod.rs index 87fe8660a..69b04e018 100644 --- a/vortex-array/src/array/sparse/compute/mod.rs +++ b/vortex-array/src/array/sparse/compute/mod.rs @@ -40,7 +40,7 @@ impl ScalarAtFn for SparseArray { fn scalar_at_unchecked(&self, index: usize) -> Scalar { match self .find_index(index) - .expect("Must be able to find the index") + .unwrap_or_else(|err| panic!("{}", err)) { None => self.fill_value().clone().cast(self.dtype()).unwrap(), Some(idx) => scalar_at_unchecked(&self.values(), idx) diff --git a/vortex-scalar/src/arrow.rs b/vortex-scalar/src/arrow.rs index fe2f167d4..cfab8c49f 100644 --- a/vortex-scalar/src/arrow.rs +++ b/vortex-scalar/src/arrow.rs @@ -3,32 +3,34 @@ use std::sync::Arc; use arrow_array::*; use vortex_datetime_dtype::{is_temporal_ext_type, TemporalMetadata, TimeUnit}; use vortex_dtype::{DType, PType}; +use vortex_error::{vortex_bail, VortexError}; use crate::{PValue, Scalar}; macro_rules! value_to_arrow_scalar { ($V:expr, $AR:ty) => { - std::sync::Arc::new( + Ok(std::sync::Arc::new( $V.map(<$AR>::new_scalar) .unwrap_or_else(|| arrow_array::Scalar::new(<$AR>::new_null(1))), - ) + )) }; } -impl From<&Scalar> for Arc { - fn from(value: &Scalar) -> Arc { +impl TryFrom<&Scalar> for Arc { + type Error = VortexError; + + fn try_from(value: &Scalar) -> Result, Self::Error> { match value.dtype() { - DType::Null => Arc::new(NullArray::new(1)), + DType::Null => Ok(Arc::new(NullArray::new(1))), DType::Bool(_) => value_to_arrow_scalar!( - value.value.as_bool().unwrap_or_else(|err| panic!("Expected a bool scalar: {}", err)), + value.value.as_bool()?, BooleanArray ), DType::Primitive(ptype, _) => { let pvalue = value .value - .as_pvalue() - .unwrap_or_else(|err| panic!("Expected a pvalue scalar: {}", err)); - match pvalue { + .as_pvalue()?; + Ok(match pvalue { None => match ptype { PType::U8 => Arc::new(UInt8Array::new_null(1)), PType::U16 => Arc::new(UInt16Array::new_null(1)), @@ -55,14 +57,13 @@ impl From<&Scalar> for Arc { PValue::F32(v) => Arc::new(Float32Array::new_scalar(v)), PValue::F64(v) => Arc::new(Float64Array::new_scalar(v)), }, - } + }) } DType::Utf8(_) => { value_to_arrow_scalar!( value .value - .as_buffer_string() - .unwrap_or_else(|err| panic!("Expected a string scalar: {}", err)), + .as_buffer_string()?, StringArray ) } @@ -70,8 +71,7 @@ impl From<&Scalar> for Arc { value_to_arrow_scalar!( value .value - .as_buffer() - .unwrap_or_else(|err| panic!("Expected a buffer scalar: {}", err)), + .as_buffer()?, BinaryArray ) } @@ -83,8 +83,8 @@ impl From<&Scalar> for Arc { } DType::Extension(ext, _) => { if is_temporal_ext_type(ext.id()) { - let metadata = TemporalMetadata::try_from(ext).unwrap(); - let pv = value.value.as_pvalue().expect("must be a pvalue"); + let metadata = TemporalMetadata::try_from(ext)?; + let pv = value.value.as_pvalue()?; return match metadata { TemporalMetadata::Time(u) => match u { TimeUnit::Ns => value_to_arrow_scalar!( @@ -104,7 +104,7 @@ impl From<&Scalar> for Arc { Time32SecondArray ), TimeUnit::D => { - unreachable!("Unsupported TimeUnit {u} for {}", ext.id()) + vortex_bail!("Unsupported TimeUnit {u} for {}", ext.id()) } }, TemporalMetadata::Date(u) => match u { @@ -114,7 +114,7 @@ impl From<&Scalar> for Arc { TimeUnit::D => { value_to_arrow_scalar!(pv.and_then(|p| p.as_i32()), Date32Array) } - _ => unreachable!("Unsupported TimeUnit {u} for {}", ext.id()), + _ => vortex_bail!("Unsupported TimeUnit {u} for {}", ext.id()), }, TemporalMetadata::Timestamp(u, _) => match u { TimeUnit::Ns => value_to_arrow_scalar!( @@ -134,7 +134,7 @@ impl From<&Scalar> for Arc { TimestampSecondArray ), TimeUnit::D => { - unreachable!("Unsupported TimeUnit {u} for {}", ext.id()) + vortex_bail!("Unsupported TimeUnit {u} for {}", ext.id()) } }, }; diff --git a/vortex-scalar/src/datafusion.rs b/vortex-scalar/src/datafusion.rs index 23bf506c1..f50cc1cb3 100644 --- a/vortex-scalar/src/datafusion.rs +++ b/vortex-scalar/src/datafusion.rs @@ -4,24 +4,25 @@ use vortex_buffer::Buffer; use vortex_datetime_dtype::arrow::make_temporal_ext_dtype; use vortex_datetime_dtype::{is_temporal_ext_type, TemporalMetadata, TimeUnit}; use vortex_dtype::{DType, Nullability, PType}; +use vortex_error::VortexError; use crate::{PValue, Scalar}; -impl From for ScalarValue { - fn from(value: Scalar) -> Self { - match value.dtype { +impl TryFrom for ScalarValue { + type Error = VortexError; + + fn try_from(value: Scalar) -> Result { + Ok(match value.dtype { DType::Null => ScalarValue::Null, DType::Bool(_) => ScalarValue::Boolean( value .value - .as_bool() - .unwrap_or_else(|err| panic!("Expected a bool scalar: {}", err)), + .as_bool()?, ), DType::Primitive(ptype, _) => { let pvalue = value .value - .as_pvalue() - .unwrap_or_else(|err| panic!("Expected a pvalue scalar: {}", err)); + .as_pvalue()?; match pvalue { None => match ptype { PType::U8 => ScalarValue::UInt8(None), @@ -54,15 +55,13 @@ impl From for ScalarValue { DType::Utf8(_) => ScalarValue::Utf8( value .value - .as_buffer_string() - .unwrap_or_else(|err| panic!("Expected a buffer string: {}", err)) + .as_buffer_string()? .map(|b| b.as_str().to_string()), ), DType::Binary(_) => ScalarValue::Binary( value .value - .as_buffer() - .unwrap_or_else(|err| panic!("Expected a buffer: {}", err)) + .as_buffer()? .map(|b| b.into_vec().unwrap_or_else(|buf| buf.as_slice().to_vec())), ), DType::Struct(..) => { @@ -73,9 +72,9 @@ impl From for ScalarValue { } DType::Extension(ext, _) => { if is_temporal_ext_type(ext.id()) { - let metadata = TemporalMetadata::try_from(&ext).unwrap(); - let pv = value.value.as_pvalue().expect("must be a pvalue"); - return match metadata { + let metadata = TemporalMetadata::try_from(&ext)?; + let pv = value.value.as_pvalue()?; + return Ok(match metadata { TemporalMetadata::Time(u) => match u { TimeUnit::Ns => { ScalarValue::Time64Nanosecond(pv.and_then(|p| p.as_i64())) @@ -117,12 +116,12 @@ impl From for ScalarValue { unreachable!("Unsupported TimeUnit {u} for {}", ext.id()) } }, - }; + }); } todo!("Non temporal extension scalar conversion") } - } + }) } } From 94daf5f4a46bd2371416a66a7623d2d72d4a99e7 Mon Sep 17 00:00:00 2001 From: Will Manning Date: Mon, 26 Aug 2024 18:06:14 -0400 Subject: [PATCH 18/39] new vortex_panic macro --- vortex-error/src/lib.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index b5840a127..876bbbef6 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -210,6 +210,16 @@ macro_rules! vortex_bail { }; } +#[macro_export] +macro_rules! vortex_panic { + ($variant:ident: $fmt:literal $(, $arg:expr)* $(,)?) => { + panic!("{}", vortex_err!($variant: $fmt, $($arg),*)) + }; + ($msg:literal) => { + panic!("{}", vortex_err!($msg)) + }; +} + #[cfg(feature = "datafusion")] impl From for datafusion_common::DataFusionError { fn from(value: VortexError) -> Self { From d99f3f5f179b8a8617f313fe8089ed343c30be3a Mon Sep 17 00:00:00 2001 From: Will Manning Date: Mon, 26 Aug 2024 18:08:57 -0400 Subject: [PATCH 19/39] better --- vortex-error/src/lib.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index 876bbbef6..deebccaf4 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -213,10 +213,13 @@ macro_rules! vortex_bail { #[macro_export] macro_rules! vortex_panic { ($variant:ident: $fmt:literal $(, $arg:expr)* $(,)?) => { - panic!("{}", vortex_err!($variant: $fmt, $($arg),*)) + panic!("{}", $crate::vortex_err!($variant: $fmt, $($arg),*)) + }; + ($msg:literal, $err:expr) => { + panic!("{}", $crate::vortex_err!(Context: $msg, $err)) }; ($msg:literal) => { - panic!("{}", vortex_err!($msg)) + panic!("{}", $crate::vortex_err!($msg)) }; } From 5b40bb5bc31a0a9ce5172be081bc8270c00b2641 Mon Sep 17 00:00:00 2001 From: Will Manning Date: Wed, 28 Aug 2024 09:37:59 -0400 Subject: [PATCH 20/39] wip --- encodings/datetime-parts/src/compute.rs | 4 ++-- encodings/dict/src/compute.rs | 4 ++-- .../src/bitpacking/compute/scalar_at.rs | 4 ++-- encodings/fastlanes/src/for/compute.rs | 6 +++--- encodings/fsst/src/array.rs | 6 +++--- encodings/roaring/src/integer/compute.rs | 4 ++-- encodings/runend-bool/src/compute.rs | 5 ++--- vortex-array/src/array/sparse/compute/mod.rs | 12 ++++++++---- vortex-array/src/array/varbin/compute/mod.rs | 8 ++++++-- vortex-array/src/array/varbinview/compute.rs | 5 +++-- vortex-array/src/compute/unary/scalar_at.rs | 5 ++--- vortex-error/src/lib.rs | 17 +++++++++++++---- 12 files changed, 48 insertions(+), 32 deletions(-) diff --git a/encodings/datetime-parts/src/compute.rs b/encodings/datetime-parts/src/compute.rs index 53c4d6a4a..b8422532e 100644 --- a/encodings/datetime-parts/src/compute.rs +++ b/encodings/datetime-parts/src/compute.rs @@ -5,7 +5,7 @@ use vortex::validity::ArrayValidity; use vortex::{Array, ArrayDType, IntoArray, IntoArrayVariant}; use vortex_datetime_dtype::{TemporalMetadata, TimeUnit}; use vortex_dtype::DType; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, vortex_panic, VortexResult}; use vortex_scalar::Scalar; use crate::DateTimePartsArray; @@ -79,7 +79,7 @@ impl ScalarAtFn for DateTimePartsArray { } fn scalar_at_unchecked(&self, index: usize) -> Scalar { - ::scalar_at(self, index).unwrap() + ::scalar_at(self, index).unwrap_or_else(|err| vortex_panic!(err)) } } diff --git a/encodings/dict/src/compute.rs b/encodings/dict/src/compute.rs index fc840891a..036aebdaa 100644 --- a/encodings/dict/src/compute.rs +++ b/encodings/dict/src/compute.rs @@ -1,7 +1,7 @@ use vortex::compute::unary::{scalar_at, scalar_at_unchecked, ScalarAtFn}; use vortex::compute::{slice, take, ArrayCompute, SliceFn, TakeFn}; use vortex::Array; -use vortex_error::VortexResult; +use vortex_error::{vortex_panic, VortexResult}; use vortex_scalar::Scalar; use crate::DictArray; @@ -30,7 +30,7 @@ impl ScalarAtFn for DictArray { let dict_index: usize = scalar_at_unchecked(&self.codes(), index) .as_ref() .try_into() - .unwrap(); + .unwrap_or_else(|err| vortex_panic!("Invalid dict index", err)); scalar_at_unchecked(&self.values(), dict_index) } diff --git a/encodings/fastlanes/src/bitpacking/compute/scalar_at.rs b/encodings/fastlanes/src/bitpacking/compute/scalar_at.rs index 9a7d5c2fd..815591c36 100644 --- a/encodings/fastlanes/src/bitpacking/compute/scalar_at.rs +++ b/encodings/fastlanes/src/bitpacking/compute/scalar_at.rs @@ -1,6 +1,6 @@ use vortex::compute::unary::{scalar_at_unchecked, ScalarAtFn}; use vortex::ArrayDType; -use vortex_error::VortexResult; +use vortex_error::{vortex_panic, VortexResult}; use vortex_scalar::Scalar; use crate::{unpack_single, BitPackedArray}; @@ -18,7 +18,7 @@ impl ScalarAtFn for BitPackedArray { } fn scalar_at_unchecked(&self, index: usize) -> Scalar { - self.scalar_at(index).unwrap() + self.scalar_at(index).unwrap_or_else(|err| vortex_panic!(err)) } } diff --git a/encodings/fastlanes/src/for/compute.rs b/encodings/fastlanes/src/for/compute.rs index 1c77b5013..496bd5d7c 100644 --- a/encodings/fastlanes/src/for/compute.rs +++ b/encodings/fastlanes/src/for/compute.rs @@ -5,7 +5,7 @@ use vortex::compute::{ }; use vortex::{Array, ArrayDType}; use vortex_dtype::match_each_integer_ptype; -use vortex_error::VortexResult; +use vortex_error::{vortex_panic, VortexResult}; use vortex_scalar::{PrimitiveScalar, Scalar, ScalarValue}; use crate::FoRArray; @@ -47,8 +47,8 @@ impl ScalarAtFn for FoRArray { fn scalar_at_unchecked(&self, index: usize) -> Scalar { let encoded_scalar = scalar_at_unchecked(&self.encoded(), index).reinterpret_cast(self.ptype()); - let encoded = PrimitiveScalar::try_from(&encoded_scalar).unwrap(); - let reference = PrimitiveScalar::try_from(self.reference()).unwrap(); + let encoded = PrimitiveScalar::try_from(&encoded_scalar).unwrap_or_else(|err| vortex_panic!("Invalid encoded scalar", err)); + let reference = PrimitiveScalar::try_from(self.reference()).unwrap_or_else(|err| vortex_panic!("Invalid reference scalar", err)); match_each_integer_ptype!(encoded.ptype(), |$P| { use num_traits::WrappingAdd; diff --git a/encodings/fsst/src/array.rs b/encodings/fsst/src/array.rs index 5ea534359..b4af4e3ca 100644 --- a/encodings/fsst/src/array.rs +++ b/encodings/fsst/src/array.rs @@ -8,7 +8,7 @@ use vortex::variants::{ArrayVariants, BinaryArrayTrait, Utf8ArrayTrait}; use vortex::visitor::AcceptArrayVisitor; use vortex::{impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, IntoCanonical}; use vortex_dtype::{DType, Nullability, PType}; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, vortex_panic, VortexResult}; impl_encoding!("vortex.fsst", 24u16, FSST); @@ -85,9 +85,9 @@ impl FSSTArray { let symbols_array = self .symbols() .into_canonical() - .unwrap() + .unwrap_or_else(|err| vortex_panic!(err)) .into_primitive() - .expect("Symbols must be a Primitive Array"); + .unwrap_or_else(|err| vortex_panic!(Context: "Symbols must be a Primitive Array", err)); let symbols = symbols_array.maybe_null_slice::(); // Transmute the 64-bit symbol values into fsst `Symbol`s. diff --git a/encodings/roaring/src/integer/compute.rs b/encodings/roaring/src/integer/compute.rs index ae5c0c702..1ae852145 100644 --- a/encodings/roaring/src/integer/compute.rs +++ b/encodings/roaring/src/integer/compute.rs @@ -1,7 +1,7 @@ use vortex::compute::unary::ScalarAtFn; use vortex::compute::ArrayCompute; use vortex_dtype::PType; -use vortex_error::{vortex_err, VortexResult}; +use vortex_error::{vortex_err, vortex_panic, VortexResult}; use vortex_scalar::Scalar; use crate::RoaringIntArray; @@ -29,6 +29,6 @@ impl ScalarAtFn for RoaringIntArray { } fn scalar_at_unchecked(&self, index: usize) -> Scalar { - ::scalar_at(self, index).unwrap() + ::scalar_at(self, index).unwrap_or_else(|err| vortex_panic!(err)) } } diff --git a/encodings/runend-bool/src/compute.rs b/encodings/runend-bool/src/compute.rs index 9fce23689..910b7ad2d 100644 --- a/encodings/runend-bool/src/compute.rs +++ b/encodings/runend-bool/src/compute.rs @@ -3,7 +3,7 @@ use vortex::compute::unary::ScalarAtFn; use vortex::compute::{slice, ArrayCompute, SliceFn, TakeFn}; use vortex::{Array, IntoArray, IntoArrayVariant, ToArray}; use vortex_dtype::match_each_integer_ptype; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, vortex_panic, VortexResult}; use vortex_scalar::Scalar; use crate::compress::value_at_index; @@ -35,8 +35,7 @@ impl ScalarAtFn for RunEndBoolArray { fn scalar_at_unchecked(&self, index: usize) -> Scalar { let start = self.start(); Scalar::from(value_at_index( - self.find_physical_index(index) - .expect("Search must be implemented for the underlying index array"), + self.find_physical_index(index).unwrap_or_else(|err| vortex_panic!("Search must be implemented for the underlying index array", err)), start, )) } diff --git a/vortex-array/src/array/sparse/compute/mod.rs b/vortex-array/src/array/sparse/compute/mod.rs index 69b04e018..4f5f646c9 100644 --- a/vortex-array/src/array/sparse/compute/mod.rs +++ b/vortex-array/src/array/sparse/compute/mod.rs @@ -1,4 +1,4 @@ -use vortex_error::VortexResult; +use vortex_error::{vortex_panic, VortexResult}; use vortex_scalar::Scalar; use crate::array::sparse::SparseArray; @@ -40,12 +40,16 @@ impl ScalarAtFn for SparseArray { fn scalar_at_unchecked(&self, index: usize) -> Scalar { match self .find_index(index) - .unwrap_or_else(|err| panic!("{}", err)) + .unwrap_or_else(|err| vortex_panic!(err)) { - None => self.fill_value().clone().cast(self.dtype()).unwrap(), + None => self + .fill_value() + .clone() + .cast(self.dtype()) + .unwrap_or_else(|err| vortex_panic!(err)), Some(idx) => scalar_at_unchecked(&self.values(), idx) .cast(self.dtype()) - .unwrap(), + .unwrap_or_else(|err| vortex_panic!(err)), } } } diff --git a/vortex-array/src/array/varbin/compute/mod.rs b/vortex-array/src/array/varbin/compute/mod.rs index 584d2af75..20f000a16 100644 --- a/vortex-array/src/array/varbin/compute/mod.rs +++ b/vortex-array/src/array/varbin/compute/mod.rs @@ -1,4 +1,4 @@ -use vortex_error::VortexResult; +use vortex_error::{vortex_panic, VortexResult}; use vortex_scalar::Scalar; use crate::array::varbin::{varbin_scalar, VarBinArray}; @@ -30,6 +30,10 @@ impl ScalarAtFn for VarBinArray { } fn scalar_at_unchecked(&self, index: usize) -> Scalar { - varbin_scalar(self.bytes_at(index).unwrap(), self.dtype()) + varbin_scalar( + self.bytes_at(index) + .unwrap_or_else(|err| vortex_panic!(err)), + self.dtype(), + ) } } diff --git a/vortex-array/src/array/varbinview/compute.rs b/vortex-array/src/array/varbinview/compute.rs index 2fe3b4132..fca7a6812 100644 --- a/vortex-array/src/array/varbinview/compute.rs +++ b/vortex-array/src/array/varbinview/compute.rs @@ -1,5 +1,5 @@ use vortex_buffer::Buffer; -use vortex_error::VortexResult; +use vortex_error::{vortex_panic, VortexResult}; use vortex_scalar::Scalar; use crate::array::varbin::varbin_scalar; @@ -25,7 +25,8 @@ impl ScalarAtFn for VarBinViewArray { } fn scalar_at_unchecked(&self, index: usize) -> Scalar { - ::scalar_at(self, index).unwrap() + ::scalar_at(self, index) + .unwrap_or_else(|err| vortex_panic!(err)) } } diff --git a/vortex-array/src/compute/unary/scalar_at.rs b/vortex-array/src/compute/unary/scalar_at.rs index 530a8c0a5..29bdaa609 100644 --- a/vortex-array/src/compute/unary/scalar_at.rs +++ b/vortex-array/src/compute/unary/scalar_at.rs @@ -1,4 +1,4 @@ -use vortex_error::{vortex_bail, vortex_err, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, vortex_panic, VortexResult}; use vortex_scalar::Scalar; use crate::{Array, ArrayDType}; @@ -29,6 +29,5 @@ pub fn scalar_at(array: &Array, index: usize) -> VortexResult { pub fn scalar_at_unchecked(array: &Array, index: usize) -> Scalar { array .with_dyn(|a| a.scalar_at().map(|s| s.scalar_at_unchecked(index))) - .ok_or_else(|| vortex_err!(NotImplemented: "scalar_at", array.encoding().id())) - .unwrap() + .unwrap_or_else(|| vortex_panic!(NotImplemented: "scalar_at", array.encoding().id())) } diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index 4fd305a5d..2211a66a0 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -153,6 +153,11 @@ pub enum VortexError { ), } +#[allow(clippy::panic)] +pub fn __vortex_panic_do_not_call_directly(err: VortexError) -> ! { + panic!("{}", err) +} + pub type VortexResult = Result; impl Debug for VortexError { @@ -187,7 +192,7 @@ macro_rules! vortex_err { $crate::VortexError::MismatchedTypes($expected.to_string().into(), $actual.to_string().into(), Backtrace::capture()) ) }}; - (Context: $fmt:literal $(, $arg:expr)*, $err:expr $(,)?) => {{ + (Context: $fmt:literal, $err:expr $(,)?) => {{ use std::backtrace::Backtrace; $crate::__private::must_use( $crate::VortexError::Context(format!($fmt, $($arg),*).into(), Box::new($err)) @@ -219,14 +224,18 @@ macro_rules! vortex_bail { #[macro_export] macro_rules! vortex_panic { ($variant:ident: $fmt:literal $(, $arg:expr)* $(,)?) => { - panic!("{}", $crate::vortex_err!($variant: $fmt, $($arg),*)) + vortex_panic!($crate::vortex_err!($variant: $fmt, $($arg),*)) }; ($msg:literal, $err:expr) => { - panic!("{}", $crate::vortex_err!(Context: $msg, $err)) + vortex_panic!($crate::vortex_err!(Context: $msg, $err)) }; ($msg:literal) => { - panic!("{}", $crate::vortex_err!($msg)) + vortex_panic!($crate::vortex_err!($msg)) }; + ($err:expr) => {{ + let err: $crate::VortexError = $err; + $crate::__vortex_panic_do_not_call_directly(err) + }}; } #[cfg(feature = "datafusion")] From 87864b33faa521e1d9e2f5c1bb9ae1d67005bf6f Mon Sep 17 00:00:00 2001 From: Will Manning Date: Tue, 3 Sep 2024 23:18:31 -0400 Subject: [PATCH 21/39] wip --- encodings/alp/benches/alp_compress.rs | 2 +- encodings/alp/src/array.rs | 5 +-- vortex-error/src/lib.rs | 59 +++++++++++++++++++++------ 3 files changed, 50 insertions(+), 16 deletions(-) diff --git a/encodings/alp/benches/alp_compress.rs b/encodings/alp/benches/alp_compress.rs index 24f06aa56..2450bd6f7 100644 --- a/encodings/alp/benches/alp_compress.rs +++ b/encodings/alp/benches/alp_compress.rs @@ -50,7 +50,7 @@ where fn alp_canonicalize_sum(array: ALPArray) -> T::Native { let array = array.into_canonical().unwrap().into_arrow(); - let arrow_primitive = as_primitive_array::(array.as_ref()); + let arrow_primitive = as_primitive_array::(array.as_ref().unwrap()); arrow_primitive .iter() .fold(T::default_value(), |acc, value| { diff --git a/encodings/alp/src/array.rs b/encodings/alp/src/array.rs index 938463b05..8f712a4ce 100644 --- a/encodings/alp/src/array.rs +++ b/encodings/alp/src/array.rs @@ -12,7 +12,7 @@ use vortex::{ impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoArray, IntoCanonical, }; use vortex_dtype::{DType, PType}; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, VortexExpect as _, VortexResult}; use crate::alp::Exponents; use crate::compress::{alp_encode, decompress}; @@ -212,7 +212,6 @@ impl PrimitiveArrayTrait for ALPArray { } } - #[allow(clippy::unwrap_in_result)] fn f64_accessor(&self) -> Option> { match self.dtype() { DType::Primitive(PType::F64, _) => { @@ -223,7 +222,7 @@ impl PrimitiveArrayTrait for ALPArray { let encoded = self .encoded() .with_dyn(|a| a.as_primitive_array_unchecked().i64_accessor()) - .expect("This is is an invariant of the ALP algorithm"); + .vortex_expect("This is is an invariant of the ALP algorithm"); Some(Arc::new(ALPAccessor::new( encoded, patches, diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index 77b5e0549..2cee1b24a 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -153,11 +153,6 @@ pub enum VortexError { ), } -#[allow(clippy::panic)] -pub fn __vortex_panic_do_not_call_directly(err: VortexError) -> ! { - panic!("{}", err) -} - pub type VortexResult = Result; impl Debug for VortexError { @@ -166,6 +161,45 @@ impl Debug for VortexError { } } +pub trait VortexPanic { + fn panic(self) -> !; + + fn panic_with_context(self, msg: &str) -> !; +} + +impl VortexPanic for VortexError { + #[allow(clippy::panic)] + fn panic(self) -> ! { + panic!("{}", self) + } + + fn panic_with_context(self, msg: &str) -> ! { + VortexError::Context(msg.to_string().into(), Box::new(self)).panic() + } +} + +pub trait VortexExpect { + type Output; + + fn vortex_expect(self, msg: &str) -> Self::Output; +} + +impl VortexExpect for VortexResult { + type Output = T; + + fn vortex_expect(self, msg: &str) -> Self::Output { + self.unwrap_or_else(|e| e.panic_with_context(msg)) + } +} + +impl VortexExpect for Option { + type Output = T; + + fn vortex_expect(self, msg: &str) -> Self::Output { + self.unwrap_or_else(|| VortexError::InvalidArgument(msg.to_string().into(), Backtrace::capture()).panic()) + } +} + #[macro_export] macro_rules! vortex_err { (OutOfBounds: $idx:expr, $start:expr, $stop:expr) => {{ @@ -224,17 +258,18 @@ macro_rules! vortex_bail { #[macro_export] macro_rules! vortex_panic { ($variant:ident: $fmt:literal $(, $arg:expr)* $(,)?) => { - vortex_panic!($crate::vortex_err!($variant: $fmt, $($arg),*)) - }; - ($msg:literal, $err:expr) => { - vortex_panic!($crate::vortex_err!(Context: $msg, $err)) + $crate::vortex_panic!($crate::vortex_err!($variant: $fmt, $($arg),*)) }; + ($msg:literal, $err:expr) => {{ + use $crate::VortexPanic; + ($err).context_panic($msg) + }}; ($msg:literal) => { - vortex_panic!($crate::vortex_err!($msg)) + $crate::vortex_panic!($crate::vortex_err!($msg)) }; ($err:expr) => {{ - let err: $crate::VortexError = $err; - $crate::__vortex_panic_do_not_call_directly(err) + use $crate::VortexPanic; + ($err).panic() }}; } From e7f3de8e13c4ce26edd2302cfa9709dd906e2931 Mon Sep 17 00:00:00 2001 From: Will Manning Date: Wed, 4 Sep 2024 10:19:54 -0400 Subject: [PATCH 22/39] wip: --- vortex-error/src/lib.rs | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index 2cee1b24a..a79f531c2 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -168,16 +168,33 @@ pub trait VortexPanic { } impl VortexPanic for VortexError { + #[inline(always)] #[allow(clippy::panic)] fn panic(self) -> ! { panic!("{}", self) } + #[inline(always)] fn panic_with_context(self, msg: &str) -> ! { VortexError::Context(msg.to_string().into(), Box::new(self)).panic() } } +pub trait VortexUnwrap { + type Output; + + fn vortex_unwrap(self) -> Self::Output; +} + +impl VortexUnwrap for VortexResult { + type Output = T; + + #[inline(always)] + fn vortex_unwrap(self) -> Self::Output { + self.unwrap_or_else(|err| err.panic()) + } +} + pub trait VortexExpect { type Output; @@ -187,6 +204,7 @@ pub trait VortexExpect { impl VortexExpect for VortexResult { type Output = T; + #[inline(always)] fn vortex_expect(self, msg: &str) -> Self::Output { self.unwrap_or_else(|e| e.panic_with_context(msg)) } @@ -195,6 +213,7 @@ impl VortexExpect for VortexResult { impl VortexExpect for Option { type Output = T; + #[inline(always)] fn vortex_expect(self, msg: &str) -> Self::Output { self.unwrap_or_else(|| VortexError::InvalidArgument(msg.to_string().into(), Backtrace::capture()).panic()) } @@ -226,10 +245,10 @@ macro_rules! vortex_err { $crate::VortexError::MismatchedTypes($expected.to_string().into(), $actual.to_string().into(), Backtrace::capture()) ) }}; - (Context: $fmt:literal, $err:expr $(,)?) => {{ + (Context: $msg:literal, $err:expr) => {{ use std::backtrace::Backtrace; $crate::__private::must_use( - $crate::VortexError::Context($fmt.into(), Box::new($err)) + $crate::VortexError::Context($msg.into(), Box::new($err)) ) }}; ($variant:ident: $fmt:literal $(, $arg:expr)* $(,)?) => {{ @@ -262,7 +281,7 @@ macro_rules! vortex_panic { }; ($msg:literal, $err:expr) => {{ use $crate::VortexPanic; - ($err).context_panic($msg) + ($err).panic_with_context($msg) }}; ($msg:literal) => { $crate::vortex_panic!($crate::vortex_err!($msg)) From 5389d4f3755a130a398a1c865b5a4b6ef0c28aca Mon Sep 17 00:00:00 2001 From: Will Manning Date: Wed, 4 Sep 2024 17:35:11 -0400 Subject: [PATCH 23/39] moar --- encodings/fastlanes/src/for/compute.rs | 4 +- encodings/runend-bool/src/compute.rs | 2 +- vortex-array/src/array/bool/mod.rs | 16 +++---- vortex-array/src/array/chunked/compute/mod.rs | 4 +- vortex-array/src/array/chunked/mod.rs | 28 +++++------ vortex-array/src/array/chunked/variants.rs | 11 ++--- vortex-array/src/array/constant/mod.rs | 12 ++--- vortex-array/src/array/constant/variants.rs | 18 ++++--- vortex-array/src/array/datetime/mod.rs | 6 +-- vortex-array/src/array/extension/mod.rs | 6 +-- vortex-array/src/array/null/mod.rs | 4 +- .../src/array/primitive/compute/take.rs | 4 +- vortex-array/src/array/primitive/mod.rs | 21 ++++---- vortex-array/src/array/sparse/mod.rs | 35 +++++--------- vortex-array/src/array/varbin/mod.rs | 6 +-- vortex-array/src/canonical.rs | 4 +- vortex-array/src/implementation.rs | 8 ++-- vortex-datafusion/src/datatype.rs | 2 +- vortex-datetime-dtype/src/arrow.rs | 10 ++-- vortex-datetime-dtype/src/temporal.rs | 4 +- vortex-dtype/src/field.rs | 3 +- vortex-error/src/lib.rs | 48 ++++++++++++++----- vortex-scalar/src/list.rs | 4 +- vortex-scalar/src/primitive.rs | 20 ++++---- vortex-scalar/src/serde/flatbuffers.rs | 4 +- 25 files changed, 144 insertions(+), 140 deletions(-) diff --git a/encodings/fastlanes/src/for/compute.rs b/encodings/fastlanes/src/for/compute.rs index 7f6a7a459..2d6a19efc 100644 --- a/encodings/fastlanes/src/for/compute.rs +++ b/encodings/fastlanes/src/for/compute.rs @@ -47,8 +47,8 @@ impl ScalarAtFn for FoRArray { fn scalar_at_unchecked(&self, index: usize) -> Scalar { let encoded_scalar = scalar_at_unchecked(&self.encoded(), index).reinterpret_cast(self.ptype()); - let encoded = PrimitiveScalar::try_from(&encoded_scalar).unwrap_or_else(|err| vortex_panic!("Invalid encoded scalar", err)); - let reference = PrimitiveScalar::try_from(self.reference()).unwrap_or_else(|err| vortex_panic!("Invalid reference scalar", err)); + let encoded = PrimitiveScalar::try_from(&encoded_scalar).vortex_expect("Invalid encoded scalar"); + let reference = PrimitiveScalar::try_from(self.reference()).vortex_expect("Invalid reference scalar"); match_each_integer_ptype!(encoded.ptype(), |$P| { use num_traits::WrappingAdd; diff --git a/encodings/runend-bool/src/compute.rs b/encodings/runend-bool/src/compute.rs index 910b7ad2d..4d0fe4e44 100644 --- a/encodings/runend-bool/src/compute.rs +++ b/encodings/runend-bool/src/compute.rs @@ -35,7 +35,7 @@ impl ScalarAtFn for RunEndBoolArray { fn scalar_at_unchecked(&self, index: usize) -> Scalar { let start = self.start(); Scalar::from(value_at_index( - self.find_physical_index(index).unwrap_or_else(|err| vortex_panic!("Search must be implemented for the underlying index array", err)), + self.find_physical_index(index).vortex_expect("Search must be implemented for the underlying index array"), start, )) } diff --git a/vortex-array/src/array/bool/mod.rs b/vortex-array/src/array/bool/mod.rs index 6eb8ef355..744fc69f1 100644 --- a/vortex-array/src/array/bool/mod.rs +++ b/vortex-array/src/array/bool/mod.rs @@ -4,7 +4,7 @@ use itertools::Itertools; use serde::{Deserialize, Serialize}; use vortex_buffer::Buffer; use vortex_dtype::DType; -use vortex_error::VortexResult; +use vortex_error::{VortexExpect as _, VortexResult}; use crate::stats::StatsSet; use crate::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata}; @@ -29,7 +29,7 @@ impl BoolArray { pub fn buffer(&self) -> &Buffer { self.array() .buffer() - .unwrap_or_else(|| panic!("Missing buffer in BoolArray")) + .vortex_expect("Missing buffer in BoolArray") } pub fn boolean_buffer(&self) -> BooleanBuffer { @@ -75,7 +75,7 @@ impl BoolArray { pub fn from_vec(bools: Vec, validity: Validity) -> Self { let buffer = BooleanBuffer::from(bools); Self::try_new(buffer, validity) - .unwrap_or_else(|err| panic!("Failed to create BoolArray from vec: {}", err)) + .vortex_expect("Failed to create BoolArray from vec") } } @@ -100,7 +100,7 @@ impl BoolArrayTrait for BoolArray { impl From for BoolArray { fn from(value: BooleanBuffer) -> Self { Self::try_new(value, Validity::NonNullable) - .unwrap_or_else(|err| panic!("Failed to create BoolArray from BooleanBuffer: {}", err)) + .vortex_expect("Failed to create BoolArray from BooleanBuffer") } } @@ -123,12 +123,8 @@ impl FromIterator> for BoolArray { }) .collect::>(); - Self::try_new(BooleanBuffer::from(values), Validity::from(validity)).unwrap_or_else(|err| { - panic!( - "Failed to create BoolArray from iterator of Option: {}", - err - ) - }) + Self::try_new(BooleanBuffer::from(values), Validity::from(validity)) + .vortex_expect("Failed to create BoolArray from iterator of Option") } } diff --git a/vortex-array/src/array/chunked/compute/mod.rs b/vortex-array/src/array/chunked/compute/mod.rs index deea1dac8..fc90a85aa 100644 --- a/vortex-array/src/array/chunked/compute/mod.rs +++ b/vortex-array/src/array/chunked/compute/mod.rs @@ -1,5 +1,5 @@ use vortex_dtype::{DType, Nullability}; -use vortex_error::{vortex_err, VortexResult}; +use vortex_error::{vortex_err, vortex_panic, VortexResult}; use vortex_scalar::Scalar; use crate::array::chunked::ChunkedArray; @@ -54,7 +54,7 @@ impl ScalarAtFn for ChunkedArray { scalar_at_unchecked( &self .chunk(chunk_index) - .unwrap_or_else(|| panic!("{}", vortex_err!(OutOfBounds: chunk_index, 0, self.nchunks()))), + .unwrap_or_else(|| vortex_panic!(OutOfBounds: chunk_index, 0, self.nchunks())), chunk_offset, ) } diff --git a/vortex-array/src/array/chunked/mod.rs b/vortex-array/src/array/chunked/mod.rs index fd144e537..13e879700 100644 --- a/vortex-array/src/array/chunked/mod.rs +++ b/vortex-array/src/array/chunked/mod.rs @@ -6,7 +6,7 @@ use futures_util::stream; use itertools::Itertools; use serde::{Deserialize, Serialize}; use vortex_dtype::{DType, Nullability, PType}; -use vortex_error::{vortex_bail, vortex_err, VortexResult}; +use vortex_error::{vortex_bail, vortex_panic, VortexExpect as _, VortexResult}; use vortex_scalar::Scalar; use crate::array::primitive::PrimitiveArray; @@ -87,16 +87,14 @@ impl ChunkedArray { pub fn chunk_offsets(&self) -> Array { self.array() .child(0, &Self::ENDS_DTYPE, self.nchunks() + 1) - .unwrap_or_else(|| panic!("Missing chunk ends in ChunkedArray")) + .vortex_expect("Missing chunk ends in ChunkedArray") } pub fn find_chunk_idx(&self, index: usize) -> (usize, usize) { assert!(index <= self.len(), "Index out of bounds of the array"); let search_result = search_sorted(&self.chunk_offsets(), index, SearchSortedSide::Left) - .unwrap_or_else(|err| { - panic!("Search sorted failed in find_chunk_idx: {}", err); - }); + .vortex_expect("Search sorted failed in find_chunk_idx"); let index_chunk = match search_result { SearchResult::Found(i) => { if i == self.nchunks() { @@ -109,9 +107,7 @@ impl ChunkedArray { }; let chunk_start = &scalar_at(&self.chunk_offsets(), index_chunk) .and_then(|s| usize::try_from(&s)) - .unwrap_or_else(|err| { - panic!("Failed to find chunk start in find_chunk_idx: {}", err); - }); + .vortex_expect("Failed to find chunk start in find_chunk_idx"); let index_in_chunk = index - chunk_start; (index_chunk, index_in_chunk) @@ -119,13 +115,13 @@ impl ChunkedArray { pub fn chunks(&self) -> impl Iterator + '_ { (0..self.nchunks()).map(|c| { - self.chunk(c).unwrap_or_else(|| { - panic!( + self.chunk(c).unwrap_or_else(|| + vortex_panic!( "Chunk should {} exist but doesn't (nchunks: {})", c, self.nchunks() - ); - }) + ) + ) }) } @@ -146,10 +142,8 @@ impl FromIterator for ChunkedArray { let dtype = chunks .first() .map(|c| c.dtype().clone()) - .unwrap_or_else(|| panic!("Cannot infer DType from an empty iterator")); - Self::try_new(chunks, dtype).unwrap_or_else(|err| { - panic!("Failed to create chunked array from iterator: {}", err); - }) + .vortex_expect("Cannot infer DType from an empty iterator"); + Self::try_new(chunks, dtype).vortex_expect("Failed to create chunked array from iterator") } } @@ -167,7 +161,7 @@ impl ArrayValidity for ChunkedArray { fn is_valid(&self, index: usize) -> bool { let (chunk, offset_in_chunk) = self.find_chunk_idx(index); self.chunk(chunk) - .unwrap_or_else(|| panic!("{}", vortex_err!(OutOfBounds: chunk, 0, self.nchunks()))) + .unwrap_or_else(|| vortex_panic!(OutOfBounds: chunk, 0, self.nchunks())) .with_dyn(|a| a.is_valid(offset_in_chunk)) } diff --git a/vortex-array/src/array/chunked/variants.rs b/vortex-array/src/array/chunked/variants.rs index 45547ed9e..c5f913f30 100644 --- a/vortex-array/src/array/chunked/variants.rs +++ b/vortex-array/src/array/chunked/variants.rs @@ -1,4 +1,5 @@ use vortex_dtype::DType; +use vortex_error::VortexExpect as _; use crate::array::chunked::ChunkedArray; use crate::variants::{ @@ -70,12 +71,10 @@ impl StructArrayTrait for ChunkedArray { let projected_dtype = self.dtype().as_struct().and_then(|s| s.dtypes().get(idx))?; let chunked = ChunkedArray::try_new(chunks, projected_dtype.clone()) - .unwrap_or_else(|err| { - panic!( - "Failed to create new chunked array with dtype {}: {}", - projected_dtype, err - ) - }) + .vortex_expect_lazy(|| format!( + "Failed to create new chunked array with dtype {}", + projected_dtype + )) .into_array(); Some(chunked) } diff --git a/vortex-array/src/array/constant/mod.rs b/vortex-array/src/array/constant/mod.rs index 1c07ee376..050826d4f 100644 --- a/vortex-array/src/array/constant/mod.rs +++ b/vortex-array/src/array/constant/mod.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use serde::{Deserialize, Serialize}; -use vortex_error::VortexResult; +use vortex_error::{VortexExpect as _, VortexResult}; use vortex_scalar::Scalar; use crate::stats::{Stat, StatsSet}; @@ -47,11 +47,11 @@ impl ConstantArray { [].into(), stats, ) - .unwrap_or_else(|err| { - panic!( - "Failed to create Constant array of length {} from scalar {}: {}", - length, scalar, err - ); + .vortex_expect_lazy(|| { + format!( + "Failed to create Constant array of length {} from scalar {}", + length, scalar + ) }) } diff --git a/vortex-array/src/array/constant/variants.rs b/vortex-array/src/array/constant/variants.rs index 39b213120..29082e909 100644 --- a/vortex-array/src/array/constant/variants.rs +++ b/vortex-array/src/array/constant/variants.rs @@ -2,7 +2,7 @@ use std::iter; use std::sync::Arc; use vortex_dtype::{DType, PType}; -use vortex_error::{vortex_panic, VortexError}; +use vortex_error::{VortexError, VortexExpect as _}; use vortex_scalar::{Scalar, StructScalar}; use crate::array::constant::ConstantArray; @@ -53,9 +53,9 @@ impl NullArrayTrait for ConstantArray {} impl BoolArrayTrait for ConstantArray { fn maybe_null_indices_iter(&self) -> Box> { - let value = self.scalar().value().as_bool().unwrap_or_else(|err| { - panic!("Failed to get bool value from constant array: {}", err); - }); + let value = self.scalar().value().as_bool().vortex_expect( + "Failed to get bool value from constant array" + ); if value.unwrap_or(false) { Box::new(0..self.len()) } else { @@ -65,9 +65,9 @@ impl BoolArrayTrait for ConstantArray { fn maybe_null_slices_iter(&self) -> Box> { // Must be a boolean scalar - let value = self.scalar().value().as_bool().unwrap_or_else(|err| { - panic!("Failed to get bool value from constant array: {}", err); - }); + let value = self.scalar().value().as_bool().vortex_expect( + "Failed to get bool value from constant array" + ); if value.unwrap_or(false) { Box::new(iter::once((0, self.len()))) @@ -91,9 +91,7 @@ where } fn value_unchecked(&self, _index: usize) -> T { - T::try_from(self.scalar().clone()).unwrap_or_else(|err| { - vortex_panic!("Failed to convert scalar to value", err); - }) + T::try_from(self.scalar().clone()).vortex_expect("Failed to convert scalar to value") } fn array_validity(&self) -> Validity { diff --git a/vortex-array/src/array/datetime/mod.rs b/vortex-array/src/array/datetime/mod.rs index 688d9901e..ddb2392cf 100644 --- a/vortex-array/src/array/datetime/mod.rs +++ b/vortex-array/src/array/datetime/mod.rs @@ -3,7 +3,7 @@ mod test; use vortex_datetime_dtype::{TemporalMetadata, TimeUnit, DATE_ID, TIMESTAMP_ID, TIME_ID}; use vortex_dtype::{DType, ExtDType}; -use vortex_error::VortexError; +use vortex_error::{vortex_panic, VortexError}; use crate::array::ExtensionArray; use crate::{Array, ArrayDType, ArrayData, IntoArray, ToArrayData}; @@ -84,7 +84,7 @@ impl TemporalArray { Some(TemporalMetadata::Date(time_unit).into()), ) } - _ => panic!("invalid TimeUnit {time_unit} for vortex.date"), + _ => vortex_panic!("invalid TimeUnit {time_unit} for vortex.date"), }; Self { @@ -116,7 +116,7 @@ impl TemporalArray { match time_unit { TimeUnit::S | TimeUnit::Ms => assert_width!(i32, array), TimeUnit::Us | TimeUnit::Ns => assert_width!(i64, array), - TimeUnit::D => panic!("invalid unit D for vortex.time data"), + TimeUnit::D => vortex_panic!("invalid unit D for vortex.time data"), } let temporal_metadata = TemporalMetadata::Time(time_unit); diff --git a/vortex-array/src/array/extension/mod.rs b/vortex-array/src/array/extension/mod.rs index 9c1b12302..8e2783060 100644 --- a/vortex-array/src/array/extension/mod.rs +++ b/vortex-array/src/array/extension/mod.rs @@ -1,6 +1,6 @@ use serde::{Deserialize, Serialize}; use vortex_dtype::{DType, ExtDType, ExtID}; -use vortex_error::VortexResult; +use vortex_error::{VortexExpect as _, VortexResult}; use crate::stats::ArrayStatisticsCompute; use crate::validity::{ArrayValidity, LogicalValidity}; @@ -28,13 +28,13 @@ impl ExtensionArray { [storage].into(), Default::default(), ) - .unwrap_or_else(|err| panic!("Invalid ExtensionArray: {err}")) + .vortex_expect("Invalid ExtensionArray") } pub fn storage(&self) -> Array { self.array() .child(0, &self.metadata().storage_dtype, self.len()) - .unwrap_or_else(|| panic!("Missing storage array for ExtensionArray")) + .vortex_expect("Missing storage array for ExtensionArray") } #[allow(dead_code)] diff --git a/vortex-array/src/array/null/mod.rs b/vortex-array/src/array/null/mod.rs index 7215971c6..708d90d3c 100644 --- a/vortex-array/src/array/null/mod.rs +++ b/vortex-array/src/array/null/mod.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use serde::{Deserialize, Serialize}; use vortex_dtype::DType; -use vortex_error::VortexResult; +use vortex_error::{VortexExpect as _, VortexResult}; use crate::stats::{ArrayStatisticsCompute, Stat, StatsSet}; use crate::validity::{ArrayValidity, LogicalValidity, Validity}; @@ -28,7 +28,7 @@ impl NullArray { Arc::new([]), StatsSet::nulls(len, &DType::Null), ) - .unwrap_or_else(|err| panic!("NullArray::new should never fail! Got: {}", err)) + .vortex_expect("NullArray::new should never fail!") } } diff --git a/vortex-array/src/array/primitive/compute/take.rs b/vortex-array/src/array/primitive/compute/take.rs index 2603dcedb..7564cd7f8 100644 --- a/vortex-array/src/array/primitive/compute/take.rs +++ b/vortex-array/src/array/primitive/compute/take.rs @@ -1,6 +1,6 @@ use num_traits::PrimInt; use vortex_dtype::{match_each_integer_ptype, match_each_native_ptype, NativePType}; -use vortex_error::VortexResult; +use vortex_error::{vortex_panic, VortexResult}; use crate::array::primitive::PrimitiveArray; use crate::compute::TakeFn; @@ -26,7 +26,7 @@ fn take_primitive(array: &[T], indices .iter() .map(|&idx| { array[idx.to_usize().unwrap_or_else(|| { - panic!("Failed to convert index to usize: {}", idx); + vortex_panic!("Failed to convert index to usize: {}", idx); })] }) .collect() diff --git a/vortex-array/src/array/primitive/mod.rs b/vortex-array/src/array/primitive/mod.rs index 04fa396c6..6328e359c 100644 --- a/vortex-array/src/array/primitive/mod.rs +++ b/vortex-array/src/array/primitive/mod.rs @@ -8,7 +8,7 @@ use num_traits::AsPrimitive; use serde::{Deserialize, Serialize}; use vortex_buffer::Buffer; use vortex_dtype::{match_each_native_ptype, DType, NativePType, PType}; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, VortexExpect as _, VortexResult}; use crate::iter::{Accessor, AccessorRef}; use crate::stats::StatsSet; @@ -49,13 +49,13 @@ impl PrimitiveArray { PrimitiveMetadata { validity: validity .to_metadata(length) - .unwrap_or_else(|err| panic!("Invalid validity: {err}")), + .vortex_expect("Invalid validity"), }, Some(buffer), validity.into_array().into_iter().collect_vec().into(), StatsSet::new(), ) - .unwrap_or_else(|err| panic!("PrimitiveArray::new should never fail! Got: {err}")), + .vortex_expect("PrimitiveArray::new should never fail!"), } } @@ -90,15 +90,18 @@ impl PrimitiveArray { pub fn ptype(&self) -> PType { // TODO(ngates): we can't really cache this anywhere? - self.dtype().try_into().unwrap_or_else(|err| { - panic!("Failed to convert dtype {} to ptype: {}", self.dtype(), err); - }) + self.dtype() + .try_into() + .vortex_expect_lazy(|| format!( + "Failed to convert dtype {} to ptype", + self.dtype() + )) } pub fn buffer(&self) -> &Buffer { self.array() .buffer() - .unwrap_or_else(|| panic!("Missing buffer in PrimitiveArray")) + .vortex_expect("Missing buffer in PrimitiveArray") } pub fn maybe_null_slice(&self) -> &[T] { @@ -174,7 +177,7 @@ impl PrimitiveArray { pub fn into_buffer(self) -> Buffer { self.into_array() .into_buffer() - .unwrap_or_else(|| panic!("PrimitiveArray must have a buffer")) + .vortex_expect("PrimitiveArray must have a buffer") } } @@ -315,7 +318,7 @@ impl AcceptArrayVisitor for PrimitiveArray { impl Array { pub fn as_primitive(&self) -> PrimitiveArray { PrimitiveArray::try_from(self) - .unwrap_or_else(|err| panic!("Expected primitive array: {err}")) + .vortex_expect("Expected primitive array") } } diff --git a/vortex-array/src/array/sparse/mod.rs b/vortex-array/src/array/sparse/mod.rs index b4a3d56df..2baedc54a 100644 --- a/vortex-array/src/array/sparse/mod.rs +++ b/vortex-array/src/array/sparse/mod.rs @@ -1,6 +1,6 @@ use ::serde::{Deserialize, Serialize}; use vortex_dtype::{match_each_integer_ptype, DType}; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, VortexExpect as _, VortexResult}; use vortex_scalar::Scalar; use crate::array::constant::ConstantArray; @@ -96,7 +96,7 @@ impl SparseArray { pub fn values(&self) -> Array { self.array() .child(1, self.dtype(), self.metadata().indices_len) - .unwrap_or_else(|| panic!("Missing child array in SparseArray")) + .vortex_expect("Missing child array in SparseArray") } #[inline] @@ -107,7 +107,7 @@ impl SparseArray { &self.metadata().indices_dtype, self.metadata().indices_len, ) - .unwrap_or_else(|| panic!("Missing indices array in SparseArray")) + .vortex_expect("Missing indices array in SparseArray") } #[inline] @@ -127,9 +127,7 @@ impl SparseArray { /// Return indices as a vector of usize with the indices_offset applied. pub fn resolved_indices(&self) -> Vec { - let flat_indices = self.indices().into_primitive().unwrap_or_else(|err| { - panic!("Failed to convert indices to primitive array: {}", err); - }); + let flat_indices = self.indices().into_primitive().vortex_expect("Failed to convert SparseArray indices to primitive array"); match_each_integer_ptype!(flat_indices.ptype(), |$P| { flat_indices .maybe_null_slice::<$P>() @@ -142,9 +140,7 @@ impl SparseArray { pub fn min_index(&self) -> usize { let min_index: usize = scalar_at(&self.indices(), 0) .and_then(|s| s.as_ref().try_into()) - .unwrap_or_else(|err| { - panic!("Failed to get min_index: {}", err); - }); + .vortex_expect("Failed to get min_index from SparseArray"); min_index - self.indices_offset() } } @@ -162,12 +158,11 @@ impl ArrayStatisticsCompute for SparseArray {} impl ArrayValidity for SparseArray { fn is_valid(&self, index: usize) -> bool { - match self.find_index(index).unwrap_or_else(|err| { - panic!( - "Error while finding index {} in sparse array: {}", - index, err - ); - }) { + match self.find_index(index).vortex_expect_lazy(|| format!( + "Error while finding index {} in sparse array", + index + )) + { None => !self.fill_value().is_null(), Some(idx) => self.values().with_dyn(|a| a.is_valid(idx)), } @@ -196,13 +191,9 @@ impl ArrayValidity for SparseArray { false.into(), ) } - .unwrap_or_else(|err| { - panic!( - "Error determining logical validity for sparse array: {}", - err - ); - }); - + .vortex_expect( + "Error determining logical validity for sparse array" + ); LogicalValidity::Array(validity.into_array()) } } diff --git a/vortex-array/src/array/varbin/mod.rs b/vortex-array/src/array/varbin/mod.rs index c79a587e7..128206cbe 100644 --- a/vortex-array/src/array/varbin/mod.rs +++ b/vortex-array/src/array/varbin/mod.rs @@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize}; pub use stats::compute_stats; use vortex_buffer::Buffer; use vortex_dtype::{match_each_native_ptype, DType, NativePType, Nullability}; -use vortex_error::{vortex_bail, VortexError, VortexResult}; +use vortex_error::{vortex_bail, VortexError, VortexExpect as _, VortexResult}; use vortex_scalar::Scalar; use crate::array::primitive::PrimitiveArray; @@ -75,7 +75,7 @@ impl VarBinArray { pub fn offsets(&self) -> Array { self.array() .child(0, &self.metadata().offsets_dtype, self.len() + 1) - .unwrap_or_else(|| panic!("Missing offsets in VarBinArray")) + .vortex_expect("Missing offsets in VarBinArray") } pub fn first_offset TryFrom<&'a Scalar, Error = VortexError>>( @@ -91,7 +91,7 @@ impl VarBinArray { pub fn bytes(&self) -> Array { self.array() .child(1, &DType::BYTES, self.metadata().bytes_len) - .unwrap_or_else(|| panic!("Missing bytes in VarBinArray")) + .vortex_expect("Missing bytes in VarBinArray") } pub fn validity(&self) -> Validity { diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs index 2b9b16987..7c1be112b 100644 --- a/vortex-array/src/canonical.rs +++ b/vortex-array/src/canonical.rs @@ -16,7 +16,7 @@ use arrow_buffer::ScalarBuffer; use arrow_schema::{Field, Fields}; use vortex_datetime_dtype::{is_temporal_ext_type, TemporalMetadata, TimeUnit}; use vortex_dtype::{DType, NativePType, PType}; -use vortex_error::{vortex_bail, vortex_panic, VortexResult}; +use vortex_error::{vortex_bail, VortexExpect as _, VortexResult}; use crate::array::{ BoolArray, ExtensionArray, NullArray, PrimitiveArray, StructArray, TemporalArray, VarBinArray, @@ -212,7 +212,7 @@ fn struct_to_arrow(struct_array: StructArray) -> ArrayRef { let nulls = struct_array .logical_validity() .to_null_buffer() - .unwrap_or_else(|err| vortex_panic!("Failed to get null buffer from logical validity", err)); + .vortex_expect("Failed to get null buffer from logical validity"); Arc::new(ArrowStructArray::new(arrow_fields, field_arrays, nulls)) } diff --git a/vortex-array/src/implementation.rs b/vortex-array/src/implementation.rs index 751d39a38..8db14a94a 100644 --- a/vortex-array/src/implementation.rs +++ b/vortex-array/src/implementation.rs @@ -1,6 +1,6 @@ use vortex_buffer::Buffer; use vortex_dtype::DType; -use vortex_error::{vortex_bail, VortexError, VortexResult}; +use vortex_error::{vortex_bail, VortexError, VortexExpect as _, VortexResult}; use crate::encoding::{ArrayEncoding, ArrayEncodingExt, ArrayEncodingRef, EncodingId, EncodingRef}; use crate::stats::{ArrayStatistics, Statistics}; @@ -243,9 +243,7 @@ where children: vec![], }; array.with_dyn(|a| { - a.accept(&mut visitor).unwrap_or_else(|err| { - panic!("Error while visiting Array View children: {err}") - }) + a.accept(&mut visitor).vortex_expect("Error while visiting Array View children") }); ArrayData::try_new( encoding, @@ -256,7 +254,7 @@ where visitor.children.into(), stats, ) - .unwrap_or_else(|err| panic!("Failed to create ArrayData from Array View: {err}")) + .vortex_expect("Failed to create ArrayData from Array View") } } } diff --git a/vortex-datafusion/src/datatype.rs b/vortex-datafusion/src/datatype.rs index 3baf21062..774b19edb 100644 --- a/vortex-datafusion/src/datatype.rs +++ b/vortex-datafusion/src/datatype.rs @@ -94,7 +94,7 @@ pub(crate) fn infer_data_type(dtype: &DType) -> DataType { if is_temporal_ext_type(ext_dtype.id()) { make_arrow_temporal_dtype(ext_dtype) } else { - panic!("unsupported extension type \"{}\"", ext_dtype.id()) + vortex_panic!("unsupported extension type \"{}\"", ext_dtype.id()) } } } diff --git a/vortex-datetime-dtype/src/arrow.rs b/vortex-datetime-dtype/src/arrow.rs index 9e50723be..2ee839515 100644 --- a/vortex-datetime-dtype/src/arrow.rs +++ b/vortex-datetime-dtype/src/arrow.rs @@ -2,7 +2,7 @@ use arrow_schema::{DataType, TimeUnit as ArrowTimeUnit}; use vortex_dtype::ExtDType; -use vortex_error::{vortex_bail, VortexError, VortexResult}; +use vortex_error::{vortex_bail, vortex_panic, VortexError, VortexExpect as _, VortexResult}; use crate::temporal::{TemporalMetadata, DATE_ID, TIMESTAMP_ID, TIME_ID}; use crate::unit::TimeUnit; @@ -54,26 +54,26 @@ pub fn make_temporal_ext_dtype(data_type: &DataType) -> ExtDType { /// panics if the ext_dtype is not a temporal dtype pub fn make_arrow_temporal_dtype(ext_dtype: &ExtDType) -> DataType { match TemporalMetadata::try_from(ext_dtype) - .unwrap_or_else(|err| panic!("make_arrow_temporal_dtype must be called with a temporal ExtDType: {err}")) + .vortex_expect("make_arrow_temporal_dtype must be called with a temporal ExtDType") { TemporalMetadata::Date(time_unit) => match time_unit { TimeUnit::D => DataType::Date32, TimeUnit::Ms => DataType::Date64, - _ => panic!("Invalid TimeUnit {time_unit} for {}", ext_dtype.id()), + _ => vortex_panic!(InvalidArgument: "Invalid TimeUnit {} for {}", time_unit, ext_dtype.id()), }, TemporalMetadata::Time(time_unit) => match time_unit { TimeUnit::S => DataType::Time32(ArrowTimeUnit::Second), TimeUnit::Ms => DataType::Time32(ArrowTimeUnit::Millisecond), TimeUnit::Us => DataType::Time64(ArrowTimeUnit::Microsecond), TimeUnit::Ns => DataType::Time64(ArrowTimeUnit::Nanosecond), - _ => panic!("Invalid TimeUnit {time_unit} for {}", ext_dtype.id()), + _ => vortex_panic!(InvalidArgument: "Invalid TimeUnit {} for {}", time_unit, ext_dtype.id()), }, TemporalMetadata::Timestamp(time_unit, tz) => match time_unit { TimeUnit::Ns => DataType::Timestamp(ArrowTimeUnit::Nanosecond, tz.map(|t| t.into())), TimeUnit::Us => DataType::Timestamp(ArrowTimeUnit::Microsecond, tz.map(|t| t.into())), TimeUnit::Ms => DataType::Timestamp(ArrowTimeUnit::Millisecond, tz.map(|t| t.into())), TimeUnit::S => DataType::Timestamp(ArrowTimeUnit::Second, tz.map(|t| t.into())), - _ => panic!("Invalid TimeUnit {time_unit} for {}", ext_dtype.id()), + _ => vortex_panic!(InvalidArgument: "Invalid TimeUnit {} for {}", time_unit, ext_dtype.id()), }, } } diff --git a/vortex-datetime-dtype/src/temporal.rs b/vortex-datetime-dtype/src/temporal.rs index 11e28af4a..6a68ca750 100644 --- a/vortex-datetime-dtype/src/temporal.rs +++ b/vortex-datetime-dtype/src/temporal.rs @@ -98,7 +98,7 @@ impl TemporalMetadata { } use vortex_dtype::{ExtDType, ExtMetadata}; -use vortex_error::{vortex_bail, vortex_err, VortexError, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, vortex_panic, VortexError, VortexResult}; impl TryFrom<&ExtDType> for TemporalMetadata { type Error = VortexError; @@ -175,7 +175,7 @@ impl From for ExtMetadata { Some(tz) => { let tz_bytes = tz.as_bytes(); let tz_len = u16::try_from(tz_bytes.len()) - .unwrap_or_else(|err| panic!("tz did not fit in u16: {err}")); + .unwrap_or_else(|err| vortex_panic!("tz did not fit in u16: {err}")); meta.extend_from_slice(tz_len.to_le_bytes().as_slice()); meta.extend_from_slice(tz_bytes); } diff --git a/vortex-dtype/src/field.rs b/vortex-dtype/src/field.rs index af18979bd..a5fbfbbc3 100644 --- a/vortex-dtype/src/field.rs +++ b/vortex-dtype/src/field.rs @@ -2,6 +2,7 @@ use core::fmt; use std::fmt::{Display, Formatter}; use itertools::Itertools; +use vortex_error::vortex_panic; #[derive(Clone, Debug, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -58,7 +59,7 @@ impl FieldPath { assert_eq!(self.0.len(), 1); match &self.0[0] { Field::Name(name) => name.as_str(), - _ => panic!("FieldPath is not a name"), + _ => vortex_panic!("FieldPath is not a name: {self}"), } } } diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index a79f531c2..e65699b97 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -14,9 +14,10 @@ impl From for ErrString where T: Into>, { + #[allow(clippy::panic)] fn from(msg: T) -> Self { if env::var("VORTEX_PANIC_ON_ERR").as_deref().unwrap_or("") == "1" { - panic!("{}", msg.into()) + panic!("{}\nBacktrace:\n{}", msg.into(), Backtrace::capture()); } else { Self(msg.into()) } @@ -164,7 +165,7 @@ impl Debug for VortexError { pub trait VortexPanic { fn panic(self) -> !; - fn panic_with_context(self, msg: &str) -> !; + fn panic_with_context>(self, msg: T) -> !; } impl VortexPanic for VortexError { @@ -175,8 +176,8 @@ impl VortexPanic for VortexError { } #[inline(always)] - fn panic_with_context(self, msg: &str) -> ! { - VortexError::Context(msg.to_string().into(), Box::new(self)).panic() + fn panic_with_context>(self, msg: T) -> ! { + VortexError::Context(msg.into(), Box::new(self)).panic() } } @@ -199,6 +200,8 @@ pub trait VortexExpect { type Output; fn vortex_expect(self, msg: &str) -> Self::Output; + + fn vortex_expect_lazy String>(self, op: F) -> Self::Output; } impl VortexExpect for VortexResult { @@ -206,7 +209,12 @@ impl VortexExpect for VortexResult { #[inline(always)] fn vortex_expect(self, msg: &str) -> Self::Output { - self.unwrap_or_else(|e| e.panic_with_context(msg)) + self.unwrap_or_else(|e| e.panic_with_context(msg.to_string())) + } + + #[inline(always)] + fn vortex_expect_lazy String>(self, op: F) -> Self::Output { + self.unwrap_or_else(|e| e.panic_with_context(op())) } } @@ -217,6 +225,11 @@ impl VortexExpect for Option { fn vortex_expect(self, msg: &str) -> Self::Output { self.unwrap_or_else(|| VortexError::InvalidArgument(msg.to_string().into(), Backtrace::capture()).panic()) } + + #[inline(always)] + fn vortex_expect_lazy String>(self, op: F) -> Self::Output { + self.unwrap_or_else(|| VortexError::InvalidArgument(op().into(), Backtrace::capture()).panic()) + } } #[macro_export] @@ -276,15 +289,26 @@ macro_rules! vortex_bail { #[macro_export] macro_rules! vortex_panic { + (OutOfBounds: $idx:expr, $start:expr, $stop:expr) => {{ + $crate::vortex_panic!($crate::vortex_err!(OutOfBounds: $idx, $start, $stop)) + }}; + (NotImplemented: $func:expr, $by_whom:expr) => {{ + $crate::vortex_panic!($crate::vortex_err!(NotImplemented: $func, $by_whom)) + }}; + (MismatchedTypes: $expected:literal, $actual:expr) => {{ + $crate::vortex_panic!($crate::vortex_err!(MismatchedTypes: $expected, $actual)) + }}; + (MismatchedTypes: $expected:expr, $actual:expr) => {{ + $crate::vortex_panic!($crate::vortex_err!(MismatchedTypes: $expected, $actual)) + }}; + (Context: $msg:literal, $err:expr) => {{ + $crate::vortex_panic!($crate::vortex_err!(Context: $msg, $err)) + }}; ($variant:ident: $fmt:literal $(, $arg:expr)* $(,)?) => { $crate::vortex_panic!($crate::vortex_err!($variant: $fmt, $($arg),*)) - }; - ($msg:literal, $err:expr) => {{ - use $crate::VortexPanic; - ($err).panic_with_context($msg) - }}; - ($msg:literal) => { - $crate::vortex_panic!($crate::vortex_err!($msg)) + }; + ($fmt:literal $(, $arg:expr)* $(,)?) => { + $crate::vortex_panic!($crate::vortex_err!($fmt, $($arg),*)) }; ($err:expr) => {{ use $crate::VortexPanic; diff --git a/vortex-scalar/src/list.rs b/vortex-scalar/src/list.rs index 95b46772d..03af1a398 100644 --- a/vortex-scalar/src/list.rs +++ b/vortex-scalar/src/list.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use itertools::Itertools; use vortex_dtype::DType; use vortex_dtype::Nullability::NonNullable; -use vortex_error::{vortex_bail, VortexError, VortexResult}; +use vortex_error::{vortex_bail, VortexError, VortexExpect as _, VortexResult}; use crate::value::ScalarValue; use crate::Scalar; @@ -112,7 +112,7 @@ where let scalars = value.into_iter().map(|v| Self::from(v)).collect_vec(); let element_dtype = scalars .first() - .unwrap_or_else(|| panic!("Empty list, could not determine element dtype")) + .vortex_expect("Empty list, could not determine element dtype") .dtype() .clone(); let dtype = DType::List(Arc::new(element_dtype), NonNullable); diff --git a/vortex-scalar/src/primitive.rs b/vortex-scalar/src/primitive.rs index 405a58b6a..cd1e03a3e 100644 --- a/vortex-scalar/src/primitive.rs +++ b/vortex-scalar/src/primitive.rs @@ -3,7 +3,7 @@ use core::any::type_name; use num_traits::NumCast; use vortex_dtype::half::f16; use vortex_dtype::{match_each_native_ptype, DType, NativePType, Nullability, PType}; -use vortex_error::{vortex_bail, vortex_err, VortexError, VortexResult}; +use vortex_error::{vortex_bail, VortexError, VortexExpect as _, VortexResult}; use crate::pvalue::PValue; use crate::value::ScalarValue; @@ -37,9 +37,11 @@ impl<'a> PrimitiveScalar<'a> { ); self.pvalue.as_ref().map(|pv| { - T::try_from(*pv).unwrap_or_else(|err| { - panic!("Failed to cast {} to {}: {}", pv, type_name::(), err) - }) + T::try_from(*pv).vortex_expect_lazy(|| format!( + "Failed to cast {} to {}", + pv, + type_name::() + )) }) } @@ -94,12 +96,10 @@ impl Scalar { } pub fn reinterpret_cast(&self, ptype: PType) -> Self { - let primitive = PrimitiveScalar::try_from(self).unwrap_or_else(|err| { - panic!( - "Failed to reinterpret cast {} to {}: {}", - self.dtype, ptype, err - ) - }); + let primitive = PrimitiveScalar::try_from(self).vortex_expect_lazy(|| format!( + "Failed to reinterpret cast {} to {}", + self.dtype, ptype + )); if primitive.ptype() == ptype { return self.clone(); } diff --git a/vortex-scalar/src/serde/flatbuffers.rs b/vortex-scalar/src/serde/flatbuffers.rs index 6b9321aaf..2fc80db01 100644 --- a/vortex-scalar/src/serde/flatbuffers.rs +++ b/vortex-scalar/src/serde/flatbuffers.rs @@ -2,7 +2,7 @@ use flatbuffers::{FlatBufferBuilder, WIPOffset}; use itertools::Itertools; use serde::{Deserialize, Serialize}; use vortex_dtype::DType; -use vortex_error::VortexError; +use vortex_error::{VortexError, VortexExpect as _}; use vortex_flatbuffers::{scalar as fb, WriteFlatBuffer}; use crate::{Scalar, ScalarValue}; @@ -55,7 +55,7 @@ impl WriteFlatBuffer for ScalarValue { let mut value_se = flexbuffers::FlexbufferSerializer::new(); self.serialize(&mut value_se) .map_err(VortexError::FlexBuffersSerError) - .unwrap_or_else(|err| panic!("Failed to serialize ScalarValue: {}", err)); + .vortex_expect("Failed to serialize ScalarValue"); let flex = Some(fbb.create_vector(value_se.view())); fb::ScalarValue::create(fbb, &fb::ScalarValueArgs { flex }) } From 27437daf0811250d1692d39765f4a3fc1d37c8c8 Mon Sep 17 00:00:00 2001 From: Will Manning Date: Wed, 4 Sep 2024 18:32:32 -0400 Subject: [PATCH 24/39] wip --- vortex-array/src/array/chunked/variants.rs | 7 +-- vortex-array/src/array/constant/mod.rs | 11 ++-- vortex-array/src/array/primitive/mod.rs | 7 +-- vortex-array/src/array/sparse/mod.rs | 13 ++--- vortex-array/src/array/struct_/mod.rs | 12 ++--- vortex-array/src/array/varbin/builder.rs | 9 ++-- .../src/array/varbin/compute/filter.rs | 6 +-- vortex-array/src/array/varbin/compute/take.rs | 8 +-- vortex-array/src/array/varbin/mod.rs | 9 ++-- vortex-array/src/array/varbinview/mod.rs | 48 ++++++----------- vortex-array/src/arrow/array.rs | 52 +++++-------------- vortex-array/src/canonical.rs | 2 +- vortex-error/src/lib.rs | 34 +++++------- vortex-scalar/src/primitive.rs | 17 ++---- 14 files changed, 86 insertions(+), 149 deletions(-) diff --git a/vortex-array/src/array/chunked/variants.rs b/vortex-array/src/array/chunked/variants.rs index c5f913f30..d229c36bc 100644 --- a/vortex-array/src/array/chunked/variants.rs +++ b/vortex-array/src/array/chunked/variants.rs @@ -1,5 +1,5 @@ use vortex_dtype::DType; -use vortex_error::VortexExpect as _; +use vortex_error::vortex_panic; use crate::array::chunked::ChunkedArray; use crate::variants::{ @@ -71,10 +71,7 @@ impl StructArrayTrait for ChunkedArray { let projected_dtype = self.dtype().as_struct().and_then(|s| s.dtypes().get(idx))?; let chunked = ChunkedArray::try_new(chunks, projected_dtype.clone()) - .vortex_expect_lazy(|| format!( - "Failed to create new chunked array with dtype {}", - projected_dtype - )) + .unwrap_or_else(|err| vortex_panic!(err, "Failed to create new chunked array with dtype {}", projected_dtype)) .into_array(); Some(chunked) } diff --git a/vortex-array/src/array/constant/mod.rs b/vortex-array/src/array/constant/mod.rs index 050826d4f..f1e405211 100644 --- a/vortex-array/src/array/constant/mod.rs +++ b/vortex-array/src/array/constant/mod.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use serde::{Deserialize, Serialize}; -use vortex_error::{VortexExpect as _, VortexResult}; +use vortex_error::{vortex_panic, VortexResult}; use vortex_scalar::Scalar; use crate::stats::{Stat, StatsSet}; @@ -37,6 +37,7 @@ impl ConstantArray { (Stat::IsSorted, true.into()), (Stat::RunCount, 1.into()), ])); + Self::try_from_parts( scalar.dtype().clone(), length, @@ -47,10 +48,12 @@ impl ConstantArray { [].into(), stats, ) - .vortex_expect_lazy(|| { - format!( + .unwrap_or_else(|err| { + vortex_panic!( + err, "Failed to create Constant array of length {} from scalar {}", - length, scalar + length, + scalar ) }) } diff --git a/vortex-array/src/array/primitive/mod.rs b/vortex-array/src/array/primitive/mod.rs index 6328e359c..0c8f6a76c 100644 --- a/vortex-array/src/array/primitive/mod.rs +++ b/vortex-array/src/array/primitive/mod.rs @@ -8,7 +8,7 @@ use num_traits::AsPrimitive; use serde::{Deserialize, Serialize}; use vortex_buffer::Buffer; use vortex_dtype::{match_each_native_ptype, DType, NativePType, PType}; -use vortex_error::{vortex_bail, VortexExpect as _, VortexResult}; +use vortex_error::{vortex_bail, vortex_panic, VortexError, VortexExpect as _, VortexResult}; use crate::iter::{Accessor, AccessorRef}; use crate::stats::StatsSet; @@ -92,10 +92,7 @@ impl PrimitiveArray { // TODO(ngates): we can't really cache this anywhere? self.dtype() .try_into() - .vortex_expect_lazy(|| format!( - "Failed to convert dtype {} to ptype", - self.dtype() - )) + .unwrap_or_else(|err: VortexError| vortex_panic!(err, "Failed to convert dtype {} to ptype", self.dtype())) } pub fn buffer(&self) -> &Buffer { diff --git a/vortex-array/src/array/sparse/mod.rs b/vortex-array/src/array/sparse/mod.rs index 2baedc54a..35d7107e8 100644 --- a/vortex-array/src/array/sparse/mod.rs +++ b/vortex-array/src/array/sparse/mod.rs @@ -1,6 +1,6 @@ use ::serde::{Deserialize, Serialize}; use vortex_dtype::{match_each_integer_ptype, DType}; -use vortex_error::{vortex_bail, VortexExpect as _, VortexResult}; +use vortex_error::{vortex_bail, vortex_panic, VortexExpect as _, VortexResult}; use vortex_scalar::Scalar; use crate::array::constant::ConstantArray; @@ -158,13 +158,10 @@ impl ArrayStatisticsCompute for SparseArray {} impl ArrayValidity for SparseArray { fn is_valid(&self, index: usize) -> bool { - match self.find_index(index).vortex_expect_lazy(|| format!( - "Error while finding index {} in sparse array", - index - )) - { - None => !self.fill_value().is_null(), - Some(idx) => self.values().with_dyn(|a| a.is_valid(idx)), + match self.find_index(index) { + Ok(None) => !self.fill_value().is_null(), + Ok(Some(idx)) => self.values().with_dyn(|a| a.is_valid(idx)), + Err(e) => vortex_panic!(e, "Error while finding index {} in sparse array", index), } } diff --git a/vortex-array/src/array/struct_/mod.rs b/vortex-array/src/array/struct_/mod.rs index faddf5f2f..f81e1c56c 100644 --- a/vortex-array/src/array/struct_/mod.rs +++ b/vortex-array/src/array/struct_/mod.rs @@ -1,7 +1,7 @@ use serde::{Deserialize, Serialize}; use vortex_dtype::field::Field; use vortex_dtype::{DType, FieldName, FieldNames, Nullability, StructDType}; -use vortex_error::{vortex_bail, vortex_err, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, vortex_panic, VortexExpect as _, VortexResult}; use crate::stats::{ArrayStatisticsCompute, StatsSet}; use crate::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata}; @@ -30,8 +30,9 @@ impl StructArray { pub fn children(&self) -> impl Iterator + '_ { (0..self.nfields()).map(move |idx| { - self.field(idx) - .unwrap_or_else(|| panic!("Field {} not found, nfields: {}", idx, self.nfields())) + self.field(idx).unwrap_or_else(|| { + vortex_panic!("Field {} not found, nfields: {}", idx, self.nfields()) + }) }) } @@ -82,9 +83,8 @@ impl StructArray { let fields: Vec = items.iter().map(|(_, array)| array.clone()).collect(); let len = fields.first().map(|f| f.len()).unwrap_or(0); - Self::try_new(FieldNames::from(names), fields, len, Validity::NonNullable).unwrap_or_else( - |err| panic!("Unexpected error while building StructArray from fields: {err}"), - ) + Self::try_new(FieldNames::from(names), fields, len, Validity::NonNullable) + .vortex_expect("Unexpected error while building StructArray from fields") } // TODO(aduffy): Add equivalent function to support field masks for nested column access. diff --git a/vortex-array/src/array/varbin/builder.rs b/vortex-array/src/array/varbin/builder.rs index 8690365a2..41064e5c4 100644 --- a/vortex-array/src/array/varbin/builder.rs +++ b/vortex-array/src/array/varbin/builder.rs @@ -2,6 +2,7 @@ use arrow_buffer::NullBufferBuilder; use bytes::BytesMut; use num_traits::AsPrimitive; use vortex_dtype::{DType, NativePType}; +use vortex_error::{vortex_panic, VortexExpect as _}; use crate::array::primitive::PrimitiveArray; use crate::array::varbin::VarBinArray; @@ -37,14 +38,14 @@ impl VarBinBuilder { pub fn push_value(&mut self, value: impl AsRef<[u8]>) { let slice = value.as_ref(); self.offsets - .push(O::from(self.data.len() + slice.len()).unwrap_or_else(|| { - panic!( + .push(O::from(self.data.len() + slice.len()).unwrap_or_else(|| + vortex_panic!( "Failed to convert sum of {} and {} to offset of type {}", self.data.len(), slice.len(), std::any::type_name::() ) - })); + )); self.data.extend_from_slice(slice); self.validity.append_non_null(); } @@ -80,7 +81,7 @@ impl VarBinBuilder { }; VarBinArray::try_new(offsets.into_array(), data.into_array(), dtype, validity) - .unwrap_or_else(|err| panic!("Unexpected error while building VarBinArray: {err}")) + .vortex_expect("Unexpected error while building VarBinArray") } } diff --git a/vortex-array/src/array/varbin/compute/filter.rs b/vortex-array/src/array/varbin/compute/filter.rs index fcd697eaf..838af36ad 100644 --- a/vortex-array/src/array/varbin/compute/filter.rs +++ b/vortex-array/src/array/varbin/compute/filter.rs @@ -1,7 +1,7 @@ use itertools::Itertools; use num_traits::{AsPrimitive, Zero}; use vortex_dtype::{match_each_integer_ptype, DType, NativePType}; -use vortex_error::{vortex_err, VortexResult}; +use vortex_error::{vortex_err, vortex_panic, VortexResult}; use crate::array::varbin::builder::VarBinBuilder; use crate::array::varbin::VarBinArray; @@ -119,10 +119,10 @@ fn update_non_nullable_slice( let new_data = { let offset_start = offsets[start] .to_usize() - .unwrap_or_else(|| panic!("Failed to convert offset to usize: {}", offsets[start])); + .unwrap_or_else(|| vortex_panic!("Failed to convert offset to usize: {}", offsets[start])); let offset_end = offsets[end] .to_usize() - .unwrap_or_else(|| panic!("Failed to convert offset to usize: {}", offsets[end])); + .unwrap_or_else(|| vortex_panic!("Failed to convert offset to usize: {}", offsets[end])); &data[offset_start..offset_end] }; let new_offsets = offsets[start..end + 1] diff --git a/vortex-array/src/array/varbin/compute/take.rs b/vortex-array/src/array/varbin/compute/take.rs index e03f4412e..c1ef74db9 100644 --- a/vortex-array/src/array/varbin/compute/take.rs +++ b/vortex-array/src/array/varbin/compute/take.rs @@ -1,6 +1,6 @@ use arrow_buffer::NullBuffer; use vortex_dtype::{match_each_integer_ptype, DType, NativePType}; -use vortex_error::{vortex_bail, vortex_err, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, vortex_panic, VortexResult}; use crate::array::varbin::builder::VarBinBuilder; use crate::array::varbin::VarBinArray; @@ -74,13 +74,13 @@ fn take_nullable( for &idx in indices { let idx = idx .to_usize() - .unwrap_or_else(|| panic!("Failed to convert index to usize: {}", idx)); + .unwrap_or_else(|| vortex_panic!("Failed to convert index to usize: {}", idx)); if null_buffer.is_valid(idx) { let start = offsets[idx] .to_usize() - .unwrap_or_else(|| panic!("Failed to convert offset to usize: {}", offsets[idx])); + .unwrap_or_else(|| vortex_panic!("Failed to convert offset to usize: {}", offsets[idx])); let stop = offsets[idx + 1].to_usize().unwrap_or_else(|| { - panic!("Failed to convert offset to usize: {}", offsets[idx + 1]) + vortex_panic!("Failed to convert offset to usize: {}", offsets[idx + 1]) }); builder.push(Some(&data[start..stop])); } else { diff --git a/vortex-array/src/array/varbin/mod.rs b/vortex-array/src/array/varbin/mod.rs index 128206cbe..a3ed3100a 100644 --- a/vortex-array/src/array/varbin/mod.rs +++ b/vortex-array/src/array/varbin/mod.rs @@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize}; pub use stats::compute_stats; use vortex_buffer::Buffer; use vortex_dtype::{match_each_native_ptype, DType, NativePType, Nullability}; -use vortex_error::{vortex_bail, VortexError, VortexExpect as _, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, vortex_panic, VortexError, VortexExpect as _, VortexResult, VortexUnwrap as _}; use vortex_scalar::Scalar; use crate::array::primitive::PrimitiveArray; @@ -153,11 +153,11 @@ impl VarBinArray { .unwrap_or_else(|| { scalar_at(&self.offsets(), index) .unwrap_or_else(|err| { - panic!("Failed to get offset at index: {}: {}", index, err) + vortex_panic!(err, "Failed to get offset at index: {}", index) }) .as_ref() .try_into() - .unwrap_or_else(|err| panic!("Failed to convert offset to usize: {}", err)) + .vortex_expect("Failed to convert offset to usize") }) } @@ -222,7 +222,8 @@ impl<'a> FromIterator> for VarBinArray { pub fn varbin_scalar(value: Buffer, dtype: &DType) -> Scalar { if matches!(dtype, DType::Utf8(_)) { Scalar::try_utf8(value, dtype.nullability()) - .unwrap_or_else(|err| panic!("Failed to create scalar from utf8 buffer: {}", err)) + .map_err(|err| vortex_err!("Failed to create scalar from utf8 buffer: {}", err)) + .vortex_unwrap() } else { Scalar::binary(value, dtype.nullability()) } diff --git a/vortex-array/src/array/varbinview/mod.rs b/vortex-array/src/array/varbinview/mod.rs index 8e3312825..5f52316bb 100644 --- a/vortex-array/src/array/varbinview/mod.rs +++ b/vortex-array/src/array/varbinview/mod.rs @@ -10,7 +10,7 @@ use arrow_buffer::ScalarBuffer; use arrow_schema::DataType; use itertools::Itertools; use vortex_dtype::{DType, PType}; -use vortex_error::{vortex_bail, VortexError, VortexResult}; +use vortex_error::{vortex_bail, vortex_panic, VortexError, VortexExpect as _, VortexResult}; use crate::array::varbin::VarBinArray; use crate::arrow::FromArrowArray; @@ -160,7 +160,7 @@ impl VarBinViewArray { slice::from_raw_parts( self.views() .into_primitive() - .unwrap_or_else(|err| panic!("Views must be a primitive array: {}", err)) + .vortex_expect("Views must be a primitive array") .maybe_null_slice::() .as_ptr() as _, self.views().len() / VIEW_SIZE, @@ -176,14 +176,14 @@ impl VarBinViewArray { pub fn views(&self) -> Array { self.array() .child(0, &DType::BYTES, self.len() * VIEW_SIZE) - .unwrap_or_else(|| panic!("Missing views")) + .unwrap_or_else(|| vortex_panic!("VarBinViewArray is missing its views")) } #[inline] pub fn bytes(&self, idx: usize) -> Array { self.array() .child(idx + 1, &DType::BYTES, self.metadata().data_lens[idx]) - .unwrap_or_else(|| panic!("Missing data buffer")) + .unwrap_or_else(|| vortex_panic!("VarBinViewArray is missing its data buffer")) } pub fn validity(&self) -> Validity { @@ -201,12 +201,8 @@ impl VarBinViewArray { builder.append_value(s); } let array = Array::from_arrow(&builder.finish(), false); - VarBinViewArray::try_from(array).unwrap_or_else(|err| { - panic!( - "Failed to convert iterator of nullable strings to VarBinViewArray: {}", - err - ) - }) + VarBinViewArray::try_from(array).vortex_expect( + "Failed to convert iterator of nullable strings to VarBinViewArray") } pub fn from_iter_nullable_str, I: IntoIterator>>( @@ -217,12 +213,8 @@ impl VarBinViewArray { builder.extend(iter); let array = Array::from_arrow(&builder.finish(), true); - VarBinViewArray::try_from(array).unwrap_or_else(|err| { - panic!( - "Failed to convert iterator of nullable strings to VarBinViewArray: {}", - err - ) - }) + VarBinViewArray::try_from(array).vortex_expect( + "Failed to convert iterator of nullable strings to VarBinViewArray") } pub fn from_iter_bin, I: IntoIterator>(iter: I) -> Self { @@ -232,12 +224,8 @@ impl VarBinViewArray { builder.append_value(b); } let array = Array::from_arrow(&builder.finish(), true); - VarBinViewArray::try_from(array).unwrap_or_else(|err| { - panic!( - "Failed to convert iterator of bytes to VarBinViewArray: {}", - err - ) - }) + VarBinViewArray::try_from(array).vortex_expect( + "Failed to convert iterator of bytes to VarBinViewArray") } pub fn from_iter_nullable_bin, I: IntoIterator>>( @@ -247,12 +235,8 @@ impl VarBinViewArray { let mut builder = BinaryViewBuilder::with_capacity(iter.size_hint().0); builder.extend(iter); let array = Array::from_arrow(&builder.finish(), true); - VarBinViewArray::try_from(array).unwrap_or_else(|err| { - panic!( - "Failed to convert iterator of nullable bytes to VarBinViewArray: {}", - err - ) - }) + VarBinViewArray::try_from(array).vortex_expect( + "Failed to convert iterator of nullable bytes to VarBinViewArray") } pub fn bytes_at(&self, index: usize) -> VortexResult> { @@ -292,17 +276,17 @@ fn as_arrow(var_bin_view: VarBinViewArray) -> ArrayRef { let views = var_bin_view .views() .into_primitive() - .unwrap_or_else(|err| panic!("Views must be a primitive array: {}", err)); + .vortex_expect("Views must be a primitive array"); assert_eq!(views.ptype(), PType::U8); let nulls = var_bin_view .logical_validity() .to_null_buffer() - .unwrap_or_else(|err| panic!("Failed to convert logical validity to null buffer: {}", err)); + .vortex_expect("Failed to convert logical validity to null buffer"); let data = (0..var_bin_view.metadata().data_lens.len()) .map(|i| var_bin_view.bytes(i).into_primitive()) .collect::>>() - .unwrap_or_else(|err| panic!("VarBinView byte arrays must be primitive arrays: {}", err)); + .vortex_expect("VarBinView byte arrays must be primitive arrays"); if !data.is_empty() { assert_eq!(data[0].ptype(), PType::U8); assert!(data.iter().map(|d| d.ptype()).all_equal()); @@ -325,7 +309,7 @@ fn as_arrow(var_bin_view: VarBinViewArray) -> ArrayRef { data, nulls, )), - _ => panic!("expected utf8 or binary, got {}", var_bin_view.dtype()), + _ => vortex_panic!("Expected utf8 or binary, got {}", var_bin_view.dtype()), } } diff --git a/vortex-array/src/arrow/array.rs b/vortex-array/src/arrow/array.rs index f0ae4997f..511ee463e 100644 --- a/vortex-array/src/arrow/array.rs +++ b/vortex-array/src/arrow/array.rs @@ -19,6 +19,7 @@ use arrow_schema::{DataType, TimeUnit as ArrowTimeUnit}; use itertools::Itertools; use vortex_datetime_dtype::TimeUnit; use vortex_dtype::{DType, NativePType, PType}; +use vortex_error::{vortex_panic, VortexExpect as _}; use crate::array::{ BoolArray, NullArray, PrimitiveArray, StructArray, TemporalArray, VarBinArray, VarBinViewArray, @@ -37,7 +38,7 @@ impl From for ArrayData { impl From for ArrayData { fn from(value: NullBuffer) -> Self { BoolArray::try_new(value.into_inner(), Validity::NonNullable) - .unwrap_or_else(|err| panic!("Failed to convert null buffer to BoolArray: {}", err)) + .vortex_expect("Failed to convert null buffer to BoolArray") .to_array_data() } } @@ -99,7 +100,7 @@ where DataType::Date64 => TemporalArray::new_date(arr.into(), TimeUnit::Ms).into(), DataType::Duration(_) => unimplemented!(), DataType::Interval(_) => unimplemented!(), - _ => panic!("Invalid data type for PrimitiveArray: {}", T::DATA_TYPE), + _ => vortex_panic!("Invalid data type for PrimitiveArray: {}", T::DATA_TYPE), } } } @@ -112,7 +113,7 @@ where let dtype = match T::DATA_TYPE { DataType::Binary | DataType::LargeBinary => DType::Binary(nullable.into()), DataType::Utf8 | DataType::LargeUtf8 => DType::Utf8(nullable.into()), - _ => panic!("Invalid data type for ByteArray"), + _ => vortex_panic!("Invalid data type for ByteArray: {}", T::DATA_TYPE), }; VarBinArray::try_new( ArrayData::from(value.offsets().clone()).into(), @@ -120,12 +121,7 @@ where dtype, nulls(value.nulls(), nullable), ) - .unwrap_or_else(|err| { - panic!( - "Failed to convert Arrow GenericByteArray to Vortex VarBinArray: {}", - err - ) - }) + .vortex_expect("Failed to convert Arrow GenericByteArray to Vortex VarBinArray") .into() } } @@ -135,7 +131,7 @@ impl FromArrowArray<&GenericByteViewArray> for Array { let dtype = match T::DATA_TYPE { DataType::BinaryView => DType::Binary(nullable.into()), DataType::Utf8View => DType::Utf8(nullable.into()), - _ => panic!("Invalid data type for ByteViewArray"), + _ => vortex_panic!("Invalid data type for ByteViewArray: {}", T::DATA_TYPE), }; VarBinViewArray::try_new( ArrayData::from(value.views().inner().clone()).into(), @@ -147,12 +143,7 @@ impl FromArrowArray<&GenericByteViewArray> for Array { dtype, nulls(value.nulls(), nullable), ) - .unwrap_or_else(|err| { - panic!( - "Failed to convert Arrow GenericByteViewArray to Vortex VarBinViewArray: {}", - err - ) - }) + .vortex_expect("Failed to convert Arrow GenericByteViewArray to Vortex VarBinViewArray") .into() } } @@ -160,12 +151,7 @@ impl FromArrowArray<&GenericByteViewArray> for Array { impl FromArrowArray<&ArrowBooleanArray> for Array { fn from_arrow(value: &ArrowBooleanArray, nullable: bool) -> Self { BoolArray::try_new(value.values().clone(), nulls(value.nulls(), nullable)) - .unwrap_or_else(|err| { - panic!( - "Failed to convert Arrow BooleanArray to Vortex BoolArray: {}", - err - ) - }) + .vortex_expect("Failed to convert Arrow BooleanArray to Vortex BoolArray") .into() } } @@ -188,12 +174,8 @@ impl FromArrowArray<&ArrowStructArray> for Array { value.len(), nulls(value.nulls(), nullable), ) - .unwrap_or_else(|err| { - panic!( - "Failed to convert Arrow StructArray to Vortex StructArray: {}", - err - ) - }) + .vortex_expect( + "Failed to convert Arrow StructArray to Vortex StructArray") .into() } } @@ -245,18 +227,15 @@ impl FromArrowArray for Array { array .as_any() .downcast_ref::() - .unwrap_or_else(|| { - panic!("Expected Arrow BinaryViewArray for DataType::BinaryView") - }), + .vortex_expect("Expected Arrow BinaryViewArray for DataType::BinaryView"), nullable, ), DataType::Utf8View => Self::from_arrow( array .as_any() .downcast_ref::() - .unwrap_or_else(|| { - panic!("Expected Arrow StringViewArray for DataType::Utf8View") - }), + .vortex_expect("Expected Arrow StringViewArray for DataType::Utf8View") + , nullable, ), DataType::Struct(_) => Self::from_arrow(array.as_struct(), nullable), @@ -309,10 +288,7 @@ impl FromArrowArray for Array { Self::from_arrow(array.as_primitive::(), nullable) } }, - _ => panic!( - "TODO(robert): Missing array encoding for Arrow data type {}", - array.data_type().clone() - ), + _ => vortex_panic!("Missing array encoding for Arrow data type {}", array.data_type().clone()), } } } diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs index 7c1be112b..bf8c3ec63 100644 --- a/vortex-array/src/canonical.rs +++ b/vortex-array/src/canonical.rs @@ -143,7 +143,7 @@ fn bool_to_arrow(bool_array: BoolArray) -> ArrayRef { bool_array .logical_validity() .to_null_buffer() - .unwrap_or_else(|err| panic!("Failed to get null buffer from logical validity: {err}")), + .vortex_expect("Failed to get null buffer from logical validity"), )) } diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index e65699b97..46891a65d 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -154,7 +154,11 @@ pub enum VortexError { ), } -pub type VortexResult = Result; +impl VortexError { + pub fn with_context>(self, msg: T) -> Self { + VortexError::Context(msg.into(), Box::new(self)) + } +} impl Debug for VortexError { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { @@ -162,10 +166,10 @@ impl Debug for VortexError { } } +pub type VortexResult = Result; + pub trait VortexPanic { fn panic(self) -> !; - - fn panic_with_context>(self, msg: T) -> !; } impl VortexPanic for VortexError { @@ -174,11 +178,6 @@ impl VortexPanic for VortexError { fn panic(self) -> ! { panic!("{}", self) } - - #[inline(always)] - fn panic_with_context>(self, msg: T) -> ! { - VortexError::Context(msg.into(), Box::new(self)).panic() - } } pub trait VortexUnwrap { @@ -200,8 +199,6 @@ pub trait VortexExpect { type Output; fn vortex_expect(self, msg: &str) -> Self::Output; - - fn vortex_expect_lazy String>(self, op: F) -> Self::Output; } impl VortexExpect for VortexResult { @@ -209,12 +206,7 @@ impl VortexExpect for VortexResult { #[inline(always)] fn vortex_expect(self, msg: &str) -> Self::Output { - self.unwrap_or_else(|e| e.panic_with_context(msg.to_string())) - } - - #[inline(always)] - fn vortex_expect_lazy String>(self, op: F) -> Self::Output { - self.unwrap_or_else(|e| e.panic_with_context(op())) + self.unwrap_or_else(|e| e.with_context(msg.to_string()).panic()) } } @@ -225,11 +217,6 @@ impl VortexExpect for Option { fn vortex_expect(self, msg: &str) -> Self::Output { self.unwrap_or_else(|| VortexError::InvalidArgument(msg.to_string().into(), Backtrace::capture()).panic()) } - - #[inline(always)] - fn vortex_expect_lazy String>(self, op: F) -> Self::Output { - self.unwrap_or_else(|| VortexError::InvalidArgument(op().into(), Backtrace::capture()).panic()) - } } #[macro_export] @@ -259,7 +246,6 @@ macro_rules! vortex_err { ) }}; (Context: $msg:literal, $err:expr) => {{ - use std::backtrace::Backtrace; $crate::__private::must_use( $crate::VortexError::Context($msg.into(), Box::new($err)) ) @@ -307,6 +293,10 @@ macro_rules! vortex_panic { ($variant:ident: $fmt:literal $(, $arg:expr)* $(,)?) => { $crate::vortex_panic!($crate::vortex_err!($variant: $fmt, $($arg),*)) }; + ($err:expr, $fmt:literal $(, $arg:expr)* $(,)?) => {{ + use $crate::VortexPanic; + ($err).with_context(format!($fmt, $($arg),*)).panic() + }}; ($fmt:literal $(, $arg:expr)* $(,)?) => { $crate::vortex_panic!($crate::vortex_err!($fmt, $($arg),*)) }; diff --git a/vortex-scalar/src/primitive.rs b/vortex-scalar/src/primitive.rs index cd1e03a3e..d222c7a66 100644 --- a/vortex-scalar/src/primitive.rs +++ b/vortex-scalar/src/primitive.rs @@ -1,9 +1,8 @@ -use core::any::type_name; use num_traits::NumCast; use vortex_dtype::half::f16; use vortex_dtype::{match_each_native_ptype, DType, NativePType, Nullability, PType}; -use vortex_error::{vortex_bail, VortexError, VortexExpect as _, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, vortex_panic, VortexError, VortexResult, VortexUnwrap}; use crate::pvalue::PValue; use crate::value::ScalarValue; @@ -36,13 +35,7 @@ impl<'a> PrimitiveScalar<'a> { T::PTYPE ); - self.pvalue.as_ref().map(|pv| { - T::try_from(*pv).vortex_expect_lazy(|| format!( - "Failed to cast {} to {}", - pv, - type_name::() - )) - }) + self.pvalue.as_ref().map(|pv| T::try_from(*pv).vortex_unwrap()) } pub fn cast(&self, dtype: &DType) -> VortexResult { @@ -96,10 +89,8 @@ impl Scalar { } pub fn reinterpret_cast(&self, ptype: PType) -> Self { - let primitive = PrimitiveScalar::try_from(self).vortex_expect_lazy(|| format!( - "Failed to reinterpret cast {} to {}", - self.dtype, ptype - )); + let primitive = PrimitiveScalar::try_from(self) + .unwrap_or_else(|e| vortex_panic!(e, "Failed to reinterpret cast {} to {}", self.dtype, ptype)); if primitive.ptype() == ptype { return self.clone(); } From 6b504d338a11d5deac036bdfc0cb029800ec91ec Mon Sep 17 00:00:00 2001 From: Will Manning Date: Thu, 5 Sep 2024 12:12:20 -0400 Subject: [PATCH 25/39] wip --- vortex-array/src/canonical.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs index bf8c3ec63..addcfc958 100644 --- a/vortex-array/src/canonical.rs +++ b/vortex-array/src/canonical.rs @@ -16,7 +16,7 @@ use arrow_buffer::ScalarBuffer; use arrow_schema::{Field, Fields}; use vortex_datetime_dtype::{is_temporal_ext_type, TemporalMetadata, TimeUnit}; use vortex_dtype::{DType, NativePType, PType}; -use vortex_error::{vortex_bail, VortexExpect as _, VortexResult}; +use vortex_error::{vortex_bail, VortexExpect, VortexResult}; use crate::array::{ BoolArray, ExtensionArray, NullArray, PrimitiveArray, StructArray, TemporalArray, VarBinArray, @@ -156,9 +156,7 @@ fn primitive_to_arrow(primitive_array: PrimitiveArray) -> ArrayRef { array .logical_validity() .to_null_buffer() - .unwrap_or_else(|err| { - panic!("Failed to get null buffer from logical validity: {err}") - }), + .vortex_expect("Failed to get null buffer from logical validity") ) } From 8fc215e5922585c0885451185e24f6f870de0ed9 Mon Sep 17 00:00:00 2001 From: Will Manning Date: Thu, 5 Sep 2024 16:59:57 -0400 Subject: [PATCH 26/39] moar --- Cargo.lock | 53 ++++++---- Cargo.toml | 4 +- encodings/dict/src/compute.rs | 4 +- encodings/fastlanes/src/for/compute.rs | 2 +- vortex-array/src/array/primitive/mod.rs | 6 +- vortex-array/src/array/struct_/mod.rs | 2 +- vortex-array/src/canonical.rs | 129 +++++++++++------------- vortex-array/src/data.rs | 12 ++- vortex-array/src/elementwise.rs | 1 + vortex-array/src/encoding.rs | 7 +- vortex-array/src/iter/mod.rs | 8 +- 11 files changed, 117 insertions(+), 111 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3048a968c..5970e586e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -632,9 +632,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.1.15" +version = "1.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57b6a275aa2903740dc87da01c62040406b8812552e97129a63ea8850a17c6e6" +checksum = "e9d013ecb737093c0e86b151a7b837993cf9ec6c502946cfb44bedc392421e0b" dependencies = [ "jobserver", "libc", @@ -713,9 +713,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.16" +version = "4.5.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019" +checksum = "3e5a21b8495e732f1b3c364c9949b201ca7bae518c502c80256c96ad79eaf6ac" dependencies = [ "clap_builder", "clap_derive", @@ -723,9 +723,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.15" +version = "4.5.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "216aec2b177652e3846684cbfe25c9964d18ec45234f0f5da5157b207ed1aab6" +checksum = "8cf2dd12af7a047ad9d6da2b6b249759a22a7abc0f474c1dae1777afa4b21a73" dependencies = [ "anstream", "anstyle", @@ -974,9 +974,9 @@ dependencies = [ [[package]] name = "dashmap" -version = "6.0.1" +version = "6.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "804c8821570c3f8b70230c2ba75ffa5c0f9a4189b9a432b6656c536712acae28" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" dependencies = [ "cfg-if", "crossbeam-utils", @@ -1882,16 +1882,16 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.27.2" +version = "0.27.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155" +checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" dependencies = [ "futures-util", "http", "hyper", "hyper-util", "rustls", - "rustls-native-certs", + "rustls-native-certs 0.8.0", "rustls-pki-types", "tokio", "tokio-rustls", @@ -3321,7 +3321,7 @@ dependencies = [ "pin-project-lite", "quinn", "rustls", - "rustls-native-certs", + "rustls-native-certs 0.7.3", "rustls-pemfile", "rustls-pki-types", "serde", @@ -3410,9 +3410,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.35" +version = "0.38.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a85d50532239da68e9addb745ba38ff4612a242c1c7ceea689c4bc7c2f43c36f" +checksum = "3f55e80d50763938498dd5ebb18647174e0c76dc38c5505294bb224624f30f36" dependencies = [ "bitflags 2.6.0", "errno", @@ -3448,6 +3448,19 @@ dependencies = [ "security-framework", ] +[[package]] +name = "rustls-native-certs" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcaf18a4f2be7326cd874a5fa579fae794320a0f388d365dca7e480e55f83f8a" +dependencies = [ + "openssl-probe", + "rustls-pemfile", + "rustls-pki-types", + "schannel", + "security-framework", +] + [[package]] name = "rustls-pemfile" version = "2.1.3" @@ -3590,9 +3603,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.127" +version = "1.0.128" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8043c06d9f82bd7271361ed64f415fe5e12a77fdb52e573e7f06a516dea329ad" +checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" dependencies = [ "itoa", "memchr", @@ -4046,9 +4059,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.11" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" dependencies = [ "bytes", "futures-core", @@ -5241,9 +5254,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.12+zstd.1.5.6" +version = "2.0.13+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a4e40c320c3cb459d9a9ff6de98cff88f4751ee9275d140e2be94a2b74e4c13" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" dependencies = [ "cc", "pkg-config", diff --git a/Cargo.toml b/Cargo.toml index 736f0ab0a..1578def04 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -188,10 +188,10 @@ inconsistent_struct_constructor = { level = "deny" } manual_is_variant_and = { level = "deny" } mem_forget = { level = "deny" } or_fun_call = "deny" -#panic_in_result_fn = { level = "deny" } +panic_in_result_fn = { level = "deny" } panic = { level = "deny" } same_name_method = { level = "deny" } tests_outside_test_module = { level = "deny" } -#unwrap_in_result = { level = "deny" } +unwrap_in_result = { level = "deny" } unwrap_used = { level = "deny" } use_debug = { level = "deny" } diff --git a/encodings/dict/src/compute.rs b/encodings/dict/src/compute.rs index 036aebdaa..164ea1839 100644 --- a/encodings/dict/src/compute.rs +++ b/encodings/dict/src/compute.rs @@ -1,7 +1,7 @@ use vortex::compute::unary::{scalar_at, scalar_at_unchecked, ScalarAtFn}; use vortex::compute::{slice, take, ArrayCompute, SliceFn, TakeFn}; use vortex::Array; -use vortex_error::{vortex_panic, VortexResult}; +use vortex_error::{VortexExpect, VortexResult}; use vortex_scalar::Scalar; use crate::DictArray; @@ -30,7 +30,7 @@ impl ScalarAtFn for DictArray { let dict_index: usize = scalar_at_unchecked(&self.codes(), index) .as_ref() .try_into() - .unwrap_or_else(|err| vortex_panic!("Invalid dict index", err)); + .vortex_expect("Invalid dict index"); scalar_at_unchecked(&self.values(), dict_index) } diff --git a/encodings/fastlanes/src/for/compute.rs b/encodings/fastlanes/src/for/compute.rs index 58b94a50e..3ab1d625e 100644 --- a/encodings/fastlanes/src/for/compute.rs +++ b/encodings/fastlanes/src/for/compute.rs @@ -6,7 +6,7 @@ use vortex::compute::{ search_sorted, slice, take, ArrayCompute, SearchResult, SearchSortedFn, SearchSortedSide, SliceFn, TakeFn, }; -use vortex::{Array, ArrayDType, IntoArray}; +use vortex::{Array, ArrayDType}; use vortex_dtype::{match_each_integer_ptype, NativePType}; use vortex_error::{VortexError, VortexExpect as _, VortexResult}; use vortex_scalar::{PValue, PrimitiveScalar, Scalar}; diff --git a/vortex-array/src/array/primitive/mod.rs b/vortex-array/src/array/primitive/mod.rs index 8f96fd309..fb12bf824 100644 --- a/vortex-array/src/array/primitive/mod.rs +++ b/vortex-array/src/array/primitive/mod.rs @@ -418,6 +418,7 @@ impl BinaryFn for PrimitiveArray { } } +#[allow(clippy::unwrap_used)] fn process_batch O>( lhs: &[I], batch: Batch, @@ -431,10 +432,7 @@ fn process_batch let lhs: [I; ITER_BATCH_SIZE] = lhs.try_into().unwrap(); let rhs: [U; ITER_BATCH_SIZE] = batch.data().try_into().unwrap(); // We know output is of the same length and lhs/rhs - let mut output_slice: [_; ITER_BATCH_SIZE] = output - [idx_offset..idx_offset + ITER_BATCH_SIZE] - .try_into() - .unwrap(); + let mut output_slice: [_; ITER_BATCH_SIZE] = output[idx_offset..idx_offset + ITER_BATCH_SIZE].try_into().unwrap(); for idx in 0..ITER_BATCH_SIZE { unsafe { diff --git a/vortex-array/src/array/struct_/mod.rs b/vortex-array/src/array/struct_/mod.rs index 81fd0a578..74860540b 100644 --- a/vortex-array/src/array/struct_/mod.rs +++ b/vortex-array/src/array/struct_/mod.rs @@ -1,6 +1,6 @@ use serde::{Deserialize, Serialize}; use vortex_dtype::field::Field; -use vortex_dtype::{DType, FieldName, FieldNames, Nullability, StructDType}; +use vortex_dtype::{DType, FieldName, FieldNames, StructDType}; use vortex_error::{vortex_bail, vortex_err, vortex_panic, VortexExpect as _, VortexResult}; use crate::stats::{ArrayStatisticsCompute, StatsSet}; diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs index addcfc958..7e9be6fdb 100644 --- a/vortex-array/src/canonical.rs +++ b/vortex-array/src/canonical.rs @@ -16,7 +16,7 @@ use arrow_buffer::ScalarBuffer; use arrow_schema::{Field, Fields}; use vortex_datetime_dtype::{is_temporal_ext_type, TemporalMetadata, TimeUnit}; use vortex_dtype::{DType, NativePType, PType}; -use vortex_error::{vortex_bail, VortexExpect, VortexResult}; +use vortex_error::{vortex_bail, VortexResult}; use crate::array::{ BoolArray, ExtensionArray, NullArray, PrimitiveArray, StructArray, TemporalArray, VarBinArray, @@ -72,17 +72,16 @@ impl Canonical { /// arrays require decompression. pub fn into_arrow(self) -> VortexResult { Ok(match self { - Canonical::Null(a) => null_to_arrow(a), - Canonical::Bool(a) => bool_to_arrow(a), - Canonical::Primitive(a) => primitive_to_arrow(a), - Canonical::Struct(a) => struct_to_arrow(a), - Canonical::VarBin(a) => varbin_to_arrow(a), + Canonical::Null(a) => null_to_arrow(a)?, + Canonical::Bool(a) => bool_to_arrow(a)?, + Canonical::Primitive(a) => primitive_to_arrow(a)?, + Canonical::Struct(a) => struct_to_arrow(a)?, + Canonical::VarBin(a) => varbin_to_arrow(a)?, Canonical::Extension(a) => { if !is_temporal_ext_type(a.id()) { vortex_bail!("unsupported extension dtype with ID {}", a.id().as_ref()) } - - temporal_to_arrow(TemporalArray::try_from(&a.into_array())?) + temporal_to_arrow(TemporalArray::try_from(&a.into_array())?)? } }) } @@ -133,64 +132,59 @@ impl Canonical { } } -fn null_to_arrow(null_array: NullArray) -> ArrayRef { - Arc::new(ArrowNullArray::new(null_array.len())) +fn null_to_arrow(null_array: NullArray) -> VortexResult { + Ok(Arc::new(ArrowNullArray::new(null_array.len()))) } -fn bool_to_arrow(bool_array: BoolArray) -> ArrayRef { - Arc::new(ArrowBoolArray::new( +fn bool_to_arrow(bool_array: BoolArray) -> VortexResult { + Ok(Arc::new(ArrowBoolArray::new( bool_array.boolean_buffer(), bool_array .logical_validity() - .to_null_buffer() - .vortex_expect("Failed to get null buffer from logical validity"), - )) + .to_null_buffer()?, + ))) } -fn primitive_to_arrow(primitive_array: PrimitiveArray) -> ArrayRef { +fn primitive_to_arrow(primitive_array: PrimitiveArray) -> VortexResult { fn as_arrow_array_primitive( array: &PrimitiveArray, - ) -> ArrowPrimitiveArray { - ArrowPrimitiveArray::new( + ) -> VortexResult>> { + Ok(Arc::new(ArrowPrimitiveArray::new( ScalarBuffer::::new(array.buffer().clone().into_arrow(), 0, array.len()), array .logical_validity() - .to_null_buffer() - .vortex_expect("Failed to get null buffer from logical validity") - ) + .to_null_buffer()? + ))) } - match primitive_array.ptype() { - PType::U8 => Arc::new(as_arrow_array_primitive::(&primitive_array)), - PType::U16 => Arc::new(as_arrow_array_primitive::(&primitive_array)), - PType::U32 => Arc::new(as_arrow_array_primitive::(&primitive_array)), - PType::U64 => Arc::new(as_arrow_array_primitive::(&primitive_array)), - PType::I8 => Arc::new(as_arrow_array_primitive::(&primitive_array)), - PType::I16 => Arc::new(as_arrow_array_primitive::(&primitive_array)), - PType::I32 => Arc::new(as_arrow_array_primitive::(&primitive_array)), - PType::I64 => Arc::new(as_arrow_array_primitive::(&primitive_array)), - PType::F16 => Arc::new(as_arrow_array_primitive::(&primitive_array)), - PType::F32 => Arc::new(as_arrow_array_primitive::(&primitive_array)), - PType::F64 => Arc::new(as_arrow_array_primitive::(&primitive_array)), - } + Ok(match primitive_array.ptype() { + PType::U8 => as_arrow_array_primitive::(&primitive_array)?, + PType::U16 => as_arrow_array_primitive::(&primitive_array)?, + PType::U32 => as_arrow_array_primitive::(&primitive_array)?, + PType::U64 => as_arrow_array_primitive::(&primitive_array)?, + PType::I8 => as_arrow_array_primitive::(&primitive_array)?, + PType::I16 => as_arrow_array_primitive::(&primitive_array)?, + PType::I32 => as_arrow_array_primitive::(&primitive_array)?, + PType::I64 => as_arrow_array_primitive::(&primitive_array)?, + PType::F16 => as_arrow_array_primitive::(&primitive_array)?, + PType::F32 => as_arrow_array_primitive::(&primitive_array)?, + PType::F64 => as_arrow_array_primitive::(&primitive_array)?, + }) } -fn struct_to_arrow(struct_array: StructArray) -> ArrayRef { - let field_arrays: Vec = struct_array - .children() - .map(|f| { +fn struct_to_arrow(struct_array: StructArray) -> VortexResult { + let field_arrays: Vec = Iterator::zip(struct_array.names().iter(), struct_array.children()) + .map(|(name, f)| { let canonical = f .into_canonical() - .unwrap_or_else(|err| panic!("Failed to canonicalize field: {err}")); + .map_err(|err| err.with_context(format!("Failed to canonicalize field {}", name)))?; match canonical { // visit nested structs recursively Canonical::Struct(a) => struct_to_arrow(a), - _ => canonical.into_arrow().unwrap_or_else(|err| { - panic!("Failed to convert canonicalized field to arrow: {err}") - }), + _ => canonical.into_arrow().map_err(|err| err.with_context(format!("Failed to convert canonicalized field {} to arrow", name))), } }) - .collect(); + .collect::>>()?; let arrow_fields: Fields = struct_array .names() @@ -209,17 +203,16 @@ fn struct_to_arrow(struct_array: StructArray) -> ArrayRef { let nulls = struct_array .logical_validity() - .to_null_buffer() - .vortex_expect("Failed to get null buffer from logical validity"); + .to_null_buffer()?; - Arc::new(ArrowStructArray::new(arrow_fields, field_arrays, nulls)) + Ok(Arc::new(ArrowStructArray::try_new(arrow_fields, field_arrays, nulls)?)) } -fn varbin_to_arrow(varbin_array: VarBinArray) -> ArrayRef { +fn varbin_to_arrow(varbin_array: VarBinArray) -> VortexResult { let offsets = varbin_array .offsets() .into_primitive() - .unwrap_or_else(|err| panic!("Failed to canon offsets: {err}")); + .map_err(|err| err.with_context("Failed to canonicalize offsets"))?; let offsets = match offsets.ptype() { PType::I32 | PType::I64 => offsets, PType::U64 => offsets.reinterpret_cast(PType::I64), @@ -227,22 +220,24 @@ fn varbin_to_arrow(varbin_array: VarBinArray) -> ArrayRef { // Unless it's u64, everything else can be converted into an i32. _ => try_cast(&offsets.to_array(), PType::I32.into()) .and_then(|a| a.into_primitive()) - .unwrap_or_else(|err| panic!("Failed to cast offsets to PrimitiveArray of i32: {err}")), + .map_err(|err| err.with_context("Failed to cast offsets to PrimitiveArray of i32"))?, }; let nulls = varbin_array .logical_validity() .to_null_buffer() - .unwrap_or_else(|err| panic!("Failed to get null buffer from logical validity: {err}")); + .map_err(|err| err.with_context("Failed to get null buffer from logical validity"))?; let data = varbin_array .bytes() .into_primitive() - .unwrap_or_else(|err| panic!("Failed to canonicalize bytes: {err}")); - assert_eq!(data.ptype(), PType::U8); + .map_err(|err| err.with_context("Failed to canonicalize bytes"))?; + if data.ptype() != PType::U8 { + vortex_bail!("Expected bytes to be of type U8, got {}", data.ptype()); + } let data = data.buffer(); // Switch on Arrow DType. - match varbin_array.dtype() { + Ok(match varbin_array.dtype() { DType::Binary(_) => match offsets.ptype() { PType::I32 => Arc::new(unsafe { BinaryArray::new_unchecked( @@ -258,7 +253,7 @@ fn varbin_to_arrow(varbin_array: VarBinArray) -> ArrayRef { nulls, ) }), - _ => panic!("Invalid offsets type"), + _ => vortex_bail!("Invalid offsets type {}", offsets.ptype()), }, DType::Utf8(_) => match offsets.ptype() { PType::I32 => Arc::new(unsafe { @@ -275,27 +270,23 @@ fn varbin_to_arrow(varbin_array: VarBinArray) -> ArrayRef { nulls, ) }), - _ => panic!("Invalid offsets type"), + _ => vortex_bail!("Invalid offsets type {}", offsets.ptype()), }, - _ => panic!( + _ => vortex_bail!( "expected utf8 or binary instead of {}", varbin_array.dtype() ), - } + }) } -fn temporal_to_arrow(temporal_array: TemporalArray) -> ArrayRef { +fn temporal_to_arrow(temporal_array: TemporalArray) -> VortexResult { macro_rules! extract_temporal_values { ($values:expr, $prim:ty) => {{ - let temporal_values = try_cast($values, <$prim as NativePType>::PTYPE.into()) - .expect("values must cast to primitive type") - .into_primitive() - .expect("must be primitive array"); + let temporal_values = try_cast($values, <$prim as NativePType>::PTYPE.into())?.into_primitive()?; let len = temporal_values.len(); let nulls = temporal_values .logical_validity() - .to_null_buffer() - .expect("null buffer"); + .to_null_buffer()?; let scalars = ScalarBuffer::<$prim>::new(temporal_values.into_buffer().into_arrow(), 0, len); @@ -303,7 +294,7 @@ fn temporal_to_arrow(temporal_array: TemporalArray) -> ArrayRef { }}; } - match temporal_array.temporal_metadata() { + Ok(match temporal_array.temporal_metadata() { TemporalMetadata::Date(time_unit) => match time_unit { TimeUnit::D => { let (scalars, nulls) = @@ -315,7 +306,7 @@ fn temporal_to_arrow(temporal_array: TemporalArray) -> ArrayRef { extract_temporal_values!(&temporal_array.temporal_values(), i64); Arc::new(Date64Array::new(scalars, nulls)) } - _ => panic!( + _ => vortex_bail!( "Invalid TimeUnit {time_unit} for {}", temporal_array.ext_dtype().id() ), @@ -341,7 +332,7 @@ fn temporal_to_arrow(temporal_array: TemporalArray) -> ArrayRef { extract_temporal_values!(&temporal_array.temporal_values(), i64); Arc::new(Time64NanosecondArray::new(scalars, nulls)) } - _ => panic!( + _ => vortex_bail!( "Invalid TimeUnit {time_unit} for {}", temporal_array.ext_dtype().id() ), @@ -353,13 +344,13 @@ fn temporal_to_arrow(temporal_array: TemporalArray) -> ArrayRef { TimeUnit::Us => Arc::new(TimestampMicrosecondArray::new(scalars, nulls)), TimeUnit::Ms => Arc::new(TimestampMillisecondArray::new(scalars, nulls)), TimeUnit::S => Arc::new(TimestampSecondArray::new(scalars, nulls)), - _ => panic!( + _ => vortex_bail!( "Invalid TimeUnit {time_unit} for {}", temporal_array.ext_dtype().id() ), } } - } + }) } /// Support trait for transmuting an array into its [vortex_dtype::DType]'s canonical encoding. diff --git a/vortex-array/src/data.rs b/vortex-array/src/data.rs index 14c88a978..3ab76326f 100644 --- a/vortex-array/src/data.rs +++ b/vortex-array/src/data.rs @@ -2,7 +2,7 @@ use std::sync::{Arc, RwLock}; use vortex_buffer::Buffer; use vortex_dtype::DType; -use vortex_error::VortexResult; +use vortex_error::{vortex_panic, VortexResult}; use vortex_scalar::Scalar; use crate::encoding::EncodingRef; @@ -136,7 +136,7 @@ impl Statistics for ArrayData { self.stats_map .read() .unwrap_or_else(|_| { - panic!( + vortex_panic!( "Failed to acquire read lock on stats map while getting {}", stat ) @@ -148,7 +148,7 @@ impl Statistics for ArrayData { fn to_set(&self) -> StatsSet { self.stats_map .read() - .unwrap_or_else(|_| panic!("Failed to acquire read lock on stats map")) + .unwrap_or_else(|_| vortex_panic!("Failed to acquire read lock on stats map")) .clone() } @@ -156,7 +156,7 @@ impl Statistics for ArrayData { self.stats_map .write() .unwrap_or_else(|_| { - panic!( + vortex_panic!( "Failed to acquire write lock on stats map while setting {} to {}", stat, value ) @@ -171,7 +171,9 @@ impl Statistics for ArrayData { self.stats_map .write() - .unwrap_or_else(|_| panic!("Failed to write to stats map while computing {}", stat)) + .unwrap_or_else(|_| { + vortex_panic!("Failed to write to stats map while computing {}", stat) + }) .extend( self.to_array() .with_dyn(|a| a.compute_statistics(stat)) diff --git a/vortex-array/src/elementwise.rs b/vortex-array/src/elementwise.rs index c6f8eabb9..2e6c34418 100644 --- a/vortex-array/src/elementwise.rs +++ b/vortex-array/src/elementwise.rs @@ -19,6 +19,7 @@ pub trait UnaryFn { ) -> VortexResult; } +#[allow(clippy::unwrap_used)] pub fn dyn_cast_array_iter(array: &Array) -> Box>> { match PType::try_from(array.dtype()).unwrap() { PType::U8 => Box::new( diff --git a/vortex-array/src/encoding.rs b/vortex-array/src/encoding.rs index 7c19f91ce..1c57eb905 100644 --- a/vortex-array/src/encoding.rs +++ b/vortex-array/src/encoding.rs @@ -1,7 +1,7 @@ use std::fmt::{Debug, Display, Formatter}; use std::hash::{Hash, Hasher}; -use vortex_error::VortexResult; +use vortex_error::{vortex_panic, VortexResult}; use crate::canonical::{Canonical, IntoCanonical}; use crate::{Array, ArrayDef, ArrayTrait}; @@ -82,8 +82,9 @@ pub trait ArrayEncodingExt { { let typed = <::Array as TryFrom>::try_from(array.clone()) .unwrap_or_else(|err| { - panic!( - "Failed to convert array to {}: {err}", + vortex_panic!( + err, + "Failed to convert array to {}", std::any::type_name::<::Array>() ) }); diff --git a/vortex-array/src/iter/mod.rs b/vortex-array/src/iter/mod.rs index c7a46826a..daa365a3d 100644 --- a/vortex-array/src/iter/mod.rs +++ b/vortex-array/src/iter/mod.rs @@ -3,7 +3,7 @@ use std::sync::Arc; pub use adapter::*; pub use ext::*; use vortex_dtype::{DType, NativePType}; -use vortex_error::VortexResult; +use vortex_error::{VortexExpect as _, VortexResult}; use crate::validity::Validity; use crate::Array; @@ -200,9 +200,9 @@ impl Iterator for VectorizedArrayIter { let validity = self .validity .slice(self.current_idx, self.current_idx + data.len()) - .unwrap_or_else(|_| { - panic!("The slice bounds should always be within the array's limits") - }); + .vortex_expect( + "The slice bounds should always be within the array's limits", + ); self.current_idx += data.len(); let batch = Batch::new_from_vec(data, validity); From 71fb70f389c431997726e7c609c3bfe741541ad8 Mon Sep 17 00:00:00 2001 From: Will Manning Date: Fri, 6 Sep 2024 09:34:47 -0400 Subject: [PATCH 27/39] no errors --- encodings/alp/benches/alp_compress.rs | 2 ++ encodings/alp/src/alp.rs | 8 +++--- encodings/alp/src/array.rs | 14 +++++------ encodings/alp/src/compress.rs | 4 +-- vortex-array/src/lib.rs | 11 +++++---- vortex-array/src/stats/mod.rs | 35 ++++++++++++++++----------- vortex-array/src/stats/statsset.rs | 31 ++++++++++++------------ vortex-array/src/stream/ext.rs | 6 ++--- vortex-array/src/typed.rs | 6 ++--- vortex-array/src/validity.rs | 32 +++++++++++------------- vortex-array/src/variants.rs | 17 +++++++------ vortex-array/src/view.rs | 4 +-- 12 files changed, 88 insertions(+), 82 deletions(-) diff --git a/encodings/alp/benches/alp_compress.rs b/encodings/alp/benches/alp_compress.rs index 2450bd6f7..27aae12c7 100644 --- a/encodings/alp/benches/alp_compress.rs +++ b/encodings/alp/benches/alp_compress.rs @@ -1,3 +1,5 @@ +#![allow(clippy::unwrap_used)] + use arrow::array::{as_primitive_array, ArrowNativeTypeOp, ArrowPrimitiveType}; use arrow::datatypes::{Float32Type, Float64Type}; use divan::{black_box, Bencher}; diff --git a/encodings/alp/src/alp.rs b/encodings/alp/src/alp.rs index 01d917781..27e24e0e4 100644 --- a/encodings/alp/src/alp.rs +++ b/encodings/alp/src/alp.rs @@ -4,6 +4,7 @@ use std::mem::size_of; use itertools::Itertools; use num_traits::{Float, NumCast, PrimInt, Zero}; use serde::{Deserialize, Serialize}; +use vortex_error::vortex_panic; const SAMPLE_SIZE: usize = 32; @@ -20,7 +21,7 @@ impl Display for Exponents { } pub trait ALPFloat: Float + Display + 'static { - type ALPInt: PrimInt; + type ALPInt: PrimInt + Display; const FRACTIONAL_BITS: u8; const MAX_EXPONENT: u8; @@ -119,8 +120,9 @@ pub trait ALPFloat: Float + Display + 'static { #[inline] fn decode_single(encoded: Self::ALPInt, exponents: Exponents) -> Self { let encoded_float: Self = Self::from(encoded).unwrap_or_else(|| { - panic!( - "Failed to convert {} to {} in ALPFloat::decode_single", + vortex_panic!( + "Failed to convert encoded value {} from {} to {} in ALPFloat::decode_single", + encoded, std::any::type_name::(), std::any::type_name::() ) diff --git a/encodings/alp/src/array.rs b/encodings/alp/src/array.rs index 8f712a4ce..0901ae545 100644 --- a/encodings/alp/src/array.rs +++ b/encodings/alp/src/array.rs @@ -12,7 +12,7 @@ use vortex::{ impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoArray, IntoCanonical, }; use vortex_dtype::{DType, PType}; -use vortex_error::{vortex_bail, VortexExpect as _, VortexResult}; +use vortex_error::{vortex_bail, vortex_panic, VortexExpect as _, VortexResult}; use crate::alp::Exponents; use crate::compress::{alp_encode, decompress}; @@ -84,7 +84,7 @@ impl ALPArray { pub fn encoded(&self) -> Array { self.array() .child(0, &self.metadata().encoded_dtype, self.len()) - .unwrap_or_else(|| panic!("Missing encoded child in ALPArray")) + .vortex_expect("Missing encoded child in ALPArray") } #[inline] @@ -95,8 +95,8 @@ impl ALPArray { pub fn patches(&self) -> Option { self.metadata().patches_dtype.as_ref().map(|dt| { self.array().child(1, dt, self.len()).unwrap_or_else(|| { - panic!( - "Missing patches with present metadata flag; dtype: {}, patches_len: {}", + vortex_panic!( + "Missing patches with present metadata flag; patches dtype: {}, patches_len: {}", dt, self.len() ) @@ -108,7 +108,7 @@ impl ALPArray { pub fn ptype(&self) -> PType { self.dtype() .try_into() - .unwrap_or_else(|err| panic!("Failed to convert DType to PType: {err}")) + .vortex_expect("Failed to convert DType to PType") } } @@ -199,7 +199,7 @@ impl PrimitiveArrayTrait for ALPArray { let encoded = self .encoded() .with_dyn(|a| a.as_primitive_array_unchecked().i32_accessor()) - .unwrap_or_else(|| panic!("This is is an invariant of the ALP algorithm")); + .vortex_expect("Failed to get underlying encoded i32 array for ALP-encoded f32 array; this violates an invariant of the ALP algorithm"); Some(Arc::new(ALPAccessor::new( encoded, @@ -222,7 +222,7 @@ impl PrimitiveArrayTrait for ALPArray { let encoded = self .encoded() .with_dyn(|a| a.as_primitive_array_unchecked().i64_accessor()) - .vortex_expect("This is is an invariant of the ALP algorithm"); + .vortex_expect("Failed to get underlying encoded i64 array for ALP-encoded f64 array; this violates an invariant of the ALP algorithm"); Some(Arc::new(ALPAccessor::new( encoded, patches, diff --git a/encodings/alp/src/compress.rs b/encodings/alp/src/compress.rs index bb6d9cc52..2266eab2d 100644 --- a/encodings/alp/src/compress.rs +++ b/encodings/alp/src/compress.rs @@ -2,7 +2,7 @@ use vortex::array::{PrimitiveArray, Sparse, SparseArray}; use vortex::validity::Validity; use vortex::{Array, ArrayDType, ArrayDef, IntoArray, IntoArrayVariant}; use vortex_dtype::{NativePType, PType}; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, VortexExpect as _, VortexResult}; use vortex_scalar::Scalar; use crate::alp::ALPFloat; @@ -43,7 +43,7 @@ where len, Scalar::null(values.dtype().as_nullable()), ) - .unwrap_or_else(|err| panic!("Failed to create SparseArray for ALP patches: {err}")) + .vortex_expect("Failed to create SparseArray for ALP patches") .into_array() }), ) diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs index 661bfa3d7..b47b0fdef 100644 --- a/vortex-array/src/lib.rs +++ b/vortex-array/src/lib.rs @@ -22,7 +22,7 @@ pub use typed::*; pub use view::*; use vortex_buffer::Buffer; use vortex_dtype::DType; -use vortex_error::VortexResult; +use vortex_error::{vortex_panic, VortexExpect, VortexResult}; use crate::compute::ArrayCompute; use crate::encoding::{ArrayEncodingRef, EncodingRef}; @@ -193,14 +193,15 @@ impl Array { Ok(()) }) .unwrap_or_else(|err| { - panic!( - "Failed to convert Array to {}: {err}", + vortex_panic!( + err, + "Failed to convert Array to {}", std::any::type_name::() ) }); // Now we unwrap the optional, which we know to be populated by the closure. - result.unwrap_or_else(|| panic!("Failed to get result from Array::with_dyn")) + result.vortex_expect("Failed to get result from Array::with_dyn") } } @@ -260,7 +261,7 @@ pub trait ArrayTrait: fn nbytes(&self) -> usize { let mut visitor = NBytesVisitor(0); self.accept(&mut visitor) - .unwrap_or_else(|err| panic!("Failed to get nbytes from Array: {err}")); + .vortex_expect("Failed to get nbytes from Array"); visitor.0 } } diff --git a/vortex-array/src/stats/mod.rs b/vortex-array/src/stats/mod.rs index 395cbe52b..785eff941 100644 --- a/vortex-array/src/stats/mod.rs +++ b/vortex-array/src/stats/mod.rs @@ -6,7 +6,7 @@ use itertools::Itertools; pub use statsset::*; use vortex_dtype::Nullability::NonNullable; use vortex_dtype::{DType, NativePType}; -use vortex_error::{VortexError, VortexResult}; +use vortex_error::{vortex_panic, VortexError, VortexResult}; use vortex_scalar::Scalar; use crate::Array; @@ -96,11 +96,11 @@ impl dyn Statistics + '_ { .map(|s| U::try_from(&s)) .transpose() .unwrap_or_else(|err| { - panic!( - "Failed to cast stat {} to {}: {}", + vortex_panic!( + err, + "Failed to cast stat {} to {}", stat, - std::any::type_name::(), - err + std::any::type_name::() ) }) } @@ -113,7 +113,14 @@ impl dyn Statistics + '_ { .map(|s| s.cast(&DType::Primitive(U::PTYPE, NonNullable))) .transpose() .and_then(|maybe| maybe.as_ref().map(U::try_from).transpose()) - .unwrap_or_else(|err| panic!("Failed to cast stat {} to {}: {}", stat, U::PTYPE, err)) + .unwrap_or_else(|err| { + vortex_panic!( + err, + "Failed to cast stat {} to {}", + stat, + U::PTYPE + ) + }) } pub fn compute_as TryFrom<&'a Scalar, Error = VortexError>>( @@ -124,11 +131,11 @@ impl dyn Statistics + '_ { .map(|s| U::try_from(&s)) .transpose() .unwrap_or_else(|err| { - panic!( - "Failed to compute stat {} as {}: {}", + vortex_panic!( + err, + "Failed to compute stat {} as {}", stat, - std::any::type_name::(), - err + std::any::type_name::() ) }) } @@ -142,11 +149,11 @@ impl dyn Statistics + '_ { .transpose() .and_then(|maybe| maybe.as_ref().map(U::try_from).transpose()) .unwrap_or_else(|err| { - panic!( - "Failed to compute stat {} as cast {}: {}", + vortex_panic!( + err, + "Failed to compute stat {} as cast {}", stat, - U::PTYPE, - err + U::PTYPE ) }) } diff --git a/vortex-array/src/stats/statsset.rs b/vortex-array/src/stats/statsset.rs index b029fedb8..0124516f8 100644 --- a/vortex-array/src/stats/statsset.rs +++ b/vortex-array/src/stats/statsset.rs @@ -4,7 +4,7 @@ use std::collections::HashMap; use enum_iterator::all; use itertools::Itertools; use vortex_dtype::DType; -use vortex_error::VortexError; +use vortex_error::{vortex_panic, VortexError, VortexExpect}; use vortex_scalar::Scalar; use crate::stats::Stat; @@ -73,11 +73,11 @@ impl StatsSet { fn get_as TryFrom<&'a Scalar, Error = VortexError>>(&self, stat: Stat) -> Option { self.get(stat).map(|v| { T::try_from(v).unwrap_or_else(|err| { - panic!( - "Failed to get stat {} as {}: {}", + vortex_panic!( + err, + "Failed to get stat {} as {}", stat, - std::any::type_name::(), - err + std::any::type_name::() ) }) }) @@ -184,10 +184,9 @@ impl StatsSet { fn merge_scalar_stat(&mut self, other: &Self, stat: Stat) { if let Entry::Occupied(mut e) = self.values.entry(stat) { if let Some(other_value) = other.get_as::(stat) { - let self_value: usize = e - .get() - .try_into() - .unwrap_or_else(|err| panic!("Failed to get stat {} as usize: {err}", stat)); + let self_value: usize = e.get().try_into().unwrap_or_else(|err: VortexError| { + vortex_panic!(err, "Failed to get stat {} as usize", stat) + }); e.insert((self_value + other_value).into()); } else { e.remove(); @@ -207,10 +206,9 @@ impl StatsSet { if let Entry::Occupied(mut e) = self.values.entry(stat) { if let Some(other_value) = other.get_as::>(stat) { // TODO(robert): Avoid the copy here. We could e.get_mut() but need to figure out casting - let self_value: Vec = e - .get() - .try_into() - .unwrap_or_else(|err| panic!("Failed to get stat {} as Vec: {err}", stat)); + let self_value: Vec = e.get().try_into().unwrap_or_else(|err: VortexError| { + vortex_panic!(err, "Failed to get stat {} as Vec", stat) + }); e.insert( self_value .iter() @@ -229,9 +227,10 @@ impl StatsSet { fn merge_run_count(&mut self, other: &Self) { if let Entry::Occupied(mut e) = self.values.entry(Stat::RunCount) { if let Some(other_value) = other.get_as::(Stat::RunCount) { - let self_value: usize = e.get().try_into().unwrap_or_else(|err| { - panic!("Failed to get stat {} as usize: {err}", Stat::RunCount) - }); + let self_value: usize = e + .get() + .try_into() + .vortex_expect("Failed to get run count as usize"); e.insert((self_value + other_value + 1).into()); } else { e.remove(); diff --git a/vortex-array/src/stream/ext.rs b/vortex-array/src/stream/ext.rs index f560b5a62..58a8a7ef4 100644 --- a/vortex-array/src/stream/ext.rs +++ b/vortex-array/src/stream/ext.rs @@ -15,11 +15,9 @@ pub trait ArrayStreamExt: ArrayStream { { async { let dtype = self.dtype().clone(); - let chunks: Vec = self - .try_collect() + self.try_collect() .await - .unwrap_or_else(|err| panic!("Failed to collect ArrayStream: {err}")); - ChunkedArray::try_new(chunks, dtype) + .and_then(|chunks| ChunkedArray::try_new(chunks, dtype)) } } diff --git a/vortex-array/src/typed.rs b/vortex-array/src/typed.rs index 2e25015c5..29c812326 100644 --- a/vortex-array/src/typed.rs +++ b/vortex-array/src/typed.rs @@ -2,7 +2,7 @@ use std::sync::{Arc, OnceLock}; use vortex_buffer::Buffer; use vortex_dtype::DType; -use vortex_error::{vortex_bail, VortexError, VortexResult}; +use vortex_error::{vortex_bail, vortex_panic, VortexError, VortexResult}; use crate::stats::StatsSet; use crate::{Array, ArrayData, ArrayDef, AsArray, IntoArray, ToArray, TryDeserializeArrayMetadata}; @@ -44,7 +44,7 @@ impl TypedArray { .as_any() .downcast_ref::() .unwrap_or_else(|| { - panic!( + vortex_panic!( "Failed to downcast metadata to {} for typed array with ID {} and encoding {}", std::any::type_name::(), D::ID.as_ref(), @@ -55,7 +55,7 @@ impl TypedArray { .lazy_metadata .get_or_init(|| { D::Metadata::try_deserialize_metadata(v.metadata()).unwrap_or_else(|err| { - panic!( + vortex_panic!( "Failed to deserialize ArrayView metadata for typed array with ID {} and encoding {}: {}", D::ID.as_ref(), D::ENCODING.id().as_ref(), diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index 197e784f3..a1ae1af45 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -1,7 +1,7 @@ use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, NullBuffer}; use serde::{Deserialize, Serialize}; use vortex_dtype::{DType, Nullability}; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, vortex_panic, VortexExpect as _, VortexResult}; use crate::array::BoolArray; use crate::compute::unary::scalar_at_unchecked; @@ -29,7 +29,7 @@ impl ValidityMetadata { Self::AllValid => Validity::AllValid, Self::AllInvalid => Validity::AllInvalid, Self::Array => match array { - None => panic!("Missing validity array"), + None => vortex_panic!("Missing validity array"), Some(a) => Validity::Array(a), }, } @@ -94,8 +94,9 @@ impl Validity { Self::NonNullable | Self::AllValid => true, Self::AllInvalid => false, Self::Array(a) => bool::try_from(&scalar_at_unchecked(a, index)).unwrap_or_else(|err| { - panic!( - "Failed to get bool from Validity Array at index {}: {err}", + vortex_panic!( + err, + "Failed to get bool from Validity Array at index {}", index ) }), @@ -176,18 +177,15 @@ impl PartialEq for Validity { (Self::AllValid, Self::AllValid) => true, (Self::AllInvalid, Self::AllInvalid) => true, (Self::Array(a), Self::Array(b)) => { - a.clone() + let a_buffer = a.clone() .into_bool() - .unwrap_or_else(|err| { - panic!("Failed to get Validity Array as BoolArray: {err}") - }) - .boolean_buffer() - == b.clone() - .into_bool() - .unwrap_or_else(|err| { - panic!("Failed to get Validity Array as BoolArray: {err}") - }) - .boolean_buffer() + .vortex_expect("Failed to get Validity Array as BoolArray") + .boolean_buffer(); + let b_buffer = b.clone() + .into_bool() + .vortex_expect("Failed to get Validity Array as BoolArray") + .boolean_buffer(); + a_buffer == b_buffer } _ => false, } @@ -246,9 +244,7 @@ impl FromIterator for Validity { LogicalValidity::Array(array) => { let array_buffer = array .into_bool() - .unwrap_or_else(|err| { - panic!("Failed to get Validity Array as BoolArray: {err}") - }) + .vortex_expect("Failed to get Validity Array as BoolArray") .boolean_buffer(); buffer.append_buffer(&array_buffer); } diff --git a/vortex-array/src/variants.rs b/vortex-array/src/variants.rs index f5e32e8fa..c6f59529b 100644 --- a/vortex-array/src/variants.rs +++ b/vortex-array/src/variants.rs @@ -4,6 +4,7 @@ //! encoding, they can use these traits to write encoding-agnostic code. use vortex_dtype::{DType, FieldNames}; +use vortex_error::VortexExpect as _; use crate::iter::{AccessorRef, VectorizedArrayIter}; use crate::{Array, ArrayTrait}; @@ -15,7 +16,7 @@ pub trait ArrayVariants { fn as_null_array_unchecked(&self) -> &dyn NullArrayTrait { self.as_null_array() - .unwrap_or_else(|| panic!("Expected NullArray")) + .vortex_expect("Expected NullArray") } fn as_bool_array(&self) -> Option<&dyn BoolArrayTrait> { @@ -24,7 +25,7 @@ pub trait ArrayVariants { fn as_bool_array_unchecked(&self) -> &dyn BoolArrayTrait { self.as_bool_array() - .unwrap_or_else(|| panic!("Expected BoolArray")) + .vortex_expect("Expected BoolArray") } fn as_primitive_array(&self) -> Option<&dyn PrimitiveArrayTrait> { @@ -33,7 +34,7 @@ pub trait ArrayVariants { fn as_primitive_array_unchecked(&self) -> &dyn PrimitiveArrayTrait { self.as_primitive_array() - .unwrap_or_else(|| panic!("Expected PrimitiveArray")) + .vortex_expect("Expected PrimitiveArray") } fn as_utf8_array(&self) -> Option<&dyn Utf8ArrayTrait> { @@ -42,7 +43,7 @@ pub trait ArrayVariants { fn as_utf8_array_unchecked(&self) -> &dyn Utf8ArrayTrait { self.as_utf8_array() - .unwrap_or_else(|| panic!("Expected Utf8Array")) + .vortex_expect("Expected Utf8Array") } fn as_binary_array(&self) -> Option<&dyn BinaryArrayTrait> { @@ -51,7 +52,7 @@ pub trait ArrayVariants { fn as_binary_array_unchecked(&self) -> &dyn BinaryArrayTrait { self.as_binary_array() - .unwrap_or_else(|| panic!("Expected BinaryArray")) + .vortex_expect("Expected BinaryArray") } fn as_struct_array(&self) -> Option<&dyn StructArrayTrait> { @@ -60,7 +61,7 @@ pub trait ArrayVariants { fn as_struct_array_unchecked(&self) -> &dyn StructArrayTrait { self.as_struct_array() - .unwrap_or_else(|| panic!("Expected StructArray")) + .vortex_expect("Expected StructArray") } fn as_list_array(&self) -> Option<&dyn ListArrayTrait> { @@ -69,7 +70,7 @@ pub trait ArrayVariants { fn as_list_array_unchecked(&self) -> &dyn ListArrayTrait { self.as_list_array() - .unwrap_or_else(|| panic!("Expected ListArray")) + .vortex_expect("Expected ListArray") } fn as_extension_array(&self) -> Option<&dyn ExtensionArrayTrait> { @@ -78,7 +79,7 @@ pub trait ArrayVariants { fn as_extension_array_unchecked(&self) -> &dyn ExtensionArrayTrait { self.as_extension_array() - .unwrap_or_else(|| panic!("Expected ExtensionArray")) + .vortex_expect("Expected ExtensionArray") } } diff --git a/vortex-array/src/view.rs b/vortex-array/src/view.rs index f6a115199..cf21b42eb 100644 --- a/vortex-array/src/view.rs +++ b/vortex-array/src/view.rs @@ -6,7 +6,7 @@ use itertools::Itertools; use log::warn; use vortex_buffer::Buffer; use vortex_dtype::{DType, Nullability}; -use vortex_error::{vortex_bail, vortex_err, VortexError, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, VortexError, VortexExpect as _, VortexResult}; use vortex_scalar::{PValue, Scalar, ScalarValue}; use crate::encoding::EncodingRef; @@ -154,7 +154,7 @@ impl ArrayView { let mut collector = ChildrenCollector::default(); Array::View(self.clone()) .with_dyn(|a| a.accept(&mut collector)) - .unwrap_or_else(|err| panic!("Failed to get children: {err}")); + .vortex_expect("Failed to get children"); collector.children } From 2d93b83e9c6682641d7e97d104eeb54c68143edb Mon Sep 17 00:00:00 2001 From: Will Manning Date: Fri, 6 Sep 2024 09:37:29 -0400 Subject: [PATCH 28/39] rename byte-bool dir to bytebool for consistency --- Cargo.toml | 2 +- encodings/{byte-bool => bytebool}/Cargo.toml | 0 encodings/{byte-bool => bytebool}/src/compute/mod.rs | 0 encodings/{byte-bool => bytebool}/src/lib.rs | 0 encodings/{byte-bool => bytebool}/src/stats.rs | 0 5 files changed, 1 insertion(+), 1 deletion(-) rename encodings/{byte-bool => bytebool}/Cargo.toml (100%) rename encodings/{byte-bool => bytebool}/src/compute/mod.rs (100%) rename encodings/{byte-bool => bytebool}/src/lib.rs (100%) rename encodings/{byte-bool => bytebool}/src/stats.rs (100%) diff --git a/Cargo.toml b/Cargo.toml index 0bc31a272..69184e2b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -131,7 +131,7 @@ uuid = "1.8.0" vortex-alp = { version = "0.8.0", path = "./encodings/alp" } vortex-array = { version = "0.8.0", path = "./vortex-array" } vortex-buffer = { version = "0.8.0", path = "./vortex-buffer" } -vortex-bytebool = { version = "0.8.0", path = "./encodings/byte-bool" } +vortex-bytebool = { version = "0.8.0", path = "./encodings/bytebool" } vortex-datafusion = { version = "0.8.0", path = "./vortex-datafusion" } vortex-datetime-dtype = { version = "0.8.0", path = "./vortex-datetime-dtype" } vortex-datetime-parts = { version = "0.8.0", path = "./encodings/datetime-parts" } diff --git a/encodings/byte-bool/Cargo.toml b/encodings/bytebool/Cargo.toml similarity index 100% rename from encodings/byte-bool/Cargo.toml rename to encodings/bytebool/Cargo.toml diff --git a/encodings/byte-bool/src/compute/mod.rs b/encodings/bytebool/src/compute/mod.rs similarity index 100% rename from encodings/byte-bool/src/compute/mod.rs rename to encodings/bytebool/src/compute/mod.rs diff --git a/encodings/byte-bool/src/lib.rs b/encodings/bytebool/src/lib.rs similarity index 100% rename from encodings/byte-bool/src/lib.rs rename to encodings/bytebool/src/lib.rs diff --git a/encodings/byte-bool/src/stats.rs b/encodings/bytebool/src/stats.rs similarity index 100% rename from encodings/byte-bool/src/stats.rs rename to encodings/bytebool/src/stats.rs From 48ff8a1a0c1fa67d96bd953374c69ea1405a2285 Mon Sep 17 00:00:00 2001 From: Will Manning Date: Fri, 6 Sep 2024 12:40:59 -0400 Subject: [PATCH 29/39] more fixing --- encodings/bytebool/src/lib.rs | 11 ++++---- encodings/datetime-parts/src/array.rs | 8 +++--- encodings/dict/src/compress.rs | 13 +++------ encodings/dict/src/dict.rs | 17 +++++------- .../fastlanes/src/bitpacking/compress.rs | 26 +++++++++--------- .../src/bitpacking/compute/search_sorted.rs | 8 +++--- encodings/fastlanes/src/bitpacking/mod.rs | 6 ++--- encodings/fastlanes/src/delta/mod.rs | 8 +++--- encodings/fastlanes/src/for/mod.rs | 6 ++--- encodings/fsst/src/array.rs | 14 +++++----- encodings/fsst/src/compress.rs | 27 +++++++++---------- encodings/fsst/src/compute.rs | 15 +++++------ encodings/fsst/tests/fsst_tests.rs | 4 +-- encodings/roaring/src/boolean/mod.rs | 6 ++--- encodings/runend-bool/src/compute.rs | 2 +- vortex-array/src/validity.rs | 27 +++++++++++++++++-- vortex-error/src/lib.rs | 13 ++++++--- 17 files changed, 113 insertions(+), 98 deletions(-) diff --git a/encodings/bytebool/src/lib.rs b/encodings/bytebool/src/lib.rs index 884a34afd..8d2f4d8ff 100644 --- a/encodings/bytebool/src/lib.rs +++ b/encodings/bytebool/src/lib.rs @@ -11,7 +11,7 @@ use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex::{impl_encoding, ArrayDef, ArrayTrait, Canonical, IntoCanonical, TypedArray}; use vortex_buffer::Buffer; use vortex_dtype::DType; -use vortex_error::VortexResult; +use vortex_error::{VortexExpect as _, VortexResult}; mod compute; mod stats; @@ -66,7 +66,7 @@ impl ByteBoolArray { pub fn buffer(&self) -> &Buffer { self.array() .buffer() - .unwrap_or_else(|| panic!("ByteBoolArray is missing the underlying buffer")) + .vortex_expect("ByteBoolArray is missing the underlying buffer") } fn maybe_null_slice(&self) -> &[bool] { @@ -96,7 +96,7 @@ impl BoolArrayTrait for ByteBoolArray { impl From> for ByteBoolArray { fn from(value: Vec) -> Self { Self::try_from_vec(value, Validity::AllValid) - .unwrap_or_else(|err| panic!("Failed to create ByteBoolArray from Vec: {err}")) + .vortex_expect("Failed to create ByteBoolArray from Vec") } } @@ -110,9 +110,8 @@ impl From>> for ByteBoolArray { .map(std::option::Option::unwrap_or_default) .collect(); - Self::try_from_vec(data, validity).unwrap_or_else(|err| { - panic!("Failed to create ByteBoolArray from nullable bools: {err}") - }) + Self::try_from_vec(data, validity) + .vortex_expect("Failed to create ByteBoolArray from nullable bools") } } diff --git a/encodings/datetime-parts/src/array.rs b/encodings/datetime-parts/src/array.rs index fbc83163d..c1b68cd0b 100644 --- a/encodings/datetime-parts/src/array.rs +++ b/encodings/datetime-parts/src/array.rs @@ -7,7 +7,7 @@ use vortex::variants::{ArrayVariants, ExtensionArrayTrait}; use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex::{impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoCanonical}; use vortex_dtype::DType; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, VortexExpect as _, VortexResult}; use crate::compute::decode_to_temporal; @@ -65,19 +65,19 @@ impl DateTimePartsArray { pub fn days(&self) -> Array { self.array() .child(0, &self.metadata().days_dtype, self.len()) - .unwrap_or_else(|| panic!("DatetimePartsArray missing days array")) + .vortex_expect("DatetimePartsArray missing days array") } pub fn seconds(&self) -> Array { self.array() .child(1, &self.metadata().seconds_dtype, self.len()) - .unwrap_or_else(|| panic!("DatetimePartsArray missing seconds array")) + .vortex_expect("DatetimePartsArray missing seconds array") } pub fn subsecond(&self) -> Array { self.array() .child(2, &self.metadata().subseconds_dtype, self.len()) - .unwrap_or_else(|| panic!("DatetimePartsArray missing subsecond array")) + .vortex_expect("DatetimePartsArray missing subsecond array") } } diff --git a/encodings/dict/src/compress.rs b/encodings/dict/src/compress.rs index eae2babec..98182519c 100644 --- a/encodings/dict/src/compress.rs +++ b/encodings/dict/src/compress.rs @@ -9,6 +9,7 @@ use vortex::array::{PrimitiveArray, VarBinArray}; use vortex::validity::Validity; use vortex::{ArrayDType, IntoArray}; use vortex_dtype::{match_each_native_ptype, DType, NativePType, ToBytes}; +use vortex_error::VortexExpect as _; #[derive(Debug)] struct Value(T); @@ -65,9 +66,7 @@ pub fn dict_encode_typed_primitive( } } }) - .unwrap_or_else(|err| { - panic!("Failed to iterate over primitive array during dictionary encoding: {err}") - }); + .vortex_expect("Failed to iterate over primitive array during dictionary encoding"); let values_validity = if array.dtype().is_nullable() { let mut validity = vec![true; values.len()]; @@ -88,9 +87,7 @@ pub fn dict_encode_typed_primitive( pub fn dict_encode_varbin(array: &VarBinArray) -> (PrimitiveArray, VarBinArray) { array .with_iterator(|iter| dict_encode_typed_varbin(array.dtype().clone(), iter)) - .unwrap_or_else(|err| { - panic!("Failed to iterate over varbin array during dictionary encoding: {err}") - }) + .vortex_expect("Failed to iterate over varbin array during dictionary encoding") } fn lookup_bytes<'a, T: NativePType + AsPrimitive>( @@ -169,9 +166,7 @@ where dtype, values_validity, ) - .unwrap_or_else(|err| { - panic!("Failed to create VarBinArray dictionary during encoding: {err}") - }), + .vortex_expect("Failed to create VarBinArray dictionary during encoding") ) } diff --git a/encodings/dict/src/dict.rs b/encodings/dict/src/dict.rs index 6b7a159ef..9df0091d2 100644 --- a/encodings/dict/src/dict.rs +++ b/encodings/dict/src/dict.rs @@ -13,7 +13,7 @@ use vortex::{ IntoCanonical, }; use vortex_dtype::{match_each_integer_ptype, DType}; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, vortex_panic, VortexExpect as _, VortexResult}; impl_encoding!("vortex.dict", 20u16, Dict); @@ -44,14 +44,14 @@ impl DictArray { pub fn values(&self) -> Array { self.array() .child(0, self.dtype(), self.metadata().values_len) - .unwrap_or_else(|| panic!("DictArray missing values")) + .vortex_expect("DictArray is missing its values child array") } #[inline] pub fn codes(&self) -> Array { self.array() .child(1, &self.metadata().codes_dtype, self.len()) - .unwrap_or_else(|| panic!("DictArray missing codes")) + .vortex_expect("DictArray is missing its codes child array") } } @@ -67,27 +67,24 @@ impl ArrayValidity for DictArray { fn is_valid(&self, index: usize) -> bool { let values_index = scalar_at(&self.codes(), index) .unwrap_or_else(|err| { - panic!("Failed to get index {} from DictArray codes: {err}", index) + vortex_panic!(err, "Failed to get index {} from DictArray codes", index) }) .as_ref() .try_into() - .unwrap_or_else(|err| panic!("Failed to convert dictionary code to usize: {err}")); + .vortex_expect("Failed to convert dictionary code to usize"); self.values().with_dyn(|a| a.is_valid(values_index)) } fn logical_validity(&self) -> LogicalValidity { if self.dtype().is_nullable() { - let primitive_codes = self.codes().into_primitive().unwrap_or_else(|err| { - panic!("Failed to convert DictArray codes to primitive array: {err}") - }); + let primitive_codes = self.codes().into_primitive().vortex_expect("Failed to convert DictArray codes to primitive array"); match_each_integer_ptype!(primitive_codes.ptype(), |$P| { ArrayAccessor::<$P>::with_iterator(&primitive_codes, |iter| { LogicalValidity::Array( BoolArray::from(iter.flatten().map(|c| *c != 0).collect::>()) .into_array(), ) - }) - .unwrap() + }).vortex_expect("Failed to convert DictArray codes into logical validity") }) } else { LogicalValidity::AllValid(self.len()) diff --git a/encodings/fastlanes/src/bitpacking/compress.rs b/encodings/fastlanes/src/bitpacking/compress.rs index 9755fe7f2..4bbe301c0 100644 --- a/encodings/fastlanes/src/bitpacking/compress.rs +++ b/encodings/fastlanes/src/bitpacking/compress.rs @@ -257,31 +257,33 @@ pub unsafe fn unpack_single_primitive( unsafe { BitPacking::unchecked_unpack_single(bit_width, packed_chunk, index_in_chunk) } } -pub fn find_best_bit_width(array: &PrimitiveArray) -> Option { - let bit_width_freq = array.statistics().compute_bit_width_freq()?; +pub fn find_best_bit_width(array: &PrimitiveArray) -> VortexResult { + let bit_width_freq = array.statistics().compute_bit_width_freq() + .ok_or_else(|| vortex_err!(ComputeError: "Failed to compute bit width frequency"))?; - Some(best_bit_width( + best_bit_width( &bit_width_freq, bytes_per_exception(array.ptype()), - )) + ) } /// Assuming exceptions cost 1 value + 1 u32 index, figure out the best bit-width to use. /// We could try to be clever, but we can never really predict how the exceptions will compress. -fn best_bit_width(bit_width_freq: &[usize], bytes_per_exception: usize) -> usize { - let len: usize = bit_width_freq.iter().sum(); - +fn best_bit_width(bit_width_freq: &[usize], bytes_per_exception: usize) -> VortexResult { if bit_width_freq.len() > u8::MAX as usize { - panic!("Too many bit widths"); + vortex_bail!("Too many bit widths"); } + let len: usize = bit_width_freq.iter().sum(); let mut num_packed = 0; let mut best_cost = len * bytes_per_exception; let mut best_width = 0; - for (bit_width, freq) in bit_width_freq.iter().enumerate() { + for (bit_width, freq) in bit_width_freq.iter().enumerate() { + let packed_cost = ((bit_width * len) + 7) / 8; // round up to bytes + num_packed += *freq; - let packed_cost = ((bit_width * len) + 7) / 8; let exceptions_cost = (len - num_packed) * bytes_per_exception; + let cost = exceptions_cost + packed_cost; if cost < best_cost { best_cost = cost; @@ -289,7 +291,7 @@ fn best_bit_width(bit_width_freq: &[usize], bytes_per_exception: usize) -> usize } } - best_width + Ok(best_width) } fn bytes_per_exception(ptype: PType) -> usize { @@ -315,7 +317,7 @@ mod test { // 10 1-bit values, 20 2-bit, etc. let freq = vec![0, 10, 20, 15, 1, 0, 0, 0]; // 3-bits => (46 * 3) + (8 * 1 * 5) => 178 bits => 23 bytes and zero exceptions - assert_eq!(best_bit_width(&freq, bytes_per_exception(PType::U8)), 3); + assert_eq!(best_bit_width(&freq, bytes_per_exception(PType::U8)).unwrap(), 3); } #[test] diff --git a/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs b/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs index 279b335f9..73e09a05a 100644 --- a/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs +++ b/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs @@ -10,7 +10,7 @@ use vortex::compute::{ use vortex::validity::Validity; use vortex::{ArrayDType, IntoArrayVariant}; use vortex_dtype::{match_each_unsigned_integer_ptype, NativePType}; -use vortex_error::{VortexError, VortexResult}; +use vortex_error::{VortexError, VortexExpect as _, VortexResult}; use vortex_scalar::Scalar; use crate::{unpack_single_primitive, BitPackedArray}; @@ -67,15 +67,13 @@ struct BitPackedSearch { impl BitPackedSearch { pub fn new(array: &BitPackedArray) -> Self { Self { - packed: array.packed().into_primitive().unwrap_or_else(|err| { - panic!("Failed to get packed bytes as PrimitiveArray: {err}") - }), + packed: array.packed().into_primitive().vortex_expect("Failed to get packed bytes as PrimitiveArray"), offset: array.offset(), length: array.len(), bit_width: array.bit_width(), min_patch_offset: array.patches().map(|p| { SparseArray::try_from(p) - .unwrap_or_else(|err| panic!("Only sparse patches are supported: {err}")) + .vortex_expect("Only sparse patches are supported") .min_index() }), validity: array.validity(), diff --git a/encodings/fastlanes/src/bitpacking/mod.rs b/encodings/fastlanes/src/bitpacking/mod.rs index e8ddc301f..a63488c82 100644 --- a/encodings/fastlanes/src/bitpacking/mod.rs +++ b/encodings/fastlanes/src/bitpacking/mod.rs @@ -7,7 +7,7 @@ use vortex::variants::{ArrayVariants, PrimitiveArrayTrait}; use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex::{impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoCanonical}; use vortex_dtype::{Nullability, PType}; -use vortex_error::{vortex_bail, vortex_err, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, vortex_panic, VortexError, VortexExpect as _, VortexResult}; mod compress; mod compute; @@ -113,7 +113,7 @@ impl BitPackedArray { &self.dtype().with_nullability(Nullability::NonNullable), self.packed_len(), ) - .unwrap_or_else(|| panic!("BitpackedArray is missing packed child bytes array")) + .vortex_expect("BitpackedArray is missing packed child bytes array") } #[inline] @@ -161,7 +161,7 @@ impl BitPackedArray { pub fn ptype(&self) -> PType { self.dtype() .try_into() - .unwrap_or_else(|err| panic!("Failed to convert BitpackedArray DType to PType: {err}")) + .unwrap_or_else(|err: VortexError| vortex_panic!(err, "Failed to convert BitpackedArray DType {} to PType", self.dtype())) } #[inline] diff --git a/encodings/fastlanes/src/delta/mod.rs b/encodings/fastlanes/src/delta/mod.rs index e8f592f33..32345d506 100644 --- a/encodings/fastlanes/src/delta/mod.rs +++ b/encodings/fastlanes/src/delta/mod.rs @@ -8,7 +8,7 @@ use vortex::variants::{ArrayVariants, PrimitiveArrayTrait}; use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex::{impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoCanonical}; use vortex_dtype::match_each_unsigned_integer_ptype; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, vortex_panic, VortexExpect as _, VortexResult}; mod compress; mod compute; @@ -61,14 +61,14 @@ impl DeltaArray { pub fn bases(&self) -> Array { self.array() .child(0, self.dtype(), self.bases_len()) - .unwrap_or_else(|| panic!("DeltaArray is missing bases")) + .vortex_expect("DeltaArray is missing bases child array") } #[inline] pub fn deltas(&self) -> Array { self.array() .child(1, self.dtype(), self.len()) - .unwrap_or_else(|| panic!("DeltaArray is missing deltas")) + .vortex_expect("DeltaArray is missing deltas child array") } #[inline] @@ -76,7 +76,7 @@ impl DeltaArray { let ptype = self .dtype() .try_into() - .unwrap_or_else(|err| panic!("Failed to convert DeltaArray DType to PType: {err}")); + .unwrap_or_else(|err| vortex_panic!(err, "Failed to convert DeltaArray DType {} to PType", self.dtype())); match_each_unsigned_integer_ptype!(ptype, |$T| { <$T as fastlanes::FastLanes>::LANES }) diff --git a/encodings/fastlanes/src/for/mod.rs b/encodings/fastlanes/src/for/mod.rs index 8f68782c0..b5a0fe4c1 100644 --- a/encodings/fastlanes/src/for/mod.rs +++ b/encodings/fastlanes/src/for/mod.rs @@ -8,7 +8,7 @@ use vortex::variants::{ArrayVariants, PrimitiveArrayTrait}; use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex::{impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoCanonical}; use vortex_dtype::{DType, PType}; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, vortex_panic, VortexExpect as _, VortexResult}; use vortex_scalar::Scalar; mod compress; @@ -50,7 +50,7 @@ impl FoRArray { }; self.array() .child(0, dtype, self.len()) - .unwrap_or_else(|| panic!("FoRArray is missing encoded child array")) + .vortex_expect("FoRArray is missing encoded child array") } #[inline] @@ -67,7 +67,7 @@ impl FoRArray { pub fn ptype(&self) -> PType { self.dtype() .try_into() - .unwrap_or_else(|err| panic!("Failed to convert FoRArray DType to PType: {err}")) + .unwrap_or_else(|err| vortex_panic!(err, "Failed to convert FoRArray DType {} to PType", self.dtype())) } } diff --git a/encodings/fsst/src/array.rs b/encodings/fsst/src/array.rs index 9bbbc9d76..8b820ef46 100644 --- a/encodings/fsst/src/array.rs +++ b/encodings/fsst/src/array.rs @@ -8,7 +8,7 @@ use vortex::variants::{ArrayVariants, BinaryArrayTrait, Utf8ArrayTrait}; use vortex::visitor::AcceptArrayVisitor; use vortex::{impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, IntoCanonical}; use vortex_dtype::{DType, Nullability, PType}; -use vortex_error::{vortex_bail, vortex_panic, VortexExpect, VortexResult}; +use vortex_error::{vortex_bail, VortexExpect, VortexResult}; impl_encoding!("vortex.fsst", 24u16, FSST); @@ -101,25 +101,25 @@ impl FSSTArray { /// this array, and pass it to the given function. /// /// This is private to the crate to avoid leaking `fsst-rs` types as part of the public API. - pub(crate) fn with_decompressor(&self, apply: F) -> R + pub(crate) fn with_decompressor(&self, apply: F) -> VortexResult where - F: FnOnce(Decompressor) -> R, + F: FnOnce(Decompressor) -> VortexResult, { // canonicalize the symbols child array, so we can view it contiguously let symbols_array = self .symbols() .into_canonical() - .unwrap_or_else(|err| vortex_panic!(err)) + .map_err(|err| err.with_context("Failed to canonicalize symbols array"))? .into_primitive() - .unwrap_or_else(|err| vortex_panic!(Context: "Symbols must be a Primitive Array", err)); + .map_err(|err| err.with_context("Symbols must be a Primitive Array"))?; let symbols = symbols_array.maybe_null_slice::(); let symbol_lengths_array = self .symbol_lengths() .into_canonical() - .unwrap() + .map_err(|err| err.with_context("Failed to canonicalize symbol_lengths array"))? .into_primitive() - .unwrap(); + .map_err(|err| err.with_context("Symbol lengths must be a Primitive Array"))?; let symbol_lengths = symbol_lengths_array.maybe_null_slice::(); // Transmute the 64-bit symbol values into fsst `Symbol`s. diff --git a/encodings/fsst/src/compress.rs b/encodings/fsst/src/compress.rs index 96213e28c..e1b95d03d 100644 --- a/encodings/fsst/src/compress.rs +++ b/encodings/fsst/src/compress.rs @@ -7,6 +7,7 @@ use vortex::array::{PrimitiveArray, VarBinArray, VarBinViewArray}; use vortex::validity::Validity; use vortex::{Array, ArrayDType, IntoArray}; use vortex_dtype::DType; +use vortex_error::{vortex_bail, VortexExpect as _, VortexResult}; use crate::FSSTArray; @@ -15,29 +16,25 @@ use crate::FSSTArray; /// # Panics /// /// If the `strings` array is not encoded as either [`VarBinArray`] or [`VarBinViewArray`]. -pub fn fsst_compress(strings: &Array, compressor: &Compressor) -> FSSTArray { +pub fn fsst_compress(strings: &Array, compressor: &Compressor) -> VortexResult { let len = strings.len(); let dtype = strings.dtype().clone(); // Compress VarBinArray if let Ok(varbin) = VarBinArray::try_from(strings) { - let compressed = varbin + return varbin .with_iterator(|iter| fsst_compress_iter(iter, len, dtype, compressor)) - .unwrap(); - - return compressed; + .map_err(|err| err.with_context("Failed to compress VarBinArray with FSST")); } // Compress VarBinViewArray if let Ok(varbin_view) = VarBinViewArray::try_from(strings) { - let compressed = varbin_view + return varbin_view .with_iterator(|iter| fsst_compress_iter(iter, len, dtype, compressor)) - .unwrap(); - - return compressed; + .map_err(|err| err.with_context("Failed to compress VarBinViewArray with FSST")); } - panic!( + vortex_bail!( "cannot fsst_compress array with unsupported encoding {:?}", strings.encoding().id() ) @@ -48,17 +45,17 @@ pub fn fsst_compress(strings: &Array, compressor: &Compressor) -> FSSTArray { /// # Panics /// /// If the provided array is not FSST compressible. -pub fn fsst_train_compressor(array: &Array) -> Compressor { +pub fn fsst_train_compressor(array: &Array) -> VortexResult { if let Ok(varbin) = VarBinArray::try_from(array) { varbin .with_iterator(|iter| fsst_train_compressor_iter(iter)) - .unwrap() + .map_err(|err| err.with_context("Failed to train FSST Compressor from VarBinArray")) } else if let Ok(varbin_view) = VarBinViewArray::try_from(array) { varbin_view .with_iterator(|iter| fsst_train_compressor_iter(iter)) - .unwrap() + .map_err(|err| err.with_context("Failed to train FSST Compressor from VarBinViewArray")) } else { - panic!( + vortex_bail!( "cannot fsst_compress array with unsupported encoding {:?}", array.encoding().id() ) @@ -118,5 +115,5 @@ where PrimitiveArray::from_vec(symbol_lengths_vec, Validity::NonNullable).into_array(); FSSTArray::try_new(dtype, symbols, symbol_lengths, codes.into_array()) - .expect("building FSSTArray from parts") + .vortex_expect("Failed to build FSSTArray from parts; this should never happen") } diff --git a/encodings/fsst/src/compute.rs b/encodings/fsst/src/compute.rs index 5159932ec..7a3aeccb2 100644 --- a/encodings/fsst/src/compute.rs +++ b/encodings/fsst/src/compute.rs @@ -3,7 +3,7 @@ use vortex::compute::unary::{scalar_at_unchecked, ScalarAtFn}; use vortex::compute::{filter, slice, take, ArrayCompute, FilterFn, SliceFn, TakeFn}; use vortex::{Array, ArrayDType, IntoArray}; use vortex_buffer::Buffer; -use vortex_error::VortexResult; +use vortex_error::{vortex_err, VortexResult, VortexUnwrap}; use vortex_scalar::Scalar; use crate::FSSTArray; @@ -57,19 +57,18 @@ impl TakeFn for FSSTArray { impl ScalarAtFn for FSSTArray { fn scalar_at(&self, index: usize) -> VortexResult { - Ok(self.scalar_at_unchecked(index)) - } - - fn scalar_at_unchecked(&self, index: usize) -> Scalar { let compressed = scalar_at_unchecked(&self.codes(), index); - let binary_datum = compressed.value().as_buffer().unwrap().unwrap(); + let binary_datum = compressed.value().as_buffer()?.ok_or_else(|| vortex_err!("Expected a binary scalar, found {}", compressed.dtype()))?; self.with_decompressor(|decompressor| { let decoded_buffer: Buffer = decompressor.decompress(binary_datum.as_slice()).into(); - - varbin_scalar(decoded_buffer, self.dtype()) + Ok(varbin_scalar(decoded_buffer, self.dtype())) }) } + + fn scalar_at_unchecked(&self, index: usize) -> Scalar { + ScalarAtFn::scalar_at(self, index).vortex_unwrap() + } } impl FilterFn for FSSTArray { diff --git a/encodings/fsst/tests/fsst_tests.rs b/encodings/fsst/tests/fsst_tests.rs index fd94402bd..01bf75264 100644 --- a/encodings/fsst/tests/fsst_tests.rs +++ b/encodings/fsst/tests/fsst_tests.rs @@ -28,9 +28,9 @@ fn fsst_array() -> Array { .finish(DType::Utf8(Nullability::NonNullable)) .into_array(); - let compressor = fsst_train_compressor(&input_array); + let compressor = fsst_train_compressor(&input_array).unwrap(); - fsst_compress(&input_array, &compressor).into_array() + fsst_compress(&input_array, &compressor).unwrap().into_array() } #[rstest] diff --git a/encodings/roaring/src/boolean/mod.rs b/encodings/roaring/src/boolean/mod.rs index f7e517698..5971b306a 100644 --- a/encodings/roaring/src/boolean/mod.rs +++ b/encodings/roaring/src/boolean/mod.rs @@ -15,7 +15,7 @@ use vortex::{ use vortex_buffer::Buffer; use vortex_dtype::DType; use vortex_dtype::Nullability::{NonNullable, Nullable}; -use vortex_error::{vortex_bail, vortex_err, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, VortexExpect as _, VortexResult}; mod compress; mod compute; @@ -50,7 +50,7 @@ impl RoaringBoolArray { Bitmap::deserialize::( self.array() .buffer() - .unwrap_or_else(|| panic!("RoaringBoolArray buffer is missing")) + .vortex_expect("RoaringBoolArray buffer is missing") .as_ref(), ) } @@ -59,7 +59,7 @@ impl RoaringBoolArray { if array.encoding().id() == Bool::ID { roaring_bool_encode(BoolArray::try_from(array)?).map(vortex::IntoArray::into_array) } else { - Err(vortex_err!("RoaringInt can only encode boolean arrays")) + vortex_bail!("RoaringInt can only encode boolean arrays") } } } diff --git a/encodings/runend-bool/src/compute.rs b/encodings/runend-bool/src/compute.rs index 4d0fe4e44..5de60574f 100644 --- a/encodings/runend-bool/src/compute.rs +++ b/encodings/runend-bool/src/compute.rs @@ -3,7 +3,7 @@ use vortex::compute::unary::ScalarAtFn; use vortex::compute::{slice, ArrayCompute, SliceFn, TakeFn}; use vortex::{Array, IntoArray, IntoArrayVariant, ToArray}; use vortex_dtype::match_each_integer_ptype; -use vortex_error::{vortex_bail, vortex_panic, VortexResult}; +use vortex_error::{VortexExpect as _, VortexResult}; use vortex_scalar::Scalar; use crate::compress::value_at_index; diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index a1ae1af45..6c94a9ee7 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -1,13 +1,13 @@ use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, NullBuffer}; use serde::{Deserialize, Serialize}; use vortex_dtype::{DType, Nullability}; -use vortex_error::{vortex_bail, vortex_panic, VortexExpect as _, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, vortex_panic, VortexError, VortexExpect as _, VortexResult}; use crate::array::BoolArray; use crate::compute::unary::scalar_at_unchecked; use crate::compute::{and, filter, slice, take}; use crate::stats::ArrayStatistics; -use crate::{Array, IntoArray, IntoArrayVariant}; +use crate::{Array, ArrayDType, IntoArray, IntoArrayVariant}; pub trait ArrayValidity { fn is_valid(&self, index: usize) -> bool; @@ -270,6 +270,21 @@ pub enum LogicalValidity { } impl LogicalValidity { + pub fn try_new_from_array(array: Array) -> VortexResult { + if !matches!(array.dtype(), &Validity::DTYPE) { + vortex_bail!("Expected a non-nullable boolean array"); + } + + let true_count = array.statistics().compute_true_count().ok_or_else(|| vortex_err!("Failed to compute true count from validity array"))?; + if true_count == array.len() { + return Ok(Self::AllValid(array.len())); + } else if true_count == 0 { + return Ok(Self::AllInvalid(array.len())); + } + + Ok(Self::Array(array)) + } + pub fn to_null_buffer(&self) -> VortexResult> { match self { Self::AllValid(_) => Ok(None), @@ -313,6 +328,14 @@ impl LogicalValidity { } } +impl TryFrom for LogicalValidity { + type Error = VortexError; + + fn try_from(array: Array) -> VortexResult { + Self::try_new_from_array(array) + } +} + impl IntoArray for LogicalValidity { fn into_array(self) -> Array { match self { diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index 46891a65d..4eef784ed 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -191,7 +191,7 @@ impl VortexUnwrap for VortexResult { #[inline(always)] fn vortex_unwrap(self) -> Self::Output { - self.unwrap_or_else(|err| err.panic()) + self.unwrap_or_else(|err| vortex_panic!(err)) } } @@ -206,7 +206,7 @@ impl VortexExpect for VortexResult { #[inline(always)] fn vortex_expect(self, msg: &str) -> Self::Output { - self.unwrap_or_else(|e| e.with_context(msg.to_string()).panic()) + self.unwrap_or_else(|e| vortex_panic!(e.with_context(msg.to_string()))) } } @@ -215,7 +215,10 @@ impl VortexExpect for Option { #[inline(always)] fn vortex_expect(self, msg: &str) -> Self::Output { - self.unwrap_or_else(|| VortexError::InvalidArgument(msg.to_string().into(), Backtrace::capture()).panic()) + self.unwrap_or_else(|| { + let err = VortexError::InvalidArgument(msg.to_string().into(), Backtrace::capture()); + vortex_panic!(err) + }) } } @@ -295,7 +298,9 @@ macro_rules! vortex_panic { }; ($err:expr, $fmt:literal $(, $arg:expr)* $(,)?) => {{ use $crate::VortexPanic; - ($err).with_context(format!($fmt, $($arg),*)).panic() + use $crate::VortexError; + let err: VortexError = $err; + err.with_context(format!($fmt, $($arg),*)).panic() }}; ($fmt:literal $(, $arg:expr)* $(,)?) => { $crate::vortex_panic!($crate::vortex_err!($fmt, $($arg),*)) From 9056ddfc00c002acf3196f0e9ddf1059b300d7ab Mon Sep 17 00:00:00 2001 From: Will Manning Date: Fri, 6 Sep 2024 14:44:27 -0400 Subject: [PATCH 30/39] moar --- encodings/roaring/src/integer/mod.rs | 4 ++-- encodings/runend-bool/src/array.rs | 4 ++-- encodings/runend-bool/src/compress.rs | 8 ++++---- encodings/runend-bool/src/compute.rs | 2 +- encodings/runend/src/compress.rs | 6 +++--- encodings/runend/src/compute.rs | 4 ++-- encodings/runend/src/runend.rs | 6 +++--- encodings/zigzag/src/compress.rs | 10 +++++----- encodings/zigzag/src/compute.rs | 6 +++--- encodings/zigzag/src/zigzag.rs | 12 ++++++------ vortex-array/benches/fn.rs | 2 ++ vortex-array/benches/iter.rs | 2 ++ vortex-datafusion/src/datatype.rs | 3 ++- vortex-datafusion/src/lib.rs | 2 +- vortex-datafusion/src/memory.rs | 8 ++------ vortex-datafusion/src/persistent/opener.rs | 5 ++--- vortex-expr/src/expr.rs | 7 ++++--- .../src/compressors/bitpacked.rs | 5 ++--- vortex-sampling-compressor/src/compressors/fsst.rs | 10 ++++------ vortex-serde/src/chunked_reader/mod.rs | 11 ++++------- vortex-serde/src/io/object_store.rs | 7 ++++--- vortex-serde/src/io/tokio.rs | 4 ++-- vortex-serde/src/layouts/read/cache.rs | 13 +++++++------ vortex-serde/src/layouts/read/layouts.rs | 14 +++++++++----- vortex-serde/src/layouts/read/stream.rs | 6 +++--- vortex-serde/src/messages.rs | 9 +++------ vortex-serde/src/stream_reader/mod.rs | 6 +++--- 27 files changed, 87 insertions(+), 89 deletions(-) diff --git a/encodings/roaring/src/integer/mod.rs b/encodings/roaring/src/integer/mod.rs index abc23b083..1fbedaa47 100644 --- a/encodings/roaring/src/integer/mod.rs +++ b/encodings/roaring/src/integer/mod.rs @@ -14,7 +14,7 @@ use vortex::{ use vortex_buffer::Buffer; use vortex_dtype::Nullability::NonNullable; use vortex_dtype::{DType, PType}; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, VortexExpect as _, VortexResult}; mod compress; mod compute; @@ -49,7 +49,7 @@ impl RoaringIntArray { Bitmap::deserialize::( self.array() .buffer() - .unwrap_or_else(|| panic!("RoaringBoolArray buffer is missing")) + .vortex_expect("RoaringBoolArray buffer is missing") .as_ref(), ) } diff --git a/encodings/runend-bool/src/array.rs b/encodings/runend-bool/src/array.rs index 8166d255c..c37ac69da 100644 --- a/encodings/runend-bool/src/array.rs +++ b/encodings/runend-bool/src/array.rs @@ -10,7 +10,7 @@ use vortex::{ IntoCanonical, }; use vortex_dtype::{DType, Nullability}; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, VortexExpect as _, VortexResult}; use crate::compress::runend_bool_decode; @@ -96,7 +96,7 @@ impl RunEndBoolArray { pub fn ends(&self) -> Array { self.array() .child(0, &self.metadata().ends_dtype, self.metadata().num_runs) - .unwrap_or_else(|| panic!("RunEndBoolArray is missing its run ends")) + .vortex_expect("RunEndBoolArray is missing its run ends") } } diff --git a/encodings/runend-bool/src/compress.rs b/encodings/runend-bool/src/compress.rs index 1d78cb202..5ba4ed86f 100644 --- a/encodings/runend-bool/src/compress.rs +++ b/encodings/runend-bool/src/compress.rs @@ -5,7 +5,7 @@ use num_traits::{AsPrimitive, FromPrimitive}; use vortex::array::{BoolArray, PrimitiveArray}; use vortex::validity::Validity; use vortex_dtype::{match_each_integer_ptype, NativePType}; -use vortex_error::VortexResult; +use vortex_error::{vortex_panic, VortexExpect as _, VortexResult}; pub fn runend_bool_encode(elements: &BoolArray) -> (PrimitiveArray, bool) { let (arr, start) = runend_bool_encode_slice(&elements.boolean_buffer()); @@ -31,7 +31,7 @@ pub fn runend_bool_encode_slice(elements: &BooleanBuffer) -> (Vec, bool) { let last_end = ends .last() - .unwrap_or_else(|| panic!("RunEndBoolArray is missing its run ends")); + .vortex_expect("RunEndBoolArray cannot have empty run ends (by construction); this should be impossible"); if *last_end != elements.len() as u64 { ends.push(elements.len() as u64) } @@ -59,14 +59,14 @@ pub fn runend_bool_decode_slice + FromPrimit length: usize, ) -> Vec { let offset_e = E::from_usize(offset).unwrap_or_else(|| { - panic!( + vortex_panic!( "offset {} cannot be converted to {}", offset, std::any::type_name::() ) }); let length_e = E::from_usize(length).unwrap_or_else(|| { - panic!( + vortex_panic!( "length {} cannot be converted to {}", length, std::any::type_name::() diff --git a/encodings/runend-bool/src/compute.rs b/encodings/runend-bool/src/compute.rs index 5de60574f..67dbdbe22 100644 --- a/encodings/runend-bool/src/compute.rs +++ b/encodings/runend-bool/src/compute.rs @@ -3,7 +3,7 @@ use vortex::compute::unary::ScalarAtFn; use vortex::compute::{slice, ArrayCompute, SliceFn, TakeFn}; use vortex::{Array, IntoArray, IntoArrayVariant, ToArray}; use vortex_dtype::match_each_integer_ptype; -use vortex_error::{VortexExpect as _, VortexResult}; +use vortex_error::{vortex_bail, VortexExpect as _, VortexResult}; use vortex_scalar::Scalar; use crate::compress::value_at_index; diff --git a/encodings/runend/src/compress.rs b/encodings/runend/src/compress.rs index ef4e54b2a..f50d6a9a2 100644 --- a/encodings/runend/src/compress.rs +++ b/encodings/runend/src/compress.rs @@ -7,7 +7,7 @@ use vortex::stats::{ArrayStatistics, Stat}; use vortex::validity::Validity; use vortex::ArrayDType; use vortex_dtype::{match_each_integer_ptype, match_each_native_ptype, NativePType, Nullability}; -use vortex_error::VortexResult; +use vortex_error::{vortex_panic, VortexResult}; pub fn runend_encode(array: &PrimitiveArray) -> (PrimitiveArray, PrimitiveArray) { let validity = if array.dtype().nullability() == Nullability::NonNullable { @@ -93,14 +93,14 @@ pub fn runend_decode_primitive< length: usize, ) -> Vec { let offset_e = E::from_usize(offset).unwrap_or_else(|| { - panic!( + vortex_panic!( "offset {} cannot be converted to {}", offset, std::any::type_name::() ) }); let length_e = E::from_usize(length).unwrap_or_else(|| { - panic!( + vortex_panic!( "length {} cannot be converted to {}", length, std::any::type_name::() diff --git a/encodings/runend/src/compute.rs b/encodings/runend/src/compute.rs index f8671710e..05554d50f 100644 --- a/encodings/runend/src/compute.rs +++ b/encodings/runend/src/compute.rs @@ -4,7 +4,7 @@ use vortex::compute::{filter, slice, take, ArrayCompute, SliceFn, TakeFn}; use vortex::validity::Validity; use vortex::{Array, ArrayDType, IntoArray, IntoArrayVariant}; use vortex_dtype::match_each_integer_ptype; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, VortexExpect as _, VortexResult}; use vortex_scalar::Scalar; use crate::RunEndArray; @@ -31,7 +31,7 @@ impl ScalarAtFn for RunEndArray { fn scalar_at_unchecked(&self, index: usize) -> Scalar { let idx = self .find_physical_index(index) - .expect("Search must be implemented for the underlying index array"); + .vortex_expect("Search must be implemented for the underlying index array"); scalar_at_unchecked(&self.values(), idx) } } diff --git a/encodings/runend/src/runend.rs b/encodings/runend/src/runend.rs index 9e4500d42..1b96a9f87 100644 --- a/encodings/runend/src/runend.rs +++ b/encodings/runend/src/runend.rs @@ -13,7 +13,7 @@ use vortex::{ IntoCanonical, }; use vortex_dtype::DType; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, VortexExpect as _, VortexResult}; use crate::compress::{runend_decode, runend_encode}; @@ -113,14 +113,14 @@ impl RunEndArray { pub fn ends(&self) -> Array { self.array() .child(0, &self.metadata().ends_dtype, self.metadata().num_runs) - .unwrap_or_else(|| panic!("RunEndArray is missing its run ends")) + .vortex_expect("RunEndArray is missing its run ends") } #[inline] pub fn values(&self) -> Array { self.array() .child(1, self.dtype(), self.metadata().num_runs) - .unwrap_or_else(|| panic!("RunEndArray is missing its values")) + .vortex_expect("RunEndArray is missing its values") } } diff --git a/encodings/zigzag/src/compress.rs b/encodings/zigzag/src/compress.rs index 65489ebc0..ad2f43cd6 100644 --- a/encodings/zigzag/src/compress.rs +++ b/encodings/zigzag/src/compress.rs @@ -13,7 +13,7 @@ pub fn zigzag_encode(parray: &PrimitiveArray) -> VortexResult { PType::I16 => zigzag_encode_primitive::(parray.maybe_null_slice(), parray.validity()), PType::I32 => zigzag_encode_primitive::(parray.maybe_null_slice(), parray.validity()), PType::I64 => zigzag_encode_primitive::(parray.maybe_null_slice(), parray.validity()), - _ => vortex_bail!("Unsupported ptype {}", parray.ptype()), + _ => vortex_bail!("ZigZag can only encode signed integers, got {}", parray.ptype()), }; ZigZagArray::try_new(encoded.into_array()) } @@ -30,14 +30,14 @@ where PrimitiveArray::from_vec(encoded.to_vec(), validity) } -pub fn zigzag_decode(parray: &PrimitiveArray) -> PrimitiveArray { - match parray.ptype() { +pub fn zigzag_decode(parray: &PrimitiveArray) -> VortexResult { + Ok(match parray.ptype() { PType::U8 => zigzag_decode_primitive::(parray.maybe_null_slice(), parray.validity()), PType::U16 => zigzag_decode_primitive::(parray.maybe_null_slice(), parray.validity()), PType::U32 => zigzag_decode_primitive::(parray.maybe_null_slice(), parray.validity()), PType::U64 => zigzag_decode_primitive::(parray.maybe_null_slice(), parray.validity()), - _ => panic!("Unsupported ptype {}", parray.ptype()), - } + _ => vortex_bail!("ZigZag can only decode unsigned integers, got {}", parray.ptype()), + }) } fn zigzag_decode_primitive( diff --git a/encodings/zigzag/src/compute.rs b/encodings/zigzag/src/compute.rs index 2347463a3..65bf68603 100644 --- a/encodings/zigzag/src/compute.rs +++ b/encodings/zigzag/src/compute.rs @@ -2,7 +2,7 @@ use vortex::compute::unary::{scalar_at_unchecked, ScalarAtFn}; use vortex::compute::{slice, ArrayCompute, SliceFn}; use vortex::{Array, IntoArray}; use vortex_dtype::PType; -use vortex_error::{vortex_err, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, VortexResult, VortexUnwrap}; use vortex_scalar::{PrimitiveScalar, Scalar}; use zigzag::ZigZag as ExternalZigZag; @@ -55,12 +55,12 @@ impl ScalarAtFn for ZigZagArray { ) })?) .into()), - _ => unreachable!(), + _ => vortex_bail!("ZigZag can only decode unsigned integers, got {}", pscalar.ptype()), } } fn scalar_at_unchecked(&self, index: usize) -> Scalar { - ::scalar_at(self, index).unwrap() + ::scalar_at(self, index).vortex_unwrap() } } diff --git a/encodings/zigzag/src/zigzag.rs b/encodings/zigzag/src/zigzag.rs index 3a6b3a6ec..99e23f31a 100644 --- a/encodings/zigzag/src/zigzag.rs +++ b/encodings/zigzag/src/zigzag.rs @@ -9,7 +9,7 @@ use vortex::{ IntoCanonical, }; use vortex_dtype::{DType, PType}; -use vortex_error::{vortex_bail, vortex_err, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, vortex_panic, VortexExpect as _, VortexResult}; use crate::compress::zigzag_encode; use crate::zigzag_decode; @@ -22,7 +22,7 @@ pub struct ZigZagMetadata; impl ZigZagArray { pub fn new(encoded: Array) -> Self { Self::try_new(encoded) - .unwrap_or_else(|err| panic!("Failed to construct ZigZagArray: {}", err)) + .vortex_expect("Failed to construct ZigZagArray") } pub fn try_new(encoded: Array) -> VortexResult { @@ -49,12 +49,12 @@ impl ZigZagArray { pub fn encoded(&self) -> Array { let ptype = PType::try_from(self.dtype()).unwrap_or_else(|err| { - panic!("Failed to convert DType {} to PType: {}", self.dtype(), err) + vortex_panic!(err, "Failed to convert DType {} to PType", self.dtype()) }); let encoded = DType::from(ptype.to_unsigned()).with_nullability(self.dtype().nullability()); self.array() .child(0, &encoded, self.len()) - .unwrap_or_else(|| panic!("ZigZagArray is missing its encoded array")) + .vortex_expect("ZigZagArray is missing its encoded child array") } } @@ -88,8 +88,8 @@ impl ArrayStatisticsCompute for ZigZagArray {} impl IntoCanonical for ZigZagArray { fn into_canonical(self) -> VortexResult { - Ok(Canonical::Primitive(zigzag_decode( + zigzag_decode( &self.encoded().into_primitive()?, - ))) + ).map(Canonical::Primitive) } } diff --git a/vortex-array/benches/fn.rs b/vortex-array/benches/fn.rs index d5bcc0709..c07c48462 100644 --- a/vortex-array/benches/fn.rs +++ b/vortex-array/benches/fn.rs @@ -1,3 +1,5 @@ +#![allow(clippy::unwrap_used)] + use arrow_array::types::UInt32Type; use arrow_array::UInt32Array; use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; diff --git a/vortex-array/benches/iter.rs b/vortex-array/benches/iter.rs index a919e7b18..c2d8682d9 100644 --- a/vortex-array/benches/iter.rs +++ b/vortex-array/benches/iter.rs @@ -1,3 +1,5 @@ +#![allow(clippy::unwrap_used)] + use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; use itertools::Itertools; use vortex::array::PrimitiveArray; diff --git a/vortex-datafusion/src/datatype.rs b/vortex-datafusion/src/datatype.rs index 774b19edb..ba642091f 100644 --- a/vortex-datafusion/src/datatype.rs +++ b/vortex-datafusion/src/datatype.rs @@ -14,6 +14,7 @@ use arrow_schema::{DataType, Field, FieldRef, Fields, Schema, SchemaBuilder}; use vortex_datetime_dtype::arrow::make_arrow_temporal_dtype; use vortex_datetime_dtype::is_temporal_ext_type; use vortex_dtype::{DType, Nullability, PType}; +use vortex_error::vortex_panic; /// Convert a Vortex [struct DType][DType] to an Arrow [Schema]. /// @@ -94,7 +95,7 @@ pub(crate) fn infer_data_type(dtype: &DType) -> DataType { if is_temporal_ext_type(ext_dtype.id()) { make_arrow_temporal_dtype(ext_dtype) } else { - vortex_panic!("unsupported extension type \"{}\"", ext_dtype.id()) + vortex_panic!("Unsupported extension type \"{}\"", ext_dtype.id()) } } } diff --git a/vortex-datafusion/src/lib.rs b/vortex-datafusion/src/lib.rs index 7e09bc495..c14d4813e 100644 --- a/vortex-datafusion/src/lib.rs +++ b/vortex-datafusion/src/lib.rs @@ -22,7 +22,7 @@ use persistent::config::VortexTableOptions; use persistent::provider::VortexFileTableProvider; use vortex::array::ChunkedArray; use vortex::{Array, ArrayDType, IntoArrayVariant}; -use vortex_error::{vortex_err, VortexError, VortexResult}; +use vortex_error::vortex_err; use vortex_dtype::field::Field; pub mod memory; diff --git a/vortex-datafusion/src/memory.rs b/vortex-datafusion/src/memory.rs index e2538b411..7cf3fce64 100644 --- a/vortex-datafusion/src/memory.rs +++ b/vortex-datafusion/src/memory.rs @@ -14,6 +14,7 @@ use datafusion_physical_plan::{ExecutionMode, ExecutionPlan, Partitioning, PlanP use itertools::Itertools; use vortex::array::ChunkedArray; use vortex::{Array, ArrayDType as _}; +use vortex_error::VortexError; use vortex_expr::datafusion::convert_expr_to_vortex; use vortex_expr::VortexExpr; @@ -115,12 +116,7 @@ impl TableProvider for VortexMemTable { let output_schema = Arc::new( self.schema_ref .project(output_projection.as_slice()) - .unwrap_or_else(|err| { - panic!( - "Failed to project output schema: {}", - VortexError::from(err) - ) - }), + .map_err(VortexError::from)? ); let plan_properties = PlanProperties::new( EquivalenceProperties::new(output_schema), diff --git a/vortex-datafusion/src/persistent/opener.rs b/vortex-datafusion/src/persistent/opener.rs index 19f200562..38a33ae23 100644 --- a/vortex-datafusion/src/persistent/opener.rs +++ b/vortex-datafusion/src/persistent/opener.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use arrow_array::RecordBatch; use arrow_schema::SchemaRef; use datafusion::datasource::physical_plan::{FileMeta, FileOpenFuture, FileOpener}; -use datafusion_common::Result as DFResult; +use datafusion_common::{DataFusionError, Result as DFResult}; use datafusion_physical_expr::PhysicalExpr; use futures::{FutureExt as _, TryStreamExt}; use object_store::ObjectStore; @@ -55,8 +55,7 @@ impl FileOpener for VortexFileOpener { builder .build() .await? - .map_ok(RecordBatch::from) - .map_err(|e| e.into()), + .map_ok(|array: vortex::Array| RecordBatch::try_from(array).unwrap()) ) as _) } .boxed()) diff --git a/vortex-expr/src/expr.rs b/vortex-expr/src/expr.rs index 7e4b47fa1..3000c6c7c 100644 --- a/vortex-expr/src/expr.rs +++ b/vortex-expr/src/expr.rs @@ -22,6 +22,7 @@ pub trait VortexExpr: Debug + Send + Sync + PartialEq { } // Taken from apache-datafusion, necessary since you can't require VortexExpr implement PartialEq +#[allow(clippy::unwrap_used)] fn unbox_any(any: &dyn Any) -> &dyn Any { if any.is::>() { any.downcast_ref::>().unwrap().as_any() @@ -32,7 +33,7 @@ fn unbox_any(any: &dyn Any) -> &dyn Any { } } -#[derive(Debug, PartialEq, Hash, Clone)] +#[derive(Debug, PartialEq, Hash, Clone, Eq)] pub struct NoOp; #[derive(Debug, Clone)] @@ -44,7 +45,7 @@ pub struct BinaryExpr { impl BinaryExpr { pub fn new(lhs: Arc, operator: Operator, rhs: Arc) -> Self { - Self { lhs, rhs, operator } + Self { lhs, operator, rhs } } pub fn lhs(&self) -> &Arc { @@ -60,7 +61,7 @@ impl BinaryExpr { } } -#[derive(Debug, PartialEq, Hash, Clone)] +#[derive(Debug, PartialEq, Hash, Clone, Eq)] pub struct Column { field: Field, } diff --git a/vortex-sampling-compressor/src/compressors/bitpacked.rs b/vortex-sampling-compressor/src/compressors/bitpacked.rs index 0e895820c..cef069f29 100644 --- a/vortex-sampling-compressor/src/compressors/bitpacked.rs +++ b/vortex-sampling-compressor/src/compressors/bitpacked.rs @@ -30,7 +30,7 @@ impl EncodingCompressor for BitPackedCompressor { return None; } - let bit_width = find_best_bit_width(&parray)?; + let bit_width = find_best_bit_width(&parray).ok()?; // Check that the bit width is less than the type's bit width if bit_width == parray.ptype().bit_width() { @@ -52,8 +52,7 @@ impl EncodingCompressor for BitPackedCompressor { .compute_bit_width_freq() .ok_or_else(|| vortex_err!(ComputeError: "missing bit width frequency"))?; - let bit_width = find_best_bit_width(&parray) - .ok_or_else(|| vortex_err!(ComputeError: "missing bit width frequency"))?; + let bit_width = find_best_bit_width(&parray)?; let num_exceptions = count_exceptions(bit_width, &bit_width_freq); if bit_width == parray.ptype().bit_width() { diff --git a/vortex-sampling-compressor/src/compressors/fsst.rs b/vortex-sampling-compressor/src/compressors/fsst.rs index 13e645db0..577ac0008 100644 --- a/vortex-sampling-compressor/src/compressors/fsst.rs +++ b/vortex-sampling-compressor/src/compressors/fsst.rs @@ -64,7 +64,8 @@ impl EncodingCompressor for FSSTCompressor { let compressor = like .and_then(|mut tree| tree.metadata()) - .unwrap_or_else(|| Arc::new(fsst_train_compressor(array))); + .map(VortexResult::Ok) + .unwrap_or_else(|| Ok(Arc::new(fsst_train_compressor(array)?)))?; let Some(fsst_compressor) = compressor.as_any().downcast_ref::() else { vortex_bail!("Could not downcast metadata as FSST Compressor") @@ -73,12 +74,9 @@ impl EncodingCompressor for FSSTCompressor { let result_array = if array.encoding().id() == VarBin::ID || array.encoding().id() == VarBinView::ID { // For a VarBinArray or VarBinViewArray, compress directly. - fsst_compress(array, fsst_compressor).into_array() + fsst_compress(array, fsst_compressor)?.into_array() } else { - vortex_bail!( - InvalidArgument: "unsupported encoding for FSSTCompressor {:?}", - array.encoding().id() - ) + vortex_bail!("Unsupported encoding for FSSTCompressor: {}", array.encoding().id()) }; Ok(CompressedArray::new( diff --git a/vortex-serde/src/chunked_reader/mod.rs b/vortex-serde/src/chunked_reader/mod.rs index 83156ff7b..fc1b6585d 100644 --- a/vortex-serde/src/chunked_reader/mod.rs +++ b/vortex-serde/src/chunked_reader/mod.rs @@ -5,7 +5,7 @@ use vortex::compute::unary::scalar_at; use vortex::stream::ArrayStream; use vortex::{Array, Context}; use vortex_dtype::DType; -use vortex_error::{vortex_bail, VortexResult}; +use vortex_error::{vortex_bail, VortexExpect as _, VortexResult}; use crate::io::VortexReadAt; use crate::stream_reader::StreamArrayReader; @@ -56,15 +56,12 @@ impl ChunkedArrayReader { let mut cursor = Cursor::new(&self.read); let byte_offset = scalar_at(&self.byte_offsets, 0) .and_then(|s| u64::try_from(&s)) - .unwrap_or_else(|err| { - panic!("Failed to convert byte_offset to u64: {err}"); - }); + .vortex_expect("Failed to convert byte_offset to u64"); + cursor.set_position(byte_offset); StreamArrayReader::try_new(cursor, self.context.clone()) .await - .unwrap_or_else(|err| { - panic!("Failed to create stream array reader: {err}"); - }) + .vortex_expect("Failed to create stream array reader") .with_dtype(self.dtype.clone()) .into_array_stream() } diff --git a/vortex-serde/src/io/object_store.rs b/vortex-serde/src/io/object_store.rs index 91f471177..f10520887 100644 --- a/vortex-serde/src/io/object_store.rs +++ b/vortex-serde/src/io/object_store.rs @@ -11,7 +11,7 @@ use object_store::path::Path; use object_store::{ObjectStore, WriteMultipart}; use vortex_buffer::io_buf::IoBuf; use vortex_buffer::Buffer; -use vortex_error::{VortexError, VortexResult}; +use vortex_error::{vortex_panic, VortexError, VortexResult}; use crate::io::{VortexRead, VortexReadAt, VortexWrite}; @@ -83,8 +83,9 @@ impl VortexReadAt for ObjectStoreReadAt { .await .map_err(VortexError::ObjectStore) .unwrap_or_else(|err| { - panic!( - "Failed to get size of object at location {}: {err}", + vortex_panic!( + err, + "Failed to get size of object at location {}", self.location ) }) diff --git a/vortex-serde/src/io/tokio.rs b/vortex-serde/src/io/tokio.rs index 265d0e277..d78320062 100644 --- a/vortex-serde/src/io/tokio.rs +++ b/vortex-serde/src/io/tokio.rs @@ -7,7 +7,7 @@ use bytes::BytesMut; use tokio::fs::File; use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; use vortex_buffer::io_buf::IoBuf; -use vortex_error::VortexError; +use vortex_error::{VortexError, VortexUnwrap as _}; use crate::io::{VortexRead, VortexReadAt, VortexWrite}; @@ -50,7 +50,7 @@ impl VortexReadAt for File { } async fn size(&self) -> u64 { - self.metadata().await.unwrap().len() + self.metadata().await.map_err(|err| VortexError::IOError(err).with_context("Failed to get file metadata")).vortex_unwrap().len() } } diff --git a/vortex-serde/src/layouts/read/cache.rs b/vortex-serde/src/layouts/read/cache.rs index c3dca0fba..e8d2c105f 100644 --- a/vortex-serde/src/layouts/read/cache.rs +++ b/vortex-serde/src/layouts/read/cache.rs @@ -3,6 +3,7 @@ use std::sync::{Arc, RwLock}; use ahash::HashMap; use bytes::Bytes; use vortex_dtype::DType; +use vortex_error::vortex_panic; use crate::layouts::read::{LayoutPartId, MessageId}; @@ -54,10 +55,10 @@ impl RelativeLayoutCache { pub fn get(&self, path: &[LayoutPartId]) -> Option { self.root .read() - .unwrap_or_else(|err| { - panic!( + .unwrap_or_else(|poison| { + vortex_panic!( "Failed to read from layout cache at path {:?} with error {}", - path, err + path, poison ); }) .get(&self.absolute_id(path)) @@ -66,10 +67,10 @@ impl RelativeLayoutCache { pub fn remove(&mut self, path: &[LayoutPartId]) -> Option { self.root .write() - .unwrap_or_else(|err| { - panic!( + .unwrap_or_else(|poison| { + vortex_panic!( "Failed to write to layout cache at path {:?} with error {}", - path, err + path, poison ) }) .remove(&self.absolute_id(path)) diff --git a/vortex-serde/src/layouts/read/layouts.rs b/vortex-serde/src/layouts/read/layouts.rs index 59e563c63..1e79c82f8 100644 --- a/vortex-serde/src/layouts/read/layouts.rs +++ b/vortex-serde/src/layouts/read/layouts.rs @@ -6,7 +6,7 @@ use flatbuffers::{ForwardsUOffset, Vector}; use vortex::Context; use vortex_dtype::field::Field; use vortex_dtype::DType; -use vortex_error::{vortex_bail, vortex_err, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, VortexExpect as _, VortexResult}; use vortex_flatbuffers::footer as fb; use super::projections::Projection; @@ -152,7 +152,9 @@ impl ColumnLayout { }; fb_layout .layout_as_nested_layout() - .unwrap_or_else(|| panic!("ColumnLayout: Failed to read nested layout from flatbuffer")) + .vortex_expect( + "ColumnLayout: Failed to read nested layout from flatbuffer" + ) } fn read_child( @@ -290,9 +292,11 @@ impl ChunkedLayout { let tab = flatbuffers::Table::new(&self.fb_bytes, self.fb_loc); fb::Layout::init_from_table(tab) }; - fb_layout.layout_as_nested_layout().unwrap_or_else(|| { - panic!("ChunkedLayout: Failed to read nested layout from flatbuffer") - }) + fb_layout + .layout_as_nested_layout() + .vortex_expect( + "ChunkedLayout: Failed to read nested layout from flatbuffer" + ) } } diff --git a/vortex-serde/src/layouts/read/stream.rs b/vortex-serde/src/layouts/read/stream.rs index 098c677e7..f8409bbca 100644 --- a/vortex-serde/src/layouts/read/stream.rs +++ b/vortex-serde/src/layouts/read/stream.rs @@ -13,7 +13,7 @@ use vortex::compute::{filter, search_sorted, slice, take, SearchSortedSide}; use vortex::validity::Validity; use vortex::{Array, IntoArray, IntoArrayVariant}; use vortex_dtype::{match_each_integer_ptype, DType}; -use vortex_error::{vortex_err, VortexError, VortexResult}; +use vortex_error::{vortex_err, vortex_panic, VortexError, VortexResult}; use vortex_scalar::Scalar; use crate::io::VortexReadAt; @@ -138,8 +138,8 @@ impl Stream for LayoutBatchStream { } StreamingState::Reading(f) => match ready!(f.poll_unpin(cx)) { Ok((read, buffers)) => { - let mut write_cache = self.messages_cache.write().unwrap_or_else(|err| { - panic!("Failed to write to message cache: {err}") + let mut write_cache = self.messages_cache.write().unwrap_or_else(|poison| { + vortex_panic!("Failed to write to message cache: {poison}") }); for (id, buf) in buffers { write_cache.set(id, buf) diff --git a/vortex-serde/src/messages.rs b/vortex-serde/src/messages.rs index 51b201e5d..2397fde98 100644 --- a/vortex-serde/src/messages.rs +++ b/vortex-serde/src/messages.rs @@ -4,7 +4,7 @@ use vortex::stats::ArrayStatistics; use vortex::{flatbuffers as fba, Array}; use vortex_buffer::Buffer; use vortex_dtype::DType; -use vortex_error::{vortex_err, VortexError}; +use vortex_error::{vortex_err, VortexError, VortexExpect as _}; use vortex_flatbuffers::message::Compression; use vortex_flatbuffers::{message as fb, FlatBufferRoot, ReadFlatBuffer, WriteFlatBuffer}; @@ -135,13 +135,10 @@ impl<'a> WriteFlatBuffer for IPCArray<'a> { .metadata() .try_serialize_metadata() // TODO(ngates): should we serialize externally to here? - .unwrap_or_else(|err| panic!("Failed to serialize metadata: {}", err)); + .vortex_expect("ArrayView is missing metadata during serialization"); Some(fbb.create_vector(metadata.as_ref())) } - Array::View(v) => Some(fbb.create_vector(v.metadata().unwrap_or_else(|| { - // TODO(wmanning): should this just return None? why does this panic? - panic!("ArrayView is missing metadata during serialization") - }))), + Array::View(v) => Some(fbb.create_vector(v.metadata().vortex_expect("ArrayView is missing metadata during serialization"))), }; let children = column_data diff --git a/vortex-serde/src/stream_reader/mod.rs b/vortex-serde/src/stream_reader/mod.rs index 3b61a35cb..622d21fc0 100644 --- a/vortex-serde/src/stream_reader/mod.rs +++ b/vortex-serde/src/stream_reader/mod.rs @@ -7,7 +7,7 @@ use vortex::stream::ArrayStream; use vortex::Context; use vortex_buffer::Buffer; use vortex_dtype::DType; -use vortex_error::VortexResult; +use vortex_error::{VortexExpect as _, VortexResult}; use crate::io::VortexRead; use crate::MessageReader; @@ -44,7 +44,7 @@ impl StreamArrayReader { let dtype = self .dtype .as_ref() - .unwrap_or_else(|| panic!("DType not set")) + .vortex_expect("Cannot read array from stream: DType not set") .deref() .clone(); self.msgs.array_stream(self.ctx.clone(), dtype) @@ -54,7 +54,7 @@ impl StreamArrayReader { let dtype = self .dtype .as_ref() - .unwrap_or_else(|| panic!("DType not set")) + .vortex_expect("Cannot read array from stream: DType not set") .deref() .clone(); self.msgs.into_array_stream(self.ctx.clone(), dtype) From 697fa092355ac6b3344db4f3eb466b55066ab3f3 Mon Sep 17 00:00:00 2001 From: Will Manning Date: Fri, 6 Sep 2024 15:17:35 -0400 Subject: [PATCH 31/39] no errors --- pyvortex/src/array.rs | 9 ++++----- pyvortex/src/encode.rs | 2 +- pyvortex/src/error.rs | 6 ++++++ pyvortex/src/io.rs | 3 ++- vortex-datafusion/src/datatype.rs | 4 ++-- vortex-datafusion/src/memory.rs | 8 ++++---- vortex-datafusion/src/persistent/opener.rs | 8 +++++--- vortex-datafusion/src/plans.rs | 4 ++-- vortex-serde/src/layouts/tests.rs | 2 ++ vortex-serde/src/stream_writer/tests.rs | 4 ++-- 10 files changed, 30 insertions(+), 20 deletions(-) diff --git a/pyvortex/src/array.rs b/pyvortex/src/array.rs index 01baa833a..4d8732d20 100644 --- a/pyvortex/src/array.rs +++ b/pyvortex/src/array.rs @@ -56,10 +56,9 @@ impl PyArray { let chunks: Vec = chunked_array .chunks() .map(|chunk| -> PyResult { - Ok(chunk - .into_canonical() - .map_err(PyVortexError::map_err)? - .into_arrow()) + chunk.into_canonical() + .and_then(|arr| arr.into_arrow()) + .map_err(PyVortexError::map_err) }) .collect::>>()?; if chunks.is_empty() { @@ -81,8 +80,8 @@ impl PyArray { Ok(vortex .clone() .into_canonical() + .and_then(|arr| arr.into_arrow()) .map_err(PyVortexError::map_err)? - .into_arrow() .into_data() .to_pyarrow(py)? .into_bound(py)) diff --git a/pyvortex/src/encode.rs b/pyvortex/src/encode.rs index c7d84ae0f..79ec11233 100644 --- a/pyvortex/src/encode.rs +++ b/pyvortex/src/encode.rs @@ -55,7 +55,7 @@ pub fn _encode<'py>(obj: &Bound<'py, PyAny>) -> PyResult> { let chunks = array_stream .into_iter() .map(|b| b.map_err(VortexError::ArrowError)) - .map(|b| b.and_then(Array::try_from).map_err(map_to_pyerr)) + .map(|b| b.and_then(Array::try_from).map_err(PyVortexError::map_err)) .collect::>>()?; Bound::new( obj.py(), diff --git a/pyvortex/src/error.rs b/pyvortex/src/error.rs index ae2769590..0c8113645 100644 --- a/pyvortex/src/error.rs +++ b/pyvortex/src/error.rs @@ -14,6 +14,12 @@ impl PyVortexError { } } +impl From for PyVortexError { + fn from(val: VortexError) -> Self { + PyVortexError::new(val) + } +} + impl From for PyErr { fn from(value: PyVortexError) -> Self { PyValueError::new_err(value.0.to_string()) diff --git a/pyvortex/src/io.rs b/pyvortex/src/io.rs index 36709b880..b4b214490 100644 --- a/pyvortex/src/io.rs +++ b/pyvortex/src/io.rs @@ -9,6 +9,7 @@ use tokio::fs::File; use vortex::array::ChunkedArray; use vortex::{Array, Context}; use vortex_dtype::field::Field; +use vortex_error::vortex_panic; use vortex_error::VortexResult; use vortex_serde::layouts::{ LayoutContext, LayoutDeserializer, LayoutReaderBuilder, LayoutWriter, Projection, RowFilter, @@ -157,7 +158,7 @@ pub fn read<'py>( let vecs: Vec = stream.try_collect().await?; if vecs.len() == 1 { - Ok(vecs.into_iter().next().unwrap()) + vecs.into_iter().next().ok_or_else(|| vortex_panic!("Should be impossible: vecs.len() == 1 but couldn't get first element")) } else { ChunkedArray::try_new(vecs, dtype).map(|e| e.into()) } diff --git a/vortex-datafusion/src/datatype.rs b/vortex-datafusion/src/datatype.rs index ba642091f..684885005 100644 --- a/vortex-datafusion/src/datatype.rs +++ b/vortex-datafusion/src/datatype.rs @@ -24,11 +24,11 @@ use vortex_error::vortex_panic; /// has top-level nullability. pub(crate) fn infer_schema(dtype: &DType) -> Schema { let DType::Struct(struct_dtype, nullable) = dtype else { - panic!("only DType::Struct can be converted to arrow schema"); + vortex_panic!("only DType::Struct can be converted to arrow schema"); }; if *nullable != Nullability::NonNullable { - panic!("top-level struct in Schema must be NonNullable"); + vortex_panic!("top-level struct in Schema must be NonNullable"); } let mut builder = SchemaBuilder::with_capacity(struct_dtype.names().len()); diff --git a/vortex-datafusion/src/memory.rs b/vortex-datafusion/src/memory.rs index 7cf3fce64..6b422b346 100644 --- a/vortex-datafusion/src/memory.rs +++ b/vortex-datafusion/src/memory.rs @@ -14,7 +14,7 @@ use datafusion_physical_plan::{ExecutionMode, ExecutionPlan, Partitioning, PlanP use itertools::Itertools; use vortex::array::ChunkedArray; use vortex::{Array, ArrayDType as _}; -use vortex_error::VortexError; +use vortex_error::{VortexError, VortexExpect as _}; use vortex_expr::datafusion::convert_expr_to_vortex; use vortex_expr::VortexExpr; @@ -47,9 +47,9 @@ impl VortexMemTable { Ok(a) => a, _ => { let dtype = array.dtype().clone(); - ChunkedArray::try_new(vec![array], dtype).unwrap_or_else(|err| { - panic!("Failed to wrap array as a ChunkedArray with 1 chunk: {err}") - }) + ChunkedArray::try_new(vec![array], dtype).vortex_expect( + "Failed to wrap array as a ChunkedArray with 1 chunk", + ) } }; diff --git a/vortex-datafusion/src/persistent/opener.rs b/vortex-datafusion/src/persistent/opener.rs index 38a33ae23..653dc0d50 100644 --- a/vortex-datafusion/src/persistent/opener.rs +++ b/vortex-datafusion/src/persistent/opener.rs @@ -3,9 +3,9 @@ use std::sync::Arc; use arrow_array::RecordBatch; use arrow_schema::SchemaRef; use datafusion::datasource::physical_plan::{FileMeta, FileOpenFuture, FileOpener}; -use datafusion_common::{DataFusionError, Result as DFResult}; +use datafusion_common::Result as DFResult; use datafusion_physical_expr::PhysicalExpr; -use futures::{FutureExt as _, TryStreamExt}; +use futures::{FutureExt as _, StreamExt, TryStreamExt}; use object_store::ObjectStore; use vortex::Context; use vortex_expr::datafusion::convert_expr_to_vortex; @@ -55,7 +55,9 @@ impl FileOpener for VortexFileOpener { builder .build() .await? - .map_ok(|array: vortex::Array| RecordBatch::try_from(array).unwrap()) + .map_ok(RecordBatch::try_from) + .map(|r| r.and_then(|inner| inner)) + .map_err(|e| e.into()) ) as _) } .boxed()) diff --git a/vortex-datafusion/src/plans.rs b/vortex-datafusion/src/plans.rs index ce8fbb838..25c3ff9ef 100644 --- a/vortex-datafusion/src/plans.rs +++ b/vortex-datafusion/src/plans.rs @@ -24,7 +24,7 @@ use vortex::arrow::FromArrowArray; use vortex::compute::take; use vortex::{Array, AsArray as _, IntoArray, IntoArrayVariant, IntoCanonical}; use vortex_dtype::field::Field; -use vortex_error::{vortex_err, VortexError}; +use vortex_error::{vortex_err, vortex_panic, VortexError}; use vortex_expr::VortexExpr; /// Physical plan operator that applies a set of [filters][Expr] against the input, producing a @@ -219,7 +219,7 @@ impl TakeRowsExec { ) -> Self { let output_schema = Arc::new(schema_ref.project(projection).unwrap_or_else(|err| { - panic!("Failed to project schema: {}", VortexError::from(err)) + vortex_panic!("Failed to project schema: {}", VortexError::from(err)) })); let plan_properties = PlanProperties::new( EquivalenceProperties::new(output_schema.clone()), diff --git a/vortex-serde/src/layouts/tests.rs b/vortex-serde/src/layouts/tests.rs index 9122da488..b80ed68a9 100644 --- a/vortex-serde/src/layouts/tests.rs +++ b/vortex-serde/src/layouts/tests.rs @@ -1,3 +1,5 @@ +#![allow(clippy::panic)] + use futures::StreamExt; use vortex::array::{ChunkedArray, PrimitiveArray, StructArray, VarBinArray}; use vortex::{ArrayDType, IntoArray, IntoArrayVariant}; diff --git a/vortex-serde/src/stream_writer/tests.rs b/vortex-serde/src/stream_writer/tests.rs index a7632e570..f23fc7434 100644 --- a/vortex-serde/src/stream_writer/tests.rs +++ b/vortex-serde/src/stream_writer/tests.rs @@ -1,7 +1,7 @@ use std::io::Cursor; use std::sync::Arc; -use arrow_array::cast::AsArray; +use arrow_array::cast::AsArray as _; use arrow_array::types::Int32Type; use arrow_array::PrimitiveArray; use vortex::arrow::FromArrowArray; @@ -31,6 +31,6 @@ async fn broken_data() { .collect_chunked() .await .unwrap(); - let round_tripped = arr.into_canonical().unwrap().into_arrow(); + let round_tripped = arr.into_canonical().unwrap().into_arrow().unwrap(); assert_eq!(&arrow_arr, round_tripped.as_primitive::()); } From 97a0b67a94fe50bc503cc73bca523e735edd80c4 Mon Sep 17 00:00:00 2001 From: Will Manning Date: Fri, 6 Sep 2024 15:22:59 -0400 Subject: [PATCH 32/39] format the world --- encodings/dict/src/compress.rs | 2 +- encodings/dict/src/dict.rs | 5 +- .../fastlanes/src/bitpacking/compress.rs | 16 +++--- .../src/bitpacking/compute/scalar_at.rs | 3 +- .../src/bitpacking/compute/search_sorted.rs | 5 +- encodings/fastlanes/src/bitpacking/mod.rs | 14 +++-- encodings/fastlanes/src/delta/mod.rs | 11 ++-- encodings/fastlanes/src/for/compute.rs | 6 +- encodings/fastlanes/src/for/mod.rs | 10 +++- encodings/fsst/src/compute.rs | 5 +- encodings/fsst/tests/fsst_tests.rs | 4 +- encodings/runend-bool/src/compress.rs | 6 +- encodings/runend-bool/src/compute.rs | 3 +- encodings/zigzag/src/compress.rs | 10 +++- encodings/zigzag/src/compute.rs | 5 +- encodings/zigzag/src/zigzag.rs | 7 +-- pyvortex/src/array.rs | 3 +- pyvortex/src/encode.rs | 4 +- pyvortex/src/io.rs | 9 ++- vortex-array/src/array/bool/mod.rs | 3 +- vortex-array/src/array/chunked/canonical.rs | 4 +- .../src/array/chunked/compute/take.rs | 4 +- vortex-array/src/array/chunked/mod.rs | 4 +- vortex-array/src/array/chunked/variants.rs | 8 ++- vortex-array/src/array/constant/variants.rs | 16 ++++-- vortex-array/src/array/primitive/mod.rs | 14 +++-- vortex-array/src/array/sparse/mod.rs | 9 +-- vortex-array/src/array/varbin/builder.rs | 4 +- .../src/array/varbin/compute/filter.rs | 12 ++-- vortex-array/src/array/varbin/compute/take.rs | 6 +- vortex-array/src/array/varbin/mod.rs | 5 +- vortex-array/src/array/varbinview/compute.rs | 3 +- vortex-array/src/array/varbinview/mod.rs | 16 +++--- vortex-array/src/arrow/array.rs | 11 ++-- vortex-array/src/arrow/recordbatch.rs | 7 +-- vortex-array/src/canonical.rs | 55 ++++++++++--------- vortex-array/src/data.rs | 3 +- vortex-array/src/implementation.rs | 3 +- vortex-array/src/iter/mod.rs | 4 +- vortex-array/src/lib.rs | 2 +- vortex-array/src/stats/mod.rs | 18 ++---- vortex-array/src/validity.rs | 31 +++++++---- vortex-array/src/variants.rs | 18 ++---- vortex-datafusion/src/lib.rs | 2 +- vortex-datafusion/src/memory.rs | 7 +-- vortex-datafusion/src/persistent/opener.rs | 2 +- vortex-datafusion/src/plans.rs | 11 ++-- vortex-datetime-dtype/src/arrow.rs | 12 +++- vortex-error/src/lib.rs | 2 +- .../src/compressors/fsst.rs | 5 +- vortex-scalar/src/arrow.rs | 23 ++------ vortex-scalar/src/datafusion.rs | 10 +--- vortex-scalar/src/primitive.rs | 14 +++-- vortex-serde/src/io/tokio.rs | 6 +- vortex-serde/src/layouts/read/cache.rs | 6 +- vortex-serde/src/layouts/read/layouts.rs | 8 +-- vortex-serde/src/layouts/read/stream.rs | 7 ++- vortex-serde/src/layouts/write/writer.rs | 7 ++- vortex-serde/src/messages.rs | 7 ++- 59 files changed, 286 insertions(+), 231 deletions(-) diff --git a/encodings/dict/src/compress.rs b/encodings/dict/src/compress.rs index 98182519c..60a05606b 100644 --- a/encodings/dict/src/compress.rs +++ b/encodings/dict/src/compress.rs @@ -166,7 +166,7 @@ where dtype, values_validity, ) - .vortex_expect("Failed to create VarBinArray dictionary during encoding") + .vortex_expect("Failed to create VarBinArray dictionary during encoding"), ) } diff --git a/encodings/dict/src/dict.rs b/encodings/dict/src/dict.rs index 9df0091d2..d30fe314f 100644 --- a/encodings/dict/src/dict.rs +++ b/encodings/dict/src/dict.rs @@ -77,7 +77,10 @@ impl ArrayValidity for DictArray { fn logical_validity(&self) -> LogicalValidity { if self.dtype().is_nullable() { - let primitive_codes = self.codes().into_primitive().vortex_expect("Failed to convert DictArray codes to primitive array"); + let primitive_codes = self + .codes() + .into_primitive() + .vortex_expect("Failed to convert DictArray codes to primitive array"); match_each_integer_ptype!(primitive_codes.ptype(), |$P| { ArrayAccessor::<$P>::with_iterator(&primitive_codes, |iter| { LogicalValidity::Array( diff --git a/encodings/fastlanes/src/bitpacking/compress.rs b/encodings/fastlanes/src/bitpacking/compress.rs index 4bbe301c0..1338f528a 100644 --- a/encodings/fastlanes/src/bitpacking/compress.rs +++ b/encodings/fastlanes/src/bitpacking/compress.rs @@ -258,13 +258,12 @@ pub unsafe fn unpack_single_primitive( } pub fn find_best_bit_width(array: &PrimitiveArray) -> VortexResult { - let bit_width_freq = array.statistics().compute_bit_width_freq() + let bit_width_freq = array + .statistics() + .compute_bit_width_freq() .ok_or_else(|| vortex_err!(ComputeError: "Failed to compute bit width frequency"))?; - best_bit_width( - &bit_width_freq, - bytes_per_exception(array.ptype()), - ) + best_bit_width(&bit_width_freq, bytes_per_exception(array.ptype())) } /// Assuming exceptions cost 1 value + 1 u32 index, figure out the best bit-width to use. @@ -278,7 +277,7 @@ fn best_bit_width(bit_width_freq: &[usize], bytes_per_exception: usize) -> Vorte let mut num_packed = 0; let mut best_cost = len * bytes_per_exception; let mut best_width = 0; - for (bit_width, freq) in bit_width_freq.iter().enumerate() { + for (bit_width, freq) in bit_width_freq.iter().enumerate() { let packed_cost = ((bit_width * len) + 7) / 8; // round up to bytes num_packed += *freq; @@ -317,7 +316,10 @@ mod test { // 10 1-bit values, 20 2-bit, etc. let freq = vec![0, 10, 20, 15, 1, 0, 0, 0]; // 3-bits => (46 * 3) + (8 * 1 * 5) => 178 bits => 23 bytes and zero exceptions - assert_eq!(best_bit_width(&freq, bytes_per_exception(PType::U8)).unwrap(), 3); + assert_eq!( + best_bit_width(&freq, bytes_per_exception(PType::U8)).unwrap(), + 3 + ); } #[test] diff --git a/encodings/fastlanes/src/bitpacking/compute/scalar_at.rs b/encodings/fastlanes/src/bitpacking/compute/scalar_at.rs index 97e5c57cc..d67829e0f 100644 --- a/encodings/fastlanes/src/bitpacking/compute/scalar_at.rs +++ b/encodings/fastlanes/src/bitpacking/compute/scalar_at.rs @@ -18,7 +18,8 @@ impl ScalarAtFn for BitPackedArray { } fn scalar_at_unchecked(&self, index: usize) -> Scalar { - self.scalar_at(index).unwrap_or_else(|err| vortex_panic!(err)) + self.scalar_at(index) + .unwrap_or_else(|err| vortex_panic!(err)) } } diff --git a/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs b/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs index 73e09a05a..04c79671f 100644 --- a/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs +++ b/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs @@ -67,7 +67,10 @@ struct BitPackedSearch { impl BitPackedSearch { pub fn new(array: &BitPackedArray) -> Self { Self { - packed: array.packed().into_primitive().vortex_expect("Failed to get packed bytes as PrimitiveArray"), + packed: array + .packed() + .into_primitive() + .vortex_expect("Failed to get packed bytes as PrimitiveArray"), offset: array.offset(), length: array.len(), bit_width: array.bit_width(), diff --git a/encodings/fastlanes/src/bitpacking/mod.rs b/encodings/fastlanes/src/bitpacking/mod.rs index a63488c82..f83a6e4e1 100644 --- a/encodings/fastlanes/src/bitpacking/mod.rs +++ b/encodings/fastlanes/src/bitpacking/mod.rs @@ -7,7 +7,9 @@ use vortex::variants::{ArrayVariants, PrimitiveArrayTrait}; use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex::{impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoCanonical}; use vortex_dtype::{Nullability, PType}; -use vortex_error::{vortex_bail, vortex_err, vortex_panic, VortexError, VortexExpect as _, VortexResult}; +use vortex_error::{ + vortex_bail, vortex_err, vortex_panic, VortexError, VortexExpect as _, VortexResult, +}; mod compress; mod compute; @@ -159,9 +161,13 @@ impl BitPackedArray { #[inline] pub fn ptype(&self) -> PType { - self.dtype() - .try_into() - .unwrap_or_else(|err: VortexError| vortex_panic!(err, "Failed to convert BitpackedArray DType {} to PType", self.dtype())) + self.dtype().try_into().unwrap_or_else(|err: VortexError| { + vortex_panic!( + err, + "Failed to convert BitpackedArray DType {} to PType", + self.dtype() + ) + }) } #[inline] diff --git a/encodings/fastlanes/src/delta/mod.rs b/encodings/fastlanes/src/delta/mod.rs index 32345d506..a091446d5 100644 --- a/encodings/fastlanes/src/delta/mod.rs +++ b/encodings/fastlanes/src/delta/mod.rs @@ -73,10 +73,13 @@ impl DeltaArray { #[inline] fn lanes(&self) -> usize { - let ptype = self - .dtype() - .try_into() - .unwrap_or_else(|err| vortex_panic!(err, "Failed to convert DeltaArray DType {} to PType", self.dtype())); + let ptype = self.dtype().try_into().unwrap_or_else(|err| { + vortex_panic!( + err, + "Failed to convert DeltaArray DType {} to PType", + self.dtype() + ) + }); match_each_unsigned_integer_ptype!(ptype, |$T| { <$T as fastlanes::FastLanes>::LANES }) diff --git a/encodings/fastlanes/src/for/compute.rs b/encodings/fastlanes/src/for/compute.rs index 3ab1d625e..948ac7059 100644 --- a/encodings/fastlanes/src/for/compute.rs +++ b/encodings/fastlanes/src/for/compute.rs @@ -50,8 +50,10 @@ impl ScalarAtFn for FoRArray { fn scalar_at_unchecked(&self, index: usize) -> Scalar { let encoded_scalar = scalar_at_unchecked(&self.encoded(), index).reinterpret_cast(self.ptype()); - let encoded = PrimitiveScalar::try_from(&encoded_scalar).vortex_expect("Invalid encoded scalar"); - let reference = PrimitiveScalar::try_from(self.reference()).vortex_expect("Invalid reference scalar"); + let encoded = + PrimitiveScalar::try_from(&encoded_scalar).vortex_expect("Invalid encoded scalar"); + let reference = + PrimitiveScalar::try_from(self.reference()).vortex_expect("Invalid reference scalar"); match_each_integer_ptype!(encoded.ptype(), |$P| { encoded.typed_value::<$P>().map(|v| (v << self.shift()).wrapping_add(reference.typed_value::<$P>().unwrap())) diff --git a/encodings/fastlanes/src/for/mod.rs b/encodings/fastlanes/src/for/mod.rs index b5a0fe4c1..d1246339c 100644 --- a/encodings/fastlanes/src/for/mod.rs +++ b/encodings/fastlanes/src/for/mod.rs @@ -65,9 +65,13 @@ impl FoRArray { #[inline] pub fn ptype(&self) -> PType { - self.dtype() - .try_into() - .unwrap_or_else(|err| vortex_panic!(err, "Failed to convert FoRArray DType {} to PType", self.dtype())) + self.dtype().try_into().unwrap_or_else(|err| { + vortex_panic!( + err, + "Failed to convert FoRArray DType {} to PType", + self.dtype() + ) + }) } } diff --git a/encodings/fsst/src/compute.rs b/encodings/fsst/src/compute.rs index 7a3aeccb2..1f0c47ff4 100644 --- a/encodings/fsst/src/compute.rs +++ b/encodings/fsst/src/compute.rs @@ -58,7 +58,10 @@ impl TakeFn for FSSTArray { impl ScalarAtFn for FSSTArray { fn scalar_at(&self, index: usize) -> VortexResult { let compressed = scalar_at_unchecked(&self.codes(), index); - let binary_datum = compressed.value().as_buffer()?.ok_or_else(|| vortex_err!("Expected a binary scalar, found {}", compressed.dtype()))?; + let binary_datum = compressed + .value() + .as_buffer()? + .ok_or_else(|| vortex_err!("Expected a binary scalar, found {}", compressed.dtype()))?; self.with_decompressor(|decompressor| { let decoded_buffer: Buffer = decompressor.decompress(binary_datum.as_slice()).into(); diff --git a/encodings/fsst/tests/fsst_tests.rs b/encodings/fsst/tests/fsst_tests.rs index 01bf75264..9029dbf02 100644 --- a/encodings/fsst/tests/fsst_tests.rs +++ b/encodings/fsst/tests/fsst_tests.rs @@ -30,7 +30,9 @@ fn fsst_array() -> Array { let compressor = fsst_train_compressor(&input_array).unwrap(); - fsst_compress(&input_array, &compressor).unwrap().into_array() + fsst_compress(&input_array, &compressor) + .unwrap() + .into_array() } #[rstest] diff --git a/encodings/runend-bool/src/compress.rs b/encodings/runend-bool/src/compress.rs index 5ba4ed86f..1ecfb4cbc 100644 --- a/encodings/runend-bool/src/compress.rs +++ b/encodings/runend-bool/src/compress.rs @@ -29,9 +29,9 @@ pub fn runend_bool_encode_slice(elements: &BooleanBuffer) -> (Vec, bool) { ends.push(e as u64); } - let last_end = ends - .last() - .vortex_expect("RunEndBoolArray cannot have empty run ends (by construction); this should be impossible"); + let last_end = ends.last().vortex_expect( + "RunEndBoolArray cannot have empty run ends (by construction); this should be impossible", + ); if *last_end != elements.len() as u64 { ends.push(elements.len() as u64) } diff --git a/encodings/runend-bool/src/compute.rs b/encodings/runend-bool/src/compute.rs index 67dbdbe22..ef5b89cfc 100644 --- a/encodings/runend-bool/src/compute.rs +++ b/encodings/runend-bool/src/compute.rs @@ -35,7 +35,8 @@ impl ScalarAtFn for RunEndBoolArray { fn scalar_at_unchecked(&self, index: usize) -> Scalar { let start = self.start(); Scalar::from(value_at_index( - self.find_physical_index(index).vortex_expect("Search must be implemented for the underlying index array"), + self.find_physical_index(index) + .vortex_expect("Search must be implemented for the underlying index array"), start, )) } diff --git a/encodings/zigzag/src/compress.rs b/encodings/zigzag/src/compress.rs index ad2f43cd6..5a138951d 100644 --- a/encodings/zigzag/src/compress.rs +++ b/encodings/zigzag/src/compress.rs @@ -13,7 +13,10 @@ pub fn zigzag_encode(parray: &PrimitiveArray) -> VortexResult { PType::I16 => zigzag_encode_primitive::(parray.maybe_null_slice(), parray.validity()), PType::I32 => zigzag_encode_primitive::(parray.maybe_null_slice(), parray.validity()), PType::I64 => zigzag_encode_primitive::(parray.maybe_null_slice(), parray.validity()), - _ => vortex_bail!("ZigZag can only encode signed integers, got {}", parray.ptype()), + _ => vortex_bail!( + "ZigZag can only encode signed integers, got {}", + parray.ptype() + ), }; ZigZagArray::try_new(encoded.into_array()) } @@ -36,7 +39,10 @@ pub fn zigzag_decode(parray: &PrimitiveArray) -> VortexResult { PType::U16 => zigzag_decode_primitive::(parray.maybe_null_slice(), parray.validity()), PType::U32 => zigzag_decode_primitive::(parray.maybe_null_slice(), parray.validity()), PType::U64 => zigzag_decode_primitive::(parray.maybe_null_slice(), parray.validity()), - _ => vortex_bail!("ZigZag can only decode unsigned integers, got {}", parray.ptype()), + _ => vortex_bail!( + "ZigZag can only decode unsigned integers, got {}", + parray.ptype() + ), }) } diff --git a/encodings/zigzag/src/compute.rs b/encodings/zigzag/src/compute.rs index 65bf68603..8b5ee0278 100644 --- a/encodings/zigzag/src/compute.rs +++ b/encodings/zigzag/src/compute.rs @@ -55,7 +55,10 @@ impl ScalarAtFn for ZigZagArray { ) })?) .into()), - _ => vortex_bail!("ZigZag can only decode unsigned integers, got {}", pscalar.ptype()), + _ => vortex_bail!( + "ZigZag can only decode unsigned integers, got {}", + pscalar.ptype() + ), } } diff --git a/encodings/zigzag/src/zigzag.rs b/encodings/zigzag/src/zigzag.rs index 99e23f31a..0098f3533 100644 --- a/encodings/zigzag/src/zigzag.rs +++ b/encodings/zigzag/src/zigzag.rs @@ -21,8 +21,7 @@ pub struct ZigZagMetadata; impl ZigZagArray { pub fn new(encoded: Array) -> Self { - Self::try_new(encoded) - .vortex_expect("Failed to construct ZigZagArray") + Self::try_new(encoded).vortex_expect("Failed to construct ZigZagArray") } pub fn try_new(encoded: Array) -> VortexResult { @@ -88,8 +87,6 @@ impl ArrayStatisticsCompute for ZigZagArray {} impl IntoCanonical for ZigZagArray { fn into_canonical(self) -> VortexResult { - zigzag_decode( - &self.encoded().into_primitive()?, - ).map(Canonical::Primitive) + zigzag_decode(&self.encoded().into_primitive()?).map(Canonical::Primitive) } } diff --git a/pyvortex/src/array.rs b/pyvortex/src/array.rs index 4d8732d20..3802edfcd 100644 --- a/pyvortex/src/array.rs +++ b/pyvortex/src/array.rs @@ -56,7 +56,8 @@ impl PyArray { let chunks: Vec = chunked_array .chunks() .map(|chunk| -> PyResult { - chunk.into_canonical() + chunk + .into_canonical() .and_then(|arr| arr.into_arrow()) .map_err(PyVortexError::map_err) }) diff --git a/pyvortex/src/encode.rs b/pyvortex/src/encode.rs index 79ec11233..028ba9552 100644 --- a/pyvortex/src/encode.rs +++ b/pyvortex/src/encode.rs @@ -66,6 +66,8 @@ pub fn _encode<'py>(obj: &Bound<'py, PyAny>) -> PyResult> { ), ) } else { - Err(PyValueError::new_err("Cannot convert object to Vortex array")) + Err(PyValueError::new_err( + "Cannot convert object to Vortex array", + )) } } diff --git a/pyvortex/src/io.rs b/pyvortex/src/io.rs index b4b214490..95eee0057 100644 --- a/pyvortex/src/io.rs +++ b/pyvortex/src/io.rs @@ -9,8 +9,7 @@ use tokio::fs::File; use vortex::array::ChunkedArray; use vortex::{Array, Context}; use vortex_dtype::field::Field; -use vortex_error::vortex_panic; -use vortex_error::VortexResult; +use vortex_error::{vortex_panic, VortexResult}; use vortex_serde::layouts::{ LayoutContext, LayoutDeserializer, LayoutReaderBuilder, LayoutWriter, Projection, RowFilter, }; @@ -158,7 +157,11 @@ pub fn read<'py>( let vecs: Vec = stream.try_collect().await?; if vecs.len() == 1 { - vecs.into_iter().next().ok_or_else(|| vortex_panic!("Should be impossible: vecs.len() == 1 but couldn't get first element")) + vecs.into_iter().next().ok_or_else(|| { + vortex_panic!( + "Should be impossible: vecs.len() == 1 but couldn't get first element" + ) + }) } else { ChunkedArray::try_new(vecs, dtype).map(|e| e.into()) } diff --git a/vortex-array/src/array/bool/mod.rs b/vortex-array/src/array/bool/mod.rs index 744fc69f1..34c9ccb87 100644 --- a/vortex-array/src/array/bool/mod.rs +++ b/vortex-array/src/array/bool/mod.rs @@ -74,8 +74,7 @@ impl BoolArray { pub fn from_vec(bools: Vec, validity: Validity) -> Self { let buffer = BooleanBuffer::from(bools); - Self::try_new(buffer, validity) - .vortex_expect("Failed to create BoolArray from vec") + Self::try_new(buffer, validity).vortex_expect("Failed to create BoolArray from vec") } } diff --git a/vortex-array/src/array/chunked/canonical.rs b/vortex-array/src/array/chunked/canonical.rs index 772d99be4..a4bf4d38d 100644 --- a/vortex-array/src/array/chunked/canonical.rs +++ b/vortex-array/src/array/chunked/canonical.rs @@ -230,7 +230,9 @@ fn pack_varbin(chunks: &[Array], validity: Validity, dtype: &DType) -> VortexRes slice(&chunk.bytes(), first_offset_value, last_offset_value)?.into_primitive()?; data_bytes.extend_from_slice(primitive_bytes.buffer()); - let adjustment_from_previous = *offsets.last().ok_or_else(|| vortex_err!("VarBinArray offsets must have at least one element"))?; + let adjustment_from_previous = *offsets + .last() + .ok_or_else(|| vortex_err!("VarBinArray offsets must have at least one element"))?; offsets.extend( offsets_arr .maybe_null_slice::() diff --git a/vortex-array/src/array/chunked/compute/take.rs b/vortex-array/src/array/chunked/compute/take.rs index f294c0bdd..cb595f120 100644 --- a/vortex-array/src/array/chunked/compute/take.rs +++ b/vortex-array/src/array/chunked/compute/take.rs @@ -111,7 +111,9 @@ fn take_strict_sorted(chunked: &ChunkedArray, indices: &Array) -> VortexResult impl Iterator + '_ { (0..self.nchunks()).map(|c| { - self.chunk(c).unwrap_or_else(|| + self.chunk(c).unwrap_or_else(|| { vortex_panic!( "Chunk should {} exist but doesn't (nchunks: {})", c, self.nchunks() ) - ) + }) }) } diff --git a/vortex-array/src/array/chunked/variants.rs b/vortex-array/src/array/chunked/variants.rs index d229c36bc..1d820f072 100644 --- a/vortex-array/src/array/chunked/variants.rs +++ b/vortex-array/src/array/chunked/variants.rs @@ -71,7 +71,13 @@ impl StructArrayTrait for ChunkedArray { let projected_dtype = self.dtype().as_struct().and_then(|s| s.dtypes().get(idx))?; let chunked = ChunkedArray::try_new(chunks, projected_dtype.clone()) - .unwrap_or_else(|err| vortex_panic!(err, "Failed to create new chunked array with dtype {}", projected_dtype)) + .unwrap_or_else(|err| { + vortex_panic!( + err, + "Failed to create new chunked array with dtype {}", + projected_dtype + ) + }) .into_array(); Some(chunked) } diff --git a/vortex-array/src/array/constant/variants.rs b/vortex-array/src/array/constant/variants.rs index 29082e909..39a8d5687 100644 --- a/vortex-array/src/array/constant/variants.rs +++ b/vortex-array/src/array/constant/variants.rs @@ -53,9 +53,11 @@ impl NullArrayTrait for ConstantArray {} impl BoolArrayTrait for ConstantArray { fn maybe_null_indices_iter(&self) -> Box> { - let value = self.scalar().value().as_bool().vortex_expect( - "Failed to get bool value from constant array" - ); + let value = self + .scalar() + .value() + .as_bool() + .vortex_expect("Failed to get bool value from constant array"); if value.unwrap_or(false) { Box::new(0..self.len()) } else { @@ -65,9 +67,11 @@ impl BoolArrayTrait for ConstantArray { fn maybe_null_slices_iter(&self) -> Box> { // Must be a boolean scalar - let value = self.scalar().value().as_bool().vortex_expect( - "Failed to get bool value from constant array" - ); + let value = self + .scalar() + .value() + .as_bool() + .vortex_expect("Failed to get bool value from constant array"); if value.unwrap_or(false) { Box::new(iter::once((0, self.len()))) diff --git a/vortex-array/src/array/primitive/mod.rs b/vortex-array/src/array/primitive/mod.rs index fb12bf824..568fbbcff 100644 --- a/vortex-array/src/array/primitive/mod.rs +++ b/vortex-array/src/array/primitive/mod.rs @@ -92,9 +92,9 @@ impl PrimitiveArray { pub fn ptype(&self) -> PType { // TODO(ngates): we can't really cache this anywhere? - self.dtype() - .try_into() - .unwrap_or_else(|err: VortexError| vortex_panic!(err, "Failed to convert dtype {} to ptype", self.dtype())) + self.dtype().try_into().unwrap_or_else(|err: VortexError| { + vortex_panic!(err, "Failed to convert dtype {} to ptype", self.dtype()) + }) } pub fn buffer(&self) -> &Buffer { @@ -316,8 +316,7 @@ impl AcceptArrayVisitor for PrimitiveArray { impl Array { pub fn as_primitive(&self) -> PrimitiveArray { - PrimitiveArray::try_from(self) - .vortex_expect("Expected primitive array") + PrimitiveArray::try_from(self).vortex_expect("Expected primitive array") } } @@ -432,7 +431,10 @@ fn process_batch let lhs: [I; ITER_BATCH_SIZE] = lhs.try_into().unwrap(); let rhs: [U; ITER_BATCH_SIZE] = batch.data().try_into().unwrap(); // We know output is of the same length and lhs/rhs - let mut output_slice: [_; ITER_BATCH_SIZE] = output[idx_offset..idx_offset + ITER_BATCH_SIZE].try_into().unwrap(); + let mut output_slice: [_; ITER_BATCH_SIZE] = output + [idx_offset..idx_offset + ITER_BATCH_SIZE] + .try_into() + .unwrap(); for idx in 0..ITER_BATCH_SIZE { unsafe { diff --git a/vortex-array/src/array/sparse/mod.rs b/vortex-array/src/array/sparse/mod.rs index 35d7107e8..ed723cc65 100644 --- a/vortex-array/src/array/sparse/mod.rs +++ b/vortex-array/src/array/sparse/mod.rs @@ -127,7 +127,10 @@ impl SparseArray { /// Return indices as a vector of usize with the indices_offset applied. pub fn resolved_indices(&self) -> Vec { - let flat_indices = self.indices().into_primitive().vortex_expect("Failed to convert SparseArray indices to primitive array"); + let flat_indices = self + .indices() + .into_primitive() + .vortex_expect("Failed to convert SparseArray indices to primitive array"); match_each_integer_ptype!(flat_indices.ptype(), |$P| { flat_indices .maybe_null_slice::<$P>() @@ -188,9 +191,7 @@ impl ArrayValidity for SparseArray { false.into(), ) } - .vortex_expect( - "Error determining logical validity for sparse array" - ); + .vortex_expect("Error determining logical validity for sparse array"); LogicalValidity::Array(validity.into_array()) } } diff --git a/vortex-array/src/array/varbin/builder.rs b/vortex-array/src/array/varbin/builder.rs index 41064e5c4..4be72d0d6 100644 --- a/vortex-array/src/array/varbin/builder.rs +++ b/vortex-array/src/array/varbin/builder.rs @@ -38,14 +38,14 @@ impl VarBinBuilder { pub fn push_value(&mut self, value: impl AsRef<[u8]>) { let slice = value.as_ref(); self.offsets - .push(O::from(self.data.len() + slice.len()).unwrap_or_else(|| + .push(O::from(self.data.len() + slice.len()).unwrap_or_else(|| { vortex_panic!( "Failed to convert sum of {} and {} to offset of type {}", self.data.len(), slice.len(), std::any::type_name::() ) - )); + })); self.data.extend_from_slice(slice); self.validity.append_non_null(); } diff --git a/vortex-array/src/array/varbin/compute/filter.rs b/vortex-array/src/array/varbin/compute/filter.rs index 838af36ad..4d220d652 100644 --- a/vortex-array/src/array/varbin/compute/filter.rs +++ b/vortex-array/src/array/varbin/compute/filter.rs @@ -117,12 +117,12 @@ fn update_non_nullable_slice( usize: AsPrimitive, { let new_data = { - let offset_start = offsets[start] - .to_usize() - .unwrap_or_else(|| vortex_panic!("Failed to convert offset to usize: {}", offsets[start])); - let offset_end = offsets[end] - .to_usize() - .unwrap_or_else(|| vortex_panic!("Failed to convert offset to usize: {}", offsets[end])); + let offset_start = offsets[start].to_usize().unwrap_or_else(|| { + vortex_panic!("Failed to convert offset to usize: {}", offsets[start]) + }); + let offset_end = offsets[end].to_usize().unwrap_or_else(|| { + vortex_panic!("Failed to convert offset to usize: {}", offsets[end]) + }); &data[offset_start..offset_end] }; let new_offsets = offsets[start..end + 1] diff --git a/vortex-array/src/array/varbin/compute/take.rs b/vortex-array/src/array/varbin/compute/take.rs index c1ef74db9..8f4282d87 100644 --- a/vortex-array/src/array/varbin/compute/take.rs +++ b/vortex-array/src/array/varbin/compute/take.rs @@ -76,9 +76,9 @@ fn take_nullable( .to_usize() .unwrap_or_else(|| vortex_panic!("Failed to convert index to usize: {}", idx)); if null_buffer.is_valid(idx) { - let start = offsets[idx] - .to_usize() - .unwrap_or_else(|| vortex_panic!("Failed to convert offset to usize: {}", offsets[idx])); + let start = offsets[idx].to_usize().unwrap_or_else(|| { + vortex_panic!("Failed to convert offset to usize: {}", offsets[idx]) + }); let stop = offsets[idx + 1].to_usize().unwrap_or_else(|| { vortex_panic!("Failed to convert offset to usize: {}", offsets[idx + 1]) }); diff --git a/vortex-array/src/array/varbin/mod.rs b/vortex-array/src/array/varbin/mod.rs index a3ed3100a..99e3f40b2 100644 --- a/vortex-array/src/array/varbin/mod.rs +++ b/vortex-array/src/array/varbin/mod.rs @@ -5,7 +5,10 @@ use serde::{Deserialize, Serialize}; pub use stats::compute_stats; use vortex_buffer::Buffer; use vortex_dtype::{match_each_native_ptype, DType, NativePType, Nullability}; -use vortex_error::{vortex_bail, vortex_err, vortex_panic, VortexError, VortexExpect as _, VortexResult, VortexUnwrap as _}; +use vortex_error::{ + vortex_bail, vortex_err, vortex_panic, VortexError, VortexExpect as _, VortexResult, + VortexUnwrap as _, +}; use vortex_scalar::Scalar; use crate::array::primitive::PrimitiveArray; diff --git a/vortex-array/src/array/varbinview/compute.rs b/vortex-array/src/array/varbinview/compute.rs index fca7a6812..f0c086913 100644 --- a/vortex-array/src/array/varbinview/compute.rs +++ b/vortex-array/src/array/varbinview/compute.rs @@ -25,8 +25,7 @@ impl ScalarAtFn for VarBinViewArray { } fn scalar_at_unchecked(&self, index: usize) -> Scalar { - ::scalar_at(self, index) - .unwrap_or_else(|err| vortex_panic!(err)) + ::scalar_at(self, index).unwrap_or_else(|err| vortex_panic!(err)) } } diff --git a/vortex-array/src/array/varbinview/mod.rs b/vortex-array/src/array/varbinview/mod.rs index 52de4ce39..1b213b126 100644 --- a/vortex-array/src/array/varbinview/mod.rs +++ b/vortex-array/src/array/varbinview/mod.rs @@ -201,8 +201,8 @@ impl VarBinViewArray { builder.append_value(s); } let array = Array::from_arrow(&builder.finish(), false); - VarBinViewArray::try_from(array).vortex_expect( - "Failed to convert iterator of nullable strings to VarBinViewArray") + VarBinViewArray::try_from(array) + .vortex_expect("Failed to convert iterator of nullable strings to VarBinViewArray") } pub fn from_iter_nullable_str, I: IntoIterator>>( @@ -213,8 +213,8 @@ impl VarBinViewArray { builder.extend(iter); let array = Array::from_arrow(&builder.finish(), true); - VarBinViewArray::try_from(array).vortex_expect( - "Failed to convert iterator of nullable strings to VarBinViewArray") + VarBinViewArray::try_from(array) + .vortex_expect("Failed to convert iterator of nullable strings to VarBinViewArray") } pub fn from_iter_bin, I: IntoIterator>(iter: I) -> Self { @@ -224,8 +224,8 @@ impl VarBinViewArray { builder.append_value(b); } let array = Array::from_arrow(&builder.finish(), true); - VarBinViewArray::try_from(array).vortex_expect( - "Failed to convert iterator of bytes to VarBinViewArray") + VarBinViewArray::try_from(array) + .vortex_expect("Failed to convert iterator of bytes to VarBinViewArray") } pub fn from_iter_nullable_bin, I: IntoIterator>>( @@ -235,8 +235,8 @@ impl VarBinViewArray { let mut builder = BinaryViewBuilder::with_capacity(iter.size_hint().0); builder.extend(iter); let array = Array::from_arrow(&builder.finish(), true); - VarBinViewArray::try_from(array).vortex_expect( - "Failed to convert iterator of nullable bytes to VarBinViewArray") + VarBinViewArray::try_from(array) + .vortex_expect("Failed to convert iterator of nullable bytes to VarBinViewArray") } pub fn bytes_at(&self, index: usize) -> VortexResult> { diff --git a/vortex-array/src/arrow/array.rs b/vortex-array/src/arrow/array.rs index 511ee463e..0a459ffd5 100644 --- a/vortex-array/src/arrow/array.rs +++ b/vortex-array/src/arrow/array.rs @@ -174,8 +174,7 @@ impl FromArrowArray<&ArrowStructArray> for Array { value.len(), nulls(value.nulls(), nullable), ) - .vortex_expect( - "Failed to convert Arrow StructArray to Vortex StructArray") + .vortex_expect("Failed to convert Arrow StructArray to Vortex StructArray") .into() } } @@ -234,8 +233,7 @@ impl FromArrowArray for Array { array .as_any() .downcast_ref::() - .vortex_expect("Expected Arrow StringViewArray for DataType::Utf8View") - , + .vortex_expect("Expected Arrow StringViewArray for DataType::Utf8View"), nullable, ), DataType::Struct(_) => Self::from_arrow(array.as_struct(), nullable), @@ -288,7 +286,10 @@ impl FromArrowArray for Array { Self::from_arrow(array.as_primitive::(), nullable) } }, - _ => vortex_panic!("Missing array encoding for Arrow data type {}", array.data_type().clone()), + _ => vortex_panic!( + "Missing array encoding for Arrow data type {}", + array.data_type().clone() + ), } } } diff --git a/vortex-array/src/arrow/recordbatch.rs b/vortex-array/src/arrow/recordbatch.rs index 082747d75..286e68e3e 100644 --- a/vortex-array/src/arrow/recordbatch.rs +++ b/vortex-array/src/arrow/recordbatch.rs @@ -37,8 +37,7 @@ impl TryFrom for RecordBatch { type Error = VortexError; fn try_from(value: Array) -> VortexResult { - let struct_arr = value.into_struct() - .map_err(|err| { + let struct_arr = value.into_struct().map_err(|err| { vortex_err!("RecordBatch can only be constructed from a Vortex StructArray: {err}") })?; @@ -50,9 +49,7 @@ impl TryFrom for RecordBatch { type Error = VortexError; fn try_from(value: StructArray) -> VortexResult { - let array_ref = value - .into_canonical()? - .into_arrow()?; + let array_ref = value.into_canonical()?.into_arrow()?; let struct_array = as_struct_array(array_ref.as_ref()); Ok(Self::from(struct_array)) } diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs index 7e9be6fdb..bc213924e 100644 --- a/vortex-array/src/canonical.rs +++ b/vortex-array/src/canonical.rs @@ -139,9 +139,7 @@ fn null_to_arrow(null_array: NullArray) -> VortexResult { fn bool_to_arrow(bool_array: BoolArray) -> VortexResult { Ok(Arc::new(ArrowBoolArray::new( bool_array.boolean_buffer(), - bool_array - .logical_validity() - .to_null_buffer()?, + bool_array.logical_validity().to_null_buffer()?, ))) } @@ -151,9 +149,7 @@ fn primitive_to_arrow(primitive_array: PrimitiveArray) -> VortexResult ) -> VortexResult>> { Ok(Arc::new(ArrowPrimitiveArray::new( ScalarBuffer::::new(array.buffer().clone().into_arrow(), 0, array.len()), - array - .logical_validity() - .to_null_buffer()? + array.logical_validity().to_null_buffer()?, ))) } @@ -173,18 +169,24 @@ fn primitive_to_arrow(primitive_array: PrimitiveArray) -> VortexResult } fn struct_to_arrow(struct_array: StructArray) -> VortexResult { - let field_arrays: Vec = Iterator::zip(struct_array.names().iter(), struct_array.children()) - .map(|(name, f)| { - let canonical = f - .into_canonical() - .map_err(|err| err.with_context(format!("Failed to canonicalize field {}", name)))?; - match canonical { - // visit nested structs recursively - Canonical::Struct(a) => struct_to_arrow(a), - _ => canonical.into_arrow().map_err(|err| err.with_context(format!("Failed to convert canonicalized field {} to arrow", name))), - } - }) - .collect::>>()?; + let field_arrays: Vec = + Iterator::zip(struct_array.names().iter(), struct_array.children()) + .map(|(name, f)| { + let canonical = f.into_canonical().map_err(|err| { + err.with_context(format!("Failed to canonicalize field {}", name)) + })?; + match canonical { + // visit nested structs recursively + Canonical::Struct(a) => struct_to_arrow(a), + _ => canonical.into_arrow().map_err(|err| { + err.with_context(format!( + "Failed to convert canonicalized field {} to arrow", + name + )) + }), + } + }) + .collect::>>()?; let arrow_fields: Fields = struct_array .names() @@ -201,11 +203,13 @@ fn struct_to_arrow(struct_array: StructArray) -> VortexResult { .map(Arc::new) .collect(); - let nulls = struct_array - .logical_validity() - .to_null_buffer()?; + let nulls = struct_array.logical_validity().to_null_buffer()?; - Ok(Arc::new(ArrowStructArray::try_new(arrow_fields, field_arrays, nulls)?)) + Ok(Arc::new(ArrowStructArray::try_new( + arrow_fields, + field_arrays, + nulls, + )?)) } fn varbin_to_arrow(varbin_array: VarBinArray) -> VortexResult { @@ -282,11 +286,10 @@ fn varbin_to_arrow(varbin_array: VarBinArray) -> VortexResult { fn temporal_to_arrow(temporal_array: TemporalArray) -> VortexResult { macro_rules! extract_temporal_values { ($values:expr, $prim:ty) => {{ - let temporal_values = try_cast($values, <$prim as NativePType>::PTYPE.into())?.into_primitive()?; + let temporal_values = + try_cast($values, <$prim as NativePType>::PTYPE.into())?.into_primitive()?; let len = temporal_values.len(); - let nulls = temporal_values - .logical_validity() - .to_null_buffer()?; + let nulls = temporal_values.logical_validity().to_null_buffer()?; let scalars = ScalarBuffer::<$prim>::new(temporal_values.into_buffer().into_arrow(), 0, len); diff --git a/vortex-array/src/data.rs b/vortex-array/src/data.rs index 3ab76326f..e97b6192f 100644 --- a/vortex-array/src/data.rs +++ b/vortex-array/src/data.rs @@ -158,7 +158,8 @@ impl Statistics for ArrayData { .unwrap_or_else(|_| { vortex_panic!( "Failed to acquire write lock on stats map while setting {} to {}", - stat, value + stat, + value ) }) .set(stat, value); diff --git a/vortex-array/src/implementation.rs b/vortex-array/src/implementation.rs index 8db14a94a..12be23b7f 100644 --- a/vortex-array/src/implementation.rs +++ b/vortex-array/src/implementation.rs @@ -243,7 +243,8 @@ where children: vec![], }; array.with_dyn(|a| { - a.accept(&mut visitor).vortex_expect("Error while visiting Array View children") + a.accept(&mut visitor) + .vortex_expect("Error while visiting Array View children") }); ArrayData::try_new( encoding, diff --git a/vortex-array/src/iter/mod.rs b/vortex-array/src/iter/mod.rs index daa365a3d..e4f7b7093 100644 --- a/vortex-array/src/iter/mod.rs +++ b/vortex-array/src/iter/mod.rs @@ -200,9 +200,7 @@ impl Iterator for VectorizedArrayIter { let validity = self .validity .slice(self.current_idx, self.current_idx + data.len()) - .vortex_expect( - "The slice bounds should always be within the array's limits", - ); + .vortex_expect("The slice bounds should always be within the array's limits"); self.current_idx += data.len(); let batch = Batch::new_from_vec(data, validity); diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs index b47b0fdef..aef62fd14 100644 --- a/vortex-array/src/lib.rs +++ b/vortex-array/src/lib.rs @@ -194,7 +194,7 @@ impl Array { }) .unwrap_or_else(|err| { vortex_panic!( - err, + err, "Failed to convert Array to {}", std::any::type_name::() ) diff --git a/vortex-array/src/stats/mod.rs b/vortex-array/src/stats/mod.rs index 785eff941..f33836758 100644 --- a/vortex-array/src/stats/mod.rs +++ b/vortex-array/src/stats/mod.rs @@ -97,7 +97,7 @@ impl dyn Statistics + '_ { .transpose() .unwrap_or_else(|err| { vortex_panic!( - err, + err, "Failed to cast stat {} to {}", stat, std::any::type_name::() @@ -114,12 +114,7 @@ impl dyn Statistics + '_ { .transpose() .and_then(|maybe| maybe.as_ref().map(U::try_from).transpose()) .unwrap_or_else(|err| { - vortex_panic!( - err, - "Failed to cast stat {} to {}", - stat, - U::PTYPE - ) + vortex_panic!(err, "Failed to cast stat {} to {}", stat, U::PTYPE) }) } @@ -132,7 +127,7 @@ impl dyn Statistics + '_ { .transpose() .unwrap_or_else(|err| { vortex_panic!( - err, + err, "Failed to compute stat {} as {}", stat, std::any::type_name::() @@ -149,12 +144,7 @@ impl dyn Statistics + '_ { .transpose() .and_then(|maybe| maybe.as_ref().map(U::try_from).transpose()) .unwrap_or_else(|err| { - vortex_panic!( - err, - "Failed to compute stat {} as cast {}", - stat, - U::PTYPE - ) + vortex_panic!(err, "Failed to compute stat {} as cast {}", stat, U::PTYPE) }) } diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index 6c94a9ee7..74e446fbb 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -1,7 +1,9 @@ use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, NullBuffer}; use serde::{Deserialize, Serialize}; use vortex_dtype::{DType, Nullability}; -use vortex_error::{vortex_bail, vortex_err, vortex_panic, VortexError, VortexExpect as _, VortexResult}; +use vortex_error::{ + vortex_bail, vortex_err, vortex_panic, VortexError, VortexExpect as _, VortexResult, +}; use crate::array::BoolArray; use crate::compute::unary::scalar_at_unchecked; @@ -93,13 +95,15 @@ impl Validity { match self { Self::NonNullable | Self::AllValid => true, Self::AllInvalid => false, - Self::Array(a) => bool::try_from(&scalar_at_unchecked(a, index)).unwrap_or_else(|err| { - vortex_panic!( - err, - "Failed to get bool from Validity Array at index {}", - index - ) - }), + Self::Array(a) => { + bool::try_from(&scalar_at_unchecked(a, index)).unwrap_or_else(|err| { + vortex_panic!( + err, + "Failed to get bool from Validity Array at index {}", + index + ) + }) + } } } @@ -177,11 +181,13 @@ impl PartialEq for Validity { (Self::AllValid, Self::AllValid) => true, (Self::AllInvalid, Self::AllInvalid) => true, (Self::Array(a), Self::Array(b)) => { - let a_buffer = a.clone() + let a_buffer = a + .clone() .into_bool() .vortex_expect("Failed to get Validity Array as BoolArray") .boolean_buffer(); - let b_buffer = b.clone() + let b_buffer = b + .clone() .into_bool() .vortex_expect("Failed to get Validity Array as BoolArray") .boolean_buffer(); @@ -275,7 +281,10 @@ impl LogicalValidity { vortex_bail!("Expected a non-nullable boolean array"); } - let true_count = array.statistics().compute_true_count().ok_or_else(|| vortex_err!("Failed to compute true count from validity array"))?; + let true_count = array + .statistics() + .compute_true_count() + .ok_or_else(|| vortex_err!("Failed to compute true count from validity array"))?; if true_count == array.len() { return Ok(Self::AllValid(array.len())); } else if true_count == 0 { diff --git a/vortex-array/src/variants.rs b/vortex-array/src/variants.rs index c6f59529b..1811768de 100644 --- a/vortex-array/src/variants.rs +++ b/vortex-array/src/variants.rs @@ -15,8 +15,7 @@ pub trait ArrayVariants { } fn as_null_array_unchecked(&self) -> &dyn NullArrayTrait { - self.as_null_array() - .vortex_expect("Expected NullArray") + self.as_null_array().vortex_expect("Expected NullArray") } fn as_bool_array(&self) -> Option<&dyn BoolArrayTrait> { @@ -24,8 +23,7 @@ pub trait ArrayVariants { } fn as_bool_array_unchecked(&self) -> &dyn BoolArrayTrait { - self.as_bool_array() - .vortex_expect("Expected BoolArray") + self.as_bool_array().vortex_expect("Expected BoolArray") } fn as_primitive_array(&self) -> Option<&dyn PrimitiveArrayTrait> { @@ -42,8 +40,7 @@ pub trait ArrayVariants { } fn as_utf8_array_unchecked(&self) -> &dyn Utf8ArrayTrait { - self.as_utf8_array() - .vortex_expect("Expected Utf8Array") + self.as_utf8_array().vortex_expect("Expected Utf8Array") } fn as_binary_array(&self) -> Option<&dyn BinaryArrayTrait> { @@ -51,8 +48,7 @@ pub trait ArrayVariants { } fn as_binary_array_unchecked(&self) -> &dyn BinaryArrayTrait { - self.as_binary_array() - .vortex_expect("Expected BinaryArray") + self.as_binary_array().vortex_expect("Expected BinaryArray") } fn as_struct_array(&self) -> Option<&dyn StructArrayTrait> { @@ -60,8 +56,7 @@ pub trait ArrayVariants { } fn as_struct_array_unchecked(&self) -> &dyn StructArrayTrait { - self.as_struct_array() - .vortex_expect("Expected StructArray") + self.as_struct_array().vortex_expect("Expected StructArray") } fn as_list_array(&self) -> Option<&dyn ListArrayTrait> { @@ -69,8 +64,7 @@ pub trait ArrayVariants { } fn as_list_array_unchecked(&self) -> &dyn ListArrayTrait { - self.as_list_array() - .vortex_expect("Expected ListArray") + self.as_list_array().vortex_expect("Expected ListArray") } fn as_extension_array(&self) -> Option<&dyn ExtensionArrayTrait> { diff --git a/vortex-datafusion/src/lib.rs b/vortex-datafusion/src/lib.rs index c14d4813e..71a1d916c 100644 --- a/vortex-datafusion/src/lib.rs +++ b/vortex-datafusion/src/lib.rs @@ -22,8 +22,8 @@ use persistent::config::VortexTableOptions; use persistent::provider::VortexFileTableProvider; use vortex::array::ChunkedArray; use vortex::{Array, ArrayDType, IntoArrayVariant}; -use vortex_error::vortex_err; use vortex_dtype::field::Field; +use vortex_error::vortex_err; pub mod memory; pub mod persistent; diff --git a/vortex-datafusion/src/memory.rs b/vortex-datafusion/src/memory.rs index 6b422b346..61380793b 100644 --- a/vortex-datafusion/src/memory.rs +++ b/vortex-datafusion/src/memory.rs @@ -47,9 +47,8 @@ impl VortexMemTable { Ok(a) => a, _ => { let dtype = array.dtype().clone(); - ChunkedArray::try_new(vec![array], dtype).vortex_expect( - "Failed to wrap array as a ChunkedArray with 1 chunk", - ) + ChunkedArray::try_new(vec![array], dtype) + .vortex_expect("Failed to wrap array as a ChunkedArray with 1 chunk") } }; @@ -116,7 +115,7 @@ impl TableProvider for VortexMemTable { let output_schema = Arc::new( self.schema_ref .project(output_projection.as_slice()) - .map_err(VortexError::from)? + .map_err(VortexError::from)?, ); let plan_properties = PlanProperties::new( EquivalenceProperties::new(output_schema), diff --git a/vortex-datafusion/src/persistent/opener.rs b/vortex-datafusion/src/persistent/opener.rs index 653dc0d50..56f871d8e 100644 --- a/vortex-datafusion/src/persistent/opener.rs +++ b/vortex-datafusion/src/persistent/opener.rs @@ -57,7 +57,7 @@ impl FileOpener for VortexFileOpener { .await? .map_ok(RecordBatch::try_from) .map(|r| r.and_then(|inner| inner)) - .map_err(|e| e.into()) + .map_err(|e| e.into()), ) as _) } .boxed()) diff --git a/vortex-datafusion/src/plans.rs b/vortex-datafusion/src/plans.rs index 25c3ff9ef..82cadace1 100644 --- a/vortex-datafusion/src/plans.rs +++ b/vortex-datafusion/src/plans.rs @@ -162,9 +162,7 @@ impl Stream for RowIndicesStream { // Get the unfiltered record batch. // Since this is a one-shot, we only want to poll the inner future once, to create the // initial batch for us to process. - let vortex_struct = next_chunk - .into_struct()? - .project(&this.filter_projection)?; + let vortex_struct = next_chunk.into_struct()?.project(&this.filter_projection)?; let selection = this .conjunction_expr @@ -217,10 +215,9 @@ impl TakeRowsExec { row_indices: Arc, table: &ChunkedArray, ) -> Self { - let output_schema = - Arc::new(schema_ref.project(projection).unwrap_or_else(|err| { - vortex_panic!("Failed to project schema: {}", VortexError::from(err)) - })); + let output_schema = Arc::new(schema_ref.project(projection).unwrap_or_else(|err| { + vortex_panic!("Failed to project schema: {}", VortexError::from(err)) + })); let plan_properties = PlanProperties::new( EquivalenceProperties::new(output_schema.clone()), Partitioning::UnknownPartitioning(1), diff --git a/vortex-datetime-dtype/src/arrow.rs b/vortex-datetime-dtype/src/arrow.rs index 2ee839515..7af9d2c7f 100644 --- a/vortex-datetime-dtype/src/arrow.rs +++ b/vortex-datetime-dtype/src/arrow.rs @@ -59,21 +59,27 @@ pub fn make_arrow_temporal_dtype(ext_dtype: &ExtDType) -> DataType { TemporalMetadata::Date(time_unit) => match time_unit { TimeUnit::D => DataType::Date32, TimeUnit::Ms => DataType::Date64, - _ => vortex_panic!(InvalidArgument: "Invalid TimeUnit {} for {}", time_unit, ext_dtype.id()), + _ => { + vortex_panic!(InvalidArgument: "Invalid TimeUnit {} for {}", time_unit, ext_dtype.id()) + } }, TemporalMetadata::Time(time_unit) => match time_unit { TimeUnit::S => DataType::Time32(ArrowTimeUnit::Second), TimeUnit::Ms => DataType::Time32(ArrowTimeUnit::Millisecond), TimeUnit::Us => DataType::Time64(ArrowTimeUnit::Microsecond), TimeUnit::Ns => DataType::Time64(ArrowTimeUnit::Nanosecond), - _ => vortex_panic!(InvalidArgument: "Invalid TimeUnit {} for {}", time_unit, ext_dtype.id()), + _ => { + vortex_panic!(InvalidArgument: "Invalid TimeUnit {} for {}", time_unit, ext_dtype.id()) + } }, TemporalMetadata::Timestamp(time_unit, tz) => match time_unit { TimeUnit::Ns => DataType::Timestamp(ArrowTimeUnit::Nanosecond, tz.map(|t| t.into())), TimeUnit::Us => DataType::Timestamp(ArrowTimeUnit::Microsecond, tz.map(|t| t.into())), TimeUnit::Ms => DataType::Timestamp(ArrowTimeUnit::Millisecond, tz.map(|t| t.into())), TimeUnit::S => DataType::Timestamp(ArrowTimeUnit::Second, tz.map(|t| t.into())), - _ => vortex_panic!(InvalidArgument: "Invalid TimeUnit {} for {}", time_unit, ext_dtype.id()), + _ => { + vortex_panic!(InvalidArgument: "Invalid TimeUnit {} for {}", time_unit, ext_dtype.id()) + } }, } } diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index 4eef784ed..930d1441c 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -295,7 +295,7 @@ macro_rules! vortex_panic { }}; ($variant:ident: $fmt:literal $(, $arg:expr)* $(,)?) => { $crate::vortex_panic!($crate::vortex_err!($variant: $fmt, $($arg),*)) - }; + }; ($err:expr, $fmt:literal $(, $arg:expr)* $(,)?) => {{ use $crate::VortexPanic; use $crate::VortexError; diff --git a/vortex-sampling-compressor/src/compressors/fsst.rs b/vortex-sampling-compressor/src/compressors/fsst.rs index 577ac0008..b5d8d2834 100644 --- a/vortex-sampling-compressor/src/compressors/fsst.rs +++ b/vortex-sampling-compressor/src/compressors/fsst.rs @@ -76,7 +76,10 @@ impl EncodingCompressor for FSSTCompressor { // For a VarBinArray or VarBinViewArray, compress directly. fsst_compress(array, fsst_compressor)?.into_array() } else { - vortex_bail!("Unsupported encoding for FSSTCompressor: {}", array.encoding().id()) + vortex_bail!( + "Unsupported encoding for FSSTCompressor: {}", + array.encoding().id() + ) }; Ok(CompressedArray::new( diff --git a/vortex-scalar/src/arrow.rs b/vortex-scalar/src/arrow.rs index cfab8c49f..32a3a4677 100644 --- a/vortex-scalar/src/arrow.rs +++ b/vortex-scalar/src/arrow.rs @@ -22,14 +22,9 @@ impl TryFrom<&Scalar> for Arc { fn try_from(value: &Scalar) -> Result, Self::Error> { match value.dtype() { DType::Null => Ok(Arc::new(NullArray::new(1))), - DType::Bool(_) => value_to_arrow_scalar!( - value.value.as_bool()?, - BooleanArray - ), + DType::Bool(_) => value_to_arrow_scalar!(value.value.as_bool()?, BooleanArray), DType::Primitive(ptype, _) => { - let pvalue = value - .value - .as_pvalue()?; + let pvalue = value.value.as_pvalue()?; Ok(match pvalue { None => match ptype { PType::U8 => Arc::new(UInt8Array::new_null(1)), @@ -60,20 +55,10 @@ impl TryFrom<&Scalar> for Arc { }) } DType::Utf8(_) => { - value_to_arrow_scalar!( - value - .value - .as_buffer_string()?, - StringArray - ) + value_to_arrow_scalar!(value.value.as_buffer_string()?, StringArray) } DType::Binary(_) => { - value_to_arrow_scalar!( - value - .value - .as_buffer()?, - BinaryArray - ) + value_to_arrow_scalar!(value.value.as_buffer()?, BinaryArray) } DType::Struct(..) => { todo!("struct scalar conversion") diff --git a/vortex-scalar/src/datafusion.rs b/vortex-scalar/src/datafusion.rs index f50cc1cb3..c5edb6ac4 100644 --- a/vortex-scalar/src/datafusion.rs +++ b/vortex-scalar/src/datafusion.rs @@ -14,15 +14,9 @@ impl TryFrom for ScalarValue { fn try_from(value: Scalar) -> Result { Ok(match value.dtype { DType::Null => ScalarValue::Null, - DType::Bool(_) => ScalarValue::Boolean( - value - .value - .as_bool()?, - ), + DType::Bool(_) => ScalarValue::Boolean(value.value.as_bool()?), DType::Primitive(ptype, _) => { - let pvalue = value - .value - .as_pvalue()?; + let pvalue = value.value.as_pvalue()?; match pvalue { None => match ptype { PType::U8 => ScalarValue::UInt8(None), diff --git a/vortex-scalar/src/primitive.rs b/vortex-scalar/src/primitive.rs index d222c7a66..135b65df2 100644 --- a/vortex-scalar/src/primitive.rs +++ b/vortex-scalar/src/primitive.rs @@ -1,8 +1,9 @@ - use num_traits::NumCast; use vortex_dtype::half::f16; use vortex_dtype::{match_each_native_ptype, DType, NativePType, Nullability, PType}; -use vortex_error::{vortex_bail, vortex_err, vortex_panic, VortexError, VortexResult, VortexUnwrap}; +use vortex_error::{ + vortex_bail, vortex_err, vortex_panic, VortexError, VortexResult, VortexUnwrap, +}; use crate::pvalue::PValue; use crate::value::ScalarValue; @@ -35,7 +36,9 @@ impl<'a> PrimitiveScalar<'a> { T::PTYPE ); - self.pvalue.as_ref().map(|pv| T::try_from(*pv).vortex_unwrap()) + self.pvalue + .as_ref() + .map(|pv| T::try_from(*pv).vortex_unwrap()) } pub fn cast(&self, dtype: &DType) -> VortexResult { @@ -89,8 +92,9 @@ impl Scalar { } pub fn reinterpret_cast(&self, ptype: PType) -> Self { - let primitive = PrimitiveScalar::try_from(self) - .unwrap_or_else(|e| vortex_panic!(e, "Failed to reinterpret cast {} to {}", self.dtype, ptype)); + let primitive = PrimitiveScalar::try_from(self).unwrap_or_else(|e| { + vortex_panic!(e, "Failed to reinterpret cast {} to {}", self.dtype, ptype) + }); if primitive.ptype() == ptype { return self.clone(); } diff --git a/vortex-serde/src/io/tokio.rs b/vortex-serde/src/io/tokio.rs index d78320062..065eb84f7 100644 --- a/vortex-serde/src/io/tokio.rs +++ b/vortex-serde/src/io/tokio.rs @@ -50,7 +50,11 @@ impl VortexReadAt for File { } async fn size(&self) -> u64 { - self.metadata().await.map_err(|err| VortexError::IOError(err).with_context("Failed to get file metadata")).vortex_unwrap().len() + self.metadata() + .await + .map_err(|err| VortexError::IOError(err).with_context("Failed to get file metadata")) + .vortex_unwrap() + .len() } } diff --git a/vortex-serde/src/layouts/read/cache.rs b/vortex-serde/src/layouts/read/cache.rs index e8d2c105f..9b93ade44 100644 --- a/vortex-serde/src/layouts/read/cache.rs +++ b/vortex-serde/src/layouts/read/cache.rs @@ -58,7 +58,8 @@ impl RelativeLayoutCache { .unwrap_or_else(|poison| { vortex_panic!( "Failed to read from layout cache at path {:?} with error {}", - path, poison + path, + poison ); }) .get(&self.absolute_id(path)) @@ -70,7 +71,8 @@ impl RelativeLayoutCache { .unwrap_or_else(|poison| { vortex_panic!( "Failed to write to layout cache at path {:?} with error {}", - path, poison + path, + poison ) }) .remove(&self.absolute_id(path)) diff --git a/vortex-serde/src/layouts/read/layouts.rs b/vortex-serde/src/layouts/read/layouts.rs index 1e79c82f8..0e3d556c6 100644 --- a/vortex-serde/src/layouts/read/layouts.rs +++ b/vortex-serde/src/layouts/read/layouts.rs @@ -152,9 +152,7 @@ impl ColumnLayout { }; fb_layout .layout_as_nested_layout() - .vortex_expect( - "ColumnLayout: Failed to read nested layout from flatbuffer" - ) + .vortex_expect("ColumnLayout: Failed to read nested layout from flatbuffer") } fn read_child( @@ -294,9 +292,7 @@ impl ChunkedLayout { }; fb_layout .layout_as_nested_layout() - .vortex_expect( - "ChunkedLayout: Failed to read nested layout from flatbuffer" - ) + .vortex_expect("ChunkedLayout: Failed to read nested layout from flatbuffer") } } diff --git a/vortex-serde/src/layouts/read/stream.rs b/vortex-serde/src/layouts/read/stream.rs index f8409bbca..9b1539547 100644 --- a/vortex-serde/src/layouts/read/stream.rs +++ b/vortex-serde/src/layouts/read/stream.rs @@ -138,9 +138,10 @@ impl Stream for LayoutBatchStream { } StreamingState::Reading(f) => match ready!(f.poll_unpin(cx)) { Ok((read, buffers)) => { - let mut write_cache = self.messages_cache.write().unwrap_or_else(|poison| { - vortex_panic!("Failed to write to message cache: {poison}") - }); + let mut write_cache = + self.messages_cache.write().unwrap_or_else(|poison| { + vortex_panic!("Failed to write to message cache: {poison}") + }); for (id, buf) in buffers { write_cache.set(id, buf) } diff --git a/vortex-serde/src/layouts/write/writer.rs b/vortex-serde/src/layouts/write/writer.rs index a71ceb6d3..1b5d2d756 100644 --- a/vortex-serde/src/layouts/write/writer.rs +++ b/vortex-serde/src/layouts/write/writer.rs @@ -154,7 +154,12 @@ impl LayoutWriter { async fn write_footer(&mut self, footer: Footer) -> VortexResult<(u64, u64)> { let dtype_offset = self.msgs.tell(); self.msgs - .write_dtype(&self.dtype.take().ok_or_else(|| vortex_err!("Schema should be written by now"))?) + .write_dtype( + &self + .dtype + .take() + .ok_or_else(|| vortex_err!("Schema should be written by now"))?, + ) .await?; let footer_offset = self.msgs.tell(); self.msgs.write_message(footer).await?; diff --git a/vortex-serde/src/messages.rs b/vortex-serde/src/messages.rs index 2397fde98..1913f0c91 100644 --- a/vortex-serde/src/messages.rs +++ b/vortex-serde/src/messages.rs @@ -138,7 +138,12 @@ impl<'a> WriteFlatBuffer for IPCArray<'a> { .vortex_expect("ArrayView is missing metadata during serialization"); Some(fbb.create_vector(metadata.as_ref())) } - Array::View(v) => Some(fbb.create_vector(v.metadata().vortex_expect("ArrayView is missing metadata during serialization"))), + Array::View(v) => Some( + fbb.create_vector( + v.metadata() + .vortex_expect("ArrayView is missing metadata during serialization"), + ), + ), }; let children = column_data From c8edc66026162bbe3e847e6f851a2f63440c017a Mon Sep 17 00:00:00 2001 From: Will Manning Date: Fri, 6 Sep 2024 15:28:14 -0400 Subject: [PATCH 33/39] moar --- Cargo.toml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 69184e2b8..8e044c81a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -171,13 +171,14 @@ unsafe_op_in_unsafe_fn = "deny" [workspace.lints.clippy] all = { level = "deny", priority = -1 } -#cargo = { level = "deny", priority = -1 } as_ptr_cast_mut = { level = "deny" } borrow_as_ptr = { level = "deny" } collection_is_never_read = { level = "deny" } cognitive_complexity = { level = "deny" } debug_assert_with_mut_call = { level = "deny" } derive_partial_eq_without_eq = { level = "deny" } +exit = { level = "deny" } +expect_fun_call = { level = "deny" } expect_used = { level = "deny" } equatable_if_let = { level = "deny" } fallible_impl_from = { level = "deny" } @@ -185,6 +186,7 @@ get_unwrap = { level = "deny" } host_endian_bytes = { level = "deny" } if_then_some_else_none = { level = "deny" } inconsistent_struct_constructor = { level = "deny" } +manual_assert = { level = "deny" } manual_is_variant_and = { level = "deny" } mem_forget = { level = "deny" } or_fun_call = "deny" @@ -192,6 +194,8 @@ panic_in_result_fn = { level = "deny" } panic = { level = "deny" } same_name_method = { level = "deny" } tests_outside_test_module = { level = "deny" } +# todo = { level = "deny" } +# unimplemented = { level = "deny" } unwrap_in_result = { level = "deny" } unwrap_used = { level = "deny" } use_debug = { level = "deny" } From 3b48fd430d64dda99dc1aae354d7db7d48e46a55 Mon Sep 17 00:00:00 2001 From: Will Manning Date: Fri, 6 Sep 2024 15:59:49 -0400 Subject: [PATCH 34/39] quick self CR --- encodings/alp/benches/alp_compress.rs | 1 - encodings/dict/src/compress.rs | 4 ++-- encodings/dict/src/compute.rs | 6 +++--- .../fastlanes/src/bitpacking/compute/scalar_at.rs | 5 ++--- encodings/fastlanes/src/bitpacking/compute/slice.rs | 4 ++-- encodings/fastlanes/src/for/compute.rs | 6 +++--- encodings/fsst/src/compute.rs | 2 +- encodings/roaring/src/boolean/mod.rs | 4 ++-- encodings/roaring/src/integer/compute.rs | 4 ++-- encodings/zigzag/src/zigzag.rs | 12 +++++++----- vortex-sampling-compressor/src/compressors/runend.rs | 2 +- 11 files changed, 25 insertions(+), 25 deletions(-) diff --git a/encodings/alp/benches/alp_compress.rs b/encodings/alp/benches/alp_compress.rs index 27aae12c7..d88728557 100644 --- a/encodings/alp/benches/alp_compress.rs +++ b/encodings/alp/benches/alp_compress.rs @@ -14,7 +14,6 @@ fn main() { divan::main(); } -#[allow(clippy::unwrap_used)] #[divan::bench(types = [f32, f64], args = [100_000, 10_000_000])] fn alp_compress(n: usize) -> (Exponents, Vec, Vec, Vec) { let values: Vec = vec![T::from(1.234).unwrap(); n]; diff --git a/encodings/dict/src/compress.rs b/encodings/dict/src/compress.rs index 60a05606b..549154ced 100644 --- a/encodings/dict/src/compress.rs +++ b/encodings/dict/src/compress.rs @@ -66,7 +66,7 @@ pub fn dict_encode_typed_primitive( } } }) - .vortex_expect("Failed to iterate over primitive array during dictionary encoding"); + .vortex_expect("Failed to dictionary encode primitive array"); let values_validity = if array.dtype().is_nullable() { let mut validity = vec![true; values.len()]; @@ -87,7 +87,7 @@ pub fn dict_encode_typed_primitive( pub fn dict_encode_varbin(array: &VarBinArray) -> (PrimitiveArray, VarBinArray) { array .with_iterator(|iter| dict_encode_typed_varbin(array.dtype().clone(), iter)) - .vortex_expect("Failed to iterate over varbin array during dictionary encoding") + .vortex_expect("Failed to dictionary encode varbin array") } fn lookup_bytes<'a, T: NativePType + AsPrimitive>( diff --git a/encodings/dict/src/compute.rs b/encodings/dict/src/compute.rs index 164ea1839..1bde4a548 100644 --- a/encodings/dict/src/compute.rs +++ b/encodings/dict/src/compute.rs @@ -1,6 +1,6 @@ use vortex::compute::unary::{scalar_at, scalar_at_unchecked, ScalarAtFn}; use vortex::compute::{slice, take, ArrayCompute, SliceFn, TakeFn}; -use vortex::Array; +use vortex::{Array, IntoArray}; use vortex_error::{VortexExpect, VortexResult}; use vortex_scalar::Scalar; @@ -42,7 +42,7 @@ impl TakeFn for DictArray { // codes: 0 0 1 // dict: a b c d e f g h let codes = take(&self.codes(), indices)?; - Self::try_new(codes, self.values()).map(vortex::IntoArray::into_array) + Self::try_new(codes, self.values()).map(|a| a.into_array()) } } @@ -50,7 +50,7 @@ impl SliceFn for DictArray { // TODO(robert): Add function to trim the dictionary fn slice(&self, start: usize, stop: usize) -> VortexResult { Self::try_new(slice(&self.codes(), start, stop)?, self.values()) - .map(vortex::IntoArray::into_array) + .map(|a| a.into_array()) } } diff --git a/encodings/fastlanes/src/bitpacking/compute/scalar_at.rs b/encodings/fastlanes/src/bitpacking/compute/scalar_at.rs index d67829e0f..841850b3d 100644 --- a/encodings/fastlanes/src/bitpacking/compute/scalar_at.rs +++ b/encodings/fastlanes/src/bitpacking/compute/scalar_at.rs @@ -1,6 +1,6 @@ use vortex::compute::unary::{scalar_at_unchecked, ScalarAtFn}; use vortex::ArrayDType; -use vortex_error::{vortex_panic, VortexResult}; +use vortex_error::{VortexResult, VortexUnwrap as _}; use vortex_scalar::Scalar; use crate::{unpack_single, BitPackedArray}; @@ -18,8 +18,7 @@ impl ScalarAtFn for BitPackedArray { } fn scalar_at_unchecked(&self, index: usize) -> Scalar { - self.scalar_at(index) - .unwrap_or_else(|err| vortex_panic!(err)) + self.scalar_at(index).vortex_unwrap() } } diff --git a/encodings/fastlanes/src/bitpacking/compute/slice.rs b/encodings/fastlanes/src/bitpacking/compute/slice.rs index d2a870878..fdb11e62b 100644 --- a/encodings/fastlanes/src/bitpacking/compute/slice.rs +++ b/encodings/fastlanes/src/bitpacking/compute/slice.rs @@ -1,7 +1,7 @@ use std::cmp::max; use vortex::compute::{slice, SliceFn}; -use vortex::Array; +use vortex::{Array, IntoArray}; use vortex_error::VortexResult; use crate::BitPackedArray; @@ -22,7 +22,7 @@ impl SliceFn for BitPackedArray { stop - start, offset, ) - .map(vortex::IntoArray::into_array) + .map(|a| a.into_array()) } } diff --git a/encodings/fastlanes/src/for/compute.rs b/encodings/fastlanes/src/for/compute.rs index 948ac7059..6ae56cf63 100644 --- a/encodings/fastlanes/src/for/compute.rs +++ b/encodings/fastlanes/src/for/compute.rs @@ -6,7 +6,7 @@ use vortex::compute::{ search_sorted, slice, take, ArrayCompute, SearchResult, SearchSortedFn, SearchSortedSide, SliceFn, TakeFn, }; -use vortex::{Array, ArrayDType}; +use vortex::{Array, ArrayDType, IntoArray}; use vortex_dtype::{match_each_integer_ptype, NativePType}; use vortex_error::{VortexError, VortexExpect as _, VortexResult}; use vortex_scalar::{PValue, PrimitiveScalar, Scalar}; @@ -38,7 +38,7 @@ impl TakeFn for FoRArray { self.reference().clone(), self.shift(), ) - .map(vortex::IntoArray::into_array) + .map(|a| a.into_array()) } } @@ -70,7 +70,7 @@ impl SliceFn for FoRArray { self.reference().clone(), self.shift(), ) - .map(vortex::IntoArray::into_array) + .map(|a| a.into_array()) } } diff --git a/encodings/fsst/src/compute.rs b/encodings/fsst/src/compute.rs index 1f0c47ff4..cb86442aa 100644 --- a/encodings/fsst/src/compute.rs +++ b/encodings/fsst/src/compute.rs @@ -70,7 +70,7 @@ impl ScalarAtFn for FSSTArray { } fn scalar_at_unchecked(&self, index: usize) -> Scalar { - ScalarAtFn::scalar_at(self, index).vortex_unwrap() + ::scalar_at(self, index).vortex_unwrap() } } diff --git a/encodings/roaring/src/boolean/mod.rs b/encodings/roaring/src/boolean/mod.rs index 5971b306a..edf5f9600 100644 --- a/encodings/roaring/src/boolean/mod.rs +++ b/encodings/roaring/src/boolean/mod.rs @@ -10,7 +10,7 @@ use vortex::validity::{ArrayValidity, LogicalValidity, Validity}; use vortex::variants::{ArrayVariants, BoolArrayTrait}; use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex::{ - impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoCanonical, TypedArray, + impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoArray, IntoCanonical, TypedArray }; use vortex_buffer::Buffer; use vortex_dtype::DType; @@ -57,7 +57,7 @@ impl RoaringBoolArray { pub fn encode(array: Array) -> VortexResult { if array.encoding().id() == Bool::ID { - roaring_bool_encode(BoolArray::try_from(array)?).map(vortex::IntoArray::into_array) + roaring_bool_encode(BoolArray::try_from(array)?).map(|a| a.into_array()) } else { vortex_bail!("RoaringInt can only encode boolean arrays") } diff --git a/encodings/roaring/src/integer/compute.rs b/encodings/roaring/src/integer/compute.rs index 1ae852145..8c7699c3b 100644 --- a/encodings/roaring/src/integer/compute.rs +++ b/encodings/roaring/src/integer/compute.rs @@ -1,7 +1,7 @@ use vortex::compute::unary::ScalarAtFn; use vortex::compute::ArrayCompute; use vortex_dtype::PType; -use vortex_error::{vortex_err, vortex_panic, VortexResult}; +use vortex_error::{vortex_err, VortexResult, VortexUnwrap as _}; use vortex_scalar::Scalar; use crate::RoaringIntArray; @@ -29,6 +29,6 @@ impl ScalarAtFn for RoaringIntArray { } fn scalar_at_unchecked(&self, index: usize) -> Scalar { - ::scalar_at(self, index).unwrap_or_else(|err| vortex_panic!(err)) + ::scalar_at(self, index).vortex_unwrap() } } diff --git a/encodings/zigzag/src/zigzag.rs b/encodings/zigzag/src/zigzag.rs index 0098f3533..9b5307d4d 100644 --- a/encodings/zigzag/src/zigzag.rs +++ b/encodings/zigzag/src/zigzag.rs @@ -5,8 +5,7 @@ use vortex::validity::{ArrayValidity, LogicalValidity}; use vortex::variants::{ArrayVariants, PrimitiveArrayTrait}; use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex::{ - impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoArrayVariant, - IntoCanonical, + impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoArray, IntoArrayVariant, IntoCanonical }; use vortex_dtype::{DType, PType}; use vortex_error::{vortex_bail, vortex_err, vortex_panic, VortexExpect as _, VortexResult}; @@ -21,7 +20,8 @@ pub struct ZigZagMetadata; impl ZigZagArray { pub fn new(encoded: Array) -> Self { - Self::try_new(encoded).vortex_expect("Failed to construct ZigZagArray") + Self::try_new(encoded) + .vortex_expect("Failed to construct ZigZagArray") } pub fn try_new(encoded: Array) -> VortexResult { @@ -43,7 +43,7 @@ impl ZigZagArray { PrimitiveArray::try_from(array) .map_err(|_| vortex_err!("ZigZag can only encoding primitive arrays")) .map(|parray| zigzag_encode(&parray))? - .map(vortex::IntoArray::into_array) + .map(|a| a.into_array()) } pub fn encoded(&self) -> Array { @@ -87,6 +87,8 @@ impl ArrayStatisticsCompute for ZigZagArray {} impl IntoCanonical for ZigZagArray { fn into_canonical(self) -> VortexResult { - zigzag_decode(&self.encoded().into_primitive()?).map(Canonical::Primitive) + zigzag_decode( + &self.encoded().into_primitive()?, + ).map(Canonical::Primitive) } } diff --git a/vortex-sampling-compressor/src/compressors/runend.rs b/vortex-sampling-compressor/src/compressors/runend.rs index e15a0c92d..4d97f3d36 100644 --- a/vortex-sampling-compressor/src/compressors/runend.rs +++ b/vortex-sampling-compressor/src/compressors/runend.rs @@ -67,7 +67,7 @@ impl EncodingCompressor for RunEndCompressor { compressed_values.array, ctx.compress_validity(primitive_array.validity())?, ) - .map(vortex::IntoArray::into_array)?, + .map(|a| a.into_array())?, Some(CompressionTree::new( self, vec![compressed_ends.path, compressed_values.path], From 876cff572f0e381c0cae7dc24b699eb2b52644f5 Mon Sep 17 00:00:00 2001 From: Will Manning Date: Fri, 6 Sep 2024 16:59:35 -0400 Subject: [PATCH 35/39] format --- encodings/dict/src/compute.rs | 3 +-- encodings/roaring/src/boolean/mod.rs | 3 ++- encodings/zigzag/src/zigzag.rs | 10 ++++------ 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/encodings/dict/src/compute.rs b/encodings/dict/src/compute.rs index 1bde4a548..0b31b42c8 100644 --- a/encodings/dict/src/compute.rs +++ b/encodings/dict/src/compute.rs @@ -49,8 +49,7 @@ impl TakeFn for DictArray { impl SliceFn for DictArray { // TODO(robert): Add function to trim the dictionary fn slice(&self, start: usize, stop: usize) -> VortexResult { - Self::try_new(slice(&self.codes(), start, stop)?, self.values()) - .map(|a| a.into_array()) + Self::try_new(slice(&self.codes(), start, stop)?, self.values()).map(|a| a.into_array()) } } diff --git a/encodings/roaring/src/boolean/mod.rs b/encodings/roaring/src/boolean/mod.rs index edf5f9600..3b7a7883b 100644 --- a/encodings/roaring/src/boolean/mod.rs +++ b/encodings/roaring/src/boolean/mod.rs @@ -10,7 +10,8 @@ use vortex::validity::{ArrayValidity, LogicalValidity, Validity}; use vortex::variants::{ArrayVariants, BoolArrayTrait}; use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex::{ - impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoArray, IntoCanonical, TypedArray + impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoArray, IntoCanonical, + TypedArray, }; use vortex_buffer::Buffer; use vortex_dtype::DType; diff --git a/encodings/zigzag/src/zigzag.rs b/encodings/zigzag/src/zigzag.rs index 9b5307d4d..262e8de5d 100644 --- a/encodings/zigzag/src/zigzag.rs +++ b/encodings/zigzag/src/zigzag.rs @@ -5,7 +5,8 @@ use vortex::validity::{ArrayValidity, LogicalValidity}; use vortex::variants::{ArrayVariants, PrimitiveArrayTrait}; use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex::{ - impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoArray, IntoArrayVariant, IntoCanonical + impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoArray, IntoArrayVariant, + IntoCanonical, }; use vortex_dtype::{DType, PType}; use vortex_error::{vortex_bail, vortex_err, vortex_panic, VortexExpect as _, VortexResult}; @@ -20,8 +21,7 @@ pub struct ZigZagMetadata; impl ZigZagArray { pub fn new(encoded: Array) -> Self { - Self::try_new(encoded) - .vortex_expect("Failed to construct ZigZagArray") + Self::try_new(encoded).vortex_expect("Failed to construct ZigZagArray") } pub fn try_new(encoded: Array) -> VortexResult { @@ -87,8 +87,6 @@ impl ArrayStatisticsCompute for ZigZagArray {} impl IntoCanonical for ZigZagArray { fn into_canonical(self) -> VortexResult { - zigzag_decode( - &self.encoded().into_primitive()?, - ).map(Canonical::Primitive) + zigzag_decode(&self.encoded().into_primitive()?).map(Canonical::Primitive) } } From e78a6a84cf53a07b625e2246175864ecf9d3c89d Mon Sep 17 00:00:00 2001 From: Will Manning Date: Mon, 9 Sep 2024 11:34:52 -0400 Subject: [PATCH 36/39] AssertionFailed for expect failures --- vortex-error/src/lib.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index 930d1441c..591a23ea6 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -58,6 +58,8 @@ pub enum VortexError { NotImplemented(ErrString, ErrString, Backtrace), #[error("expected type: {0} but instead got {1}\nBacktrace:\n{2}")] MismatchedTypes(ErrString, ErrString, Backtrace), + #[error("{0}\nBacktrace:\n{1}")] + AssertionFailed(ErrString, Backtrace), #[error("{0}: {1}")] Context(ErrString, Box), #[error(transparent)] @@ -216,7 +218,7 @@ impl VortexExpect for Option { #[inline(always)] fn vortex_expect(self, msg: &str) -> Self::Output { self.unwrap_or_else(|| { - let err = VortexError::InvalidArgument(msg.to_string().into(), Backtrace::capture()); + let err = VortexError::AssertionFailed(msg.to_string().into(), Backtrace::capture()); vortex_panic!(err) }) } From e0a7437364bc4d5d8c96cecbe321b9cd5c7c2438 Mon Sep 17 00:00:00 2001 From: Will Manning Date: Mon, 9 Sep 2024 16:38:21 -0400 Subject: [PATCH 37/39] self CR --- bench-vortex/src/reader.rs | 5 ++-- bench-vortex/src/tpch/dbgen.rs | 2 +- encodings/alp/src/array.rs | 4 +-- encodings/datetime-parts/src/compute.rs | 6 ++--- encodings/roaring/src/boolean/compute.rs | 4 +-- vortex-array/src/array/sparse/compute/mod.rs | 8 +++--- vortex-array/src/array/varbin/compute/mod.rs | 5 ++-- vortex-array/src/arrow/array.rs | 2 +- vortex-datafusion/src/persistent/provider.rs | 2 +- vortex-datetime-dtype/src/temporal.rs | 2 +- vortex-dtype/src/dtype.rs | 8 +++--- vortex-dtype/src/field.rs | 2 +- vortex-error/src/lib.rs | 26 +++++--------------- vortex-expr/src/expr.rs | 13 +++++++--- vortex-scalar/src/list.rs | 2 +- vortex-serde/src/layouts/read/batch.rs | 2 +- vortex-serde/src/layouts/read/buffered.rs | 2 +- 17 files changed, 43 insertions(+), 52 deletions(-) diff --git a/bench-vortex/src/reader.rs b/bench-vortex/src/reader.rs index 58f3f8d56..90fbb5318 100644 --- a/bench-vortex/src/reader.rs +++ b/bench-vortex/src/reader.rs @@ -20,6 +20,7 @@ use object_store::ObjectStore; use parquet::arrow::arrow_reader::{ArrowReaderOptions, ParquetRecordBatchReaderBuilder}; use parquet::arrow::async_reader::{AsyncFileReader, ParquetObjectReader}; use parquet::arrow::ParquetRecordBatchStreamBuilder; +use parquet::file::metadata::RowGroupMetaData; use serde::{Deserialize, Serialize}; use stream::StreamExt; use vortex::array::{ChunkedArray, PrimitiveArray}; @@ -58,7 +59,7 @@ pub async fn open_vortex(path: &Path) -> VortexResult { .into_array_stream() .collect_chunked() .await - .map(vortex::IntoArray::into_array) + .map(IntoArray::into_array) } pub async fn rewrite_parquet_as_vortex( @@ -219,7 +220,7 @@ async fn parquet_take_from_stream( .metadata() .row_groups() .iter() - .map(parquet::file::metadata::RowGroupMetaData::num_rows) + .map(RowGroupMetaData::num_rows) .scan(0i64, |acc, x| { *acc += x; Some(*acc) diff --git a/bench-vortex/src/tpch/dbgen.rs b/bench-vortex/src/tpch/dbgen.rs index fdbef554a..1c66ec947 100644 --- a/bench-vortex/src/tpch/dbgen.rs +++ b/bench-vortex/src/tpch/dbgen.rs @@ -175,7 +175,7 @@ fn get_or_cache_toolchain( zip_file .url() .path_segments() - .and_then(std::iter::Iterator::last) + .and_then(Iterator::last) .unwrap(), ); diff --git a/encodings/alp/src/array.rs b/encodings/alp/src/array.rs index 0901ae545..09ba1d0ae 100644 --- a/encodings/alp/src/array.rs +++ b/encodings/alp/src/array.rs @@ -199,7 +199,7 @@ impl PrimitiveArrayTrait for ALPArray { let encoded = self .encoded() .with_dyn(|a| a.as_primitive_array_unchecked().i32_accessor()) - .vortex_expect("Failed to get underlying encoded i32 array for ALP-encoded f32 array; this violates an invariant of the ALP algorithm"); + .vortex_expect("Failed to get underlying encoded i32 array for ALP-encoded f32 array"); Some(Arc::new(ALPAccessor::new( encoded, @@ -222,7 +222,7 @@ impl PrimitiveArrayTrait for ALPArray { let encoded = self .encoded() .with_dyn(|a| a.as_primitive_array_unchecked().i64_accessor()) - .vortex_expect("Failed to get underlying encoded i64 array for ALP-encoded f64 array; this violates an invariant of the ALP algorithm"); + .vortex_expect("Failed to get underlying encoded i64 array for ALP-encoded f64 array"); Some(Arc::new(ALPAccessor::new( encoded, patches, diff --git a/encodings/datetime-parts/src/compute.rs b/encodings/datetime-parts/src/compute.rs index b8422532e..2b00bb6e0 100644 --- a/encodings/datetime-parts/src/compute.rs +++ b/encodings/datetime-parts/src/compute.rs @@ -5,7 +5,7 @@ use vortex::validity::ArrayValidity; use vortex::{Array, ArrayDType, IntoArray, IntoArrayVariant}; use vortex_datetime_dtype::{TemporalMetadata, TimeUnit}; use vortex_dtype::DType; -use vortex_error::{vortex_bail, vortex_panic, VortexResult}; +use vortex_error::{vortex_bail, VortexResult, VortexUnwrap as _}; use vortex_scalar::Scalar; use crate::DateTimePartsArray; @@ -79,7 +79,7 @@ impl ScalarAtFn for DateTimePartsArray { } fn scalar_at_unchecked(&self, index: usize) -> Scalar { - ::scalar_at(self, index).unwrap_or_else(|err| vortex_panic!(err)) + ::scalar_at(self, index).vortex_unwrap() } } @@ -120,7 +120,7 @@ pub fn decode_to_temporal(array: &DateTimePartsArray) -> VortexResult Scalar { match self .find_index(index) - .unwrap_or_else(|err| vortex_panic!(err)) + .vortex_unwrap() { None => self .fill_value() .clone() .cast(self.dtype()) - .unwrap_or_else(|err| vortex_panic!(err)), + .vortex_unwrap(), Some(idx) => scalar_at_unchecked(&self.values(), idx) .cast(self.dtype()) - .unwrap_or_else(|err| vortex_panic!(err)), + .vortex_unwrap(), } } } diff --git a/vortex-array/src/array/varbin/compute/mod.rs b/vortex-array/src/array/varbin/compute/mod.rs index 20f000a16..58f4922cd 100644 --- a/vortex-array/src/array/varbin/compute/mod.rs +++ b/vortex-array/src/array/varbin/compute/mod.rs @@ -1,4 +1,4 @@ -use vortex_error::{vortex_panic, VortexResult}; +use vortex_error::{VortexResult, VortexUnwrap as _}; use vortex_scalar::Scalar; use crate::array::varbin::{varbin_scalar, VarBinArray}; @@ -31,8 +31,7 @@ impl ScalarAtFn for VarBinArray { fn scalar_at_unchecked(&self, index: usize) -> Scalar { varbin_scalar( - self.bytes_at(index) - .unwrap_or_else(|err| vortex_panic!(err)), + self.bytes_at(index).vortex_unwrap(), self.dtype(), ) } diff --git a/vortex-array/src/arrow/array.rs b/vortex-array/src/arrow/array.rs index 0a459ffd5..fe0e0f11e 100644 --- a/vortex-array/src/arrow/array.rs +++ b/vortex-array/src/arrow/array.rs @@ -287,7 +287,7 @@ impl FromArrowArray for Array { } }, _ => vortex_panic!( - "Missing array encoding for Arrow data type {}", + "Array encoding not implementedfor Arrow data type {}", array.data_type().clone() ), } diff --git a/vortex-datafusion/src/persistent/provider.rs b/vortex-datafusion/src/persistent/provider.rs index 48531373b..f98b9217e 100644 --- a/vortex-datafusion/src/persistent/provider.rs +++ b/vortex-datafusion/src/persistent/provider.rs @@ -81,7 +81,7 @@ impl TableProvider for VortexFileTableProvider { .data_files .iter() .cloned() - .map(std::convert::Into::into) + .map(Into::into) .collect(), ) .with_projection(projection.cloned()); diff --git a/vortex-datetime-dtype/src/temporal.rs b/vortex-datetime-dtype/src/temporal.rs index 6a68ca750..e824c1952 100644 --- a/vortex-datetime-dtype/src/temporal.rs +++ b/vortex-datetime-dtype/src/temporal.rs @@ -175,7 +175,7 @@ impl From for ExtMetadata { Some(tz) => { let tz_bytes = tz.as_bytes(); let tz_len = u16::try_from(tz_bytes.len()) - .unwrap_or_else(|err| vortex_panic!("tz did not fit in u16: {err}")); + .unwrap_or_else(|err| vortex_panic!("tz did not fit in u16: {}", err)); meta.extend_from_slice(tz_len.to_le_bytes().as_slice()); meta.extend_from_slice(tz_bytes); } diff --git a/vortex-dtype/src/dtype.rs b/vortex-dtype/src/dtype.rs index 563c6e72d..2aeefd4d1 100644 --- a/vortex-dtype/src/dtype.rs +++ b/vortex-dtype/src/dtype.rs @@ -87,19 +87,19 @@ impl DType { } pub fn is_unsigned_int(&self) -> bool { - PType::try_from(self).is_ok_and(super::ptype::PType::is_unsigned_int) + PType::try_from(self).is_ok_and(PType::is_unsigned_int) } pub fn is_signed_int(&self) -> bool { - PType::try_from(self).is_ok_and(super::ptype::PType::is_signed_int) + PType::try_from(self).is_ok_and(PType::is_signed_int) } pub fn is_int(&self) -> bool { - PType::try_from(self).is_ok_and(super::ptype::PType::is_int) + PType::try_from(self).is_ok_and(PType::is_int) } pub fn is_float(&self) -> bool { - PType::try_from(self).is_ok_and(super::ptype::PType::is_float) + PType::try_from(self).is_ok_and(PType::is_float) } pub fn is_boolean(&self) -> bool { diff --git a/vortex-dtype/src/field.rs b/vortex-dtype/src/field.rs index a5fbfbbc3..044dfe18d 100644 --- a/vortex-dtype/src/field.rs +++ b/vortex-dtype/src/field.rs @@ -59,7 +59,7 @@ impl FieldPath { assert_eq!(self.0.len(), 1); match &self.0[0] { Field::Name(name) => name.as_str(), - _ => vortex_panic!("FieldPath is not a name: {self}"), + _ => vortex_panic!("FieldPath is not a name: {}", self), } } } diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index 591a23ea6..6e76ea2fe 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -170,18 +170,6 @@ impl Debug for VortexError { pub type VortexResult = Result; -pub trait VortexPanic { - fn panic(self) -> !; -} - -impl VortexPanic for VortexError { - #[inline(always)] - #[allow(clippy::panic)] - fn panic(self) -> ! { - panic!("{}", self) - } -} - pub trait VortexUnwrap { type Output; @@ -283,8 +271,8 @@ macro_rules! vortex_panic { (OutOfBounds: $idx:expr, $start:expr, $stop:expr) => {{ $crate::vortex_panic!($crate::vortex_err!(OutOfBounds: $idx, $start, $stop)) }}; - (NotImplemented: $func:expr, $by_whom:expr) => {{ - $crate::vortex_panic!($crate::vortex_err!(NotImplemented: $func, $by_whom)) + (NotImplemented: $func:expr, $for_whom:expr) => {{ + $crate::vortex_panic!($crate::vortex_err!(NotImplemented: $func, $for_whom)) }}; (MismatchedTypes: $expected:literal, $actual:expr) => {{ $crate::vortex_panic!($crate::vortex_err!(MismatchedTypes: $expected, $actual)) @@ -299,17 +287,15 @@ macro_rules! vortex_panic { $crate::vortex_panic!($crate::vortex_err!($variant: $fmt, $($arg),*)) }; ($err:expr, $fmt:literal $(, $arg:expr)* $(,)?) => {{ - use $crate::VortexPanic; - use $crate::VortexError; - let err: VortexError = $err; - err.with_context(format!($fmt, $($arg),*)).panic() + let err: $crate::VortexError = $err; + panic!("{}", err.with_context(format!($fmt, $($arg),*))) }}; ($fmt:literal $(, $arg:expr)* $(,)?) => { $crate::vortex_panic!($crate::vortex_err!($fmt, $($arg),*)) }; ($err:expr) => {{ - use $crate::VortexPanic; - ($err).panic() + let err: $crate::VortexError = $err; + panic!("{}", err) }}; } diff --git a/vortex-expr/src/expr.rs b/vortex-expr/src/expr.rs index 3000c6c7c..ca4565454 100644 --- a/vortex-expr/src/expr.rs +++ b/vortex-expr/src/expr.rs @@ -8,7 +8,7 @@ use vortex::compute::{compare, Operator as ArrayOperator}; use vortex::variants::StructArrayTrait; use vortex::{Array, IntoArray}; use vortex_dtype::field::Field; -use vortex_error::{vortex_bail, vortex_err, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, VortexExpect as _, VortexResult}; use vortex_scalar::Scalar; use crate::Operator; @@ -22,12 +22,17 @@ pub trait VortexExpr: Debug + Send + Sync + PartialEq { } // Taken from apache-datafusion, necessary since you can't require VortexExpr implement PartialEq -#[allow(clippy::unwrap_used)] fn unbox_any(any: &dyn Any) -> &dyn Any { if any.is::>() { - any.downcast_ref::>().unwrap().as_any() + any + .downcast_ref::>() + .vortex_expect("any.is::> returned true but downcast_ref failed") + .as_any() } else if any.is::>() { - any.downcast_ref::>().unwrap().as_any() + any + .downcast_ref::>() + .vortex_expect("any.is::> returned true but downcast_ref failed") + .as_any() } else { any } diff --git a/vortex-scalar/src/list.rs b/vortex-scalar/src/list.rs index 03af1a398..2ecac732a 100644 --- a/vortex-scalar/src/list.rs +++ b/vortex-scalar/src/list.rs @@ -53,7 +53,7 @@ impl<'a> ListScalar<'a> { pub fn elements(&self) -> impl Iterator + '_ { self.elements .as_ref() - .map(std::convert::AsRef::as_ref) + .map(AsRef::as_ref) .unwrap_or_else(|| &[] as &[ScalarValue]) .iter() .map(|e| Scalar { diff --git a/vortex-serde/src/layouts/read/batch.rs b/vortex-serde/src/layouts/read/batch.rs index 9b9be2ff5..c48b0c2ff 100644 --- a/vortex-serde/src/layouts/read/batch.rs +++ b/vortex-serde/src/layouts/read/batch.rs @@ -41,7 +41,7 @@ impl BatchReader { }, None => { debug_assert!( - self.arrays.iter().all(std::option::Option::is_none), + self.arrays.iter().all(Option::is_none), "Expected layout to produce an array but it was empty" ); return Ok(None); diff --git a/vortex-serde/src/layouts/read/buffered.rs b/vortex-serde/src/layouts/read/buffered.rs index db79b7853..61e342ca3 100644 --- a/vortex-serde/src/layouts/read/buffered.rs +++ b/vortex-serde/src/layouts/read/buffered.rs @@ -28,7 +28,7 @@ impl BufferedReader { } fn buffered_row_count(&self) -> usize { - self.arrays.iter().map(vortex::Array::len).sum() + self.arrays.iter().map(Array::len).sum() } fn buffer(&mut self) -> VortexResult> { From e57e8a77215787da2a790ebeb1643bdf5e3f87c1 Mon Sep 17 00:00:00 2001 From: Will Manning Date: Mon, 9 Sep 2024 16:45:03 -0400 Subject: [PATCH 38/39] format --- encodings/alp/src/array.rs | 8 ++++++-- encodings/datetime-parts/src/compute.rs | 4 +--- vortex-array/src/array/sparse/compute/mod.rs | 11 ++--------- vortex-array/src/array/varbin/compute/mod.rs | 5 +---- vortex-expr/src/expr.rs | 6 ++---- 5 files changed, 12 insertions(+), 22 deletions(-) diff --git a/encodings/alp/src/array.rs b/encodings/alp/src/array.rs index 09ba1d0ae..a51fea615 100644 --- a/encodings/alp/src/array.rs +++ b/encodings/alp/src/array.rs @@ -199,7 +199,9 @@ impl PrimitiveArrayTrait for ALPArray { let encoded = self .encoded() .with_dyn(|a| a.as_primitive_array_unchecked().i32_accessor()) - .vortex_expect("Failed to get underlying encoded i32 array for ALP-encoded f32 array"); + .vortex_expect( + "Failed to get underlying encoded i32 array for ALP-encoded f32 array", + ); Some(Arc::new(ALPAccessor::new( encoded, @@ -222,7 +224,9 @@ impl PrimitiveArrayTrait for ALPArray { let encoded = self .encoded() .with_dyn(|a| a.as_primitive_array_unchecked().i64_accessor()) - .vortex_expect("Failed to get underlying encoded i64 array for ALP-encoded f64 array"); + .vortex_expect( + "Failed to get underlying encoded i64 array for ALP-encoded f64 array", + ); Some(Arc::new(ALPAccessor::new( encoded, patches, diff --git a/encodings/datetime-parts/src/compute.rs b/encodings/datetime-parts/src/compute.rs index 2b00bb6e0..de9205c73 100644 --- a/encodings/datetime-parts/src/compute.rs +++ b/encodings/datetime-parts/src/compute.rs @@ -118,9 +118,7 @@ pub fn decode_to_temporal(array: &DateTimePartsArray) -> VortexResult Scalar { - match self - .find_index(index) - .vortex_unwrap() - { - None => self - .fill_value() - .clone() - .cast(self.dtype()) - .vortex_unwrap(), + match self.find_index(index).vortex_unwrap() { + None => self.fill_value().clone().cast(self.dtype()).vortex_unwrap(), Some(idx) => scalar_at_unchecked(&self.values(), idx) .cast(self.dtype()) .vortex_unwrap(), diff --git a/vortex-array/src/array/varbin/compute/mod.rs b/vortex-array/src/array/varbin/compute/mod.rs index 58f4922cd..16a74bfa1 100644 --- a/vortex-array/src/array/varbin/compute/mod.rs +++ b/vortex-array/src/array/varbin/compute/mod.rs @@ -30,9 +30,6 @@ impl ScalarAtFn for VarBinArray { } fn scalar_at_unchecked(&self, index: usize) -> Scalar { - varbin_scalar( - self.bytes_at(index).vortex_unwrap(), - self.dtype(), - ) + varbin_scalar(self.bytes_at(index).vortex_unwrap(), self.dtype()) } } diff --git a/vortex-expr/src/expr.rs b/vortex-expr/src/expr.rs index ca4565454..116434920 100644 --- a/vortex-expr/src/expr.rs +++ b/vortex-expr/src/expr.rs @@ -24,13 +24,11 @@ pub trait VortexExpr: Debug + Send + Sync + PartialEq { // Taken from apache-datafusion, necessary since you can't require VortexExpr implement PartialEq fn unbox_any(any: &dyn Any) -> &dyn Any { if any.is::>() { - any - .downcast_ref::>() + any.downcast_ref::>() .vortex_expect("any.is::> returned true but downcast_ref failed") .as_any() } else if any.is::>() { - any - .downcast_ref::>() + any.downcast_ref::>() .vortex_expect("any.is::> returned true but downcast_ref failed") .as_any() } else { From d8d59676d6eed68d1dcc6ad5123afe42a3e6662d Mon Sep 17 00:00:00 2001 From: Will Manning Date: Tue, 10 Sep 2024 09:49:57 -0400 Subject: [PATCH 39/39] fix semantic merge conflict --- encodings/zigzag/src/compress.rs | 10 ++++++++-- encodings/zigzag/src/compute.rs | 2 +- encodings/zigzag/src/zigzag.rs | 6 ++++-- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/encodings/zigzag/src/compress.rs b/encodings/zigzag/src/compress.rs index f7d7d28e0..41bd53e35 100644 --- a/encodings/zigzag/src/compress.rs +++ b/encodings/zigzag/src/compress.rs @@ -14,7 +14,10 @@ pub fn zigzag_encode(parray: PrimitiveArray) -> VortexResult { PType::I16 => zigzag_encode_primitive::(parray.into_maybe_null_slice(), validity), PType::I32 => zigzag_encode_primitive::(parray.into_maybe_null_slice(), validity), PType::I64 => zigzag_encode_primitive::(parray.into_maybe_null_slice(), validity), - _ => vortex_bail!("ZigZag can only encode signed integers, got {}", parray.ptype()), + _ => vortex_bail!( + "ZigZag can only encode signed integers, got {}", + parray.ptype() + ), }; ZigZagArray::try_new(encoded.into_array()) } @@ -36,7 +39,10 @@ pub fn zigzag_decode(parray: PrimitiveArray) -> VortexResult { PType::U16 => zigzag_decode_primitive::(parray.into_maybe_null_slice(), validity), PType::U32 => zigzag_decode_primitive::(parray.into_maybe_null_slice(), validity), PType::U64 => zigzag_decode_primitive::(parray.into_maybe_null_slice(), validity), - _ => vortex_bail!("ZigZag can only decode unsigned integers, got {}", parray.ptype()), + _ => vortex_bail!( + "ZigZag can only decode unsigned integers, got {}", + parray.ptype() + ), }) } diff --git a/encodings/zigzag/src/compute.rs b/encodings/zigzag/src/compute.rs index 325077eea..6eb851767 100644 --- a/encodings/zigzag/src/compute.rs +++ b/encodings/zigzag/src/compute.rs @@ -2,7 +2,7 @@ use vortex::compute::unary::{scalar_at_unchecked, ScalarAtFn}; use vortex::compute::{slice, ArrayCompute, SliceFn}; use vortex::{Array, ArrayDType, IntoArray}; use vortex_dtype::match_each_unsigned_integer_ptype; -use vortex_error::{vortex_bail, vortex_err, VortexResult, VortexUnwrap}; +use vortex_error::{vortex_err, VortexResult, VortexUnwrap as _}; use vortex_scalar::{PrimitiveScalar, Scalar}; use zigzag::{ZigZag as ExternalZigZag, ZigZag}; diff --git a/encodings/zigzag/src/zigzag.rs b/encodings/zigzag/src/zigzag.rs index fc17d818c..86826ffa8 100644 --- a/encodings/zigzag/src/zigzag.rs +++ b/encodings/zigzag/src/zigzag.rs @@ -9,7 +9,9 @@ use vortex::{ IntoCanonical, }; use vortex_dtype::{DType, PType}; -use vortex_error::{vortex_bail, vortex_err, vortex_panic, VortexExpect as _, VortexResult}; +use vortex_error::{ + vortex_bail, vortex_err, vortex_panic, VortexExpect as _, VortexResult, VortexUnwrap as _, +}; use crate::compress::zigzag_encode; use crate::zigzag_decode; @@ -53,7 +55,7 @@ impl ZigZagArray { } pub fn ptype(&self) -> PType { - PType::try_from(self.dtype()).expect("must be a ptype") + PType::try_from(self.dtype()).vortex_unwrap() } }