diff --git a/vortex-array/src/array/mod.rs b/vortex-array/src/array/mod.rs index 12b50b72f..c890e0f7d 100644 --- a/vortex-array/src/array/mod.rs +++ b/vortex-array/src/array/mod.rs @@ -1,4 +1,6 @@ +#[cfg(test)] mod assertions; + mod bool; mod chunked; mod constant; diff --git a/vortex-array/src/arrow/dtype.rs b/vortex-array/src/arrow/dtype.rs index 0bc425322..704f9a592 100644 --- a/vortex-array/src/arrow/dtype.rs +++ b/vortex-array/src/arrow/dtype.rs @@ -131,6 +131,7 @@ pub fn infer_schema(dtype: &DType) -> VortexResult { Ok(builder.finish()) } +/// Try to convert a Vortex [`DType`] into an a Arrow [`DataType`] pub fn infer_data_type(dtype: &DType) -> VortexResult { Ok(match dtype { DType::Null => DataType::Null, diff --git a/vortex-array/src/arrow/mod.rs b/vortex-array/src/arrow/mod.rs index 4db9618e5..12a607313 100644 --- a/vortex-array/src/arrow/mod.rs +++ b/vortex-array/src/arrow/mod.rs @@ -1,3 +1,5 @@ +//! Utilities to work with `Arrow` data and types + use vortex_error::VortexResult; pub use crate::arrow::dtype::{infer_data_type, infer_schema}; diff --git a/vortex-array/src/context.rs b/vortex-array/src/context.rs index 404f99536..9d30eca4b 100644 --- a/vortex-array/src/context.rs +++ b/vortex-array/src/context.rs @@ -6,6 +6,7 @@ use crate::array::{ }; use crate::encoding::EncodingRef; +/// A mapping between an encoding's ID to an [`EncodingRef`], used to have a shared view of all available encoding schemes. #[derive(Debug, Clone)] pub struct Context { encodings: HashMap, diff --git a/vortex-array/src/data.rs b/vortex-array/src/data.rs index b645d6735..9deb5979c 100644 --- a/vortex-array/src/data.rs +++ b/vortex-array/src/data.rs @@ -9,6 +9,7 @@ use crate::encoding::EncodingRef; use crate::stats::{Stat, Statistics, StatsSet}; use crate::{Array, ArrayDType, ArrayMetadata, ToArray}; +/// Owned [`Array`] with serialized metadata, backed by heap-allocated memory. #[derive(Clone, Debug)] pub struct ArrayData { encoding: EncodingRef, diff --git a/vortex-array/src/encoding.rs b/vortex-array/src/encoding.rs index a0991d085..804be0011 100644 --- a/vortex-array/src/encoding.rs +++ b/vortex-array/src/encoding.rs @@ -1,3 +1,5 @@ +//! Traits and types to define shared unique encoding identifiers + use std::fmt::{Debug, Display, Formatter}; use std::hash::{Hash, Hasher}; @@ -7,6 +9,7 @@ use crate::canonical::{Canonical, IntoCanonical}; use crate::{Array, ArrayDef, ArrayTrait}; // TODO(robert): Outline how you create a well known encoding id + /// EncodingId is a unique name and numerical code of the array /// /// 0x0000 - reserved marker encoding diff --git a/vortex-array/src/implementation.rs b/vortex-array/src/implementation.rs index 5c8ec3e4d..2235d9508 100644 --- a/vortex-array/src/implementation.rs +++ b/vortex-array/src/implementation.rs @@ -21,6 +21,10 @@ pub trait ArrayDef { type Encoding: ArrayEncoding + ArrayEncodingExt; } +/// Macro to generate all the necessary code for a new type of array encoding. Including: +/// 1. New Array type that implements `AsRef`, `GetArrayMetadata`, `ToArray`, `IntoArray`, and multiple useful `From`/`TryFrom` implementations. +/// 1. New Encoding type that implements `ArrayEncoding`. +/// 1. New metadata type that implements `ArrayMetadata`. #[macro_export] macro_rules! impl_encoding { ($id:literal, $code:expr, $Name:ident) => { diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs index 99fd2e585..00e66c9c3 100644 --- a/vortex-array/src/lib.rs +++ b/vortex-array/src/lib.rs @@ -12,7 +12,6 @@ use std::fmt::{Debug, Display, Formatter}; use std::future::ready; -pub use ::paste; pub use canonical::*; pub use context::*; pub use data::*; @@ -62,9 +61,14 @@ pub mod flatbuffers { pub use vortex_flatbuffers::array::*; } +/// A central type for all Vortex arrays, which are known length sequences of compressed data. +/// +/// This is the main entrypoint for working with in-memory Vortex data, and dispatches work over the underlying encoding or memory representations. #[derive(Debug, Clone)] pub enum Array { + /// Owned [`Array`] with serialized metadata, backed by heap-allocated memory. Data(ArrayData), + /// Zero-copy view over flatbuffer-encoded [`Array`] data, created without eager serialization. View(ArrayView), } @@ -76,6 +80,7 @@ impl Array { } } + /// Returns the number of logical elements in the array. #[allow(clippy::same_name_method)] pub fn len(&self) -> usize { match self { @@ -91,6 +96,7 @@ impl Array { } } + /// Total size of the array in bytes, including all children and buffers. pub fn nbytes(&self) -> usize { self.with_dyn(|a| a.nbytes()) } @@ -102,6 +108,7 @@ impl Array { } } + /// Returns a Vec of Arrays with all of the array's child arrays. pub fn children(&self) -> Vec { match self { Array::Data(d) => d.children().iter().cloned().collect_vec(), @@ -109,6 +116,7 @@ impl Array { } } + /// Returns the number of child arrays pub fn nchildren(&self) -> usize { match self { Self::Data(d) => d.nchildren(), @@ -174,7 +182,7 @@ impl Array { ) } - /// Checks whether array is of given encoding + /// Checks whether array is of a given encoding. pub fn is_encoding(&self, id: EncodingId) -> bool { self.encoding().id() == id } @@ -270,6 +278,7 @@ pub trait ArrayTrait: + ArrayStatisticsCompute + ToArrayData { + /// Total size of the array in bytes, including all children and buffers. fn nbytes(&self) -> usize { let mut visitor = NBytesVisitor(0); self.accept(&mut visitor) diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index 38b9ab52c..3dec73ec9 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -47,24 +47,23 @@ impl ValidityMetadata { } } +/// Validity information for an array #[derive(Clone, Debug)] pub enum Validity { + /// Items *can't* be null NonNullable, + /// All items are valid AllValid, + /// All items are null AllInvalid, + /// Specified items are null Array(Array), } impl Validity { + /// The [`DType`] of the underlying validity array (if it exists). pub const DTYPE: DType = DType::Bool(Nullability::NonNullable); - pub fn into_array(self) -> Option { - match self { - Self::Array(a) => Some(a), - _ => None, - } - } - pub fn to_metadata(&self, length: usize) -> VortexResult { match self { Self::NonNullable => Ok(ValidityMetadata::NonNullable), @@ -85,6 +84,15 @@ impl Validity { } } + /// If Validity is [`Validity::Array`], returns the array, otherwise returns `None`. + pub fn into_array(self) -> Option { + match self { + Self::Array(a) => Some(a), + _ => None, + } + } + + /// If Validity is [`Validity::Array`], returns a reference to the array array, otherwise returns `None`. pub fn as_array(&self) -> Option<&Array> { match self { Self::Array(a) => Some(a), @@ -99,6 +107,7 @@ impl Validity { } } + /// Returns whether the `index` item is valid. #[inline] pub fn is_valid(&self, index: usize) -> bool { match self { diff --git a/vortex-array/src/view.rs b/vortex-array/src/view.rs index e2b50fef9..341fb74a6 100644 --- a/vortex-array/src/view.rs +++ b/vortex-array/src/view.rs @@ -15,6 +15,7 @@ use crate::stats::{Stat, Statistics, StatsSet}; use crate::visitor::ArrayVisitor; use crate::{flatbuffers as fb, Array, Context, IntoArray, ToArray}; +/// Zero-copy view over flatbuffer-encoded array data, created without eager serialization. #[derive(Clone)] pub struct ArrayView { encoding: EncodingRef, diff --git a/vortex-dtype/src/nullability.rs b/vortex-dtype/src/nullability.rs index 14a33dc93..4e3729505 100644 --- a/vortex-dtype/src/nullability.rs +++ b/vortex-dtype/src/nullability.rs @@ -1,5 +1,6 @@ use std::fmt::{Display, Formatter}; +/// Whether an item can contain a null value or not #[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Hash, Ord, PartialOrd)] pub enum Nullability { #[default] diff --git a/vortex-dtype/src/ptype.rs b/vortex-dtype/src/ptype.rs index 7fb04bf58..6f7630540 100644 --- a/vortex-dtype/src/ptype.rs +++ b/vortex-dtype/src/ptype.rs @@ -1,3 +1,5 @@ +//! Physical type definitions and behavior. + use std::cmp::Ordering; use std::fmt::{Debug, Display, Formatter}; use std::hash::Hash; @@ -11,6 +13,7 @@ use crate::nullability::Nullability::NonNullable; use crate::DType; use crate::DType::*; +/// Physical type enum, represents the in-memory physical layout but might represent a different logical type. #[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Hash)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[cfg_attr(feature = "serde", serde(rename_all = "lowercase"))] diff --git a/vortex-sampling-compressor/src/compressors/mod.rs b/vortex-sampling-compressor/src/compressors/mod.rs index 132f0cbba..8ffd68dde 100644 --- a/vortex-sampling-compressor/src/compressors/mod.rs +++ b/vortex-sampling-compressor/src/compressors/mod.rs @@ -217,6 +217,7 @@ impl<'a> CompressedArray<'a> { (self.array, self.path) } + /// Total size of the array in bytes, including all children and buffers. #[inline] pub fn nbytes(&self) -> usize { self.array.nbytes()