Skip to content

Commit

Permalink
More docs (#1104)
Browse files Browse the repository at this point in the history
More docs, trying to cleanup some of the main rustdoc page.
  • Loading branch information
AdamGS authored Oct 21, 2024
1 parent 90de5bd commit 008b3a7
Show file tree
Hide file tree
Showing 13 changed files with 47 additions and 9 deletions.
2 changes: 2 additions & 0 deletions vortex-array/src/array/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#[cfg(test)]
mod assertions;

mod bool;
mod chunked;
mod constant;
Expand Down
1 change: 1 addition & 0 deletions vortex-array/src/arrow/dtype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ pub fn infer_schema(dtype: &DType) -> VortexResult<Schema> {
Ok(builder.finish())
}

/// Try to convert a Vortex [`DType`] into an a Arrow [`DataType`]
pub fn infer_data_type(dtype: &DType) -> VortexResult<DataType> {
Ok(match dtype {
DType::Null => DataType::Null,
Expand Down
2 changes: 2 additions & 0 deletions vortex-array/src/arrow/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//! Utilities to work with `Arrow` data and types

use vortex_error::VortexResult;

pub use crate::arrow::dtype::{infer_data_type, infer_schema};
Expand Down
1 change: 1 addition & 0 deletions vortex-array/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use crate::array::{
};
use crate::encoding::EncodingRef;

/// A mapping between an encoding's ID to an [`EncodingRef`], used to have a shared view of all available encoding schemes.
#[derive(Debug, Clone)]
pub struct Context {
encodings: HashMap<u16, EncodingRef>,
Expand Down
1 change: 1 addition & 0 deletions vortex-array/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use crate::encoding::EncodingRef;
use crate::stats::{Stat, Statistics, StatsSet};
use crate::{Array, ArrayDType, ArrayMetadata, ToArray};

/// Owned [`Array`] with serialized metadata, backed by heap-allocated memory.
#[derive(Clone, Debug)]
pub struct ArrayData {
encoding: EncodingRef,
Expand Down
3 changes: 3 additions & 0 deletions vortex-array/src/encoding.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//! Traits and types to define shared unique encoding identifiers

use std::fmt::{Debug, Display, Formatter};
use std::hash::{Hash, Hasher};

Expand All @@ -7,6 +9,7 @@ use crate::canonical::{Canonical, IntoCanonical};
use crate::{Array, ArrayDef, ArrayTrait};

// TODO(robert): Outline how you create a well known encoding id

/// EncodingId is a unique name and numerical code of the array
///
/// 0x0000 - reserved marker encoding
Expand Down
4 changes: 4 additions & 0 deletions vortex-array/src/implementation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ pub trait ArrayDef {
type Encoding: ArrayEncoding + ArrayEncodingExt<D = Self>;
}

/// Macro to generate all the necessary code for a new type of array encoding. Including:
/// 1. New Array type that implements `AsRef<Array>`, `GetArrayMetadata`, `ToArray`, `IntoArray`, and multiple useful `From`/`TryFrom` implementations.
/// 1. New Encoding type that implements `ArrayEncoding`.
/// 1. New metadata type that implements `ArrayMetadata`.
#[macro_export]
macro_rules! impl_encoding {
($id:literal, $code:expr, $Name:ident) => {
Expand Down
13 changes: 11 additions & 2 deletions vortex-array/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
use std::fmt::{Debug, Display, Formatter};
use std::future::ready;

pub use ::paste;
pub use canonical::*;
pub use context::*;
pub use data::*;
Expand Down Expand Up @@ -62,9 +61,14 @@ pub mod flatbuffers {
pub use vortex_flatbuffers::array::*;
}

/// A central type for all Vortex arrays, which are known length sequences of compressed data.
///
/// This is the main entrypoint for working with in-memory Vortex data, and dispatches work over the underlying encoding or memory representations.
#[derive(Debug, Clone)]
pub enum Array {
/// Owned [`Array`] with serialized metadata, backed by heap-allocated memory.
Data(ArrayData),
/// Zero-copy view over flatbuffer-encoded [`Array`] data, created without eager serialization.
View(ArrayView),
}

Expand All @@ -76,6 +80,7 @@ impl Array {
}
}

/// Returns the number of logical elements in the array.
#[allow(clippy::same_name_method)]
pub fn len(&self) -> usize {
match self {
Expand All @@ -91,6 +96,7 @@ impl Array {
}
}

/// Total size of the array in bytes, including all children and buffers.
pub fn nbytes(&self) -> usize {
self.with_dyn(|a| a.nbytes())
}
Expand All @@ -102,13 +108,15 @@ impl Array {
}
}

/// Returns a Vec of Arrays with all of the array's child arrays.
pub fn children(&self) -> Vec<Array> {
match self {
Array::Data(d) => d.children().iter().cloned().collect_vec(),
Array::View(v) => v.children(),
}
}

/// Returns the number of child arrays
pub fn nchildren(&self) -> usize {
match self {
Self::Data(d) => d.nchildren(),
Expand Down Expand Up @@ -174,7 +182,7 @@ impl Array {
)
}

/// Checks whether array is of given encoding
/// Checks whether array is of a given encoding.
pub fn is_encoding(&self, id: EncodingId) -> bool {
self.encoding().id() == id
}
Expand Down Expand Up @@ -270,6 +278,7 @@ pub trait ArrayTrait:
+ ArrayStatisticsCompute
+ ToArrayData
{
/// Total size of the array in bytes, including all children and buffers.
fn nbytes(&self) -> usize {
let mut visitor = NBytesVisitor(0);
self.accept(&mut visitor)
Expand Down
23 changes: 16 additions & 7 deletions vortex-array/src/validity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,24 +47,23 @@ impl ValidityMetadata {
}
}

/// Validity information for an array
#[derive(Clone, Debug)]
pub enum Validity {
/// Items *can't* be null
NonNullable,
/// All items are valid
AllValid,
/// All items are null
AllInvalid,
/// Specified items are null
Array(Array),
}

impl Validity {
/// The [`DType`] of the underlying validity array (if it exists).
pub const DTYPE: DType = DType::Bool(Nullability::NonNullable);

pub fn into_array(self) -> Option<Array> {
match self {
Self::Array(a) => Some(a),
_ => None,
}
}

pub fn to_metadata(&self, length: usize) -> VortexResult<ValidityMetadata> {
match self {
Self::NonNullable => Ok(ValidityMetadata::NonNullable),
Expand All @@ -85,6 +84,15 @@ impl Validity {
}
}

/// If Validity is [`Validity::Array`], returns the array, otherwise returns `None`.
pub fn into_array(self) -> Option<Array> {
match self {
Self::Array(a) => Some(a),
_ => None,
}
}

/// If Validity is [`Validity::Array`], returns a reference to the array array, otherwise returns `None`.
pub fn as_array(&self) -> Option<&Array> {
match self {
Self::Array(a) => Some(a),
Expand All @@ -99,6 +107,7 @@ impl Validity {
}
}

/// Returns whether the `index` item is valid.
#[inline]
pub fn is_valid(&self, index: usize) -> bool {
match self {
Expand Down
1 change: 1 addition & 0 deletions vortex-array/src/view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use crate::stats::{Stat, Statistics, StatsSet};
use crate::visitor::ArrayVisitor;
use crate::{flatbuffers as fb, Array, Context, IntoArray, ToArray};

/// Zero-copy view over flatbuffer-encoded array data, created without eager serialization.
#[derive(Clone)]
pub struct ArrayView {
encoding: EncodingRef,
Expand Down
1 change: 1 addition & 0 deletions vortex-dtype/src/nullability.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::fmt::{Display, Formatter};

/// Whether an item can contain a null value or not
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Hash, Ord, PartialOrd)]
pub enum Nullability {
#[default]
Expand Down
3 changes: 3 additions & 0 deletions vortex-dtype/src/ptype.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//! Physical type definitions and behavior.

use std::cmp::Ordering;
use std::fmt::{Debug, Display, Formatter};
use std::hash::Hash;
Expand All @@ -11,6 +13,7 @@ use crate::nullability::Nullability::NonNullable;
use crate::DType;
use crate::DType::*;

/// Physical type enum, represents the in-memory physical layout but might represent a different logical type.
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "serde", serde(rename_all = "lowercase"))]
Expand Down
1 change: 1 addition & 0 deletions vortex-sampling-compressor/src/compressors/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ impl<'a> CompressedArray<'a> {
(self.array, self.path)
}

/// Total size of the array in bytes, including all children and buffers.
#[inline]
pub fn nbytes(&self) -> usize {
self.array.nbytes()
Expand Down

0 comments on commit 008b3a7

Please sign in to comment.