Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added remaining scalars and improved API.
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed May 16, 2021
1 parent 96de6cd commit a557bfc
Show file tree
Hide file tree
Showing 8 changed files with 468 additions and 218 deletions.
2 changes: 1 addition & 1 deletion src/scalar/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ There are three reasons:
* forward-compatibility: a new entry on an `enum` is backward-incompatible
* do not expose implementation details to users (reduce the surface of the public API)

### `Scalar` should contain nullability information
### `Scalar` MUST contain nullability information

This is to be aligned with the general notion of arrow's `Array`.

Expand Down
66 changes: 66 additions & 0 deletions src/scalar/binary.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
use crate::{array::*, buffer::Buffer, datatypes::DataType};

use super::Scalar;

#[derive(Debug, Clone)]
pub struct BinaryScalar<O: Offset> {
value: Buffer<u8>,
is_valid: bool,
phantom: std::marker::PhantomData<O>,
}

impl<O: Offset> BinaryScalar<O> {
#[inline]
pub fn new(v: Option<&[u8]>) -> Self {
let is_valid = v.is_some();
O::from_usize(v.map(|x| x.len()).unwrap_or_default()).expect("Too large");
let value = Buffer::from(v.unwrap_or(&[]));
Self {
value,
is_valid,
phantom: std::marker::PhantomData,
}
}

#[inline]
pub fn value(&self) -> &[u8] {
self.value.as_slice()
}
}

impl<O: Offset> Scalar for BinaryScalar<O> {
#[inline]
fn as_any(&self) -> &dyn std::any::Any {
self
}

#[inline]
fn is_valid(&self) -> bool {
self.is_valid
}

#[inline]
fn data_type(&self) -> &DataType {
if O::is_large() {
&DataType::LargeBinary
} else {
&DataType::Binary
}
}

fn to_boxed_array(&self, length: usize) -> Box<dyn Array> {
if self.is_valid {
let item_length = O::from_usize(self.value.len()).unwrap(); // verified at `new`
let offsets = (0..=length).map(|i| O::from_usize(i).unwrap() * item_length);
let offsets = unsafe { Buffer::from_trusted_len_iter_unchecked(offsets) };
let values = std::iter::repeat(self.value.as_slice())
.take(length)
.flatten()
.copied()
.collect();
Box::new(BinaryArray::<O>::from_data(offsets, values, None))
} else {
Box::new(BinaryArray::<O>::new_null(length))
}
}
}
51 changes: 51 additions & 0 deletions src/scalar/boolean.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
use crate::{array::*, bitmap::Bitmap, datatypes::DataType};

use super::Scalar;

#[derive(Debug, Clone)]
pub struct BooleanScalar {
value: bool,
is_valid: bool,
}

impl BooleanScalar {
#[inline]
pub fn new(v: Option<bool>) -> Self {
let is_valid = v.is_some();
Self {
value: v.unwrap_or_default(),
is_valid,
}
}

#[inline]
pub fn value(&self) -> bool {
self.value
}
}

impl Scalar for BooleanScalar {
#[inline]
fn as_any(&self) -> &dyn std::any::Any {
self
}

#[inline]
fn is_valid(&self) -> bool {
self.is_valid
}

#[inline]
fn data_type(&self) -> &DataType {
&DataType::Boolean
}

fn to_boxed_array(&self, length: usize) -> Box<dyn Array> {
if self.is_valid {
let values = Bitmap::from_trusted_len_iter(std::iter::repeat(self.value).take(length));
Box::new(BooleanArray::from_data(values, None))
} else {
Box::new(BooleanArray::new_null(length))
}
}
}
83 changes: 83 additions & 0 deletions src/scalar/list.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
use std::any::Any;
use std::sync::Arc;

use crate::{
array::*,
buffer::Buffer,
datatypes::{DataType, Field},
};

use super::Scalar;

/// The scalar equivalent of [`ListArray`]. Like [`ListArray`], this struct holds a dynamically-typed
/// [`Array`]. The only difference is that this has only one element.
#[derive(Debug, Clone)]
pub struct ListScalar<O: Offset> {
values: Arc<dyn Array>,
length: O,
is_valid: bool,
phantom: std::marker::PhantomData<O>,
data_type: DataType,
}

pub enum ListScalarNew {
Array(Arc<dyn Array>),
DataType(DataType),
}

impl<O: Offset> ListScalar<O> {
#[inline]
pub fn new(v: ListScalarNew) -> Self {
let (data_type, values, is_valid) = match v {
ListScalarNew::Array(a) => (a.data_type().clone(), a, true),
ListScalarNew::DataType(d) => (d.clone(), new_empty_array(d).into(), false),
};
let field = Field::new("item", data_type, true);
let data_type = if O::is_large() {
DataType::LargeList(Box::new(field))
} else {
DataType::List(Box::new(field))
};
let length = O::from_usize(values.len()).unwrap();
Self {
values,
length,
is_valid,
phantom: std::marker::PhantomData,
data_type,
}
}
}

impl<O: Offset> Scalar for ListScalar<O> {
fn as_any(&self) -> &dyn Any {
self
}

fn is_valid(&self) -> bool {
self.is_valid
}

fn data_type(&self) -> &DataType {
&self.data_type
}

fn to_boxed_array(&self, length: usize) -> Box<dyn Array> {
if self.is_valid {
let offsets = (0..=length).map(|i| O::from_usize(i).unwrap() * self.length);
let offsets = unsafe { Buffer::from_trusted_len_iter_unchecked(offsets) };
let values = std::iter::repeat(self.values.as_ref())
.take(self.length.to_usize().unwrap())
.collect::<Vec<_>>();
let values = crate::compute::concat::concatenate(&values).unwrap();
Box::new(ListArray::<O>::from_data(
self.data_type.clone(),
offsets,
values.into(),
None,
))
} else {
Box::new(ListArray::<O>::new_null(self.data_type.clone(), length))
}
}
}
Loading

0 comments on commit a557bfc

Please sign in to comment.