Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Implements helper methods for vectors/values #703

Merged
merged 2 commits into from
Dec 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/common/recordbatch/src/recordbatch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use snafu::ResultExt;

use crate::error::{self, Result};

// TODO(yingwen): We should hold vectors in the RecordBatch.
#[derive(Clone, Debug, PartialEq)]
pub struct RecordBatch {
pub schema: SchemaRef,
Expand Down Expand Up @@ -103,6 +104,7 @@ impl<'a> Iterator for RecordBatchRowIterator<'a> {
} else {
let mut row = Vec::with_capacity(self.columns);

// TODO(yingwen): Get from the vector if RecordBatch also holds vectors.
for col in 0..self.columns {
let column_array = self.record_batch.df_recordbatch.column(col);
match arrow_array_get(column_array.as_ref(), self.row_cursor)
Expand Down
304 changes: 141 additions & 163 deletions src/datatypes2/src/arrow_array.rs

Large diffs are not rendered by default.

213 changes: 145 additions & 68 deletions src/datatypes2/src/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,66 +61,67 @@ pub enum ConcreteDataType {
List(ListType),
}

// TODO(yingwen): Consider moving these methods to the DataType trait.
// TODO(yingwen): Refactor these `is_xxx()` methods, such as adding a `properties()` method
// returning all these properties to the `DataType` trait
impl ConcreteDataType {
// pub fn is_float(&self) -> bool {
// matches!(
// self,
// ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_)
// )
// }

// pub fn is_boolean(&self) -> bool {
// matches!(self, ConcreteDataType::Boolean(_))
// }

// pub fn stringifiable(&self) -> bool {
// matches!(
// self,
// ConcreteDataType::String(_)
// | ConcreteDataType::Date(_)
// | ConcreteDataType::DateTime(_)
// | ConcreteDataType::Timestamp(_)
// )
// }

// pub fn is_signed(&self) -> bool {
// matches!(
// self,
// ConcreteDataType::Int8(_)
// | ConcreteDataType::Int16(_)
// | ConcreteDataType::Int32(_)
// | ConcreteDataType::Int64(_)
// | ConcreteDataType::Date(_)
// | ConcreteDataType::DateTime(_)
// | ConcreteDataType::Timestamp(_)
// )
// }

// pub fn is_unsigned(&self) -> bool {
// matches!(
// self,
// ConcreteDataType::UInt8(_)
// | ConcreteDataType::UInt16(_)
// | ConcreteDataType::UInt32(_)
// | ConcreteDataType::UInt64(_)
// )
// }

// pub fn numerics() -> Vec<ConcreteDataType> {
// vec![
// ConcreteDataType::int8_datatype(),
// ConcreteDataType::int16_datatype(),
// ConcreteDataType::int32_datatype(),
// ConcreteDataType::int64_datatype(),
// ConcreteDataType::uint8_datatype(),
// ConcreteDataType::uint16_datatype(),
// ConcreteDataType::uint32_datatype(),
// ConcreteDataType::uint64_datatype(),
// ConcreteDataType::float32_datatype(),
// ConcreteDataType::float64_datatype(),
// ]
// }
pub fn is_float(&self) -> bool {
matches!(
self,
ConcreteDataType::Float64(_) | ConcreteDataType::Float32(_)
)
}

pub fn is_boolean(&self) -> bool {
matches!(self, ConcreteDataType::Boolean(_))
}

pub fn is_stringifiable(&self) -> bool {
matches!(
self,
ConcreteDataType::String(_)
| ConcreteDataType::Date(_)
| ConcreteDataType::DateTime(_)
| ConcreteDataType::Timestamp(_)
)
}

pub fn is_signed(&self) -> bool {
matches!(
self,
ConcreteDataType::Int8(_)
| ConcreteDataType::Int16(_)
| ConcreteDataType::Int32(_)
| ConcreteDataType::Int64(_)
| ConcreteDataType::Date(_)
| ConcreteDataType::DateTime(_)
| ConcreteDataType::Timestamp(_)
)
}

pub fn is_unsigned(&self) -> bool {
matches!(
self,
ConcreteDataType::UInt8(_)
| ConcreteDataType::UInt16(_)
| ConcreteDataType::UInt32(_)
| ConcreteDataType::UInt64(_)
)
}

pub fn numerics() -> Vec<ConcreteDataType> {
vec![
ConcreteDataType::int8_datatype(),
ConcreteDataType::int16_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::uint8_datatype(),
ConcreteDataType::uint16_datatype(),
ConcreteDataType::uint32_datatype(),
ConcreteDataType::uint64_datatype(),
ConcreteDataType::float32_datatype(),
ConcreteDataType::float64_datatype(),
]
}

/// Convert arrow data type to [ConcreteDataType].
///
Expand All @@ -130,9 +131,9 @@ impl ConcreteDataType {
ConcreteDataType::try_from(dt).expect("Unimplemented type")
}

// pub fn is_null(&self) -> bool {
// matches!(self, ConcreteDataType::Null(NullType))
// }
pub fn is_null(&self) -> bool {
matches!(self, ConcreteDataType::Null(NullType))
}
}

impl TryFrom<&ArrowDataType> for ConcreteDataType {
Expand Down Expand Up @@ -261,7 +262,6 @@ pub trait DataType: std::fmt::Debug + Send + Sync {

pub type DataTypeRef = Arc<dyn DataType>;

// TODO(yingwen): Pass all tests.
#[cfg(test)]
mod tests {
use arrow::datatypes::Field;
Expand Down Expand Up @@ -401,9 +401,86 @@ mod tests {
assert!(!ConcreteDataType::uint64_datatype().is_timestamp_compatible());
}

// #[test]
// fn test_is_null() {
// assert!(ConcreteDataType::null_datatype().is_null());
// assert!(!ConcreteDataType::int32_datatype().is_null());
// }
#[test]
fn test_is_null() {
assert!(ConcreteDataType::null_datatype().is_null());
assert!(!ConcreteDataType::int32_datatype().is_null());
}

#[test]
fn test_is_float() {
assert!(!ConcreteDataType::int32_datatype().is_float());
assert!(ConcreteDataType::float32_datatype().is_float());
assert!(ConcreteDataType::float64_datatype().is_float());
}

#[test]
fn test_is_boolean() {
assert!(!ConcreteDataType::int32_datatype().is_boolean());
assert!(!ConcreteDataType::float32_datatype().is_boolean());
assert!(ConcreteDataType::boolean_datatype().is_boolean());
}

#[test]
fn test_is_stringifiable() {
assert!(!ConcreteDataType::int32_datatype().is_stringifiable());
assert!(!ConcreteDataType::float32_datatype().is_stringifiable());
assert!(ConcreteDataType::string_datatype().is_stringifiable());
assert!(ConcreteDataType::date_datatype().is_stringifiable());
assert!(ConcreteDataType::datetime_datatype().is_stringifiable());
assert!(ConcreteDataType::timestamp_second_datatype().is_stringifiable());
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_stringifiable());
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_stringifiable());
assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_stringifiable());
}

#[test]
fn test_is_signed() {
assert!(ConcreteDataType::int8_datatype().is_signed());
assert!(ConcreteDataType::int16_datatype().is_signed());
assert!(ConcreteDataType::int32_datatype().is_signed());
assert!(ConcreteDataType::int64_datatype().is_signed());
assert!(ConcreteDataType::date_datatype().is_signed());
assert!(ConcreteDataType::datetime_datatype().is_signed());
assert!(ConcreteDataType::timestamp_second_datatype().is_signed());
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_signed());
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_signed());
assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_signed());

assert!(!ConcreteDataType::uint8_datatype().is_signed());
assert!(!ConcreteDataType::uint16_datatype().is_signed());
assert!(!ConcreteDataType::uint32_datatype().is_signed());
assert!(!ConcreteDataType::uint64_datatype().is_signed());

assert!(!ConcreteDataType::float32_datatype().is_signed());
assert!(!ConcreteDataType::float64_datatype().is_signed());
}

#[test]
fn test_is_unsigned() {
assert!(!ConcreteDataType::int8_datatype().is_unsigned());
assert!(!ConcreteDataType::int16_datatype().is_unsigned());
assert!(!ConcreteDataType::int32_datatype().is_unsigned());
assert!(!ConcreteDataType::int64_datatype().is_unsigned());
assert!(!ConcreteDataType::date_datatype().is_unsigned());
assert!(!ConcreteDataType::datetime_datatype().is_unsigned());
assert!(!ConcreteDataType::timestamp_second_datatype().is_unsigned());
assert!(!ConcreteDataType::timestamp_millisecond_datatype().is_unsigned());
assert!(!ConcreteDataType::timestamp_microsecond_datatype().is_unsigned());
assert!(!ConcreteDataType::timestamp_nanosecond_datatype().is_unsigned());

assert!(ConcreteDataType::uint8_datatype().is_unsigned());
assert!(ConcreteDataType::uint16_datatype().is_unsigned());
assert!(ConcreteDataType::uint32_datatype().is_unsigned());
assert!(ConcreteDataType::uint64_datatype().is_unsigned());

assert!(!ConcreteDataType::float32_datatype().is_unsigned());
assert!(!ConcreteDataType::float64_datatype().is_unsigned());
}

#[test]
fn test_numerics() {
let nums = ConcreteDataType::numerics();
assert_eq!(10, nums.len());
}
}
2 changes: 2 additions & 0 deletions src/datatypes2/src/scalars.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,8 @@ impl<'a> ScalarRef<'a> for DateTime {
}
}

// Timestamp types implement Scalar and ScalarRef in `src/timestamp.rs`.

impl Scalar for ListValue {
type VectorType = ListVector;
type RefType<'a> = ListValueRef<'a>;
Expand Down
21 changes: 21 additions & 0 deletions src/datatypes2/src/timestamp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,24 @@ define_timestamp_with_unit!(Second);
define_timestamp_with_unit!(Millisecond);
define_timestamp_with_unit!(Microsecond);
define_timestamp_with_unit!(Nanosecond);

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_timestamp_scalar() {
let ts = TimestampSecond::new(123);
assert_eq!(ts, ts.as_scalar_ref());
assert_eq!(ts, ts.to_owned_scalar());
let ts = TimestampMillisecond::new(123);
assert_eq!(ts, ts.as_scalar_ref());
assert_eq!(ts, ts.to_owned_scalar());
let ts = TimestampMicrosecond::new(123);
assert_eq!(ts, ts.as_scalar_ref());
assert_eq!(ts, ts.to_owned_scalar());
let ts = TimestampNanosecond::new(123);
assert_eq!(ts, ts.as_scalar_ref());
assert_eq!(ts, ts.to_owned_scalar());
}
}
54 changes: 30 additions & 24 deletions src/datatypes2/src/type_id.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,30 +58,36 @@ impl LogicalTypeId {
/// Panics if data type is not supported.
#[cfg(any(test, feature = "test"))]
pub fn data_type(&self) -> crate::data_type::ConcreteDataType {
unimplemented!()
// use crate::data_type::ConcreteDataType;
use crate::data_type::ConcreteDataType;

// match self {
// LogicalTypeId::Null => ConcreteDataType::null_datatype(),
// LogicalTypeId::Boolean => ConcreteDataType::boolean_datatype(),
// LogicalTypeId::Int8 => ConcreteDataType::int8_datatype(),
// LogicalTypeId::Int16 => ConcreteDataType::int16_datatype(),
// LogicalTypeId::Int32 => ConcreteDataType::int32_datatype(),
// LogicalTypeId::Int64 => ConcreteDataType::int64_datatype(),
// LogicalTypeId::UInt8 => ConcreteDataType::uint8_datatype(),
// LogicalTypeId::UInt16 => ConcreteDataType::uint16_datatype(),
// LogicalTypeId::UInt32 => ConcreteDataType::uint32_datatype(),
// LogicalTypeId::UInt64 => ConcreteDataType::uint64_datatype(),
// LogicalTypeId::Float32 => ConcreteDataType::float32_datatype(),
// LogicalTypeId::Float64 => ConcreteDataType::float64_datatype(),
// LogicalTypeId::String => ConcreteDataType::string_datatype(),
// LogicalTypeId::Binary => ConcreteDataType::binary_datatype(),
// LogicalTypeId::Date => ConcreteDataType::date_datatype(),
// LogicalTypeId::DateTime => ConcreteDataType::datetime_datatype(),
// LogicalTypeId::Timestamp => ConcreteDataType::timestamp_millis_datatype(), // to timestamp type with default time unit
// LogicalTypeId::List => {
// ConcreteDataType::list_datatype(ConcreteDataType::null_datatype())
// }
// }
match self {
LogicalTypeId::Null => ConcreteDataType::null_datatype(),
LogicalTypeId::Boolean => ConcreteDataType::boolean_datatype(),
LogicalTypeId::Int8 => ConcreteDataType::int8_datatype(),
LogicalTypeId::Int16 => ConcreteDataType::int16_datatype(),
LogicalTypeId::Int32 => ConcreteDataType::int32_datatype(),
LogicalTypeId::Int64 => ConcreteDataType::int64_datatype(),
LogicalTypeId::UInt8 => ConcreteDataType::uint8_datatype(),
LogicalTypeId::UInt16 => ConcreteDataType::uint16_datatype(),
LogicalTypeId::UInt32 => ConcreteDataType::uint32_datatype(),
LogicalTypeId::UInt64 => ConcreteDataType::uint64_datatype(),
LogicalTypeId::Float32 => ConcreteDataType::float32_datatype(),
LogicalTypeId::Float64 => ConcreteDataType::float64_datatype(),
LogicalTypeId::String => ConcreteDataType::string_datatype(),
LogicalTypeId::Binary => ConcreteDataType::binary_datatype(),
LogicalTypeId::Date => ConcreteDataType::date_datatype(),
LogicalTypeId::DateTime => ConcreteDataType::datetime_datatype(),
LogicalTypeId::TimestampSecond => ConcreteDataType::timestamp_second_datatype(),
LogicalTypeId::TimestampMillisecond => {
ConcreteDataType::timestamp_millisecond_datatype()
}
LogicalTypeId::TimestampMicrosecond => {
ConcreteDataType::timestamp_microsecond_datatype()
}
LogicalTypeId::TimestampNanosecond => ConcreteDataType::timestamp_nanosecond_datatype(),
LogicalTypeId::List => {
ConcreteDataType::list_datatype(ConcreteDataType::null_datatype())
}
evenyag marked this conversation as resolved.
Show resolved Hide resolved
}
}
}
15 changes: 11 additions & 4 deletions src/datatypes2/src/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -393,9 +393,9 @@ impl Ord for ListValue {
impl TryFrom<ScalarValue> for Value {
type Error = error::Error;

// TODO(yingwen): Implement it.
fn try_from(v: ScalarValue) -> Result<Self> {
let v = match v {
ScalarValue::Null => Value::Null,
ScalarValue::Boolean(b) => Value::from(b),
ScalarValue::Float32(f) => Value::from(f),
ScalarValue::Float64(f) => Value::from(f),
Expand All @@ -410,7 +410,9 @@ impl TryFrom<ScalarValue> for Value {
ScalarValue::Utf8(s) | ScalarValue::LargeUtf8(s) => {
Value::from(s.map(StringBytes::from))
}
ScalarValue::Binary(b) | ScalarValue::LargeBinary(b) => Value::from(b.map(Bytes::from)),
ScalarValue::Binary(b)
| ScalarValue::LargeBinary(b)
| ScalarValue::FixedSizeBinary(_, b) => Value::from(b.map(Bytes::from)),
ScalarValue::List(vs, field) => {
let items = if let Some(vs) = vs {
let vs = vs
Expand Down Expand Up @@ -440,7 +442,13 @@ impl TryFrom<ScalarValue> for Value {
ScalarValue::TimestampNanosecond(t, _) => t
.map(|x| Value::Timestamp(Timestamp::new(x, TimeUnit::Nanosecond)))
.unwrap_or(Value::Null),
_ => {
ScalarValue::Decimal128(_, _, _)
| ScalarValue::Time64(_)
| ScalarValue::IntervalYearMonth(_)
| ScalarValue::IntervalDayTime(_)
| ScalarValue::IntervalMonthDayNano(_)
| ScalarValue::Struct(_, _)
| ScalarValue::Dictionary(_, _) => {
return error::UnsupportedArrowTypeSnafu {
arrow_type: v.get_datatype(),
}
Expand Down Expand Up @@ -648,7 +656,6 @@ impl<'a> PartialOrd for ListValueRef<'a> {
}
}

// TODO(yingwen): Pass all tests.
#[cfg(test)]
mod tests {
use arrow::datatypes::DataType as ArrowDataType;
Expand Down
Loading