Skip to content

Commit

Permalink
add tests for string_view type
Browse files Browse the repository at this point in the history
  • Loading branch information
ariesdevil committed Feb 22, 2024
1 parent 5a57951 commit 122c8ab
Show file tree
Hide file tree
Showing 5 changed files with 176 additions and 2 deletions.
24 changes: 24 additions & 0 deletions src/common/arrow/src/arrow/array/binview/from.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright 2021 Datafuse Labs
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::arrow::array::BinaryViewArrayGeneric;
use crate::arrow::array::MutableBinaryViewArray;
use crate::arrow::array::ViewType;

impl<T: ViewType + ?Sized, P: AsRef<T>> FromIterator<Option<P>> for BinaryViewArrayGeneric<T> {
#[inline]
fn from_iter<I: IntoIterator<Item=Option<P>>>(iter: I) -> Self {
MutableBinaryViewArray::<T>::from_iter(iter).into()
}
}
11 changes: 10 additions & 1 deletion src/common/arrow/src/arrow/array/binview/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

mod ffi;
pub(crate) mod fmt;
mod from;
mod iterator;
mod mutable;
mod view;
Expand All @@ -23,6 +24,7 @@ mod private {
pub trait Sealed: Send + Sync {}

impl Sealed for str {}

impl Sealed for [u8] {}
}

Expand Down Expand Up @@ -157,6 +159,7 @@ impl<T: ViewType + ?Sized> Clone for BinaryViewArrayGeneric<T> {
}

unsafe impl<T: ViewType + ?Sized> Send for BinaryViewArrayGeneric<T> {}

unsafe impl<T: ViewType + ?Sized> Sync for BinaryViewArrayGeneric<T> {}

fn buffers_into_raw<T>(buffers: &[Buffer<T>]) -> Arc<[(*const T, usize)]> {
Expand Down Expand Up @@ -254,6 +257,12 @@ impl<T: ViewType + ?Sized> BinaryViewArrayGeneric<T> {
}
}

/// Returns a new [`BinaryViewArrayGeneric`] from a slice of `&T`.
// Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.
pub fn from<V: AsRef<T>, P: AsRef<[Option<V>]>>(slice: P) -> Self {
MutableBinaryViewArray::<T>::from(slice).into()
}

/// Creates an empty [`BinaryViewArrayGeneric`], i.e. whose `.len` is zero.
#[inline]
pub fn new_empty(data_type: DataType) -> Self {
Expand Down Expand Up @@ -323,7 +332,7 @@ impl<T: ViewType + ?Sized> BinaryViewArrayGeneric<T> {
BinaryViewValueIter::new(self)
}

pub fn len_iter(&self) -> impl Iterator<Item = u32> + '_ {
pub fn len_iter(&self) -> impl Iterator<Item=u32> + '_ {
self.views.iter().map(|v| v.length)
}

Expand Down
12 changes: 11 additions & 1 deletion src/common/arrow/src/arrow/array/binview/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,16 @@ impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
payload[0..4].copy_from_slice(&len.to_le_bytes());

if len <= 12 {
// | len | prefix | remaining(zero-padded) |
// ^ ^ ^
// | 4 bytes | 4 bytes | 8 bytes |
payload[4..4 + bytes.len()].copy_from_slice(bytes);
} else {
// | len | prefix | buffer | offsets |
// ^ ^ ^ ^
// | 4 bytes | 4 bytes | 4 bytes | 4 bytes |
//
// buffer index + offset -> real binary data
self.total_buffer_len += bytes.len();
let required_cap = self.in_progress_buffer.len() + bytes.len();
if self.in_progress_buffer.capacity() < required_cap {
Expand All @@ -192,6 +200,7 @@ impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
let offset = self.in_progress_buffer.len() as u32;
self.in_progress_buffer.extend_from_slice(bytes);

// set prefix
unsafe { payload[4..8].copy_from_slice(bytes.get_unchecked(0..4)) };
let buffer_idx: u32 = self.completed_buffers.len().try_into().unwrap();
payload[8..12].copy_from_slice(&buffer_idx.to_le_bytes());
Expand Down Expand Up @@ -347,12 +356,13 @@ impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
let len = v.length;

// view layout:
// for no-inlined layout:
// length: 4 bytes
// prefix: 4 bytes
// buffer_index: 4 bytes
// offset: 4 bytes

// inlined layout:
// for inlined layout:
// length: 4 bytes
// data: 12 bytes
let bytes = if len <= 12 {
Expand Down
130 changes: 130 additions & 0 deletions src/common/arrow/tests/it/arrow/array/binview/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// Copyright 2021 Datafuse Labs
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use databend_common_arrow::arrow::array::Array;
use databend_common_arrow::arrow::array::BinaryViewArray;
use databend_common_arrow::arrow::array::Utf8ViewArray;
use databend_common_arrow::arrow::bitmap::Bitmap;
use databend_common_arrow::arrow::datatypes::DataType;

#[test]
fn basics_string_view() {
let data = vec![
Some("hello"),
None,
// larger than 12 bytes.
Some("Databend Cloud is a Cost-Effective alternative to Snowflake."),
];

let array: Utf8ViewArray = data.into_iter().collect();

assert_eq!(array.value(0), "hello");
assert_eq!(array.value(1), "");
assert_eq!(
array.value(2),
"Databend Cloud is a Cost-Effective alternative to Snowflake."
);
assert_eq!(
unsafe { array.value_unchecked(2) },
"Databend Cloud is a Cost-Effective alternative to Snowflake."
);
assert_eq!(
array.validity(),
Some(&Bitmap::from_u8_slice([0b00000101], 3))
);
assert!(array.is_valid(0));
assert!(!array.is_valid(1));
assert!(array.is_valid(2));

let array2 = Utf8ViewArray::new_unchecked(
DataType::Utf8View,
array.views().clone(),
array.data_buffers().clone(),
array.validity().cloned(),
array.total_bytes_len(),
array.total_buffer_len(),
);

assert_eq!(array, array2);

let array = array.sliced(1, 2);

assert_eq!(array.value(0), "");
assert_eq!(
array.value(1),
"Databend Cloud is a Cost-Effective alternative to Snowflake."
);
}

#[test]
fn basics_binary_view() {
let data = vec![
Some(b"hello".to_vec()),
None,
// larger than 12 bytes.
Some(b"Databend Cloud is a Cost-Effective alternative to Snowflake.".to_vec()),
];

let array: BinaryViewArray = data.into_iter().collect();

assert_eq!(array.value(0), b"hello");
assert_eq!(array.value(1), b"");
assert_eq!(
array.value(2),
b"Databend Cloud is a Cost-Effective alternative to Snowflake."
);
assert_eq!(
unsafe { array.value_unchecked(2) },
b"Databend Cloud is a Cost-Effective alternative to Snowflake."
);
assert_eq!(
array.validity(),
Some(&Bitmap::from_u8_slice([0b00000101], 3))
);
assert!(array.is_valid(0));
assert!(!array.is_valid(1));
assert!(array.is_valid(2));

let array2 = BinaryViewArray::new_unchecked(
DataType::Utf8View,
array.views().clone(),
array.data_buffers().clone(),
array.validity().cloned(),
array.total_bytes_len(),
array.total_buffer_len(),
);

assert_eq!(array, array2);

let array = array.sliced(1, 2);

assert_eq!(array.value(0), b"");
assert_eq!(
array.value(1),
b"Databend Cloud is a Cost-Effective alternative to Snowflake."
);
}

#[test]
fn from() {
let array = Utf8ViewArray::from([Some("hello"), Some(" "), None]);

let a = array.validity().unwrap();
assert_eq!(a, &Bitmap::from([true, true, false]));

let array = BinaryViewArray::from([Some(b"hello".to_vec()), Some(b" ".to_vec()), None]);

let a = array.validity().unwrap();
assert_eq!(a, &Bitmap::from([true, true, false]));
}
1 change: 1 addition & 0 deletions src/common/arrow/tests/it/arrow/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
// limitations under the License.

mod binary;
mod binview;
mod boolean;
mod dictionary;
mod equal;
Expand Down

0 comments on commit 122c8ab

Please sign in to comment.