Skip to content

Commit

Permalink
Add ArrayDataLayout (apache#1799)
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold committed Mar 8, 2023
1 parent d7561c8 commit e741c29
Show file tree
Hide file tree
Showing 16 changed files with 1,395 additions and 177 deletions.
5 changes: 5 additions & 0 deletions arrow-buffer/src/buffer/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,9 @@ impl BooleanBuffer {
pub fn inner(&self) -> &Buffer {
&self.buffer
}

/// Returns the inner [`Buffer`]
pub fn into_inner(self) -> Buffer {
self.buffer
}
}
15 changes: 15 additions & 0 deletions arrow-buffer/src/buffer/offset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,21 @@ impl<O: ArrowNativeType> OffsetBuffer<O> {
let buffer = MutableBuffer::from_len_zeroed(std::mem::size_of::<O>());
Self(buffer.into_buffer().into())
}

/// Returns the inner [`ScalarBuffer`]
pub fn inner(&self) -> &ScalarBuffer<O> {
&self.0
}

/// Returns the inner [`Buffer`]
pub fn into_inner(self) -> ScalarBuffer<O> {
self.0
}

/// Returns a zero-copy slice of this buffer with length `len` and starting at `offset`
pub fn slice(&self, offset: usize, len: usize) -> Self {
Self(self.0.slice(offset, len.saturating_add(1)))
}
}

impl<T: ArrowNativeType> Deref for OffsetBuffer<T> {
Expand Down
10 changes: 10 additions & 0 deletions arrow-buffer/src/buffer/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,14 @@ where
len,
}
}

/// Returns the inner [`ScalarBuffer`]
pub fn inner(&self) -> &ScalarBuffer<E> {
&self.run_ends
}

/// Returns the inner [`ScalarBuffer`]
pub fn into_inner(self) -> ScalarBuffer<E> {
self.run_ends
}
}
15 changes: 15 additions & 0 deletions arrow-buffer/src/buffer/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,21 @@ impl<T: ArrowNativeType> ScalarBuffer<T> {
let byte_len = len.checked_mul(size).expect("length overflow");
buffer.slice_with_length(byte_offset, byte_len).into()
}

/// Returns a zero-copy slice of this buffer with length `len` and starting at `offset`
pub fn slice(&self, offset: usize, len: usize) -> Self {
Self::new(self.buffer.clone(), offset, len)
}

/// Returns the inner [`Buffer`]
pub fn inner(&self) -> &Buffer {
&self.buffer
}

/// Returns the inner [`Buffer`]
pub fn into_inner(self) -> Buffer {
self.buffer
}
}

impl<T: ArrowNativeType> Deref for ScalarBuffer<T> {
Expand Down
139 changes: 139 additions & 0 deletions arrow-data/src/data/boolean.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use crate::data::types::PhysicalType;
use crate::data::ArrayDataLayout;
use crate::{ArrayDataBuilder, Buffers};
use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
use arrow_schema::DataType;

#[derive(Debug, Clone)]
pub struct BooleanArrayData {
data_type: DataType,
values: BooleanBuffer,
nulls: Option<NullBuffer>,
}

impl BooleanArrayData {
/// Create a new [`BooleanArrayData`]
///
/// # Panics
///
/// Panics if
/// - `nulls` and `values` are different lengths
/// - `data_type` is not compatible with `T`
pub fn new(
data_type: DataType,
values: BooleanBuffer,
nulls: Option<NullBuffer>,
) -> Self {
let physical = PhysicalType::from(&data_type);
assert_eq!(
physical, PhysicalType::Boolean,
"Illegal physical type for BooleanArrayData of datatype {:?}, expected {:?} got {:?}",
data_type,
PhysicalType::Boolean,
physical
);

if let Some(n) = nulls.as_ref() {
assert_eq!(values.len(), n.len())
}
Self {
data_type,
values,
nulls,
}
}

/// Create a new [`BooleanArrayData`]
///
/// # Safety
///
/// - `nulls` and `values` are the same lengths
/// - `PhysicalType::from(&data_type) == PhysicalType::Boolean`
pub unsafe fn new_unchecked(
data_type: DataType,
values: BooleanBuffer,
nulls: Option<NullBuffer>,
) -> Self {
Self {
data_type,
values,
nulls,
}
}

/// Creates a new [`BooleanArrayData`] from raw buffers
///
/// # Safety
///
/// See [`BooleanArrayData::new_unchecked`]
pub(crate) unsafe fn from_raw(builder: ArrayDataBuilder) -> Self {
let values = builder.buffers.into_iter().next().unwrap();
let values = BooleanBuffer::new(values, builder.offset, builder.len);
Self {
values,
data_type: builder.data_type,
nulls: builder.nulls,
}
}

/// Returns the null buffer if any
#[inline]
pub fn nulls(&self) -> Option<&NullBuffer> {
self.nulls.as_ref()
}

/// Returns the boolean values
#[inline]
pub fn values(&self) -> &BooleanBuffer {
&self.values
}

/// Returns the data type of this array
#[inline]
pub fn data_type(&self) -> &DataType {
&self.data_type
}

/// Returns the underlying parts of this [`BooleanArrayData`]
pub fn into_parts(self) -> (DataType, BooleanBuffer, Option<NullBuffer>) {
(self.data_type, self.values, self.nulls)
}

/// Returns a zero-copy slice of this array
pub fn slice(&self, offset: usize, len: usize) -> Self {
Self {
data_type: self.data_type.clone(),
values: self.values.slice(offset, len),
nulls: self.nulls.as_ref().map(|x| x.slice(offset, len)),
}
}

/// Returns an [`ArrayDataLayout`] representation of this
pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
ArrayDataLayout {
data_type: &self.data_type,
len: self.values.len(),
offset: self.values.offset(),
nulls: self.nulls.as_ref(),
buffers: Buffers::one(self.values().inner()),
child_data: &[],
}
}
}
11 changes: 10 additions & 1 deletion arrow-data/src/data/buffers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ pub struct Buffers<'a>([Option<&'a Buffer>; 2]);

impl<'a> Buffers<'a> {
/// Temporary will be removed once ArrayData does not store `Vec<Buffer>` directly (#3769)
#[inline]
pub(crate) fn from_slice(a: &'a [Buffer]) -> Self {
match a.len() {
0 => Self([None, None]),
Expand All @@ -34,6 +33,16 @@ impl<'a> Buffers<'a> {
}
}

#[inline]
pub(crate) fn one(b: &'a Buffer) -> Self {
Self([Some(b), None])
}

#[inline]
pub(crate) fn two(a: &'a Buffer, b: &'a Buffer) -> Self {
Self([Some(a), Some(b)])
}

/// Returns the number of [`Buffer`] in this collection
#[inline]
pub fn len(&self) -> usize {
Expand Down
Loading

0 comments on commit e741c29

Please sign in to comment.