Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Added IndexRange to remove checks in hot loops #247

Merged
merged 2 commits into from
Aug 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ pub use fixed_size_list::FixedSizeListArray;
pub use list::{ListArray, MutableListArray};
pub use null::NullArray;
pub use primitive::*;
pub use specification::{Index, Offset};
pub use specification::Offset;
pub use struct_::StructArray;
pub use utf8::{MutableUtf8Array, Utf8Array, Utf8ValuesIter};

Expand Down
128 changes: 2 additions & 126 deletions src/array/specification.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,13 @@ use std::convert::TryFrom;

use num::Num;

use crate::{
buffer::{Buffer, MutableBuffer},
types::{NativeType, NaturalDataType},
};

/// Trait describing any type that can be used to index a slot of an array.
pub trait Index: NativeType + NaturalDataType {
fn to_usize(&self) -> usize;
fn from_usize(index: usize) -> Option<Self>;
fn is_usize() -> bool {
false
}

fn buffer_from_range(start: usize, end: usize) -> Option<MutableBuffer<Self>>;
}
use crate::{buffer::Buffer, types::Index};

/// Trait describing types that can be used as offsets as per Arrow specification.
/// This trait is only implemented for `i32` and `i64`, the two sizes part of the specification.
/// # Safety
/// Do not implement.
pub unsafe trait Offset:
Index + Num + Ord + std::ops::AddAssign + std::ops::Sub + num::CheckedAdd
{
pub unsafe trait Offset: Index + Num + Ord + num::CheckedAdd {
fn is_large() -> bool;

fn to_isize(&self) -> isize;
Expand Down Expand Up @@ -66,114 +50,6 @@ unsafe impl Offset for i64 {
}
}

impl Index for i32 {
#[inline]
fn to_usize(&self) -> usize {
*self as usize
}

#[inline]
fn from_usize(value: usize) -> Option<Self> {
Self::try_from(value).ok()
}

fn buffer_from_range(start: usize, end: usize) -> Option<MutableBuffer<Self>> {
let start = Self::from_usize(start);
let end = Self::from_usize(end);
match (start, end) {
(Some(start), Some(end)) => unsafe {
Some(MutableBuffer::<Self>::from_trusted_len_iter_unchecked(
start..end,
))
},
_ => None,
}
}
}

impl Index for i64 {
#[inline]
fn to_usize(&self) -> usize {
*self as usize
}

#[inline]
fn from_usize(value: usize) -> Option<Self> {
Self::try_from(value).ok()
}

fn buffer_from_range(start: usize, end: usize) -> Option<MutableBuffer<Self>> {
let start = Self::from_usize(start);
let end = Self::from_usize(end);
match (start, end) {
(Some(start), Some(end)) => unsafe {
Some(MutableBuffer::<Self>::from_trusted_len_iter_unchecked(
start..end,
))
},
_ => None,
}
}
}

impl Index for u32 {
#[inline]
fn to_usize(&self) -> usize {
*self as usize
}

#[inline]
fn from_usize(value: usize) -> Option<Self> {
Self::try_from(value).ok()
}

fn is_usize() -> bool {
std::mem::size_of::<Self>() == std::mem::size_of::<usize>()
}

fn buffer_from_range(start: usize, end: usize) -> Option<MutableBuffer<Self>> {
let start = Self::from_usize(start);
let end = Self::from_usize(end);
match (start, end) {
(Some(start), Some(end)) => unsafe {
Some(MutableBuffer::<Self>::from_trusted_len_iter_unchecked(
start..end,
))
},
_ => None,
}
}
}

impl Index for u64 {
#[inline]
fn to_usize(&self) -> usize {
*self as usize
}

#[inline]
fn from_usize(value: usize) -> Option<Self> {
Self::try_from(value).ok()
}

fn is_usize() -> bool {
std::mem::size_of::<Self>() == std::mem::size_of::<usize>()
}

fn buffer_from_range(start: usize, end: usize) -> Option<MutableBuffer<Self>> {
let start = Self::from_usize(start);
let end = Self::from_usize(end);
match (start, end) {
(Some(start), Some(end)) => unsafe {
Some(MutableBuffer::<Self>::from_trusted_len_iter_unchecked(
start..end,
))
},
_ => None,
}
}
}

#[inline]
pub fn check_offsets<O: Offset>(offsets: &Buffer<O>, values_len: usize) -> usize {
assert!(
Expand Down
9 changes: 0 additions & 9 deletions src/buffer/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,15 +93,6 @@ impl<T: NativeType> MutableBuffer<T> {
}
}

/// Allocates a new [MutableBuffer] with `len` and capacity to be at least `len` where
/// all bytes are not initialized
#[inline]
pub unsafe fn from_len(len: usize) -> Self {
let mut buffer = MutableBuffer::with_capacity(len);
buffer.set_len(len);
buffer
}

/// Ensures that this buffer has at least `self.len + additional` bytes. This re-allocates iff
/// `self.len + additional > capacity`.
/// # Example
Expand Down
3 changes: 2 additions & 1 deletion src/compute/sort/boolean.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::{
array::{Array, BooleanArray, Index, PrimitiveArray},
array::{Array, BooleanArray, PrimitiveArray},
buffer::MutableBuffer,
types::Index,
};

use super::SortOptions;
Expand Down
17 changes: 7 additions & 10 deletions src/compute/sort/common.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
use crate::{
array::{Index, PrimitiveArray},
bitmap::Bitmap,
buffer::MutableBuffer,
};
use crate::{array::PrimitiveArray, bitmap::Bitmap, buffer::MutableBuffer, types::Index};

use super::SortOptions;

Expand Down Expand Up @@ -100,19 +96,19 @@ where
let limit = limit.min(length);

let indices = if let Some(validity) = validity {
let mut indices = unsafe { MutableBuffer::<I>::from_len(length) };
let mut indices = MutableBuffer::<I>::from_len_zeroed(length);
if options.nulls_first {
let mut nulls = 0;
let mut valids = 0;
validity
.iter()
.zip(0..length)
.zip(I::range(0, length).unwrap())
.for_each(|(is_valid, index)| {
if is_valid {
indices[validity.null_count() + valids] = I::from_usize(index).unwrap();
indices[validity.null_count() + valids] = index;
valids += 1;
} else {
indices[nulls] = I::from_usize(index).unwrap();
indices[nulls] = index;
nulls += 1;
}
});
Expand Down Expand Up @@ -154,7 +150,8 @@ where

indices
} else {
let mut indices = Index::buffer_from_range(0, length).unwrap();
let mut indices = MutableBuffer::from_trusted_len_iter(I::range(0, length).unwrap());

// Soundness:
// indices are by construction `< values.len()`
// limit is by construction `< values.len()`
Expand Down
3 changes: 2 additions & 1 deletion src/compute/sort/lex_sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ use std::cmp::Ordering;
use crate::compute::take;
use crate::error::{ArrowError, Result};
use crate::{
array::{ord, Array, Index, PrimitiveArray},
array::{ord, Array, PrimitiveArray},
buffer::MutableBuffer,
types::Index,
};

use super::{sort_to_indices, SortOptions};
Expand Down
2 changes: 1 addition & 1 deletion src/compute/sort/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use crate::{
array::*,
types::{days_ms, NativeType},
types::{days_ms, Index, NativeType},
};

use crate::buffer::MutableBuffer;
Expand Down
4 changes: 2 additions & 2 deletions src/compute/sort/primitive/indices.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
array::{Array, Index, PrimitiveArray},
types::NativeType,
array::{Array, PrimitiveArray},
types::{Index, NativeType},
};

use super::super::common;
Expand Down
3 changes: 2 additions & 1 deletion src/compute/sort/utf8.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::array::{Array, Index, Offset, PrimitiveArray, Utf8Array};
use crate::array::{Array, Offset, PrimitiveArray, Utf8Array};
use crate::array::{DictionaryArray, DictionaryKey};
use crate::types::Index;

use super::common;
use super::SortOptions;
Expand Down
4 changes: 1 addition & 3 deletions src/compute/take/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,9 @@ use crate::{
array::{new_empty_array, Array, NullArray, PrimitiveArray},
datatypes::{DataType, IntervalUnit},
error::Result,
types::days_ms,
types::{days_ms, Index},
};

pub use crate::array::Index;

mod binary;
mod boolean;
mod dict;
Expand Down
113 changes: 113 additions & 0 deletions src/types/index.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
use std::convert::TryFrom;

use crate::{
trusted_len::TrustedLen,
types::{NativeType, NaturalDataType},
};

/// iterator of [`Index`] equivalent to `(a..b)`.
// `Step` is unstable in Rust which does not allow (a..b) for generic `Index`.
pub struct IndexRange<I: Index> {
start: I,
end: I,
}

impl<I: Index> IndexRange<I> {
pub fn new(start: I, end: I) -> Self {
assert!(end >= start);
Self { start, end }
}
}

impl<I: Index> Iterator for IndexRange<I> {
type Item = I;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.start == self.end {
return None;
}
let old = self.start;
self.start += I::one();
Some(old)
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let len = (self.end - self.start).to_usize();
(len, Some(len))
}
}

/// Safety: a range is always of known length
unsafe impl<I: Index> TrustedLen for IndexRange<I> {}

/// Trait describing any type that can be used to index a slot of an array.
pub trait Index:
NativeType
+ NaturalDataType
+ std::ops::AddAssign
+ std::ops::Sub<Output = Self>
+ num::One
+ PartialOrd
{
fn to_usize(&self) -> usize;
fn from_usize(index: usize) -> Option<Self>;

fn range(start: usize, end: usize) -> Option<IndexRange<Self>> {
let start = Self::from_usize(start);
let end = Self::from_usize(end);
match (start, end) {
(Some(start), Some(end)) => Some(IndexRange::new(start, end)),
_ => None,
}
}
}

impl Index for i32 {
#[inline]
fn to_usize(&self) -> usize {
*self as usize
}

#[inline]
fn from_usize(value: usize) -> Option<Self> {
Self::try_from(value).ok()
}
}

impl Index for i64 {
#[inline]
fn to_usize(&self) -> usize {
*self as usize
}

#[inline]
fn from_usize(value: usize) -> Option<Self> {
Self::try_from(value).ok()
}
}

impl Index for u32 {
#[inline]
fn to_usize(&self) -> usize {
*self as usize
}

#[inline]
fn from_usize(value: usize) -> Option<Self> {
Self::try_from(value).ok()
}
}

impl Index for u64 {
#[inline]
fn to_usize(&self) -> usize {
*self as usize
}

#[inline]
fn from_usize(value: usize) -> Option<Self> {
Self::try_from(value).ok()
}
}
2 changes: 2 additions & 0 deletions src/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ use std::{

mod bit_chunk;
pub use bit_chunk::{BitChunk, BitChunkIter};
mod index;
pub mod simd;
pub use index::*;

use crate::datatypes::{DataType, IntervalUnit, TimeUnit};

Expand Down