Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added IndexRange to remove checks in hot loops (#247)
Browse files Browse the repository at this point in the history
* Added IndexRange to improve performance in compute.

* Removed unsafe.
  • Loading branch information
jorgecarleitao authored Aug 4, 2021
1 parent 3d123b3 commit 60ffdba
Show file tree
Hide file tree
Showing 12 changed files with 135 additions and 155 deletions.
2 changes: 1 addition & 1 deletion src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ pub use fixed_size_list::FixedSizeListArray;
pub use list::{ListArray, MutableListArray};
pub use null::NullArray;
pub use primitive::*;
pub use specification::{Index, Offset};
pub use specification::Offset;
pub use struct_::StructArray;
pub use utf8::{MutableUtf8Array, Utf8Array, Utf8ValuesIter};

Expand Down
128 changes: 2 additions & 126 deletions src/array/specification.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,13 @@ use std::convert::TryFrom;

use num::Num;

use crate::{
buffer::{Buffer, MutableBuffer},
types::{NativeType, NaturalDataType},
};

/// Trait describing any type that can be used to index a slot of an array.
pub trait Index: NativeType + NaturalDataType {
fn to_usize(&self) -> usize;
fn from_usize(index: usize) -> Option<Self>;
fn is_usize() -> bool {
false
}

fn buffer_from_range(start: usize, end: usize) -> Option<MutableBuffer<Self>>;
}
use crate::{buffer::Buffer, types::Index};

/// Trait describing types that can be used as offsets as per Arrow specification.
/// This trait is only implemented for `i32` and `i64`, the two sizes part of the specification.
/// # Safety
/// Do not implement.
pub unsafe trait Offset:
Index + Num + Ord + std::ops::AddAssign + std::ops::Sub + num::CheckedAdd
{
pub unsafe trait Offset: Index + Num + Ord + num::CheckedAdd {
fn is_large() -> bool;

fn to_isize(&self) -> isize;
Expand Down Expand Up @@ -66,114 +50,6 @@ unsafe impl Offset for i64 {
}
}

impl Index for i32 {
#[inline]
fn to_usize(&self) -> usize {
*self as usize
}

#[inline]
fn from_usize(value: usize) -> Option<Self> {
Self::try_from(value).ok()
}

fn buffer_from_range(start: usize, end: usize) -> Option<MutableBuffer<Self>> {
let start = Self::from_usize(start);
let end = Self::from_usize(end);
match (start, end) {
(Some(start), Some(end)) => unsafe {
Some(MutableBuffer::<Self>::from_trusted_len_iter_unchecked(
start..end,
))
},
_ => None,
}
}
}

impl Index for i64 {
#[inline]
fn to_usize(&self) -> usize {
*self as usize
}

#[inline]
fn from_usize(value: usize) -> Option<Self> {
Self::try_from(value).ok()
}

fn buffer_from_range(start: usize, end: usize) -> Option<MutableBuffer<Self>> {
let start = Self::from_usize(start);
let end = Self::from_usize(end);
match (start, end) {
(Some(start), Some(end)) => unsafe {
Some(MutableBuffer::<Self>::from_trusted_len_iter_unchecked(
start..end,
))
},
_ => None,
}
}
}

impl Index for u32 {
#[inline]
fn to_usize(&self) -> usize {
*self as usize
}

#[inline]
fn from_usize(value: usize) -> Option<Self> {
Self::try_from(value).ok()
}

fn is_usize() -> bool {
std::mem::size_of::<Self>() == std::mem::size_of::<usize>()
}

fn buffer_from_range(start: usize, end: usize) -> Option<MutableBuffer<Self>> {
let start = Self::from_usize(start);
let end = Self::from_usize(end);
match (start, end) {
(Some(start), Some(end)) => unsafe {
Some(MutableBuffer::<Self>::from_trusted_len_iter_unchecked(
start..end,
))
},
_ => None,
}
}
}

impl Index for u64 {
#[inline]
fn to_usize(&self) -> usize {
*self as usize
}

#[inline]
fn from_usize(value: usize) -> Option<Self> {
Self::try_from(value).ok()
}

fn is_usize() -> bool {
std::mem::size_of::<Self>() == std::mem::size_of::<usize>()
}

fn buffer_from_range(start: usize, end: usize) -> Option<MutableBuffer<Self>> {
let start = Self::from_usize(start);
let end = Self::from_usize(end);
match (start, end) {
(Some(start), Some(end)) => unsafe {
Some(MutableBuffer::<Self>::from_trusted_len_iter_unchecked(
start..end,
))
},
_ => None,
}
}
}

#[inline]
pub fn check_offsets<O: Offset>(offsets: &Buffer<O>, values_len: usize) -> usize {
assert!(
Expand Down
9 changes: 0 additions & 9 deletions src/buffer/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,15 +93,6 @@ impl<T: NativeType> MutableBuffer<T> {
}
}

/// Allocates a new [MutableBuffer] with `len` and capacity to be at least `len` where
/// all bytes are not initialized
#[inline]
pub unsafe fn from_len(len: usize) -> Self {
let mut buffer = MutableBuffer::with_capacity(len);
buffer.set_len(len);
buffer
}

/// Ensures that this buffer has at least `self.len + additional` bytes. This re-allocates iff
/// `self.len + additional > capacity`.
/// # Example
Expand Down
3 changes: 2 additions & 1 deletion src/compute/sort/boolean.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::{
array::{Array, BooleanArray, Index, PrimitiveArray},
array::{Array, BooleanArray, PrimitiveArray},
buffer::MutableBuffer,
types::Index,
};

use super::SortOptions;
Expand Down
17 changes: 7 additions & 10 deletions src/compute/sort/common.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
use crate::{
array::{Index, PrimitiveArray},
bitmap::Bitmap,
buffer::MutableBuffer,
};
use crate::{array::PrimitiveArray, bitmap::Bitmap, buffer::MutableBuffer, types::Index};

use super::SortOptions;

Expand Down Expand Up @@ -100,19 +96,19 @@ where
let limit = limit.min(length);

let indices = if let Some(validity) = validity {
let mut indices = unsafe { MutableBuffer::<I>::from_len(length) };
let mut indices = MutableBuffer::<I>::from_len_zeroed(length);
if options.nulls_first {
let mut nulls = 0;
let mut valids = 0;
validity
.iter()
.zip(0..length)
.zip(I::range(0, length).unwrap())
.for_each(|(is_valid, index)| {
if is_valid {
indices[validity.null_count() + valids] = I::from_usize(index).unwrap();
indices[validity.null_count() + valids] = index;
valids += 1;
} else {
indices[nulls] = I::from_usize(index).unwrap();
indices[nulls] = index;
nulls += 1;
}
});
Expand Down Expand Up @@ -154,7 +150,8 @@ where

indices
} else {
let mut indices = Index::buffer_from_range(0, length).unwrap();
let mut indices = MutableBuffer::from_trusted_len_iter(I::range(0, length).unwrap());

// Soundness:
// indices are by construction `< values.len()`
// limit is by construction `< values.len()`
Expand Down
3 changes: 2 additions & 1 deletion src/compute/sort/lex_sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ use std::cmp::Ordering;
use crate::compute::take;
use crate::error::{ArrowError, Result};
use crate::{
array::{ord, Array, Index, PrimitiveArray},
array::{ord, Array, PrimitiveArray},
buffer::MutableBuffer,
types::Index,
};

use super::{sort_to_indices, SortOptions};
Expand Down
2 changes: 1 addition & 1 deletion src/compute/sort/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use crate::{
array::*,
types::{days_ms, NativeType},
types::{days_ms, Index, NativeType},
};

use crate::buffer::MutableBuffer;
Expand Down
4 changes: 2 additions & 2 deletions src/compute/sort/primitive/indices.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
array::{Array, Index, PrimitiveArray},
types::NativeType,
array::{Array, PrimitiveArray},
types::{Index, NativeType},
};

use super::super::common;
Expand Down
3 changes: 2 additions & 1 deletion src/compute/sort/utf8.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::array::{Array, Index, Offset, PrimitiveArray, Utf8Array};
use crate::array::{Array, Offset, PrimitiveArray, Utf8Array};
use crate::array::{DictionaryArray, DictionaryKey};
use crate::types::Index;

use super::common;
use super::SortOptions;
Expand Down
4 changes: 1 addition & 3 deletions src/compute/take/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,9 @@ use crate::{
array::{new_empty_array, Array, NullArray, PrimitiveArray},
datatypes::{DataType, IntervalUnit},
error::Result,
types::days_ms,
types::{days_ms, Index},
};

pub use crate::array::Index;

mod binary;
mod boolean;
mod dict;
Expand Down
113 changes: 113 additions & 0 deletions src/types/index.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
use std::convert::TryFrom;

use crate::{
trusted_len::TrustedLen,
types::{NativeType, NaturalDataType},
};

/// iterator of [`Index`] equivalent to `(a..b)`.
// `Step` is unstable in Rust which does not allow (a..b) for generic `Index`.
pub struct IndexRange<I: Index> {
start: I,
end: I,
}

impl<I: Index> IndexRange<I> {
pub fn new(start: I, end: I) -> Self {
assert!(end >= start);
Self { start, end }
}
}

impl<I: Index> Iterator for IndexRange<I> {
type Item = I;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.start == self.end {
return None;
}
let old = self.start;
self.start += I::one();
Some(old)
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let len = (self.end - self.start).to_usize();
(len, Some(len))
}
}

/// Safety: a range is always of known length
unsafe impl<I: Index> TrustedLen for IndexRange<I> {}

/// Trait describing any type that can be used to index a slot of an array.
pub trait Index:
NativeType
+ NaturalDataType
+ std::ops::AddAssign
+ std::ops::Sub<Output = Self>
+ num::One
+ PartialOrd
{
fn to_usize(&self) -> usize;
fn from_usize(index: usize) -> Option<Self>;

fn range(start: usize, end: usize) -> Option<IndexRange<Self>> {
let start = Self::from_usize(start);
let end = Self::from_usize(end);
match (start, end) {
(Some(start), Some(end)) => Some(IndexRange::new(start, end)),
_ => None,
}
}
}

impl Index for i32 {
#[inline]
fn to_usize(&self) -> usize {
*self as usize
}

#[inline]
fn from_usize(value: usize) -> Option<Self> {
Self::try_from(value).ok()
}
}

impl Index for i64 {
#[inline]
fn to_usize(&self) -> usize {
*self as usize
}

#[inline]
fn from_usize(value: usize) -> Option<Self> {
Self::try_from(value).ok()
}
}

impl Index for u32 {
#[inline]
fn to_usize(&self) -> usize {
*self as usize
}

#[inline]
fn from_usize(value: usize) -> Option<Self> {
Self::try_from(value).ok()
}
}

impl Index for u64 {
#[inline]
fn to_usize(&self) -> usize {
*self as usize
}

#[inline]
fn from_usize(value: usize) -> Option<Self> {
Self::try_from(value).ok()
}
}
2 changes: 2 additions & 0 deletions src/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ use std::{

mod bit_chunk;
pub use bit_chunk::{BitChunk, BitChunkIter};
mod index;
pub mod simd;
pub use index::*;

use crate::datatypes::{DataType, IntervalUnit, TimeUnit};

Expand Down

0 comments on commit 60ffdba

Please sign in to comment.