Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Improved performance in cast Primitive to Binary/String again (4x) (#651
Browse files Browse the repository at this point in the history
)
  • Loading branch information
sundy-li authored Dec 5, 2021
1 parent de87058 commit a18555c
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 34 deletions.
2 changes: 1 addition & 1 deletion src/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ impl<O: Offset> BinaryArray<O> {
/// * The `data_type`'s physical type is not consistent with the offset `O`.
/// * The last element of `offsets` is different from `values.len()`.
/// * The validity is not `None` and its length is different from `offsets.len() - 1`.
pub fn from_data_unchecked(
pub unsafe fn from_data_unchecked(
data_type: DataType,
offsets: Buffer<O>,
values: Buffer<u8>,
Expand Down
90 changes: 57 additions & 33 deletions src/compute/cast/primitive_to.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::hash::Hash;

use crate::buffer::MutableBuffer;
use crate::error::Result;
use crate::{
array::*,
Expand All @@ -8,7 +9,6 @@ use crate::{
datatypes::{DataType, TimeUnit},
temporal_conversions::*,
types::NativeType,
util::lexical_to_bytes_mut,
};

use super::CastOptions;
Expand All @@ -17,21 +17,34 @@ use super::CastOptions;
pub fn primitive_to_binary<T: NativeType + lexical_core::ToLexical, O: Offset>(
from: &PrimitiveArray<T>,
) -> BinaryArray<O> {
let mut buffer = vec![];
let builder = from.iter().fold(
MutableBinaryArray::<O>::with_capacity(from.len()),
|mut builder, x| {
match x {
Some(x) => {
lexical_to_bytes_mut(*x, &mut buffer);
builder.push(Some(buffer.as_slice()));
}
None => builder.push_null(),
}
builder
},
);
builder.into()
let mut values: MutableBuffer<u8> = MutableBuffer::with_capacity(from.len());
let mut offsets: MutableBuffer<O> = MutableBuffer::with_capacity(from.len() + 1);
offsets.push(O::default());

let mut offset: usize = 0;

unsafe {
for x in from.values().iter() {
values.reserve(offset + T::FORMATTED_SIZE_DECIMAL);

let bytes = std::slice::from_raw_parts_mut(
values.as_mut_ptr().add(offset),
values.capacity() - offset,
);
let len = lexical_core::write_unchecked(*x, bytes).len();

offset += len;
offsets.push(O::from_isize(offset as isize).unwrap());
}
values.set_len(offset);
values.shrink_to_fit();
BinaryArray::<O>::from_data_unchecked(
BinaryArray::<O>::default_data_type(),
offsets.into(),
values.into(),
from.validity().cloned(),
)
}
}

pub(super) fn primitive_to_binary_dyn<T, O>(from: &dyn Array) -> Result<Box<dyn Array>>
Expand Down Expand Up @@ -70,23 +83,34 @@ where
pub fn primitive_to_utf8<T: NativeType + lexical_core::ToLexical, O: Offset>(
from: &PrimitiveArray<T>,
) -> Utf8Array<O> {
let mut buffer = vec![];
let builder = from.iter().fold(
MutableUtf8Array::<O>::with_capacity(from.len()),
|mut builder, x| {
match x {
Some(x) => {
lexical_to_bytes_mut(*x, &mut buffer);
builder.push(Some(unsafe {
std::str::from_utf8_unchecked(buffer.as_slice())
}));
}
None => builder.push_null(),
}
builder
},
);
builder.into()
let mut values: MutableBuffer<u8> = MutableBuffer::with_capacity(from.len());
let mut offsets: MutableBuffer<O> = MutableBuffer::with_capacity(from.len() + 1);
offsets.push(O::default());

let mut offset: usize = 0;

unsafe {
for x in from.values().iter() {
values.reserve(offset + T::FORMATTED_SIZE_DECIMAL);

let bytes = std::slice::from_raw_parts_mut(
values.as_mut_ptr().add(offset),
values.capacity() - offset,
);
let len = lexical_core::write_unchecked(*x, bytes).len();

offset += len;
offsets.push(O::from_isize(offset as isize).unwrap());
}
values.set_len(offset);
values.shrink_to_fit();
Utf8Array::<O>::from_data_unchecked(
Utf8Array::<O>::default_data_type(),
offsets.into(),
values.into(),
from.validity().cloned(),
)
}
}

pub(super) fn primitive_to_utf8_dyn<T, O>(from: &dyn Array) -> Result<Box<dyn Array>>
Expand Down

0 comments on commit a18555c

Please sign in to comment.