From a18555c055219bd1243eae785c635b9ab3feff30 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Sun, 5 Dec 2021 21:15:25 +0800 Subject: [PATCH] Improved performance in cast Primitive to Binary/String again (4x) (#651) --- src/array/binary/mod.rs | 2 +- src/compute/cast/primitive_to.rs | 90 ++++++++++++++++++++------------ 2 files changed, 58 insertions(+), 34 deletions(-) diff --git a/src/array/binary/mod.rs b/src/array/binary/mod.rs index 0369a305588..6a1e8ad3aff 100644 --- a/src/array/binary/mod.rs +++ b/src/array/binary/mod.rs @@ -90,7 +90,7 @@ impl BinaryArray { /// * The `data_type`'s physical type is not consistent with the offset `O`. /// * The last element of `offsets` is different from `values.len()`. /// * The validity is not `None` and its length is different from `offsets.len() - 1`. - pub fn from_data_unchecked( + pub unsafe fn from_data_unchecked( data_type: DataType, offsets: Buffer, values: Buffer, diff --git a/src/compute/cast/primitive_to.rs b/src/compute/cast/primitive_to.rs index 50ab5c97f2d..bbffdbdb2fe 100644 --- a/src/compute/cast/primitive_to.rs +++ b/src/compute/cast/primitive_to.rs @@ -1,5 +1,6 @@ use std::hash::Hash; +use crate::buffer::MutableBuffer; use crate::error::Result; use crate::{ array::*, @@ -8,7 +9,6 @@ use crate::{ datatypes::{DataType, TimeUnit}, temporal_conversions::*, types::NativeType, - util::lexical_to_bytes_mut, }; use super::CastOptions; @@ -17,21 +17,34 @@ use super::CastOptions; pub fn primitive_to_binary( from: &PrimitiveArray, ) -> BinaryArray { - let mut buffer = vec![]; - let builder = from.iter().fold( - MutableBinaryArray::::with_capacity(from.len()), - |mut builder, x| { - match x { - Some(x) => { - lexical_to_bytes_mut(*x, &mut buffer); - builder.push(Some(buffer.as_slice())); - } - None => builder.push_null(), - } - builder - }, - ); - builder.into() + let mut values: MutableBuffer = MutableBuffer::with_capacity(from.len()); + let mut offsets: MutableBuffer = MutableBuffer::with_capacity(from.len() + 1); + offsets.push(O::default()); + + let mut offset: usize = 0; + + unsafe { + for x in from.values().iter() { + values.reserve(offset + T::FORMATTED_SIZE_DECIMAL); + + let bytes = std::slice::from_raw_parts_mut( + values.as_mut_ptr().add(offset), + values.capacity() - offset, + ); + let len = lexical_core::write_unchecked(*x, bytes).len(); + + offset += len; + offsets.push(O::from_isize(offset as isize).unwrap()); + } + values.set_len(offset); + values.shrink_to_fit(); + BinaryArray::::from_data_unchecked( + BinaryArray::::default_data_type(), + offsets.into(), + values.into(), + from.validity().cloned(), + ) + } } pub(super) fn primitive_to_binary_dyn(from: &dyn Array) -> Result> @@ -70,23 +83,34 @@ where pub fn primitive_to_utf8( from: &PrimitiveArray, ) -> Utf8Array { - let mut buffer = vec![]; - let builder = from.iter().fold( - MutableUtf8Array::::with_capacity(from.len()), - |mut builder, x| { - match x { - Some(x) => { - lexical_to_bytes_mut(*x, &mut buffer); - builder.push(Some(unsafe { - std::str::from_utf8_unchecked(buffer.as_slice()) - })); - } - None => builder.push_null(), - } - builder - }, - ); - builder.into() + let mut values: MutableBuffer = MutableBuffer::with_capacity(from.len()); + let mut offsets: MutableBuffer = MutableBuffer::with_capacity(from.len() + 1); + offsets.push(O::default()); + + let mut offset: usize = 0; + + unsafe { + for x in from.values().iter() { + values.reserve(offset + T::FORMATTED_SIZE_DECIMAL); + + let bytes = std::slice::from_raw_parts_mut( + values.as_mut_ptr().add(offset), + values.capacity() - offset, + ); + let len = lexical_core::write_unchecked(*x, bytes).len(); + + offset += len; + offsets.push(O::from_isize(offset as isize).unwrap()); + } + values.set_len(offset); + values.shrink_to_fit(); + Utf8Array::::from_data_unchecked( + Utf8Array::::default_data_type(), + offsets.into(), + values.into(), + from.validity().cloned(), + ) + } } pub(super) fn primitive_to_utf8_dyn(from: &dyn Array) -> Result>