From 7de7da463325652e74289bb43886aff3cb091dc7 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 21 Oct 2024 11:10:58 -0400 Subject: [PATCH] Minor: improve docs for ByteViewArray->ByteArray From impl --- arrow-array/src/array/byte_view_array.rs | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/arrow-array/src/array/byte_view_array.rs b/arrow-array/src/array/byte_view_array.rs index b1b5580577ab..d6cf89634401 100644 --- a/arrow-array/src/array/byte_view_array.rs +++ b/arrow-array/src/array/byte_view_array.rs @@ -599,8 +599,16 @@ impl From for GenericByteViewArray { } } -/// Convert a [`GenericByteArray`] to a [`GenericByteViewArray`] but in a smart way: -/// If the offsets are all less than u32::MAX, then we directly build the view array on top of existing buffer. +/// Efficiently convert a [`GenericByteArray`] to a [`GenericByteViewArray`] +/// +/// For example this method can convert a [`StringArray`] to a +/// [`StringViewArray`]. +/// +/// If the offsets are all less than u32::MAX, the new [`GenericByteViewArray`] +/// is build without copying the underlying string data (views are created +/// directly into the existing buffer) +/// +/// [`StringArray`]: crate::StringArray impl From<&GenericByteArray> for GenericByteViewArray where FROM: ByteArrayType, @@ -616,6 +624,7 @@ where }; if can_reuse_buffer { + // build views directly pointing to the existing buffer let len = byte_array.len(); let mut views_builder = GenericByteViewBuilder::::with_capacity(len); let str_values_buf = byte_array.values().clone(); @@ -638,7 +647,9 @@ where assert_eq!(views_builder.len(), len); views_builder.finish() } else { - // TODO: the first u32::MAX can still be reused + // otherwise, create a new buffer for large strings + // TODO: the original buffer could still be used + // until the offset reaches `u32::max`. GenericByteViewArray::::from_iter(byte_array.iter()) } }