From 7de7da463325652e74289bb43886aff3cb091dc7 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 21 Oct 2024 11:10:58 -0400 Subject: [PATCH 1/3] Minor: improve docs for ByteViewArray->ByteArray From impl --- arrow-array/src/array/byte_view_array.rs | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/arrow-array/src/array/byte_view_array.rs b/arrow-array/src/array/byte_view_array.rs index b1b5580577ab..d6cf89634401 100644 --- a/arrow-array/src/array/byte_view_array.rs +++ b/arrow-array/src/array/byte_view_array.rs @@ -599,8 +599,16 @@ impl From for GenericByteViewArray { } } -/// Convert a [`GenericByteArray`] to a [`GenericByteViewArray`] but in a smart way: -/// If the offsets are all less than u32::MAX, then we directly build the view array on top of existing buffer. +/// Efficiently convert a [`GenericByteArray`] to a [`GenericByteViewArray`] +/// +/// For example this method can convert a [`StringArray`] to a +/// [`StringViewArray`]. +/// +/// If the offsets are all less than u32::MAX, the new [`GenericByteViewArray`] +/// is build without copying the underlying string data (views are created +/// directly into the existing buffer) +/// +/// [`StringArray`]: crate::StringArray impl From<&GenericByteArray> for GenericByteViewArray where FROM: ByteArrayType, @@ -616,6 +624,7 @@ where }; if can_reuse_buffer { + // build views directly pointing to the existing buffer let len = byte_array.len(); let mut views_builder = GenericByteViewBuilder::::with_capacity(len); let str_values_buf = byte_array.values().clone(); @@ -638,7 +647,9 @@ where assert_eq!(views_builder.len(), len); views_builder.finish() } else { - // TODO: the first u32::MAX can still be reused + // otherwise, create a new buffer for large strings + // TODO: the original buffer could still be used + // until the offset reaches `u32::max`. GenericByteViewArray::::from_iter(byte_array.iter()) } } From 45b7f463eba89656d86670b658d61960cbb44004 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 21 Oct 2024 11:15:34 -0400 Subject: [PATCH 2/3] Update arrow-array/src/array/byte_view_array.rs Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> --- arrow-array/src/array/byte_view_array.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-array/src/array/byte_view_array.rs b/arrow-array/src/array/byte_view_array.rs index d6cf89634401..d5918a22667e 100644 --- a/arrow-array/src/array/byte_view_array.rs +++ b/arrow-array/src/array/byte_view_array.rs @@ -605,7 +605,7 @@ impl From for GenericByteViewArray { /// [`StringViewArray`]. /// /// If the offsets are all less than u32::MAX, the new [`GenericByteViewArray`] -/// is build without copying the underlying string data (views are created +/// is built without copying the underlying string data (views are created /// directly into the existing buffer) /// /// [`StringArray`]: crate::StringArray From 29155adfea57c9721e5d25e40536e83398795d0a Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 21 Oct 2024 11:16:04 -0400 Subject: [PATCH 3/3] Update arrow-array/src/array/byte_view_array.rs Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> --- arrow-array/src/array/byte_view_array.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow-array/src/array/byte_view_array.rs b/arrow-array/src/array/byte_view_array.rs index d5918a22667e..5b313913a7ef 100644 --- a/arrow-array/src/array/byte_view_array.rs +++ b/arrow-array/src/array/byte_view_array.rs @@ -647,9 +647,9 @@ where assert_eq!(views_builder.len(), len); views_builder.finish() } else { - // otherwise, create a new buffer for large strings + // Otherwise, create a new buffer for large strings // TODO: the original buffer could still be used - // until the offset reaches `u32::max`. + // by making multiple slices of u32::MAX length GenericByteViewArray::::from_iter(byte_array.iter()) } }