Skip to content

Commit

Permalink
Faster ByteArray to StringArray conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold committed Nov 23, 2022
1 parent 12a67b9 commit 5598db0
Showing 1 changed file with 12 additions and 1 deletion.
13 changes: 12 additions & 1 deletion arrow-array/src/array/string_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,19 @@ impl<OffsetSize: OffsetSizeTrait> From<GenericBinaryArray<OffsetSize>>
for GenericStringArray<OffsetSize>
{
fn from(v: GenericBinaryArray<OffsetSize>) -> Self {
let offsets = v.value_offsets();
let values = v.data().buffers()[1].as_ref();

// We only need to validate that all values are valid UTF-8
let validated = std::str::from_utf8(values).unwrap();
for offset in offsets.iter() {
assert!(validated.is_char_boundary(offset.as_usize()))
}

let builder = v.into_data().into_builder().data_type(Self::DATA_TYPE);
Self::from(builder.build().unwrap())
// SAFETY:
// Validated UTF-8 above
Self::from(unsafe { builder.build_unchecked() })
}
}

Expand Down

0 comments on commit 5598db0

Please sign in to comment.