diff --git a/arrow-array/src/array/string_array.rs b/arrow-array/src/array/string_array.rs index 8d92093f5ce8..5dcfd7d1d3ff 100644 --- a/arrow-array/src/array/string_array.rs +++ b/arrow-array/src/array/string_array.rs @@ -216,8 +216,19 @@ impl From> for GenericStringArray { fn from(v: GenericBinaryArray) -> Self { + let offsets = v.value_offsets(); + let values = v.data().buffers()[1].as_ref(); + + // We only need to validate that all values are valid UTF-8 + let validated = std::str::from_utf8(values).unwrap(); + for offset in offsets.iter() { + assert!(validated.is_char_boundary(offset.as_usize())) + } + let builder = v.into_data().into_builder().data_type(Self::DATA_TYPE); - Self::from(builder.build().unwrap()) + // SAFETY: + // Validated UTF-8 above + Self::from(unsafe { builder.build_unchecked() }) } }