diff --git a/crates/polars-core/src/chunked_array/ops/explode.rs b/crates/polars-core/src/chunked_array/ops/explode.rs index b44ee0863a98..9817a6732db9 100644 --- a/crates/polars-core/src/chunked_array/ops/explode.rs +++ b/crates/polars-core/src/chunked_array/ops/explode.rs @@ -1,16 +1,9 @@ use arrow::array::*; use arrow::bitmap::utils::set_bit_unchecked; use arrow::bitmap::{Bitmap, MutableBitmap}; -use arrow::legacy::array::list::AnonymousBuilder; -#[cfg(feature = "dtype-array")] -use arrow::legacy::is_valid::IsValid; use arrow::legacy::prelude::*; -use arrow::legacy::trusted_len::TrustedLenPush; use polars_utils::slice::GetSaferUnchecked; -#[cfg(feature = "dtype-array")] -use crate::chunked_array::builder::get_fixed_size_list_builder; -use crate::chunked_array::metadata::MetadataProperties; use crate::prelude::*; use crate::series::implementations::null::NullChunked; @@ -160,12 +153,18 @@ where impl ExplodeByOffsets for Float32Chunked { fn explode_by_offsets(&self, offsets: &[i64]) -> Series { - self.apply_as_ints(|s| s.explode_by_offsets(offsets)) + self.apply_as_ints(|s| { + let ca = s.u32().unwrap(); + ca.explode_by_offsets(offsets) + }) } } impl ExplodeByOffsets for Float64Chunked { fn explode_by_offsets(&self, offsets: &[i64]) -> Series { - self.apply_as_ints(|s| s.explode_by_offsets(offsets)) + self.apply_as_ints(|s| { + let ca = s.u64().unwrap(); + ca.explode_by_offsets(offsets) + }) } } @@ -225,166 +224,6 @@ impl ExplodeByOffsets for BooleanChunked { } } -impl ExplodeByOffsets for ListChunked { - fn explode_by_offsets(&self, offsets: &[i64]) -> Series { - debug_assert_eq!(self.chunks.len(), 1); - let arr = self.downcast_iter().next().unwrap(); - - let cap = get_capacity(offsets); - let inner_type = self.inner_dtype(); - - let mut builder = arrow::legacy::array::list::AnonymousBuilder::new(cap); - let mut owned = Vec::with_capacity(cap); - let mut start = offsets[0] as usize; - let mut last = start; - - let mut process_range = |start: usize, last: usize, builder: &mut AnonymousBuilder<'_>| { - let vals = arr.slice_typed(start, last - start); - for opt_arr in vals.into_iter() { - match opt_arr { - None => builder.push_null(), - Some(arr) => { - unsafe { - // we create a pointer to evade the bck - let ptr = arr.as_ref() as *const dyn Array; - // SAFETY: we preallocated - owned.push_unchecked(arr); - // SAFETY: the pointer is still valid as `owned` will not reallocate - builder.push(&*ptr as &dyn Array); - } - }, - } - } - }; - - for &o in &offsets[1..] { - let o = o as usize; - if o == last { - if start != last { - process_range(start, last, &mut builder); - } - builder.push_null(); - start = o; - } - last = o; - } - process_range(start, last, &mut builder); - let arr = builder - .finish(Some(&inner_type.to_arrow(CompatLevel::newest()))) - .unwrap(); - let mut ca = unsafe { self.copy_with_chunks(vec![Box::new(arr)]) }; - - use MetadataProperties as P; - ca.copy_metadata(self, P::SORTED | P::FAST_EXPLODE_LIST); - - ca.into_series() - } -} - -#[cfg(feature = "dtype-array")] -impl ExplodeByOffsets for ArrayChunked { - fn explode_by_offsets(&self, offsets: &[i64]) -> Series { - debug_assert_eq!(self.chunks.len(), 1); - let arr = self.downcast_iter().next().unwrap(); - - let cap = get_capacity(offsets); - let inner_type = self.inner_dtype(); - let mut builder = - get_fixed_size_list_builder(inner_type, cap, self.width(), self.name()).unwrap(); - - let mut start = offsets[0] as usize; - let mut last = start; - for &o in &offsets[1..] { - let o = o as usize; - if o == last { - if start != last { - let array = arr.slice_typed(start, last - start); - let values = array.values().as_ref(); - - for i in 0..array.len() { - unsafe { - if array.is_valid_unchecked(i) { - builder.push_unchecked(values, i) - } else { - builder.push_null() - } - } - } - } - unsafe { - builder.push_null(); - } - start = o; - } - last = o; - } - let array = arr.slice_typed(start, last - start); - let values = array.values().as_ref(); - for i in 0..array.len() { - unsafe { - if array.is_valid_unchecked(i) { - builder.push_unchecked(values, i) - } else { - builder.push_null() - } - } - } - - builder.finish().into() - } -} - -impl ExplodeByOffsets for StringChunked { - fn explode_by_offsets(&self, offsets: &[i64]) -> Series { - unsafe { - self.as_binary() - .explode_by_offsets(offsets) - .cast_unchecked(&DataType::String) - .unwrap() - } - } -} - -impl ExplodeByOffsets for BinaryChunked { - fn explode_by_offsets(&self, offsets: &[i64]) -> Series { - debug_assert_eq!(self.chunks.len(), 1); - let arr = self.downcast_iter().next().unwrap(); - - let cap = get_capacity(offsets); - let mut builder = BinaryChunkedBuilder::new(self.name(), cap); - - let mut start = offsets[0] as usize; - let mut last = start; - for &o in &offsets[1..] { - let o = o as usize; - if o == last { - if start != last { - let vals = arr.slice_typed(start, last - start); - if vals.null_count() == 0 { - builder - .chunk_builder - .extend_trusted_len_values(vals.values_iter()) - } else { - builder.chunk_builder.extend_trusted_len(vals.into_iter()); - } - } - builder.append_null(); - start = o; - } - last = o; - } - let vals = arr.slice_typed(start, last - start); - if vals.null_count() == 0 { - builder - .chunk_builder - .extend_trusted_len_values(vals.values_iter()) - } else { - builder.chunk_builder.extend_trusted_len(vals.into_iter()); - } - builder.finish().into() - } -} - /// Convert Arrow array offsets to indexes of the original list pub(crate) fn offsets_to_indexes(offsets: &[i64], capacity: usize) -> Vec { if offsets.is_empty() { @@ -454,32 +293,6 @@ mod test { Ok(()) } - #[test] - fn test_explode_list_nulls() -> PolarsResult<()> { - let ca = Int32Chunked::from_slice_options("", &[None, Some(1), Some(2)]); - let offsets = &[0, 3, 3]; - let out = ca.explode_by_offsets(offsets); - assert_eq!( - Vec::from(out.i32().unwrap()), - &[None, Some(1), Some(2), None] - ); - - let ca = BooleanChunked::from_slice_options("", &[None, Some(true), Some(false)]); - let out = ca.explode_by_offsets(offsets); - assert_eq!( - Vec::from(out.bool().unwrap()), - &[None, Some(true), Some(false), None] - ); - - let ca = StringChunked::from_slice_options("", &[None, Some("b"), Some("c")]); - let out = ca.explode_by_offsets(offsets); - assert_eq!( - Vec::from(out.str().unwrap()), - &[None, Some("b"), Some("c"), None] - ); - Ok(()) - } - #[test] fn test_explode_empty_list_slot() -> PolarsResult<()> { // primitive diff --git a/crates/polars-core/src/chunked_array/ops/explode_and_offsets.rs b/crates/polars-core/src/chunked_array/ops/explode_and_offsets.rs index f335e3074665..3c3e81ff5a00 100644 --- a/crates/polars-core/src/chunked_array/ops/explode_and_offsets.rs +++ b/crates/polars-core/src/chunked_array/ops/explode_and_offsets.rs @@ -3,6 +3,51 @@ use arrow::offset::OffsetsBuffer; use super::*; +impl ListChunked { + fn specialized( + &self, + values: ArrayRef, + offsets: &[i64], + offsets_buf: OffsetsBuffer, + ) -> (Series, OffsetsBuffer) { + // SAFETY: inner_dtype should be correct + let values = unsafe { + Series::from_chunks_and_dtype_unchecked( + self.name(), + vec![values], + &self.inner_dtype().to_physical(), + ) + }; + + use crate::chunked_array::ops::explode::ExplodeByOffsets; + + let mut values = match values.dtype() { + DataType::Boolean => { + let t = values.bool().unwrap(); + ExplodeByOffsets::explode_by_offsets(t, offsets).into_series() + }, + DataType::Null => { + let t = values.null().unwrap(); + ExplodeByOffsets::explode_by_offsets(t, offsets).into_series() + }, + dtype => { + with_match_physical_numeric_polars_type!(dtype, |$T| { + let t: &ChunkedArray<$T> = values.as_ref().as_ref(); + ExplodeByOffsets::explode_by_offsets(t, offsets).into_series() + }) + }, + }; + + // let mut values = values.explode_by_offsets(offsets); + // restore logical type + unsafe { + values = values.cast_unchecked(self.inner_dtype()).unwrap(); + } + + (values, offsets_buf) + } +} + impl ChunkExplode for ListChunked { fn offsets(&self) -> PolarsResult> { let ca = self.rechunk(); @@ -64,16 +109,36 @@ impl ChunkExplode for ListChunked { panic!("could have fast exploded") } } - if listarr.null_count() == 0 { - // SAFETY: inner_dtype should be correct - let values = unsafe { - Series::from_chunks_and_dtype_unchecked( - self.name(), - vec![values], - &self.inner_dtype().to_physical(), - ) - }; - (values.explode_by_offsets(offsets), offsets_buf) + let (indices, new_offsets) = if listarr.null_count() == 0 { + // SPECIALIZED path. + let inner_phys = self.inner_dtype().to_physical(); + if inner_phys.is_numeric() || inner_phys.is_null() || inner_phys.is_bool() { + return Ok(self.specialized(values, offsets, offsets_buf)); + } + // Use gather + let mut indices = + MutablePrimitiveArray::::with_capacity(*offsets_buf.last() as usize); + let mut new_offsets = Vec::with_capacity(listarr.len() + 1); + let mut current_offset = 0i64; + let mut iter = offsets.iter(); + if let Some(mut previous) = iter.next().copied() { + new_offsets.push(current_offset); + iter.for_each(|&offset| { + let len = offset - previous; + let start = previous as IdxSize; + let end = offset as IdxSize; + + if len == 0 { + indices.push_null(); + } else { + indices.extend_trusted_len_values(start..end); + } + current_offset += len; + previous = offset; + new_offsets.push(current_offset); + }) + } + (indices, new_offsets) } else { // we have already ensure that validity is not none. let validity = listarr.validity().unwrap(); @@ -105,20 +170,22 @@ impl ChunkExplode for ListChunked { new_offsets.push(current_offset); }) } - // SAFETY: the indices we generate are in bounds - let chunk = unsafe { take_unchecked(values.as_ref(), &indices.into()) }; - // SAFETY: inner_dtype should be correct - let s = unsafe { - Series::from_chunks_and_dtype_unchecked( - self.name(), - vec![chunk], - &self.inner_dtype().to_physical(), - ) - }; - // SAFETY: monotonically increasing - let new_offsets = unsafe { OffsetsBuffer::new_unchecked(new_offsets.into()) }; - (s, new_offsets) - } + (indices, new_offsets) + }; + + // SAFETY: the indices we generate are in bounds + let chunk = unsafe { take_unchecked(values.as_ref(), &indices.into()) }; + // SAFETY: inner_dtype should be correct + let s = unsafe { + Series::from_chunks_and_dtype_unchecked( + self.name(), + vec![chunk], + &self.inner_dtype().to_physical(), + ) + }; + // SAFETY: monotonically increasing + let new_offsets = unsafe { OffsetsBuffer::new_unchecked(new_offsets.into()) }; + (s, new_offsets) }; debug_assert_eq!(s.name(), self.name()); // restore logical type diff --git a/crates/polars-core/src/series/implementations/array.rs b/crates/polars-core/src/series/implementations/array.rs index bc3ed6d23243..8afecf5b80dd 100644 --- a/crates/polars-core/src/series/implementations/array.rs +++ b/crates/polars-core/src/series/implementations/array.rs @@ -4,7 +4,6 @@ use std::borrow::Cow; use super::{private, MetadataFlags}; use crate::chunked_array::cast::CastOptions; use crate::chunked_array::comparison::*; -use crate::chunked_array::ops::explode::ExplodeByOffsets; use crate::chunked_array::AsSinglePtr; #[cfg(feature = "algorithm_group_by")] use crate::frame::group_by::*; @@ -30,10 +29,6 @@ impl private::PrivateSeries for SeriesWrap { self.0.set_flags(flags) } - fn explode_by_offsets(&self, offsets: &[i64]) -> Series { - self.0.explode_by_offsets(offsets) - } - unsafe fn equal_element(&self, idx_self: usize, idx_other: usize, other: &Series) -> bool { self.0.equal_element(idx_self, idx_other, other) } diff --git a/crates/polars-core/src/series/implementations/binary.rs b/crates/polars-core/src/series/implementations/binary.rs index fd832de9222a..d290f2f79a3f 100644 --- a/crates/polars-core/src/series/implementations/binary.rs +++ b/crates/polars-core/src/series/implementations/binary.rs @@ -21,9 +21,6 @@ impl private::PrivateSeries for SeriesWrap { fn _set_flags(&mut self, flags: MetadataFlags) { self.0.set_flags(flags) } - fn explode_by_offsets(&self, offsets: &[i64]) -> Series { - self.0.explode_by_offsets(offsets) - } unsafe fn equal_element(&self, idx_self: usize, idx_other: usize, other: &Series) -> bool { self.0.equal_element(idx_self, idx_other, other) diff --git a/crates/polars-core/src/series/implementations/boolean.rs b/crates/polars-core/src/series/implementations/boolean.rs index 492dac39b8c1..9fadb6a952c6 100644 --- a/crates/polars-core/src/series/implementations/boolean.rs +++ b/crates/polars-core/src/series/implementations/boolean.rs @@ -20,9 +20,6 @@ impl private::PrivateSeries for SeriesWrap { fn _set_flags(&mut self, flags: MetadataFlags) { self.0.set_flags(flags) } - fn explode_by_offsets(&self, offsets: &[i64]) -> Series { - self.0.explode_by_offsets(offsets) - } unsafe fn equal_element(&self, idx_self: usize, idx_other: usize, other: &Series) -> bool { self.0.equal_element(idx_self, idx_other, other) diff --git a/crates/polars-core/src/series/implementations/categorical.rs b/crates/polars-core/src/series/implementations/categorical.rs index 16092a96a5f5..2a541680129f 100644 --- a/crates/polars-core/src/series/implementations/categorical.rs +++ b/crates/polars-core/src/series/implementations/categorical.rs @@ -62,14 +62,6 @@ impl private::PrivateSeries for SeriesWrap { self.0.set_flags(flags) } - fn explode_by_offsets(&self, offsets: &[i64]) -> Series { - // TODO! explode by offset should return concrete type - self.with_state(true, |cats| { - cats.explode_by_offsets(offsets).u32().unwrap().clone() - }) - .into_series() - } - unsafe fn equal_element(&self, idx_self: usize, idx_other: usize, other: &Series) -> bool { self.0.physical().equal_element(idx_self, idx_other, other) } diff --git a/crates/polars-core/src/series/implementations/date.rs b/crates/polars-core/src/series/implementations/date.rs index 3882d4976ee0..fe2e4a3c8025 100644 --- a/crates/polars-core/src/series/implementations/date.rs +++ b/crates/polars-core/src/series/implementations/date.rs @@ -39,10 +39,6 @@ impl private::PrivateSeries for SeriesWrap { self.0.set_flags(flags) } - fn explode_by_offsets(&self, offsets: &[i64]) -> Series { - self.0.explode_by_offsets(offsets).into_date().into_series() - } - #[cfg(feature = "zip_with")] fn zip_with_same_type(&self, mask: &BooleanChunked, other: &Series) -> PolarsResult { let other = other.to_physical_repr().into_owned(); diff --git a/crates/polars-core/src/series/implementations/datetime.rs b/crates/polars-core/src/series/implementations/datetime.rs index bc35975c5eb3..25c636324b58 100644 --- a/crates/polars-core/src/series/implementations/datetime.rs +++ b/crates/polars-core/src/series/implementations/datetime.rs @@ -32,13 +32,6 @@ impl private::PrivateSeries for SeriesWrap { self.0.set_flags(flags) } - fn explode_by_offsets(&self, offsets: &[i64]) -> Series { - self.0 - .explode_by_offsets(offsets) - .into_datetime(self.0.time_unit(), self.0.time_zone().clone()) - .into_series() - } - #[cfg(feature = "zip_with")] fn zip_with_same_type(&self, mask: &BooleanChunked, other: &Series) -> PolarsResult { let other = other.to_physical_repr().into_owned(); diff --git a/crates/polars-core/src/series/implementations/decimal.rs b/crates/polars-core/src/series/implementations/decimal.rs index 324ec02dff64..f8d27c7a3ec8 100644 --- a/crates/polars-core/src/series/implementations/decimal.rs +++ b/crates/polars-core/src/series/implementations/decimal.rs @@ -180,17 +180,6 @@ impl private::PrivateSeries for SeriesWrap { fn group_tuples(&self, multithreaded: bool, sorted: bool) -> PolarsResult { self.0.group_tuples(multithreaded, sorted) } - - fn explode_by_offsets(&self, offsets: &[i64]) -> Series { - self.0 - .explode_by_offsets(offsets) - .decimal() - .unwrap() - .as_ref() - .clone() - .into_decimal_unchecked(self.0.precision(), self.0.scale()) - .into_series() - } } impl SeriesTrait for SeriesWrap { diff --git a/crates/polars-core/src/series/implementations/duration.rs b/crates/polars-core/src/series/implementations/duration.rs index 4e1f59c8a6f1..38775b29d7f0 100644 --- a/crates/polars-core/src/series/implementations/duration.rs +++ b/crates/polars-core/src/series/implementations/duration.rs @@ -29,13 +29,6 @@ impl private::PrivateSeries for SeriesWrap { self.0.dtype() } - fn explode_by_offsets(&self, offsets: &[i64]) -> Series { - self.0 - .explode_by_offsets(offsets) - .into_duration(self.0.time_unit()) - .into_series() - } - fn _set_flags(&mut self, flags: MetadataFlags) { self.0.deref_mut().set_flags(flags) } diff --git a/crates/polars-core/src/series/implementations/floats.rs b/crates/polars-core/src/series/implementations/floats.rs index e5419462abd4..43d9f9aad66c 100644 --- a/crates/polars-core/src/series/implementations/floats.rs +++ b/crates/polars-core/src/series/implementations/floats.rs @@ -23,10 +23,6 @@ macro_rules! impl_dyn_series { fn _get_flags(&self) -> MetadataFlags { self.0.get_flags() } - fn explode_by_offsets(&self, offsets: &[i64]) -> Series { - self.0.explode_by_offsets(offsets) - } - unsafe fn equal_element( &self, idx_self: usize, diff --git a/crates/polars-core/src/series/implementations/list.rs b/crates/polars-core/src/series/implementations/list.rs index a67dc8e8f487..ae3008d1a93c 100644 --- a/crates/polars-core/src/series/implementations/list.rs +++ b/crates/polars-core/src/series/implementations/list.rs @@ -21,10 +21,6 @@ impl private::PrivateSeries for SeriesWrap { self.0.set_flags(flags) } - fn explode_by_offsets(&self, offsets: &[i64]) -> Series { - self.0.explode_by_offsets(offsets) - } - unsafe fn equal_element(&self, idx_self: usize, idx_other: usize, other: &Series) -> bool { self.0.equal_element(idx_self, idx_other, other) } diff --git a/crates/polars-core/src/series/implementations/mod.rs b/crates/polars-core/src/series/implementations/mod.rs index 50dd3e1c0042..afba1b0d9a50 100644 --- a/crates/polars-core/src/series/implementations/mod.rs +++ b/crates/polars-core/src/series/implementations/mod.rs @@ -20,7 +20,7 @@ pub(crate) mod null; mod object; mod string; #[cfg(feature = "dtype-struct")] -mod struct__; +mod struct_; #[cfg(feature = "dtype-time")] mod time; @@ -35,7 +35,6 @@ use crate::chunked_array::metadata::MetadataTrait; use crate::chunked_array::ops::compare_inner::{ IntoTotalEqInner, IntoTotalOrdInner, TotalEqInner, TotalOrdInner, }; -use crate::chunked_array::ops::explode::ExplodeByOffsets; use crate::chunked_array::AsSinglePtr; // Utility wrapper struct @@ -90,10 +89,6 @@ macro_rules! impl_dyn_series { self.0.set_flags(flags) } - fn explode_by_offsets(&self, offsets: &[i64]) -> Series { - self.0.explode_by_offsets(offsets) - } - unsafe fn equal_element( &self, idx_self: usize, diff --git a/crates/polars-core/src/series/implementations/null.rs b/crates/polars-core/src/series/implementations/null.rs index 0837532df2d5..c6a12a5bcddc 100644 --- a/crates/polars-core/src/series/implementations/null.rs +++ b/crates/polars-core/src/series/implementations/null.rs @@ -3,7 +3,6 @@ use std::any::Any; use polars_error::constants::LENGTH_LIMIT_MSG; use crate::prelude::compare_inner::{IntoTotalEqInner, TotalEqInner}; -use crate::prelude::explode::ExplodeByOffsets; use crate::prelude::*; use crate::series::private::{PrivateSeries, PrivateSeriesNumeric}; use crate::series::*; @@ -80,10 +79,6 @@ impl PrivateSeries for NullChunked { Ok(Self::new(self.name().into(), len).into_series()) } - fn explode_by_offsets(&self, offsets: &[i64]) -> Series { - ExplodeByOffsets::explode_by_offsets(self, offsets) - } - fn subtract(&self, _rhs: &Series) -> PolarsResult { null_arithmetic(self, _rhs, "subtract") } diff --git a/crates/polars-core/src/series/implementations/string.rs b/crates/polars-core/src/series/implementations/string.rs index 3a795e23092d..7bcc0ca9c105 100644 --- a/crates/polars-core/src/series/implementations/string.rs +++ b/crates/polars-core/src/series/implementations/string.rs @@ -21,10 +21,6 @@ impl private::PrivateSeries for SeriesWrap { fn _get_flags(&self) -> MetadataFlags { self.0.get_flags() } - fn explode_by_offsets(&self, offsets: &[i64]) -> Series { - self.0.explode_by_offsets(offsets) - } - unsafe fn equal_element(&self, idx_self: usize, idx_other: usize, other: &Series) -> bool { self.0.equal_element(idx_self, idx_other, other) } diff --git a/crates/polars-core/src/series/implementations/struct__.rs b/crates/polars-core/src/series/implementations/struct_.rs similarity index 97% rename from crates/polars-core/src/series/implementations/struct__.rs rename to crates/polars-core/src/series/implementations/struct_.rs index 07b35502dd6b..f3663acdda3f 100644 --- a/crates/polars-core/src/series/implementations/struct__.rs +++ b/crates/polars-core/src/series/implementations/struct_.rs @@ -32,12 +32,6 @@ impl PrivateSeries for SeriesWrap { fn _set_flags(&mut self, _flags: MetadataFlags) {} - fn explode_by_offsets(&self, offsets: &[i64]) -> Series { - self._apply_fields(|s| s.explode_by_offsets(offsets)) - .unwrap() - .into_series() - } - // TODO! remove this. Very slow. Asof join should use row-encoding. unsafe fn equal_element(&self, idx_self: usize, idx_other: usize, other: &Series) -> bool { let other = other.struct_().unwrap(); diff --git a/crates/polars-core/src/series/implementations/time.rs b/crates/polars-core/src/series/implementations/time.rs index c197de232ef1..78e13365714c 100644 --- a/crates/polars-core/src/series/implementations/time.rs +++ b/crates/polars-core/src/series/implementations/time.rs @@ -39,10 +39,6 @@ impl private::PrivateSeries for SeriesWrap { self.0.set_flags(flags) } - fn explode_by_offsets(&self, offsets: &[i64]) -> Series { - self.0.explode_by_offsets(offsets).into_time().into_series() - } - #[cfg(feature = "zip_with")] fn zip_with_same_type(&self, mask: &BooleanChunked, other: &Series) -> PolarsResult { let other = other.to_physical_repr().into_owned(); diff --git a/crates/polars-core/src/series/series_trait.rs b/crates/polars-core/src/series/series_trait.rs index 1aa606205dc3..b4dba12e0327 100644 --- a/crates/polars-core/src/series/series_trait.rs +++ b/crates/polars-core/src/series/series_trait.rs @@ -79,10 +79,6 @@ pub(crate) mod private { fn _set_flags(&mut self, flags: MetadataFlags); - fn explode_by_offsets(&self, _offsets: &[i64]) -> Series { - invalid_operation_panic!(explode_by_offsets, self) - } - unsafe fn equal_element( &self, _idx_self: usize, diff --git a/py-polars/tests/unit/operations/test_explode.py b/py-polars/tests/unit/operations/test_explode.py index 24e65ac1dc6d..14aefa93c3c1 100644 --- a/py-polars/tests/unit/operations/test_explode.py +++ b/py-polars/tests/unit/operations/test_explode.py @@ -167,7 +167,7 @@ def test_list_struct_explode_6905() -> None: }, schema={"group": pl.List(pl.Struct([pl.Field("params", pl.List(pl.Int32))]))}, )["group"].list.explode().to_list() == [ - {"params": None}, + None, {"params": [1]}, {"params": []}, ] @@ -447,3 +447,8 @@ def test_explode_17648() -> None: .with_columns(pl.int_ranges(pl.col("a").list.len()).alias("count")) .explode("a", "count") ).to_dict(as_series=False) == {"a": [2, 6, 7, 3, 9, 2], "count": [0, 1, 2, 0, 1, 2]} + + +def test_explode_struct_nulls() -> None: + df = pl.DataFrame({"A": [[{"B": 1}], [None], []]}) + assert df.explode("A").to_dict(as_series=False) == {"A": [{"B": 1}, None, None]}