rapidsai · sarahyurick · Jul 27, 2021 · Jul 27, 2021 · Jul 27, 2021 · beckernick
@@ -6034,6 +6034,18 @@ def explode(self, ignore_index=False):
         3       5
         dtype: int64
         """
+        if is_struct_dtype(self._column.dtype):
+            cols = [key for key in self.dtype.fields]
+            results = []
+            for row in self.to_arrow():
+                row_results = [str(row[col]) for col in cols]
+                results.append(row_results)
+
+            out = cudf.DataFrame(results, columns=cols)
+            for col in cols:
+                out[col] = out[col].astype(self.dtype.fields[col])
+            return out
+
         if not is_list_dtype(self._column.dtype):
             data = self._data.copy(deep=True)
             idx = None if ignore_index else self._index.copy(deep=True)

@@ -9,6 +9,8 @@
 import pandas as pd
 import pytest
 
+import dask_cudf
+
 import cudf
 from cudf.testing._utils import (
     DATETIME_TYPES,
@@ -1230,3 +1232,25 @@ def test_explode(data, ignore_index, p_index):
 def test_nested_series_from_sequence_data(data, expected):
     actual = cudf.Series(data)
     assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize(
+    "data, npartitions",
+    [
+        (
+            [
+                {"a": 1, "b": "x"},
+                {"a": 2, "b": "y"},
+                {"a": 3, "b": "z"},
+                {"a": 4, "b": "a"},
+            ],
+            2,
+        )
+    ],
+)
+def test_dask_explode(data, npartitions):
+    s = cudf.Series(data)
+    assert_eq(s.struct.explode(), s.explode())
+
+    sd = dask_cudf.from_cudf(s, npartitions=npartitions)
+    assert_eq(s.explode(), sd.compute().explode())