Skip to content

Commit

Permalink
Implement __setitem__ for StructColumn (#8737)
Browse files Browse the repository at this point in the history
  • Loading branch information
shaneding authored Jul 15, 2021
1 parent 65a38af commit 73f2c44
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 8 deletions.
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/scalar.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ cdef _set_struct_from_pydict(unique_ptr[scalar]& s,
else:
pyarrow_table = pa.Table.from_arrays(
[
pa.array([], from_pandas=True, type=f.type)
pa.array([cudf.NA], from_pandas=True, type=f.type)
for f in arrow_schema
],
names=columns
Expand Down
9 changes: 9 additions & 0 deletions python/cudf/cudf/core/column/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,15 @@ def __getitem__(self, args):
}
return result

def __setitem__(self, key, value):
if isinstance(value, dict):
# filling in fields not in dict
for field in self.dtype.fields:
value[field] = value.get(field, cudf.NA)

value = cudf.Scalar(value, self.dtype)
super().__setitem__(key, value)

def copy(self, deep=True):
result = super().copy(deep=deep)
if deep:
Expand Down
7 changes: 5 additions & 2 deletions python/cudf/cudf/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,13 @@ def __setitem__(self, key, value):
if is_scalar(value):
value = to_cudf_compatible_scalar(value)
elif not (
isinstance(value, list)
and isinstance(self._sr._column.dtype, cudf.ListDtype)
isinstance(value, (list, dict))
and isinstance(
self._sr._column.dtype, (cudf.ListDtype, cudf.StructDtype)
)
):
value = column.as_column(value)

if (
not isinstance(
self._sr._column.dtype,
Expand Down
8 changes: 3 additions & 5 deletions python/cudf/cudf/core/scalar.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,15 +133,13 @@ def _preprocess_host_value(self, value, dtype):
return NA, dtype

if isinstance(value, dict):
if dtype is not None:
raise TypeError("dict may not be cast to a different dtype")
else:
if dtype is None:
dtype = StructDtype.from_arrow(
pa.infer_type([value], from_pandas=True)
)
return value, dtype
return value, dtype
elif isinstance(dtype, StructDtype):
if value is not None:
if value not in {None, NA}:
raise ValueError(f"Can not coerce {value} to StructDType")
else:
return NA, dtype
Expand Down
1 change: 1 addition & 0 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1238,6 +1238,7 @@ def __repr__(self):
preprocess._column, cudf.core.column.CategoricalColumn
)
and not is_list_dtype(preprocess.dtype)
and not is_struct_dtype(preprocess.dtype)
and not is_decimal_dtype(preprocess.dtype)
) or isinstance(
preprocess._column, cudf.core.column.timedelta.TimeDeltaColumn
Expand Down
45 changes: 45 additions & 0 deletions python/cudf/cudf/tests/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,51 @@ def test_struct_getitem(series, expected):
assert sr[0] == expected


@pytest.mark.parametrize(
"data, item",
[
(
[
{"a": "Hello world", "b": []},
{"a": "CUDF", "b": [1, 2, 3], "c": cudf.NA},
{"a": "abcde", "b": [4, 5, 6], "c": 9},
],
{"a": "Hello world", "b": [], "c": cudf.NA},
),
(
[
{"a": "Hello world", "b": []},
{"a": "CUDF", "b": [1, 2, 3], "c": cudf.NA},
{"a": "abcde", "b": [4, 5, 6], "c": 9},
],
{},
),
(
[
{"a": "Hello world", "b": []},
{"a": "CUDF", "b": [1, 2, 3], "c": cudf.NA},
{"a": "abcde", "b": [4, 5, 6], "c": 9},
],
cudf.NA,
),
(
[
{"a": "Hello world", "b": []},
{"a": "CUDF", "b": [1, 2, 3], "c": cudf.NA},
{"a": "abcde", "b": [4, 5, 6], "c": 9},
],
{"a": "Second element", "b": [1, 2], "c": 1000},
),
],
)
def test_struct_setitem(data, item):
sr = cudf.Series(data)
sr[1] = item
data[1] = item
expected = cudf.Series(data)
assert sr.to_arrow() == expected.to_arrow()


@pytest.mark.parametrize(
"data",
[
Expand Down

0 comments on commit 73f2c44

Please sign in to comment.