Skip to content

Commit

Permalink
Fix maxima of categorical column (#15701)
Browse files Browse the repository at this point in the history
Closes #15641

Applies patch suggested by @wence-

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #15701
  • Loading branch information
rjzamora authored May 10, 2024
1 parent e1c6dc2 commit e93782f
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 0 deletions.
23 changes: 23 additions & 0 deletions python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,10 @@ class CategoricalColumn(column.ColumnBase):
dtype: cudf.core.dtypes.CategoricalDtype
_codes: Optional[NumericalColumn]
_children: Tuple[NumericalColumn]
_VALID_REDUCTIONS = {
"max",
"min",
}
_VALID_BINARY_OPERATIONS = {
"__eq__",
"__ne__",
Expand Down Expand Up @@ -699,6 +703,25 @@ def slice(
),
)

def _reduce(
self,
op: str,
skipna: Optional[bool] = None,
min_count: int = 0,
*args,
**kwargs,
) -> ScalarLike:
# Only valid reductions are min and max
if not self.ordered:
raise TypeError(
f"Categorical is not ordered for operation {op} "
"you can use .as_ordered() to change the Categorical "
"to an ordered one."
)
return self._decode(
self.codes._reduce(op, skipna, min_count, *args, **kwargs)
)

def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
other = self._wrap_binop_normalization(other)
# TODO: This is currently just here to make mypy happy, but eventually
Expand Down
19 changes: 19 additions & 0 deletions python/cudf/cudf/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,3 +875,22 @@ def test_cat_groupby_fillna():
lfunc_args_and_kwargs=(("d",), {}),
rfunc_args_and_kwargs=(("d",), {}),
)


@pytest.mark.parametrize("op", ["min", "max"])
def test_categorical_maxima(op):
ser = cudf.Series(
["a", "d", "c", "z", "g"],
dtype=cudf.CategoricalDtype(["z", "c", "g", "d", "a"], ordered=False),
)
assert not ser.cat.ordered

# Cannot get extrema of unordered Categorical column
with pytest.raises(TypeError, match="Categorical is not ordered"):
getattr(ser, op)()

# Max/min should work after converting to "ordered"
ser_pd = ser.to_pandas()
result = getattr(ser.cat.as_ordered(), op)()
result_pd = getattr(ser_pd.cat.as_ordered(), op)()
assert_eq(result, result_pd)

0 comments on commit e93782f

Please sign in to comment.