Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate Groupby.collect #15808

Merged
merged 9 commits into from
May 22, 2024
8 changes: 7 additions & 1 deletion python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2180,7 +2180,13 @@ def func(x):
@_cudf_nvtx_annotate
def collect(self):
"""Get a list of all the values for each column in each group."""
return self.agg("collect")
warnings.warn(
"Groupby.collect is deprecated and "
"will be removed in a future version. "
"Use `.agg(list)` instead.",
FutureWarning,
)
return self.agg(list)

@_cudf_nvtx_annotate
def unique(self):
Expand Down
4 changes: 2 additions & 2 deletions python/dask_cudf/dask_cudf/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def max(self, split_every=None, split_out=1):
def collect(self, split_every=None, split_out=1):
rjzamora marked this conversation as resolved.
Show resolved Hide resolved
return _make_groupby_agg_call(
self,
self._make_groupby_method_aggs("collect"),
self._make_groupby_method_aggs(list),
split_every,
split_out,
)
Expand Down Expand Up @@ -310,7 +310,7 @@ def max(self, split_every=None, split_out=1):
def collect(self, split_every=None, split_out=1):
return _make_groupby_agg_call(
self,
{self._slice: "collect"},
{self._slice: list},
split_every,
split_out,
)[self._slice]
Expand Down
4 changes: 3 additions & 1 deletion python/dask_cudf/dask_cudf/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from dask.utils_test import hlg_layer

import cudf
from cudf.testing._utils import expect_warning_if

import dask_cudf
from dask_cudf.groupby import OPTIMIZED_AGGS, _aggs_optimized
galipremsagar marked this conversation as resolved.
Show resolved Hide resolved
Expand Down Expand Up @@ -62,7 +63,8 @@ def test_groupby_basic(series, aggregation, pdf):

check_dtype = aggregation != "count"

expect = getattr(gdf_grouped, aggregation)()
with expect_warning_if(aggregation == "collect"):
expect = getattr(gdf_grouped, aggregation)()
galipremsagar marked this conversation as resolved.
Show resolved Hide resolved
actual = getattr(ddf_grouped, aggregation)()

if not QUERY_PLANNING_ON:
Expand Down
Loading