From d7a5b838d8d6234f6bec5a30bfa33b24bd4afbd9 Mon Sep 17 00:00:00 2001 From: Janus Date: Wed, 14 Oct 2020 20:37:51 +0200 Subject: [PATCH] Fix GH-29442 DataFrame.groupby doesn't preserve _metadata (#35688) --- doc/source/whatsnew/v1.1.4.rst | 2 +- pandas/core/groupby/groupby.py | 8 +++++--- pandas/tests/generic/test_finalize.py | 16 +++++++++++++++- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.1.4.rst b/doc/source/whatsnew/v1.1.4.rst index aa2c77da4ee6f..6892fb62028c9 100644 --- a/doc/source/whatsnew/v1.1.4.rst +++ b/doc/source/whatsnew/v1.1.4.rst @@ -27,7 +27,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Bug causing ``groupby(...).sum()`` and similar to not preserve metadata (:issue:`29442`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 4ab40e25db84e..3938adce6736a 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1004,8 +1004,9 @@ def _agg_general( ): with group_selection_context(self): # try a cython aggregation if we can + result = None try: - return self._cython_agg_general( + result = self._cython_agg_general( how=alias, alt=npfunc, numeric_only=numeric_only, @@ -1024,8 +1025,9 @@ def _agg_general( raise # apply a non-cython aggregation - result = self.aggregate(lambda x: npfunc(x, axis=self.axis)) - return result + if result is None: + result = self.aggregate(lambda x: npfunc(x, axis=self.axis)) + return result.__finalize__(self.obj, method="groupby") def _cython_agg_general( self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 25c926b1de4c6..42bd2c9a64901 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -762,13 +762,27 @@ def test_categorical_accessor(method): [ operator.methodcaller("sum"), lambda x: x.agg("sum"), + ], +) +def test_groupby_finalize(obj, method): + obj.attrs = {"a": 1} + result = method(obj.groupby([0, 0])) + assert result.attrs == {"a": 1} + + +@pytest.mark.parametrize( + "obj", [pd.Series([0, 0]), pd.DataFrame({"A": [0, 1], "B": [1, 2]})] +) +@pytest.mark.parametrize( + "method", + [ lambda x: x.agg(["sum", "count"]), lambda x: x.transform(lambda y: y), lambda x: x.apply(lambda y: y), ], ) @not_implemented_mark -def test_groupby(obj, method): +def test_groupby_finalize_not_implemented(obj, method): obj.attrs = {"a": 1} result = method(obj.groupby([0, 0])) assert result.attrs == {"a": 1}