From 80b685027108245086b78dbd9a176b096c92570a Mon Sep 17 00:00:00 2001 From: matiaslindgren Date: Sat, 7 Sep 2024 13:53:28 +0200 Subject: [PATCH] BUG: Fix inconsistent pivot table subaggregation when index is None (#59629) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/reshape/pivot.py | 11 +++++++---- pandas/tests/reshape/test_pivot.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index cd353b60d1a6e..9a29ff4d49966 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -668,6 +668,7 @@ Reshaping - Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`) - Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`) - Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`) +- Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`) - Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`) Sparse diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 0886aad310034..cfc6f91557781 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -557,7 +557,12 @@ def _all_key(key): table_pieces.append(piece) margin_keys.append(all_key) else: - from pandas import DataFrame + margin = ( + data[cols[:1] + values] + .groupby(cols[:1], observed=observed) + .agg(aggfunc, **kwargs) + .T + ) cat_axis = 0 for key, piece in table.groupby(level=0, observed=observed): @@ -566,9 +571,7 @@ def _all_key(key): else: all_key = margins_name table_pieces.append(piece) - # GH31016 this is to calculate margin for each group, and assign - # corresponded key as index - transformed_piece = DataFrame(piece.apply(aggfunc, **kwargs)).T + transformed_piece = margin[key].to_frame().T if isinstance(piece.index, MultiIndex): # We are adding an empty level transformed_piece.index = MultiIndex.from_tuples( diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 44b96afaa4ef5..8cfe565ebdd65 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2785,3 +2785,31 @@ def test_pivot_empty_with_datetime(self): index="category", columns="value", values="timestamp" ) assert df_pivoted.empty + + def test_pivot_margins_with_none_index(self): + # GH#58722 + df = DataFrame( + { + "x": [1, 1, 2], + "y": [3, 3, 4], + "z": [5, 5, 6], + "w": [7, 8, 9], + } + ) + result = df.pivot_table( + index=None, + columns=["y", "z"], + values="w", + margins=True, + aggfunc="count", + ) + expected = DataFrame( + [[2, 2, 1, 1]], + index=["w"], + columns=MultiIndex( + levels=[[3, 4], [5, 6, "All"]], + codes=[[0, 0, 1, 1], [0, 2, 1, 2]], + names=["y", "z"], + ), + ) + tm.assert_frame_equal(result, expected)