Skip to content

Commit

Permalink
Fix pandas-devGH-29442 DataFrame.groupby doesn't preserve _metadata
Browse files Browse the repository at this point in the history
This bug is a regression in v1.1.0 and was introduced by the fix for pandas-devGH-34214 in commit [6f065b].

Underlying cause is that the `*Splitter` classes do not use the `._constructor` property and do not call `__finalize__`.

Please note that the method name used for `__finalize__` calls was my best guess since documentation for the value has been hard to find.

[6f065b]: pandas-dev@6f065b6
  • Loading branch information
Japanuspus committed Aug 12, 2020
1 parent 8380708 commit 7cc4d53
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 2 deletions.
5 changes: 3 additions & 2 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -955,7 +955,8 @@ class SeriesSplitter(DataSplitter):
def _chop(self, sdata: Series, slice_obj: slice) -> Series:
# fastpath equivalent to `sdata.iloc[slice_obj]`
mgr = sdata._mgr.get_slice(slice_obj)
return type(sdata)(mgr, name=sdata.name, fastpath=True)
return sdata._constructor(mgr, name=sdata.name, fastpath=True)\
.__finalize__(sdata, method='groupby')


class FrameSplitter(DataSplitter):
Expand All @@ -971,7 +972,7 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame:
# else:
# return sdata.iloc[:, slice_obj]
mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis)
return type(sdata)(mgr)
return sdata._constructor(mgr).__finalize__(sdata, method='groupby')


def get_splitter(
Expand Down
83 changes: 83 additions & 0 deletions pandas/tests/groupby/test_custom_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""
Test metadata propagation in groupby
The PandasTable class below is implemented according to the [guidelines], and as such would
expect `__finalize__` to always be called so that the `_pandastable_metadata` is always populated.
[guidelines]: https://pandas.pydata.org/pandas-docs/stable/development/extending.html#override-constructor-properties
"""

import pytest
import pandas as pd
from warnings import warn
from typing import List


_TABLE_METADATA_FIELD_NAME = '_pandastable_metadata'


def _combine_metadata(data: List[str]) -> str:
"""
A mock implementation for testing
"""
return '+'.join(data)


class PandasTable(pd.DataFrame):
"""
A pandas dataframe subclass with associated table metadata.
"""

_metadata = [_TABLE_METADATA_FIELD_NAME] # Register metadata fieldnames here

@property
def _constructor(self):
return PandasTable

def __finalize__(self, other, method=None, **kwargs):
"""
This method is responsible for populating metadata when creating new Table-object.
The method argument is subject to change, and a robust handling of this is implemented
"""
src = [other] #more logic here in actual implementation
metadata = _combine_metadata([d.get_metadata() for d in src if isinstance(d, PandasTable)])

if not metadata:
warn('__finalize__ unable to combine metadata for method "{method}", falling back to DataFrame')
return pd.DataFrame(self)
object.__setattr__(self, _TABLE_METADATA_FIELD_NAME, metadata)
return self

def get_metadata(self):
#return object.__getattribute__(self, _TABLE_METADATA_FIELD_NAME)
metadata = getattr(self, _TABLE_METADATA_FIELD_NAME, None)
if metadata is None:
warn('PandasTable object not correctly initialized: no metadata')
return metadata

@staticmethod
def from_table_data(df: pd.DataFrame, metadata) -> 'PandasTable':
df = PandasTable(df)
object.__setattr__(df, _TABLE_METADATA_FIELD_NAME, metadata)
return df


@pytest.fixture
def dft():
df = pd.DataFrame([[11, 12, 0], [21, 22, 0], [31, 32, 1]], columns={'a','b','g'})
return PandasTable.from_table_data(df, 'My metadata')


def test_initial_metadata(dft):
assert dft.get_metadata() == 'My metadata'


def test_basic_propagation(dft):
gg = dft.loc[dft.g==0, :]
assert gg.get_metadata() == 'My metadata'


def test_groupby(dft):
gg = [ab for g, ab in dft.groupby('g')]
assert gg[0].get_metadata() is not None

0 comments on commit 7cc4d53

Please sign in to comment.