Skip to content

Commit

Permalink
more tests & change observed=None
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback committed May 1, 2018
1 parent bdb7ad3 commit bdf7525
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 6 deletions.
8 changes: 5 additions & 3 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,14 @@ def ip():
return InteractiveShell()


@pytest.fixture(params=[True, False])
@pytest.fixture(params=[True, False, None])
def observed(request):
""" pass in the observed keyword to groupby for [True, False]
This indicates whether categoricals should return values for
values which are not in the grouper [False], or only values which
appear in the grouper [True] """
values which are not in the grouper [False / None], or only values which
appear in the grouper [True]. [None] is supported for future compatiblity
if we decide to change the default (and would need to warn if this
parameter is not passed)"""
return request.param


Expand Down
12 changes: 9 additions & 3 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,7 @@ class _GroupBy(PandasObject, SelectionMixin):
def __init__(self, obj, keys=None, axis=0, level=None,
grouper=None, exclusions=None, selection=None, as_index=True,
sort=True, group_keys=True, squeeze=False,
observed=False, **kwargs):
observed=None, **kwargs):

self._selection = selection

Expand Down Expand Up @@ -2907,7 +2907,7 @@ class Grouping(object):
"""

def __init__(self, index, grouper=None, obj=None, name=None, level=None,
sort=True, observed=False, in_axis=False):
sort=True, observed=None, in_axis=False):

self.name = name
self.level = level
Expand Down Expand Up @@ -2964,6 +2964,12 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
# a passed Categorical
elif is_categorical_dtype(self.grouper):

# observed can be True/False/None
# we treat None as False. If in the future
# we need to warn if observed is not passed
# then we have this option
# gh-20583

self.all_grouper = self.grouper
self.grouper = self.grouper._codes_for_groupby(
self.sort, observed)
Expand Down Expand Up @@ -3082,7 +3088,7 @@ def groups(self):


def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
observed=False, mutated=False, validate=True):
observed=None, mutated=False, validate=True):
"""
create and return a BaseGrouper, which is an internal
mapping of how to create the grouper indexers.
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,26 @@ def test_observed_perf():
assert result.index.levels[2].nunique() == df.other_id.nunique()


def test_observed_groups(observed):
# gh-20583
# test that we have the appropriate groups

cat = pd.Categorical(['a', 'c', 'a'], categories=['a', 'b', 'c'])
df = pd.DataFrame({'cat': cat, 'vals': [1, 2, 3]})
g = df.groupby('cat', observed=observed)

result = g.groups
if observed:
expected = {'a': Index([0, 2], dtype='int64'),
'c': Index([1], dtype='int64')}
else:
expected = {'a': Index([0, 2], dtype='int64'),
'b': Index([], dtype='int64'),
'c': Index([1], dtype='int64')}

tm.assert_dict_equal(result, expected)


def test_datetime():
# GH9049: ensure backward compatibility
levels = pd.date_range('2014-01-01', periods=4)
Expand Down

0 comments on commit bdf7525

Please sign in to comment.