make observed=False the default, remove deprecation warning

pandas-dev · May 1, 2018 · bdb7ad3 · bdb7ad3
1 parent 7ae10ba
commit bdb7ad3
Show file tree

Hide file tree

Showing 4 changed files with 62 additions and 99 deletions.
diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst
@@ -996,19 +996,19 @@ Handling of (un)observed Categorical values
 
 When using a ``Categorical`` grouper (as a single or as part of multipler groupers), the ``observed`` keyword
 controls whether to return a cartesian product of all possible groupers values (``observed=False``) or only those
-that are observed groupers (``observed=True``). The ``observed`` keyword will default to ``True`` in the future.
+that are observed groupers (``observed=True``).
 
-Show only the observed values:
+Show all values:
 
 .. ipython:: python
 
-   pd.Series([1, 1, 1]).groupby(pd.Categorical(['a', 'a', 'a'], categories=['a', 'b']), observed=True).count()
+   pd.Series([1, 1, 1]).groupby(pd.Categorical(['a', 'a', 'a'], categories=['a', 'b']), observed=False).count()
 
-Show all values:
+Show only the observed values:
 
 .. ipython:: python
 
-   pd.Series([1, 1, 1]).groupby(pd.Categorical(['a', 'a', 'a'], categories=['a', 'b']), observed=False).count()
+   pd.Series([1, 1, 1]).groupby(pd.Categorical(['a', 'a', 'a'], categories=['a', 'b']), observed=True).count()
 
 The returned dtype of the grouped will *always* include *all* of the catergories that were grouped.
 

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -396,6 +396,58 @@ documentation. If you build an extension array, publicize it on our
 
 .. _cyberpandas: https://cyberpandas.readthedocs.io/en/latest/
 
+.. _whatsnew_0230.enhancements.categorical_grouping:
+
+Categorical Groupers has gained an observed keyword
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In previous versions, grouping by 1 or more categorical columns would result in an index that was the cartesian product of all of the categories for
+each grouper, not just the observed values.``.groupby()`` has gained the ``observed`` keyword to toggle this behavior. The default remains backward
+compatible (generate a cartesian product). (:issue:`14942`, :issue:`8138`, :issue:`15217`, :issue:`17594`, :issue:`8669`, :issue:`20583`)
+
+
+.. ipython:: python
+
+   cat1 = pd.Categorical(["a", "a", "b", "b"],
+                         categories=["a", "b", "z"], ordered=True)
+   cat2 = pd.Categorical(["c", "d", "c", "d"],
+                         categories=["c", "d", "y"], ordered=True)
+   df = pd.DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
+   df['C'] = ['foo', 'bar'] * 2
+   df
+
+To show all values, the previous behavior:
+
+.. ipython:: python
+
+   df.groupby(['A', 'B', 'C'], observed=False).count()
+
+
+To show only observed values:
+
+.. ipython:: python
+
+   df.groupby(['A', 'B', 'C'], observed=True).count()
+
+For pivotting operations, this behavior is *already* controlled by the ``dropna`` keyword:
+
+.. ipython:: python
+
+   cat1 = pd.Categorical(["a", "a", "b", "b"],
+                         categories=["a", "b", "z"], ordered=True)
+   cat2 = pd.Categorical(["c", "d", "c", "d"],
+                         categories=["c", "d", "y"], ordered=True)
+   df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
+   df
+
+.. ipython:: python
+
+   pd.pivot_table(df, values='values', index=['A', 'B'],
+                  dropna=True)
+   pd.pivot_table(df, values='values', index=['A', 'B'],
+                  dropna=False)
+
+
 .. _whatsnew_0230.enhancements.other:
 
 Other Enhancements
@@ -527,68 +579,6 @@ If you wish to retain the old behavior while using Python >= 3.6, you can use
                'Taxes': -200,
                'Net result': 300}).sort_index()
 
-.. _whatsnew_0230.api_breaking.categorical_grouping:
-
-Categorical Groupers will now require passing the observed keyword
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-In previous versions, grouping by 1 or more categorical columns would result in an index that was the cartesian product of all of the categories for
-each grouper, not just the observed values.``.groupby()`` has gained the ``observed`` keyword to toggle this behavior. The default remains backward
-compatible (generate a cartesian product). Pandas will show a ``FutureWarning`` if the ``observed`` keyword is not passed; the default will
-change to ``observed=True`` in the future. (:issue:`14942`, :issue:`8138`, :issue:`15217`, :issue:`17594`, :issue:`8669`, :issue:`20583`)
-
-
-.. ipython:: python
-
-   cat1 = pd.Categorical(["a", "a", "b", "b"],
-                         categories=["a", "b", "z"], ordered=True)
-   cat2 = pd.Categorical(["c", "d", "c", "d"],
-                         categories=["c", "d", "y"], ordered=True)
-   df = pd.DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
-   df['C'] = ['foo', 'bar'] * 2
-   df
-
-``observed`` must now be passed when grouping by categoricals, or a
-``FutureWarning`` will show:
-
-.. ipython:: python
-   :okwarning:
-
-   df.groupby(['A', 'B', 'C']).count()
-
-
-To suppress the warning, with previous Behavior (show all values):
-
-.. ipython:: python
-
-   df.groupby(['A', 'B', 'C'], observed=False).count()
-
-
-Future Behavior (show only observed values):
-
-.. ipython:: python
-
-   df.groupby(['A', 'B', 'C'], observed=True).count()
-
-For pivotting operations, this behavior is *already* controlled by the ``dropna`` keyword:
-
-.. ipython:: python
-
-   cat1 = pd.Categorical(["a", "a", "b", "b"],
-                         categories=["a", "b", "z"], ordered=True)
-   cat2 = pd.Categorical(["c", "d", "c", "d"],
-                         categories=["c", "d", "y"], ordered=True)
-   df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
-   df
-
-.. ipython:: python
-
-   pd.pivot_table(df, values='values', index=['A', 'B'],
-                  dropna=True)
-   pd.pivot_table(df, values='values', index=['A', 'B'],
-                  dropna=False)
-
-
 .. _whatsnew_0230.api_breaking.deprecate_panel:
 
 Deprecate Panel

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -557,7 +557,7 @@ class _GroupBy(PandasObject, SelectionMixin):
     def __init__(self, obj, keys=None, axis=0, level=None,
                  grouper=None, exclusions=None, selection=None, as_index=True,
                  sort=True, group_keys=True, squeeze=False,
-                 observed=None, **kwargs):
+                 observed=False, **kwargs):
 
         self._selection = selection
 
@@ -2890,7 +2890,8 @@ class Grouping(object):
     obj :
     name :
     level :
-    observed : If we are a Categorical, use the observed values
+    observed : boolean, default False
+        If we are a Categorical, use the observed values
     in_axis : if the Grouping is a column in self.obj and hence among
         Groupby.exclusions list
 
@@ -2906,7 +2907,7 @@ class Grouping(object):
     """
 
     def __init__(self, index, grouper=None, obj=None, name=None, level=None,
-                 sort=True, observed=None, in_axis=False):
+                 sort=True, observed=False, in_axis=False):
 
         self.name = name
         self.level = level
@@ -2963,17 +2964,6 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
             # a passed Categorical
             elif is_categorical_dtype(self.grouper):
 
-                # Use the observed values of the grouper if inidcated
-                observed = self.observed
-                if observed is None:
-                    msg = ("pass observed=True to ensure that a "
-                           "categorical grouper only returns the "
-                           "observed categories, or\n"
-                           "observed=False to also include"
-                           "unobserved categories.\n")
-                    warnings.warn(msg, FutureWarning, stacklevel=5)
-                    observed = False
-
                 self.all_grouper = self.grouper
                 self.grouper = self.grouper._codes_for_groupby(
                     self.sort, observed)
@@ -3092,7 +3082,7 @@ def groups(self):
 
 
 def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
-                 observed=None, mutated=False, validate=True):
+                 observed=False, mutated=False, validate=True):
     """
     create and return a BaseGrouper, which is an internal
     mapping of how to create the grouper indexers.

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
@@ -246,23 +246,6 @@ def test_apply(ordered):
     assert_series_equal(result, expected)
 
 
-def test_observed_warning():
-    # 20583 - future warning on observe
-
-    cat1 = Categorical(["a", "a", "b", "b"],
-                       categories=["a", "b", "z"], ordered=True)
-    cat2 = Categorical(["c", "d", "c", "d"],
-                       categories=["c", "d", "y"], ordered=True)
-    df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
-    df['C'] = ['foo', 'bar'] * 2
-
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        df.groupby(['A', 'B', 'C'])
-
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        df.groupby('A')
-
-
 def test_observed(observed):
     # multiple groupers, don't re-expand the output space
     # of the grouper