diff --git a/pandas/conftest.py b/pandas/conftest.py
index cf83904e4fa13..c4aab1b632b00 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -66,12 +66,14 @@ def ip():
     return InteractiveShell()
 
 
-@pytest.fixture(params=[True, False])
+@pytest.fixture(params=[True, False, None])
 def observed(request):
     """ pass in the observed keyword to groupby for [True, False]
     This indicates whether categoricals should return values for
-    values which are not in the grouper [False], or only values which
-    appear in the grouper [True] """
+    values which are not in the grouper [False / None], or only values which
+    appear in the grouper [True]. [None] is supported for future compatiblity
+    if we decide to change the default (and would need to warn if this
+    parameter is not passed)"""
     return request.param
 
 
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 63f0b742eb8b3..8613ab4d8c59d 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -557,7 +557,7 @@ class _GroupBy(PandasObject, SelectionMixin):
     def __init__(self, obj, keys=None, axis=0, level=None,
                  grouper=None, exclusions=None, selection=None, as_index=True,
                  sort=True, group_keys=True, squeeze=False,
-                 observed=False, **kwargs):
+                 observed=None, **kwargs):
 
         self._selection = selection
 
@@ -2907,7 +2907,7 @@ class Grouping(object):
     """
 
     def __init__(self, index, grouper=None, obj=None, name=None, level=None,
-                 sort=True, observed=False, in_axis=False):
+                 sort=True, observed=None, in_axis=False):
 
         self.name = name
         self.level = level
@@ -2964,6 +2964,12 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
             # a passed Categorical
             elif is_categorical_dtype(self.grouper):
 
+                # observed can be True/False/None
+                # we treat None as False. If in the future
+                # we need to warn if observed is not passed
+                # then we have this option
+                # gh-20583
+
                 self.all_grouper = self.grouper
                 self.grouper = self.grouper._codes_for_groupby(
                     self.sort, observed)
@@ -3082,7 +3088,7 @@ def groups(self):
 
 
 def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
-                 observed=False, mutated=False, validate=True):
+                 observed=None, mutated=False, validate=True):
     """
     create and return a BaseGrouper, which is an internal
     mapping of how to create the grouper indexers.
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 923a79b6a7720..e0793b8e1bd64 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -395,6 +395,26 @@ def test_observed_perf():
     assert result.index.levels[2].nunique() == df.other_id.nunique()
 
 
+def test_observed_groups(observed):
+    # gh-20583
+    # test that we have the appropriate groups
+
+    cat = pd.Categorical(['a', 'c', 'a'], categories=['a', 'b', 'c'])
+    df = pd.DataFrame({'cat': cat, 'vals': [1, 2, 3]})
+    g = df.groupby('cat', observed=observed)
+
+    result = g.groups
+    if observed:
+        expected = {'a': Index([0, 2], dtype='int64'),
+                    'c': Index([1], dtype='int64')}
+    else:
+        expected = {'a': Index([0, 2], dtype='int64'),
+                    'b': Index([], dtype='int64'),
+                    'c': Index([1], dtype='int64')}
+
+    tm.assert_dict_equal(result, expected)
+
+
 def test_datetime():
     # GH9049: ensure backward compatibility
     levels = pd.date_range('2014-01-01', periods=4)