From 8def64931af8a01f4af50d79a8d628fe3e63f00c Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 21 Apr 2018 18:26:27 -0400
Subject: [PATCH] TST: split test_groupby.py (#20781)

closes #20696
---
 .../tests/groupby/aggregate/test_aggregate.py |   71 +-
 pandas/tests/groupby/common.py                |   62 -
 pandas/tests/groupby/conftest.py              |   77 +
 pandas/tests/groupby/test_apply.py            |  517 ++
 pandas/tests/groupby/test_categorical.py      | 1415 ++---
 pandas/tests/groupby/test_filters.py          | 1180 ++---
 pandas/tests/groupby/test_function.py         | 1120 ++++
 pandas/tests/groupby/test_functional.py       |  372 --
 pandas/tests/groupby/test_groupby.py          | 4606 ++++++-----------
 pandas/tests/groupby/test_grouping.py         |  115 +-
 pandas/tests/groupby/test_nth.py              |  618 +--
 pandas/tests/groupby/test_rank.py             |  254 +
 pandas/tests/groupby/test_transform.py        | 1464 +++---
 13 files changed, 5983 insertions(+), 5888 deletions(-)
 delete mode 100644 pandas/tests/groupby/common.py
 create mode 100644 pandas/tests/groupby/conftest.py
 create mode 100644 pandas/tests/groupby/test_apply.py
 create mode 100644 pandas/tests/groupby/test_function.py
 delete mode 100644 pandas/tests/groupby/test_functional.py
 create mode 100644 pandas/tests/groupby/test_rank.py

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index d85719d328ff2..b2f18e11de8ee 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -15,51 +15,6 @@
 import pandas.util.testing as tm
 
 
-@pytest.fixture
-def ts():
-    return tm.makeTimeSeries()
-
-
-@pytest.fixture
-def tsframe():
-    return DataFrame(tm.getTimeSeriesData())
-
-
-@pytest.fixture
-def df():
-    return DataFrame(
-        {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
-         'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
-         'C': np.random.randn(8),
-         'D': np.random.randn(8)})
-
-
-@pytest.fixture
-def mframe():
-    index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
-                               ['one', 'two', 'three']],
-                       labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
-                               [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
-                       names=['first', 'second'])
-    return DataFrame(np.random.randn(10, 3),
-                     index=index,
-                     columns=['A', 'B', 'C'])
-
-
-@pytest.fixture
-def three_group():
-    return DataFrame(
-        {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar',
-               'bar', 'bar', 'foo', 'foo', 'foo'],
-         'B': ['one', 'one', 'one', 'two', 'one', 'one',
-               'one', 'two', 'two', 'two', 'one'],
-         'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny',
-               'shiny', 'dull', 'shiny', 'shiny', 'shiny'],
-         'D': np.random.randn(11),
-         'E': np.random.randn(11),
-         'F': np.random.randn(11)})
-
-
 def test_agg_regression1(tsframe):
     grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])
     result = grouped.agg(np.mean)
@@ -87,6 +42,32 @@ def test_agg_ser_multi_key(df):
     tm.assert_series_equal(results, expected)
 
 
+def test_groupby_aggregation_mixed_dtype():
+
+    # GH 6212
+    expected = DataFrame({
+        'v1': [5, 5, 7, np.nan, 3, 3, 4, 1],
+        'v2': [55, 55, 77, np.nan, 33, 33, 44, 11]},
+        index=MultiIndex.from_tuples([(1, 95), (1, 99), (2, 95), (2, 99),
+                                      ('big', 'damp'),
+                                      ('blue', 'dry'),
+                                      ('red', 'red'), ('red', 'wet')],
+                                     names=['by1', 'by2']))
+
+    df = DataFrame({
+        'v1': [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9],
+        'v2': [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99],
+        'by1': ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan,
+                12],
+        'by2': ["wet", "dry", 99, 95, np.nan, "damp", 95, 99, "red", 99,
+                np.nan, np.nan]
+    })
+
+    g = df.groupby(['by1', 'by2'])
+    result = g[['v1', 'v2']].mean()
+    tm.assert_frame_equal(result, expected)
+
+
 def test_agg_apply_corner(ts, tsframe):
     # nothing to group, all NA
     grouped = ts.groupby(ts * np.nan)
diff --git a/pandas/tests/groupby/common.py b/pandas/tests/groupby/common.py
deleted file mode 100644
index 3e99e8211b4f8..0000000000000
--- a/pandas/tests/groupby/common.py
+++ /dev/null
@@ -1,62 +0,0 @@
-""" Base setup """
-
-import pytest
-import numpy as np
-from pandas.util import testing as tm
-from pandas import DataFrame, MultiIndex
-
-
-@pytest.fixture
-def mframe():
-    index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
-                                                              'three']],
-                       labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
-                               [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
-                       names=['first', 'second'])
-    return DataFrame(np.random.randn(10, 3), index=index,
-                     columns=['A', 'B', 'C'])
-
-
-@pytest.fixture
-def df():
-    return DataFrame(
-        {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
-         'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
-         'C': np.random.randn(8),
-         'D': np.random.randn(8)})
-
-
-class MixIn(object):
-
-    def setup_method(self, method):
-        self.ts = tm.makeTimeSeries()
-
-        self.seriesd = tm.getSeriesData()
-        self.tsd = tm.getTimeSeriesData()
-        self.frame = DataFrame(self.seriesd)
-        self.tsframe = DataFrame(self.tsd)
-
-        self.df = df()
-        self.df_mixed_floats = DataFrame(
-            {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
-             'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
-             'C': np.random.randn(8),
-             'D': np.array(
-                 np.random.randn(8), dtype='float32')})
-
-        self.mframe = mframe()
-
-        self.three_group = DataFrame(
-            {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
-                   'foo', 'foo', 'foo'],
-             'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two',
-                   'two', 'two', 'one'],
-             'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
-                   'dull', 'shiny', 'shiny', 'shiny'],
-             'D': np.random.randn(11),
-             'E': np.random.randn(11),
-             'F': np.random.randn(11)})
-
-
-def assert_fp_equal(a, b):
-    assert (np.abs(a - b) < 1e-12).all()
diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py
new file mode 100644
index 0000000000000..877aa835ac6f5
--- /dev/null
+++ b/pandas/tests/groupby/conftest.py
@@ -0,0 +1,77 @@
+import pytest
+import numpy as np
+from pandas import MultiIndex, DataFrame
+from pandas.util import testing as tm
+
+
+@pytest.fixture
+def mframe():
+    index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
+                                                              'three']],
+                       labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
+                               [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
+                       names=['first', 'second'])
+    return DataFrame(np.random.randn(10, 3), index=index,
+                     columns=['A', 'B', 'C'])
+
+
+@pytest.fixture
+def df():
+    return DataFrame(
+        {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
+         'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
+         'C': np.random.randn(8),
+         'D': np.random.randn(8)})
+
+
+@pytest.fixture
+def ts():
+    return tm.makeTimeSeries()
+
+
+@pytest.fixture
+def seriesd():
+    return tm.getSeriesData()
+
+
+@pytest.fixture
+def tsd():
+    return tm.getTimeSeriesData()
+
+
+@pytest.fixture
+def frame(seriesd):
+    return DataFrame(seriesd)
+
+
+@pytest.fixture
+def tsframe(tsd):
+    return DataFrame(tsd)
+
+
+@pytest.fixture
+def df_mixed_floats():
+    return DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
+                            'foo', 'bar', 'foo', 'foo'],
+                      'B': ['one', 'one', 'two', 'three',
+                            'two', 'two', 'one', 'three'],
+                      'C': np.random.randn(8),
+                      'D': np.array(
+                          np.random.randn(8), dtype='float32')})
+
+
+@pytest.fixture
+def three_group():
+    return DataFrame({'A': ['foo', 'foo', 'foo',
+                            'foo', 'bar', 'bar',
+                            'bar', 'bar',
+                            'foo', 'foo', 'foo'],
+                      'B': ['one', 'one', 'one',
+                            'two', 'one', 'one', 'one', 'two',
+                            'two', 'two', 'one'],
+                      'C': ['dull', 'dull', 'shiny',
+                            'dull', 'dull', 'shiny', 'shiny',
+                            'dull', 'shiny', 'shiny', 'shiny'],
+                      'D': np.random.randn(11),
+                      'E': np.random.randn(11),
+                      'F': np.random.randn(11)})
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
new file mode 100644
index 0000000000000..5ca10fe1af9d1
--- /dev/null
+++ b/pandas/tests/groupby/test_apply.py
@@ -0,0 +1,517 @@
+import pytest
+import numpy as np
+import pandas as pd
+from datetime import datetime
+from pandas.util import testing as tm
+from pandas import DataFrame, MultiIndex, compat, Series, bdate_range, Index
+
+
+def test_apply_issues():
+        # GH 5788
+
+    s = """2011.05.16,00:00,1.40893
+2011.05.16,01:00,1.40760
+2011.05.16,02:00,1.40750
+2011.05.16,03:00,1.40649
+2011.05.17,02:00,1.40893
+2011.05.17,03:00,1.40760
+2011.05.17,04:00,1.40750
+2011.05.17,05:00,1.40649
+2011.05.18,02:00,1.40893
+2011.05.18,03:00,1.40760
+2011.05.18,04:00,1.40750
+2011.05.18,05:00,1.40649"""
+
+    df = pd.read_csv(
+        compat.StringIO(s), header=None, names=['date', 'time', 'value'],
+        parse_dates=[['date', 'time']])
+    df = df.set_index('date_time')
+
+    expected = df.groupby(df.index.date).idxmax()
+    result = df.groupby(df.index.date).apply(lambda x: x.idxmax())
+    tm.assert_frame_equal(result, expected)
+
+    # GH 5789
+    # don't auto coerce dates
+    df = pd.read_csv(
+        compat.StringIO(s), header=None, names=['date', 'time', 'value'])
+    exp_idx = pd.Index(
+        ['2011.05.16', '2011.05.17', '2011.05.18'
+         ], dtype=object, name='date')
+    expected = Series(['00:00', '02:00', '02:00'], index=exp_idx)
+    result = df.groupby('date').apply(
+        lambda x: x['time'][x['value'].idxmax()])
+    tm.assert_series_equal(result, expected)
+
+
+def test_apply_trivial():
+    # GH 20066
+    # trivial apply: ignore input and return a constant dataframe.
+    df = pd.DataFrame({'key': ['a', 'a', 'b', 'b', 'a'],
+                       'data': [1.0, 2.0, 3.0, 4.0, 5.0]},
+                      columns=['key', 'data'])
+    expected = pd.concat([df.iloc[1:], df.iloc[1:]],
+                         axis=1, keys=['float64', 'object'])
+    result = df.groupby([str(x) for x in df.dtypes],
+                        axis=1).apply(lambda x: df.iloc[1:])
+
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.xfail(reason=("GH 20066; function passed into apply "
+                           "returns a DataFrame with the same index "
+                           "as the one to create GroupBy object."))
+def test_apply_trivial_fail():
+    # GH 20066
+    # trivial apply fails if the constant dataframe has the same index
+    # with the one used to create GroupBy object.
+    df = pd.DataFrame({'key': ['a', 'a', 'b', 'b', 'a'],
+                       'data': [1.0, 2.0, 3.0, 4.0, 5.0]},
+                      columns=['key', 'data'])
+    expected = pd.concat([df, df],
+                         axis=1, keys=['float64', 'object'])
+    result = df.groupby([str(x) for x in df.dtypes],
+                        axis=1).apply(lambda x: df)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_fast_apply():
+    # make sure that fast apply is correctly called
+    # rather than raising any kind of error
+    # otherwise the python path will be callsed
+    # which slows things down
+    N = 1000
+    labels = np.random.randint(0, 2000, size=N)
+    labels2 = np.random.randint(0, 3, size=N)
+    df = DataFrame({'key': labels,
+                    'key2': labels2,
+                    'value1': np.random.randn(N),
+                    'value2': ['foo', 'bar', 'baz', 'qux'] * (N // 4)})
+
+    def f(g):
+        return 1
+
+    g = df.groupby(['key', 'key2'])
+
+    grouper = g.grouper
+
+    splitter = grouper._get_splitter(g._selected_obj, axis=g.axis)
+    group_keys = grouper._get_group_keys()
+
+    values, mutated = splitter.fast_apply(f, group_keys)
+    assert not mutated
+
+
+def test_apply_with_mixed_dtype():
+    # GH3480, apply with mixed dtype on axis=1 breaks in 0.11
+    df = DataFrame({'foo1': np.random.randn(6),
+                    'foo2': ['one', 'two', 'two', 'three', 'one', 'two']})
+    result = df.apply(lambda x: x, axis=1)
+    tm.assert_series_equal(df.get_dtype_counts(), result.get_dtype_counts())
+
+    # GH 3610 incorrect dtype conversion with as_index=False
+    df = DataFrame({"c1": [1, 2, 6, 6, 8]})
+    df["c2"] = df.c1 / 2.0
+    result1 = df.groupby("c2").mean().reset_index().c2
+    result2 = df.groupby("c2", as_index=False).mean().c2
+    tm.assert_series_equal(result1, result2)
+
+
+def test_groupby_as_index_apply(df):
+    # GH #4648 and #3417
+    df = DataFrame({'item_id': ['b', 'b', 'a', 'c', 'a', 'b'],
+                    'user_id': [1, 2, 1, 1, 3, 1],
+                    'time': range(6)})
+
+    g_as = df.groupby('user_id', as_index=True)
+    g_not_as = df.groupby('user_id', as_index=False)
+
+    res_as = g_as.head(2).index
+    res_not_as = g_not_as.head(2).index
+    exp = Index([0, 1, 2, 4])
+    tm.assert_index_equal(res_as, exp)
+    tm.assert_index_equal(res_not_as, exp)
+
+    res_as_apply = g_as.apply(lambda x: x.head(2)).index
+    res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index
+
+    # apply doesn't maintain the original ordering
+    # changed in GH5610 as the as_index=False returns a MI here
+    exp_not_as_apply = MultiIndex.from_tuples([(0, 0), (0, 2), (1, 1), (
+        2, 4)])
+    tp = [(1, 0), (1, 2), (2, 1), (3, 4)]
+    exp_as_apply = MultiIndex.from_tuples(tp, names=['user_id', None])
+
+    tm.assert_index_equal(res_as_apply, exp_as_apply)
+    tm.assert_index_equal(res_not_as_apply, exp_not_as_apply)
+
+    ind = Index(list('abcde'))
+    df = DataFrame([[1, 2], [2, 3], [1, 4], [1, 5], [2, 6]], index=ind)
+    res = df.groupby(0, as_index=False).apply(lambda x: x).index
+    tm.assert_index_equal(res, ind)
+
+
+def test_apply_concat_preserve_names(three_group):
+    grouped = three_group.groupby(['A', 'B'])
+
+    def desc(group):
+        result = group.describe()
+        result.index.name = 'stat'
+        return result
+
+    def desc2(group):
+        result = group.describe()
+        result.index.name = 'stat'
+        result = result[:len(group)]
+        # weirdo
+        return result
+
+    def desc3(group):
+        result = group.describe()
+
+        # names are different
+        result.index.name = 'stat_%d' % len(group)
+
+        result = result[:len(group)]
+        # weirdo
+        return result
+
+    result = grouped.apply(desc)
+    assert result.index.names == ('A', 'B', 'stat')
+
+    result2 = grouped.apply(desc2)
+    assert result2.index.names == ('A', 'B', 'stat')
+
+    result3 = grouped.apply(desc3)
+    assert result3.index.names == ('A', 'B', None)
+
+
+def test_apply_series_to_frame():
+    def f(piece):
+        with np.errstate(invalid='ignore'):
+            logged = np.log(piece)
+        return DataFrame({'value': piece,
+                          'demeaned': piece - piece.mean(),
+                          'logged': logged})
+
+    dr = bdate_range('1/1/2000', periods=100)
+    ts = Series(np.random.randn(100), index=dr)
+
+    grouped = ts.groupby(lambda x: x.month)
+    result = grouped.apply(f)
+
+    assert isinstance(result, DataFrame)
+    tm.assert_index_equal(result.index, ts.index)
+
+
+def test_apply_series_yield_constant(df):
+    result = df.groupby(['A', 'B'])['C'].apply(len)
+    assert result.index.names[:2] == ('A', 'B')
+
+
+def test_apply_frame_yield_constant(df):
+    # GH13568
+    result = df.groupby(['A', 'B']).apply(len)
+    assert isinstance(result, Series)
+    assert result.name is None
+
+    result = df.groupby(['A', 'B'])[['C', 'D']].apply(len)
+    assert isinstance(result, Series)
+    assert result.name is None
+
+
+def test_apply_frame_to_series(df):
+    grouped = df.groupby(['A', 'B'])
+    result = grouped.apply(len)
+    expected = grouped.count()['C']
+    tm.assert_index_equal(result.index, expected.index)
+    tm.assert_numpy_array_equal(result.values, expected.values)
+
+
+def test_apply_frame_concat_series():
+    def trans(group):
+        return group.groupby('B')['C'].sum().sort_values()[:2]
+
+    def trans2(group):
+        grouped = group.groupby(df.reindex(group.index)['B'])
+        return grouped.sum().sort_values()[:2]
+
+    df = DataFrame({'A': np.random.randint(0, 5, 1000),
+                    'B': np.random.randint(0, 5, 1000),
+                    'C': np.random.randn(1000)})
+
+    result = df.groupby('A').apply(trans)
+    exp = df.groupby('A')['C'].apply(trans2)
+    tm.assert_series_equal(result, exp, check_names=False)
+    assert result.name == 'C'
+
+
+def test_apply_transform(ts):
+    grouped = ts.groupby(lambda x: x.month)
+    result = grouped.apply(lambda x: x * 2)
+    expected = grouped.transform(lambda x: x * 2)
+    tm.assert_series_equal(result, expected)
+
+
+def test_apply_multikey_corner(tsframe):
+    grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])
+
+    def f(group):
+        return group.sort_values('A')[-5:]
+
+    result = grouped.apply(f)
+    for key, group in grouped:
+        tm.assert_frame_equal(result.loc[key], f(group))
+
+
+def test_apply_chunk_view():
+    # Low level tinkering could be unsafe, make sure not
+    df = DataFrame({'key': [1, 1, 1, 2, 2, 2, 3, 3, 3],
+                    'value': compat.lrange(9)})
+
+    # return view
+    f = lambda x: x[:2]
+
+    result = df.groupby('key', group_keys=False).apply(f)
+    expected = df.take([0, 1, 3, 4, 6, 7])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_apply_no_name_column_conflict():
+    df = DataFrame({'name': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2],
+                    'name2': [0, 0, 0, 1, 1, 1, 0, 0, 1, 1],
+                    'value': compat.lrange(10)[::-1]})
+
+    # it works! #2605
+    grouped = df.groupby(['name', 'name2'])
+    grouped.apply(lambda x: x.sort_values('value', inplace=True))
+
+
+def test_apply_typecast_fail():
+    df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
+                    'c': np.tile(
+                        ['a', 'b', 'c'], 2),
+                    'v': np.arange(1., 7.)})
+
+    def f(group):
+        v = group['v']
+        group['v2'] = (v - v.min()) / (v.max() - v.min())
+        return group
+
+    result = df.groupby('d').apply(f)
+
+    expected = df.copy()
+    expected['v2'] = np.tile([0., 0.5, 1], 2)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_apply_multiindex_fail():
+    index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]
+                                    ])
+    df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
+                    'c': np.tile(['a', 'b', 'c'], 2),
+                    'v': np.arange(1., 7.)}, index=index)
+
+    def f(group):
+        v = group['v']
+        group['v2'] = (v - v.min()) / (v.max() - v.min())
+        return group
+
+    result = df.groupby('d').apply(f)
+
+    expected = df.copy()
+    expected['v2'] = np.tile([0., 0.5, 1], 2)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_apply_corner(tsframe):
+    result = tsframe.groupby(lambda x: x.year).apply(lambda x: x * 2)
+    expected = tsframe * 2
+    tm.assert_frame_equal(result, expected)
+
+
+def test_apply_without_copy():
+    # GH 5545
+    # returning a non-copy in an applied function fails
+
+    data = DataFrame({'id_field': [100, 100, 200, 300],
+                      'category': ['a', 'b', 'c', 'c'],
+                      'value': [1, 2, 3, 4]})
+
+    def filt1(x):
+        if x.shape[0] == 1:
+            return x.copy()
+        else:
+            return x[x.category == 'c']
+
+    def filt2(x):
+        if x.shape[0] == 1:
+            return x
+        else:
+            return x[x.category == 'c']
+
+    expected = data.groupby('id_field').apply(filt1)
+    result = data.groupby('id_field').apply(filt2)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_apply_corner_cases():
+    # #535, can't use sliding iterator
+
+    N = 1000
+    labels = np.random.randint(0, 100, size=N)
+    df = DataFrame({'key': labels,
+                    'value1': np.random.randn(N),
+                    'value2': ['foo', 'bar', 'baz', 'qux'] * (N // 4)})
+
+    grouped = df.groupby('key')
+
+    def f(g):
+        g['value3'] = g['value1'] * 2
+        return g
+
+    result = grouped.apply(f)
+    assert 'value3' in result
+
+
+def test_apply_numeric_coercion_when_datetime():
+    # In the past, group-by/apply operations have been over-eager
+    # in converting dtypes to numeric, in the presence of datetime
+    # columns.  Various GH issues were filed, the reproductions
+    # for which are here.
+
+    # GH 15670
+    df = pd.DataFrame({'Number': [1, 2],
+                       'Date': ["2017-03-02"] * 2,
+                       'Str': ["foo", "inf"]})
+    expected = df.groupby(['Number']).apply(lambda x: x.iloc[0])
+    df.Date = pd.to_datetime(df.Date)
+    result = df.groupby(['Number']).apply(lambda x: x.iloc[0])
+    tm.assert_series_equal(result['Str'], expected['Str'])
+
+    # GH 15421
+    df = pd.DataFrame({'A': [10, 20, 30],
+                       'B': ['foo', '3', '4'],
+                       'T': [pd.Timestamp("12:31:22")] * 3})
+
+    def get_B(g):
+        return g.iloc[0][['B']]
+    result = df.groupby('A').apply(get_B)['B']
+    expected = df.B
+    expected.index = df.A
+    tm.assert_series_equal(result, expected)
+
+    # GH 14423
+    def predictions(tool):
+        out = pd.Series(index=['p1', 'p2', 'useTime'], dtype=object)
+        if 'step1' in list(tool.State):
+            out['p1'] = str(tool[tool.State == 'step1'].Machine.values[0])
+        if 'step2' in list(tool.State):
+            out['p2'] = str(tool[tool.State == 'step2'].Machine.values[0])
+            out['useTime'] = str(
+                tool[tool.State == 'step2'].oTime.values[0])
+        return out
+    df1 = pd.DataFrame({'Key': ['B', 'B', 'A', 'A'],
+                        'State': ['step1', 'step2', 'step1', 'step2'],
+                        'oTime': ['', '2016-09-19 05:24:33',
+                                  '', '2016-09-19 23:59:04'],
+                        'Machine': ['23', '36L', '36R', '36R']})
+    df2 = df1.copy()
+    df2.oTime = pd.to_datetime(df2.oTime)
+    expected = df1.groupby('Key').apply(predictions).p1
+    result = df2.groupby('Key').apply(predictions).p1
+    tm.assert_series_equal(expected, result)
+
+
+def test_time_field_bug():
+    # Test a fix for the following error related to GH issue 11324 When
+    # non-key fields in a group-by dataframe contained time-based fields
+    # that were not returned by the apply function, an exception would be
+    # raised.
+
+    df = pd.DataFrame({'a': 1, 'b': [datetime.now() for nn in range(10)]})
+
+    def func_with_no_date(batch):
+        return pd.Series({'c': 2})
+
+    def func_with_date(batch):
+        return pd.Series({'b': datetime(2015, 1, 1), 'c': 2})
+
+    dfg_no_conversion = df.groupby(by=['a']).apply(func_with_no_date)
+    dfg_no_conversion_expected = pd.DataFrame({'c': 2}, index=[1])
+    dfg_no_conversion_expected.index.name = 'a'
+
+    dfg_conversion = df.groupby(by=['a']).apply(func_with_date)
+    dfg_conversion_expected = pd.DataFrame(
+        {'b': datetime(2015, 1, 1),
+         'c': 2}, index=[1])
+    dfg_conversion_expected.index.name = 'a'
+
+    tm.assert_frame_equal(dfg_no_conversion, dfg_no_conversion_expected)
+    tm.assert_frame_equal(dfg_conversion, dfg_conversion_expected)
+
+
+def test_gb_apply_list_of_unequal_len_arrays():
+
+    # GH1738
+    df = DataFrame({'group1': ['a', 'a', 'a', 'b', 'b', 'b', 'a', 'a', 'a',
+                               'b', 'b', 'b'],
+                    'group2': ['c', 'c', 'd', 'd', 'd', 'e', 'c', 'c', 'd',
+                               'd', 'd', 'e'],
+                    'weight': [1.1, 2, 3, 4, 5, 6, 2, 4, 6, 8, 1, 2],
+                    'value': [7.1, 8, 9, 10, 11, 12, 8, 7, 6, 5, 4, 3]})
+    df = df.set_index(['group1', 'group2'])
+    df_grouped = df.groupby(level=['group1', 'group2'], sort=True)
+
+    def noddy(value, weight):
+        out = np.array(value * weight).repeat(3)
+        return out
+
+    # the kernel function returns arrays of unequal length
+    # pandas sniffs the first one, sees it's an array and not
+    # a list, and assumed the rest are of equal length
+    # and so tries a vstack
+
+    # don't die
+    df_grouped.apply(lambda x: noddy(x.value, x.weight))
+
+
+def test_groupby_apply_all_none():
+    # Tests to make sure no errors if apply function returns all None
+    # values. Issue 9684.
+    test_df = DataFrame({'groups': [0, 0, 1, 1],
+                         'random_vars': [8, 7, 4, 5]})
+
+    def test_func(x):
+        pass
+
+    result = test_df.groupby('groups').apply(test_func)
+    expected = DataFrame()
+    tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_apply_none_first():
+    # GH 12824. Tests if apply returns None first.
+    test_df1 = DataFrame({'groups': [1, 1, 1, 2], 'vars': [0, 1, 2, 3]})
+    test_df2 = DataFrame({'groups': [1, 2, 2, 2], 'vars': [0, 1, 2, 3]})
+
+    def test_func(x):
+        if x.shape[0] < 2:
+            return None
+        return x.iloc[[0, -1]]
+
+    result1 = test_df1.groupby('groups').apply(test_func)
+    result2 = test_df2.groupby('groups').apply(test_func)
+    index1 = MultiIndex.from_arrays([[1, 1], [0, 2]],
+                                    names=['groups', None])
+    index2 = MultiIndex.from_arrays([[2, 2], [1, 3]],
+                                    names=['groups', None])
+    expected1 = DataFrame({'groups': [1, 1], 'vars': [0, 2]},
+                          index=index1)
+    expected2 = DataFrame({'groups': [2, 2], 'vars': [1, 3]},
+                          index=index2)
+    tm.assert_frame_equal(result1, expected1)
+    tm.assert_frame_equal(result2, expected2)
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index bcd0da28b5a34..160b60e69f39d 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -9,710 +9,725 @@
 
 import pandas as pd
 from pandas import (Index, MultiIndex, CategoricalIndex,
-                    DataFrame, Categorical, Series, Interval)
+                    DataFrame, Categorical, Series, Interval, qcut)
 from pandas.util.testing import assert_frame_equal, assert_series_equal
 import pandas.util.testing as tm
-from .common import MixIn
-
-
-class TestGroupByCategorical(MixIn):
-
-    def test_groupby(self):
-
-        cats = Categorical(["a", "a", "a", "b", "b", "b", "c", "c", "c"],
-                           categories=["a", "b", "c", "d"], ordered=True)
-        data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats})
-
-        exp_index = CategoricalIndex(list('abcd'), name='b', ordered=True)
-        expected = DataFrame({'a': [1, 2, 4, np.nan]}, index=exp_index)
-        result = data.groupby("b").mean()
-        tm.assert_frame_equal(result, expected)
-
-        raw_cat1 = Categorical(["a", "a", "b", "b"],
-                               categories=["a", "b", "z"], ordered=True)
-        raw_cat2 = Categorical(["c", "d", "c", "d"],
-                               categories=["c", "d", "y"], ordered=True)
-        df = DataFrame({"A": raw_cat1, "B": raw_cat2, "values": [1, 2, 3, 4]})
-
-        # single grouper
-        gb = df.groupby("A")
-        exp_idx = CategoricalIndex(['a', 'b', 'z'], name='A', ordered=True)
-        expected = DataFrame({'values': Series([3, 7, 0], index=exp_idx)})
-        result = gb.sum()
-        tm.assert_frame_equal(result, expected)
-
-        # multiple groupers
-        gb = df.groupby(['A', 'B'])
-        exp_index = pd.MultiIndex.from_product(
-            [Categorical(["a", "b", "z"], ordered=True),
-             Categorical(["c", "d", "y"], ordered=True)],
-            names=['A', 'B'])
-        expected = DataFrame({'values': [1, 2, np.nan, 3, 4, np.nan,
-                                         np.nan, np.nan, np.nan]},
-                             index=exp_index)
-        result = gb.sum()
-        tm.assert_frame_equal(result, expected)
-
-        # multiple groupers with a non-cat
-        df = df.copy()
-        df['C'] = ['foo', 'bar'] * 2
-        gb = df.groupby(['A', 'B', 'C'])
-        exp_index = pd.MultiIndex.from_product(
-            [Categorical(["a", "b", "z"], ordered=True),
-             Categorical(["c", "d", "y"], ordered=True),
-             ['foo', 'bar']],
-            names=['A', 'B', 'C'])
-        expected = DataFrame({'values': Series(
-            np.nan, index=exp_index)}).sort_index()
-        expected.iloc[[1, 2, 7, 8], 0] = [1, 2, 3, 4]
-        result = gb.sum()
-        tm.assert_frame_equal(result, expected)
-
-        # GH 8623
-        x = DataFrame([[1, 'John P. Doe'], [2, 'Jane Dove'],
-                       [1, 'John P. Doe']],
-                      columns=['person_id', 'person_name'])
-        x['person_name'] = Categorical(x.person_name)
-
-        g = x.groupby(['person_id'])
-        result = g.transform(lambda x: x)
-        tm.assert_frame_equal(result, x[['person_name']])
-
-        result = x.drop_duplicates('person_name')
-        expected = x.iloc[[0, 1]]
-        tm.assert_frame_equal(result, expected)
-
-        def f(x):
-            return x.drop_duplicates('person_name').iloc[0]
-
-        result = g.apply(f)
-        expected = x.iloc[[0, 1]].copy()
-        expected.index = Index([1, 2], name='person_id')
-        expected['person_name'] = expected['person_name'].astype('object')
-        tm.assert_frame_equal(result, expected)
-
-        # GH 9921
-        # Monotonic
-        df = DataFrame({"a": [5, 15, 25]})
-        c = pd.cut(df.a, bins=[0, 10, 20, 30, 40])
-
-        result = df.a.groupby(c).transform(sum)
-        tm.assert_series_equal(result, df['a'])
-
-        tm.assert_series_equal(
-            df.a.groupby(c).transform(lambda xs: np.sum(xs)), df['a'])
-        tm.assert_frame_equal(df.groupby(c).transform(sum), df[['a']])
-        tm.assert_frame_equal(
-            df.groupby(c).transform(lambda xs: np.max(xs)), df[['a']])
-
-        # Filter
-        tm.assert_series_equal(df.a.groupby(c).filter(np.all), df['a'])
-        tm.assert_frame_equal(df.groupby(c).filter(np.all), df)
-
-        # Non-monotonic
-        df = DataFrame({"a": [5, 15, 25, -5]})
-        c = pd.cut(df.a, bins=[-10, 0, 10, 20, 30, 40])
-
-        result = df.a.groupby(c).transform(sum)
-        tm.assert_series_equal(result, df['a'])
-
-        tm.assert_series_equal(
-            df.a.groupby(c).transform(lambda xs: np.sum(xs)), df['a'])
-        tm.assert_frame_equal(df.groupby(c).transform(sum), df[['a']])
-        tm.assert_frame_equal(
-            df.groupby(c).transform(lambda xs: np.sum(xs)), df[['a']])
-
-        # GH 9603
-        df = DataFrame({'a': [1, 0, 0, 0]})
-        c = pd.cut(df.a, [0, 1, 2, 3, 4], labels=Categorical(list('abcd')))
-        result = df.groupby(c).apply(len)
-
-        exp_index = CategoricalIndex(
-            c.values.categories, ordered=c.values.ordered)
-        expected = Series([1, 0, 0, 0], index=exp_index)
-        expected.index.name = 'a'
-        tm.assert_series_equal(result, expected)
-
-    def test_groupby_sort(self):
-
-        # http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby
-        # This should result in a properly sorted Series so that the plot
-        # has a sorted x axis
-        # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar')
-
-        df = DataFrame({'value': np.random.randint(0, 10000, 100)})
-        labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
-        cat_labels = Categorical(labels, labels)
-
-        df = df.sort_values(by=['value'], ascending=True)
-        df['value_group'] = pd.cut(df.value, range(0, 10500, 500),
-                                   right=False, labels=cat_labels)
-
-        res = df.groupby(['value_group'])['value_group'].count()
-        exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))]
-        exp.index = CategoricalIndex(exp.index, name=exp.index.name)
-        tm.assert_series_equal(res, exp)
-
-    def test_level_groupby_get_group(self):
-        # GH15155
-        df = DataFrame(data=np.arange(2, 22, 2),
-                       index=MultiIndex(
-                           levels=[pd.CategoricalIndex(["a", "b"]), range(10)],
-                           labels=[[0] * 5 + [1] * 5, range(10)],
-                           names=["Index1", "Index2"]))
-        g = df.groupby(level=["Index1"])
-
-        # expected should equal test.loc[["a"]]
-        # GH15166
-        expected = DataFrame(data=np.arange(2, 12, 2),
-                             index=pd.MultiIndex(levels=[pd.CategoricalIndex(
-                                 ["a", "b"]), range(5)],
-            labels=[[0] * 5, range(5)],
-            names=["Index1", "Index2"]))
-        result = g.get_group('a')
 
-        assert_frame_equal(result, expected)
-
-    def test_apply_use_categorical_name(self):
-        from pandas import qcut
-        cats = qcut(self.df.C, 4)
-
-        def get_stats(group):
-            return {'min': group.min(),
-                    'max': group.max(),
-                    'count': group.count(),
-                    'mean': group.mean()}
-
-        result = self.df.groupby(cats).D.apply(get_stats)
-        assert result.index.names[0] == 'C'
-
-    def test_apply_categorical_data(self):
-        # GH 10138
-        for ordered in [True, False]:
-            dense = Categorical(list('abc'), ordered=ordered)
-            # 'b' is in the categories but not in the list
-            missing = Categorical(
-                list('aaa'), categories=['a', 'b'], ordered=ordered)
-            values = np.arange(len(dense))
-            df = DataFrame({'missing': missing,
-                            'dense': dense,
-                            'values': values})
-            grouped = df.groupby(['missing', 'dense'])
-
-            # missing category 'b' should still exist in the output index
-            idx = MultiIndex.from_product(
-                [Categorical(['a', 'b'], ordered=ordered),
-                 Categorical(['a', 'b', 'c'], ordered=ordered)],
-                names=['missing', 'dense'])
-            expected = DataFrame([0, 1, 2, np.nan, np.nan, np.nan],
-                                 index=idx,
-                                 columns=['values'])
-
-            assert_frame_equal(grouped.apply(lambda x: np.mean(x)), expected)
-            assert_frame_equal(grouped.mean(), expected)
-            assert_frame_equal(grouped.agg(np.mean), expected)
-
-            # but for transform we should still get back the original index
-            idx = MultiIndex.from_product([['a'], ['a', 'b', 'c']],
-                                          names=['missing', 'dense'])
-            expected = Series(1, index=idx)
-            assert_series_equal(grouped.apply(lambda x: 1), expected)
-
-    def test_groupby_categorical(self):
-        levels = ['foo', 'bar', 'baz', 'qux']
-        codes = np.random.randint(0, 4, size=100)
-
-        cats = Categorical.from_codes(codes, levels, ordered=True)
-
-        data = DataFrame(np.random.randn(100, 4))
-
-        result = data.groupby(cats).mean()
-
-        expected = data.groupby(np.asarray(cats)).mean()
-        exp_idx = CategoricalIndex(levels, categories=cats.categories,
-                                   ordered=True)
-        expected = expected.reindex(exp_idx)
-
-        assert_frame_equal(result, expected)
-
-        grouped = data.groupby(cats)
-        desc_result = grouped.describe()
-
-        idx = cats.codes.argsort()
-        ord_labels = np.asarray(cats).take(idx)
-        ord_data = data.take(idx)
-
-        exp_cats = Categorical(ord_labels, ordered=True,
-                               categories=['foo', 'bar', 'baz', 'qux'])
-        expected = ord_data.groupby(exp_cats, sort=False).describe()
-        assert_frame_equal(desc_result, expected)
-
-        # GH 10460
-        expc = Categorical.from_codes(np.arange(4).repeat(8),
-                                      levels, ordered=True)
-        exp = CategoricalIndex(expc)
-        tm.assert_index_equal((desc_result.stack().index
-                               .get_level_values(0)), exp)
-        exp = Index(['count', 'mean', 'std', 'min', '25%', '50%',
-                     '75%', 'max'] * 4)
-        tm.assert_index_equal((desc_result.stack().index
-                               .get_level_values(1)), exp)
-
-    def test_groupby_datetime_categorical(self):
-        # GH9049: ensure backward compatibility
-        levels = pd.date_range('2014-01-01', periods=4)
-        codes = np.random.randint(0, 4, size=100)
-
-        cats = Categorical.from_codes(codes, levels, ordered=True)
-
-        data = DataFrame(np.random.randn(100, 4))
-        result = data.groupby(cats).mean()
-
-        expected = data.groupby(np.asarray(cats)).mean()
-        expected = expected.reindex(levels)
-        expected.index = CategoricalIndex(expected.index,
-                                          categories=expected.index,
-                                          ordered=True)
-
-        assert_frame_equal(result, expected)
-
-        grouped = data.groupby(cats)
-        desc_result = grouped.describe()
-
-        idx = cats.codes.argsort()
-        ord_labels = cats.take_nd(idx)
-        ord_data = data.take(idx)
-        expected = ord_data.groupby(ord_labels).describe()
-        assert_frame_equal(desc_result, expected)
-        tm.assert_index_equal(desc_result.index, expected.index)
-        tm.assert_index_equal(
-            desc_result.index.get_level_values(0),
-            expected.index.get_level_values(0))
-
-        # GH 10460
-        expc = Categorical.from_codes(
-            np.arange(4).repeat(8), levels, ordered=True)
-        exp = CategoricalIndex(expc)
-        tm.assert_index_equal((desc_result.stack().index
-                               .get_level_values(0)), exp)
-        exp = Index(['count', 'mean', 'std', 'min', '25%', '50%',
-                     '75%', 'max'] * 4)
-        tm.assert_index_equal((desc_result.stack().index
-                               .get_level_values(1)), exp)
-
-    def test_groupby_categorical_index(self):
-
-        s = np.random.RandomState(12345)
-        levels = ['foo', 'bar', 'baz', 'qux']
-        codes = s.randint(0, 4, size=20)
-        cats = Categorical.from_codes(codes, levels, ordered=True)
-        df = DataFrame(
-            np.repeat(
-                np.arange(20), 4).reshape(-1, 4), columns=list('abcd'))
-        df['cats'] = cats
-
-        # with a cat index
-        result = df.set_index('cats').groupby(level=0).sum()
-        expected = df[list('abcd')].groupby(cats.codes).sum()
-        expected.index = CategoricalIndex(
-            Categorical.from_codes(
-                [0, 1, 2, 3], levels, ordered=True), name='cats')
-        assert_frame_equal(result, expected)
 
-        # with a cat column, should produce a cat index
-        result = df.groupby('cats').sum()
-        expected = df[list('abcd')].groupby(cats.codes).sum()
-        expected.index = CategoricalIndex(
-            Categorical.from_codes(
-                [0, 1, 2, 3], levels, ordered=True), name='cats')
-        assert_frame_equal(result, expected)
-
-    def test_groupby_describe_categorical_columns(self):
-        # GH 11558
-        cats = pd.CategoricalIndex(['qux', 'foo', 'baz', 'bar'],
-                                   categories=['foo', 'bar', 'baz', 'qux'],
-                                   ordered=True)
-        df = DataFrame(np.random.randn(20, 4), columns=cats)
-        result = df.groupby([1, 2, 3, 4] * 5).describe()
-
-        tm.assert_index_equal(result.stack().columns, cats)
-        tm.assert_categorical_equal(result.stack().columns.values, cats.values)
-
-    def test_groupby_unstack_categorical(self):
-        # GH11558 (example is taken from the original issue)
-        df = pd.DataFrame({'a': range(10),
-                           'medium': ['A', 'B'] * 5,
-                           'artist': list('XYXXY') * 2})
-        df['medium'] = df['medium'].astype('category')
-
-        gcat = df.groupby(['artist', 'medium'])['a'].count().unstack()
-        result = gcat.describe()
-
-        exp_columns = pd.CategoricalIndex(['A', 'B'], ordered=False,
-                                          name='medium')
-        tm.assert_index_equal(result.columns, exp_columns)
-        tm.assert_categorical_equal(result.columns.values, exp_columns.values)
-
-        result = gcat['A'] + gcat['B']
-        expected = pd.Series([6, 4], index=pd.Index(['X', 'Y'], name='artist'))
-        tm.assert_series_equal(result, expected)
-
-    def test_groupby_bins_unequal_len(self):
-        # GH3011
-        series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4])
-        bins = pd.cut(series.dropna().values, 4)
-
-        # len(bins) != len(series) here
-        def f():
-            series.groupby(bins).mean()
-        pytest.raises(ValueError, f)
-
-    def test_groupby_multi_categorical_as_index(self):
-        # GH13204
-        df = DataFrame({'cat': Categorical([1, 2, 2], [1, 2, 3]),
-                        'A': [10, 11, 11],
-                        'B': [101, 102, 103]})
-        result = df.groupby(['cat', 'A'], as_index=False).sum()
-        expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]),
-                              'A': [10, 11, 10, 11, 10, 11],
-                              'B': [101.0, nan, nan, 205.0, nan, nan]},
-                             columns=['cat', 'A', 'B'])
-        tm.assert_frame_equal(result, expected)
-
-        # function grouper
-        f = lambda r: df.loc[r, 'A']
-        result = df.groupby(['cat', f], as_index=False).sum()
-        expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]),
-                              'A': [10.0, nan, nan, 22.0, nan, nan],
-                              'B': [101.0, nan, nan, 205.0, nan, nan]},
-                             columns=['cat', 'A', 'B'])
-        tm.assert_frame_equal(result, expected)
-
-        # another not in-axis grouper
-        s = Series(['a', 'b', 'b'], name='cat2')
-        result = df.groupby(['cat', s], as_index=False).sum()
-        expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]),
-                              'A': [10.0, nan, nan, 22.0, nan, nan],
-                              'B': [101.0, nan, nan, 205.0, nan, nan]},
-                             columns=['cat', 'A', 'B'])
-        tm.assert_frame_equal(result, expected)
-
-        # GH18872: conflicting names in desired index
-        pytest.raises(ValueError, lambda: df.groupby(['cat',
-                                                      s.rename('cat')]).sum())
-
-        # is original index dropped?
-        expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]),
-                              'A': [10, 11, 10, 11, 10, 11],
-                              'B': [101.0, nan, nan, 205.0, nan, nan]},
-                             columns=['cat', 'A', 'B'])
-
-        group_columns = ['cat', 'A']
-
-        for name in [None, 'X', 'B', 'cat']:
-            df.index = Index(list("abc"), name=name)
-
-            if name in group_columns and name in df.index.names:
-                with tm.assert_produces_warning(FutureWarning,
-                                                check_stacklevel=False):
-                    result = df.groupby(group_columns, as_index=False).sum()
-
-            else:
+def test_groupby():
+
+    cats = Categorical(["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+                       categories=["a", "b", "c", "d"], ordered=True)
+    data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats})
+
+    exp_index = CategoricalIndex(list('abcd'), name='b', ordered=True)
+    expected = DataFrame({'a': [1, 2, 4, np.nan]}, index=exp_index)
+    result = data.groupby("b").mean()
+    tm.assert_frame_equal(result, expected)
+
+    raw_cat1 = Categorical(["a", "a", "b", "b"],
+                           categories=["a", "b", "z"], ordered=True)
+    raw_cat2 = Categorical(["c", "d", "c", "d"],
+                           categories=["c", "d", "y"], ordered=True)
+    df = DataFrame({"A": raw_cat1, "B": raw_cat2, "values": [1, 2, 3, 4]})
+
+    # single grouper
+    gb = df.groupby("A")
+    exp_idx = CategoricalIndex(['a', 'b', 'z'], name='A', ordered=True)
+    expected = DataFrame({'values': Series([3, 7, 0], index=exp_idx)})
+    result = gb.sum()
+    tm.assert_frame_equal(result, expected)
+
+    # multiple groupers
+    gb = df.groupby(['A', 'B'])
+    exp_index = pd.MultiIndex.from_product(
+        [Categorical(["a", "b", "z"], ordered=True),
+         Categorical(["c", "d", "y"], ordered=True)],
+        names=['A', 'B'])
+    expected = DataFrame({'values': [1, 2, np.nan, 3, 4, np.nan,
+                                     np.nan, np.nan, np.nan]},
+                         index=exp_index)
+    result = gb.sum()
+    tm.assert_frame_equal(result, expected)
+
+    # multiple groupers with a non-cat
+    df = df.copy()
+    df['C'] = ['foo', 'bar'] * 2
+    gb = df.groupby(['A', 'B', 'C'])
+    exp_index = pd.MultiIndex.from_product(
+        [Categorical(["a", "b", "z"], ordered=True),
+         Categorical(["c", "d", "y"], ordered=True),
+         ['foo', 'bar']],
+        names=['A', 'B', 'C'])
+    expected = DataFrame({'values': Series(
+        np.nan, index=exp_index)}).sort_index()
+    expected.iloc[[1, 2, 7, 8], 0] = [1, 2, 3, 4]
+    result = gb.sum()
+    tm.assert_frame_equal(result, expected)
+
+    # GH 8623
+    x = DataFrame([[1, 'John P. Doe'], [2, 'Jane Dove'],
+                   [1, 'John P. Doe']],
+                  columns=['person_id', 'person_name'])
+    x['person_name'] = Categorical(x.person_name)
+
+    g = x.groupby(['person_id'])
+    result = g.transform(lambda x: x)
+    tm.assert_frame_equal(result, x[['person_name']])
+
+    result = x.drop_duplicates('person_name')
+    expected = x.iloc[[0, 1]]
+    tm.assert_frame_equal(result, expected)
+
+    def f(x):
+        return x.drop_duplicates('person_name').iloc[0]
+
+    result = g.apply(f)
+    expected = x.iloc[[0, 1]].copy()
+    expected.index = Index([1, 2], name='person_id')
+    expected['person_name'] = expected['person_name'].astype('object')
+    tm.assert_frame_equal(result, expected)
+
+    # GH 9921
+    # Monotonic
+    df = DataFrame({"a": [5, 15, 25]})
+    c = pd.cut(df.a, bins=[0, 10, 20, 30, 40])
+
+    result = df.a.groupby(c).transform(sum)
+    tm.assert_series_equal(result, df['a'])
+
+    tm.assert_series_equal(
+        df.a.groupby(c).transform(lambda xs: np.sum(xs)), df['a'])
+    tm.assert_frame_equal(df.groupby(c).transform(sum), df[['a']])
+    tm.assert_frame_equal(
+        df.groupby(c).transform(lambda xs: np.max(xs)), df[['a']])
+
+    # Filter
+    tm.assert_series_equal(df.a.groupby(c).filter(np.all), df['a'])
+    tm.assert_frame_equal(df.groupby(c).filter(np.all), df)
+
+    # Non-monotonic
+    df = DataFrame({"a": [5, 15, 25, -5]})
+    c = pd.cut(df.a, bins=[-10, 0, 10, 20, 30, 40])
+
+    result = df.a.groupby(c).transform(sum)
+    tm.assert_series_equal(result, df['a'])
+
+    tm.assert_series_equal(
+        df.a.groupby(c).transform(lambda xs: np.sum(xs)), df['a'])
+    tm.assert_frame_equal(df.groupby(c).transform(sum), df[['a']])
+    tm.assert_frame_equal(
+        df.groupby(c).transform(lambda xs: np.sum(xs)), df[['a']])
+
+    # GH 9603
+    df = DataFrame({'a': [1, 0, 0, 0]})
+    c = pd.cut(df.a, [0, 1, 2, 3, 4], labels=Categorical(list('abcd')))
+    result = df.groupby(c).apply(len)
+
+    exp_index = CategoricalIndex(
+        c.values.categories, ordered=c.values.ordered)
+    expected = Series([1, 0, 0, 0], index=exp_index)
+    expected.index.name = 'a'
+    tm.assert_series_equal(result, expected)
+
+
+def test_groupby_sort():
+
+    # http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby
+    # This should result in a properly sorted Series so that the plot
+    # has a sorted x axis
+    # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar')
+
+    df = DataFrame({'value': np.random.randint(0, 10000, 100)})
+    labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
+    cat_labels = Categorical(labels, labels)
+
+    df = df.sort_values(by=['value'], ascending=True)
+    df['value_group'] = pd.cut(df.value, range(0, 10500, 500),
+                               right=False, labels=cat_labels)
+
+    res = df.groupby(['value_group'])['value_group'].count()
+    exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))]
+    exp.index = CategoricalIndex(exp.index, name=exp.index.name)
+    tm.assert_series_equal(res, exp)
+
+
+def test_level_groupby_get_group():
+    # GH15155
+    df = DataFrame(data=np.arange(2, 22, 2),
+                   index=MultiIndex(
+                       levels=[pd.CategoricalIndex(["a", "b"]), range(10)],
+                       labels=[[0] * 5 + [1] * 5, range(10)],
+                       names=["Index1", "Index2"]))
+    g = df.groupby(level=["Index1"])
+
+    # expected should equal test.loc[["a"]]
+    # GH15166
+    expected = DataFrame(data=np.arange(2, 12, 2),
+                         index=pd.MultiIndex(levels=[pd.CategoricalIndex(
+                             ["a", "b"]), range(5)],
+        labels=[[0] * 5, range(5)],
+        names=["Index1", "Index2"]))
+    result = g.get_group('a')
+
+    assert_frame_equal(result, expected)
+
+
+def test_apply_use_categorical_name(df):
+    cats = qcut(df.C, 4)
+
+    def get_stats(group):
+        return {'min': group.min(),
+                'max': group.max(),
+                'count': group.count(),
+                'mean': group.mean()}
+
+    result = df.groupby(cats).D.apply(get_stats)
+    assert result.index.names[0] == 'C'
+
+
+def test_apply_categorical_data():
+    # GH 10138
+    for ordered in [True, False]:
+        dense = Categorical(list('abc'), ordered=ordered)
+        # 'b' is in the categories but not in the list
+        missing = Categorical(
+            list('aaa'), categories=['a', 'b'], ordered=ordered)
+        values = np.arange(len(dense))
+        df = DataFrame({'missing': missing,
+                        'dense': dense,
+                        'values': values})
+        grouped = df.groupby(['missing', 'dense'])
+
+        # missing category 'b' should still exist in the output index
+        idx = MultiIndex.from_product(
+            [Categorical(['a', 'b'], ordered=ordered),
+             Categorical(['a', 'b', 'c'], ordered=ordered)],
+            names=['missing', 'dense'])
+        expected = DataFrame([0, 1, 2, np.nan, np.nan, np.nan],
+                             index=idx,
+                             columns=['values'])
+
+        assert_frame_equal(grouped.apply(lambda x: np.mean(x)), expected)
+        assert_frame_equal(grouped.mean(), expected)
+        assert_frame_equal(grouped.agg(np.mean), expected)
+
+        # but for transform we should still get back the original index
+        idx = MultiIndex.from_product([['a'], ['a', 'b', 'c']],
+                                      names=['missing', 'dense'])
+        expected = Series(1, index=idx)
+        assert_series_equal(grouped.apply(lambda x: 1), expected)
+
+
+def test_groupby_categorical():
+    levels = ['foo', 'bar', 'baz', 'qux']
+    codes = np.random.randint(0, 4, size=100)
+
+    cats = Categorical.from_codes(codes, levels, ordered=True)
+
+    data = DataFrame(np.random.randn(100, 4))
+
+    result = data.groupby(cats).mean()
+
+    expected = data.groupby(np.asarray(cats)).mean()
+    exp_idx = CategoricalIndex(levels, categories=cats.categories,
+                               ordered=True)
+    expected = expected.reindex(exp_idx)
+
+    assert_frame_equal(result, expected)
+
+    grouped = data.groupby(cats)
+    desc_result = grouped.describe()
+
+    idx = cats.codes.argsort()
+    ord_labels = np.asarray(cats).take(idx)
+    ord_data = data.take(idx)
+
+    exp_cats = Categorical(ord_labels, ordered=True,
+                           categories=['foo', 'bar', 'baz', 'qux'])
+    expected = ord_data.groupby(exp_cats, sort=False).describe()
+    assert_frame_equal(desc_result, expected)
+
+    # GH 10460
+    expc = Categorical.from_codes(np.arange(4).repeat(8),
+                                  levels, ordered=True)
+    exp = CategoricalIndex(expc)
+    tm.assert_index_equal((desc_result.stack().index
+                           .get_level_values(0)), exp)
+    exp = Index(['count', 'mean', 'std', 'min', '25%', '50%',
+                 '75%', 'max'] * 4)
+    tm.assert_index_equal((desc_result.stack().index
+                           .get_level_values(1)), exp)
+
+
+def test_groupby_datetime_categorical():
+    # GH9049: ensure backward compatibility
+    levels = pd.date_range('2014-01-01', periods=4)
+    codes = np.random.randint(0, 4, size=100)
+
+    cats = Categorical.from_codes(codes, levels, ordered=True)
+
+    data = DataFrame(np.random.randn(100, 4))
+    result = data.groupby(cats).mean()
+
+    expected = data.groupby(np.asarray(cats)).mean()
+    expected = expected.reindex(levels)
+    expected.index = CategoricalIndex(expected.index,
+                                      categories=expected.index,
+                                      ordered=True)
+
+    assert_frame_equal(result, expected)
+
+    grouped = data.groupby(cats)
+    desc_result = grouped.describe()
+
+    idx = cats.codes.argsort()
+    ord_labels = cats.take_nd(idx)
+    ord_data = data.take(idx)
+    expected = ord_data.groupby(ord_labels).describe()
+    assert_frame_equal(desc_result, expected)
+    tm.assert_index_equal(desc_result.index, expected.index)
+    tm.assert_index_equal(
+        desc_result.index.get_level_values(0),
+        expected.index.get_level_values(0))
+
+    # GH 10460
+    expc = Categorical.from_codes(
+        np.arange(4).repeat(8), levels, ordered=True)
+    exp = CategoricalIndex(expc)
+    tm.assert_index_equal((desc_result.stack().index
+                           .get_level_values(0)), exp)
+    exp = Index(['count', 'mean', 'std', 'min', '25%', '50%',
+                 '75%', 'max'] * 4)
+    tm.assert_index_equal((desc_result.stack().index
+                           .get_level_values(1)), exp)
+
+
+def test_groupby_categorical_index():
+
+    s = np.random.RandomState(12345)
+    levels = ['foo', 'bar', 'baz', 'qux']
+    codes = s.randint(0, 4, size=20)
+    cats = Categorical.from_codes(codes, levels, ordered=True)
+    df = DataFrame(
+        np.repeat(
+            np.arange(20), 4).reshape(-1, 4), columns=list('abcd'))
+    df['cats'] = cats
+
+    # with a cat index
+    result = df.set_index('cats').groupby(level=0).sum()
+    expected = df[list('abcd')].groupby(cats.codes).sum()
+    expected.index = CategoricalIndex(
+        Categorical.from_codes(
+            [0, 1, 2, 3], levels, ordered=True), name='cats')
+    assert_frame_equal(result, expected)
+
+    # with a cat column, should produce a cat index
+    result = df.groupby('cats').sum()
+    expected = df[list('abcd')].groupby(cats.codes).sum()
+    expected.index = CategoricalIndex(
+        Categorical.from_codes(
+            [0, 1, 2, 3], levels, ordered=True), name='cats')
+    assert_frame_equal(result, expected)
+
+
+def test_groupby_describe_categorical_columns():
+    # GH 11558
+    cats = pd.CategoricalIndex(['qux', 'foo', 'baz', 'bar'],
+                               categories=['foo', 'bar', 'baz', 'qux'],
+                               ordered=True)
+    df = DataFrame(np.random.randn(20, 4), columns=cats)
+    result = df.groupby([1, 2, 3, 4] * 5).describe()
+
+    tm.assert_index_equal(result.stack().columns, cats)
+    tm.assert_categorical_equal(result.stack().columns.values, cats.values)
+
+
+def test_groupby_unstack_categorical():
+    # GH11558 (example is taken from the original issue)
+    df = pd.DataFrame({'a': range(10),
+                       'medium': ['A', 'B'] * 5,
+                       'artist': list('XYXXY') * 2})
+    df['medium'] = df['medium'].astype('category')
+
+    gcat = df.groupby(['artist', 'medium'])['a'].count().unstack()
+    result = gcat.describe()
+
+    exp_columns = pd.CategoricalIndex(['A', 'B'], ordered=False,
+                                      name='medium')
+    tm.assert_index_equal(result.columns, exp_columns)
+    tm.assert_categorical_equal(result.columns.values, exp_columns.values)
+
+    result = gcat['A'] + gcat['B']
+    expected = pd.Series([6, 4], index=pd.Index(['X', 'Y'], name='artist'))
+    tm.assert_series_equal(result, expected)
+
+
+def test_groupby_bins_unequal_len():
+    # GH3011
+    series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4])
+    bins = pd.cut(series.dropna().values, 4)
+
+    # len(bins) != len(series) here
+    def f():
+        series.groupby(bins).mean()
+    pytest.raises(ValueError, f)
+
+
+def test_groupby_multi_categorical_as_index():
+    # GH13204
+    df = DataFrame({'cat': Categorical([1, 2, 2], [1, 2, 3]),
+                    'A': [10, 11, 11],
+                    'B': [101, 102, 103]})
+    result = df.groupby(['cat', 'A'], as_index=False).sum()
+    expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]),
+                          'A': [10, 11, 10, 11, 10, 11],
+                          'B': [101.0, nan, nan, 205.0, nan, nan]},
+                         columns=['cat', 'A', 'B'])
+    tm.assert_frame_equal(result, expected)
+
+    # function grouper
+    f = lambda r: df.loc[r, 'A']
+    result = df.groupby(['cat', f], as_index=False).sum()
+    expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]),
+                          'A': [10.0, nan, nan, 22.0, nan, nan],
+                          'B': [101.0, nan, nan, 205.0, nan, nan]},
+                         columns=['cat', 'A', 'B'])
+    tm.assert_frame_equal(result, expected)
+
+    # another not in-axis grouper
+    s = Series(['a', 'b', 'b'], name='cat2')
+    result = df.groupby(['cat', s], as_index=False).sum()
+    expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]),
+                          'A': [10.0, nan, nan, 22.0, nan, nan],
+                          'B': [101.0, nan, nan, 205.0, nan, nan]},
+                         columns=['cat', 'A', 'B'])
+    tm.assert_frame_equal(result, expected)
+
+    # GH18872: conflicting names in desired index
+    pytest.raises(ValueError, lambda: df.groupby(['cat',
+                                                  s.rename('cat')]).sum())
+
+    # is original index dropped?
+    expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]),
+                          'A': [10, 11, 10, 11, 10, 11],
+                          'B': [101.0, nan, nan, 205.0, nan, nan]},
+                         columns=['cat', 'A', 'B'])
+
+    group_columns = ['cat', 'A']
+
+    for name in [None, 'X', 'B', 'cat']:
+        df.index = Index(list("abc"), name=name)
+
+        if name in group_columns and name in df.index.names:
+            with tm.assert_produces_warning(FutureWarning,
+                                            check_stacklevel=False):
                 result = df.groupby(group_columns, as_index=False).sum()
 
-            tm.assert_frame_equal(result, expected, check_index_type=True)
-
-    def test_groupby_preserve_categories(self):
-        # GH-13179
-        categories = list('abc')
-
-        # ordered=True
-        df = DataFrame({'A': pd.Categorical(list('ba'),
-                                            categories=categories,
+        else:
+            result = df.groupby(group_columns, as_index=False).sum()
+
+        tm.assert_frame_equal(result, expected, check_index_type=True)
+
+
+def test_groupby_preserve_categories():
+    # GH-13179
+    categories = list('abc')
+
+    # ordered=True
+    df = DataFrame({'A': pd.Categorical(list('ba'),
+                                        categories=categories,
+                                        ordered=True)})
+    index = pd.CategoricalIndex(categories, categories, ordered=True)
+    tm.assert_index_equal(df.groupby('A', sort=True).first().index, index)
+    tm.assert_index_equal(df.groupby('A', sort=False).first().index, index)
+
+    # ordered=False
+    df = DataFrame({'A': pd.Categorical(list('ba'),
+                                        categories=categories,
+                                        ordered=False)})
+    sort_index = pd.CategoricalIndex(categories, categories, ordered=False)
+    nosort_index = pd.CategoricalIndex(list('bac'), list('bac'),
+                                       ordered=False)
+    tm.assert_index_equal(df.groupby('A', sort=True).first().index,
+                          sort_index)
+    tm.assert_index_equal(df.groupby('A', sort=False).first().index,
+                          nosort_index)
+
+
+def test_groupby_preserve_categorical_dtype():
+    # GH13743, GH13854
+    df = DataFrame({'A': [1, 2, 1, 1, 2],
+                    'B': [10, 16, 22, 28, 34],
+                    'C1': Categorical(list("abaab"),
+                                      categories=list("bac"),
+                                      ordered=False),
+                    'C2': Categorical(list("abaab"),
+                                      categories=list("bac"),
+                                      ordered=True)})
+    # single grouper
+    exp_full = DataFrame({'A': [2.0, 1.0, np.nan],
+                          'B': [25.0, 20.0, np.nan],
+                          'C1': Categorical(list("bac"),
+                                            categories=list("bac"),
+                                            ordered=False),
+                          'C2': Categorical(list("bac"),
+                                            categories=list("bac"),
                                             ordered=True)})
-        index = pd.CategoricalIndex(categories, categories, ordered=True)
-        tm.assert_index_equal(df.groupby('A', sort=True).first().index, index)
-        tm.assert_index_equal(df.groupby('A', sort=False).first().index, index)
-
-        # ordered=False
-        df = DataFrame({'A': pd.Categorical(list('ba'),
-                                            categories=categories,
-                                            ordered=False)})
-        sort_index = pd.CategoricalIndex(categories, categories, ordered=False)
-        nosort_index = pd.CategoricalIndex(list('bac'), list('bac'),
-                                           ordered=False)
-        tm.assert_index_equal(df.groupby('A', sort=True).first().index,
-                              sort_index)
-        tm.assert_index_equal(df.groupby('A', sort=False).first().index,
-                              nosort_index)
-
-    def test_groupby_preserve_categorical_dtype(self):
-        # GH13743, GH13854
-        df = DataFrame({'A': [1, 2, 1, 1, 2],
-                        'B': [10, 16, 22, 28, 34],
-                        'C1': Categorical(list("abaab"),
-                                          categories=list("bac"),
-                                          ordered=False),
-                        'C2': Categorical(list("abaab"),
-                                          categories=list("bac"),
-                                          ordered=True)})
-        # single grouper
-        exp_full = DataFrame({'A': [2.0, 1.0, np.nan],
-                              'B': [25.0, 20.0, np.nan],
-                              'C1': Categorical(list("bac"),
-                                                categories=list("bac"),
-                                                ordered=False),
-                              'C2': Categorical(list("bac"),
-                                                categories=list("bac"),
-                                                ordered=True)})
-        for col in ['C1', 'C2']:
-            result1 = df.groupby(by=col, as_index=False).mean()
-            result2 = df.groupby(by=col, as_index=True).mean().reset_index()
-            expected = exp_full.reindex(columns=result1.columns)
-            tm.assert_frame_equal(result1, expected)
-            tm.assert_frame_equal(result2, expected)
-
-        # multiple grouper
-        exp_full = DataFrame({'A': [1, 1, 1, 2, 2, 2],
-                              'B': [np.nan, 20.0, np.nan, 25.0, np.nan,
-                                    np.nan],
-                              'C1': Categorical(list("bacbac"),
-                                                categories=list("bac"),
-                                                ordered=False),
-                              'C2': Categorical(list("bacbac"),
-                                                categories=list("bac"),
-                                                ordered=True)})
-        for cols in [['A', 'C1'], ['A', 'C2']]:
-            result1 = df.groupby(by=cols, as_index=False).mean()
-            result2 = df.groupby(by=cols, as_index=True).mean().reset_index()
-            expected = exp_full.reindex(columns=result1.columns)
-            tm.assert_frame_equal(result1, expected)
-            tm.assert_frame_equal(result2, expected)
-
-    def test_groupby_categorical_no_compress(self):
-        data = Series(np.random.randn(9))
-
-        codes = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])
-        cats = Categorical.from_codes(codes, [0, 1, 2], ordered=True)
-
-        result = data.groupby(cats).mean()
-        exp = data.groupby(codes).mean()
-
-        exp.index = CategoricalIndex(exp.index, categories=cats.categories,
-                                     ordered=cats.ordered)
-        assert_series_equal(result, exp)
-
-        codes = np.array([0, 0, 0, 1, 1, 1, 3, 3, 3])
-        cats = Categorical.from_codes(codes, [0, 1, 2, 3], ordered=True)
-
-        result = data.groupby(cats).mean()
-        exp = data.groupby(codes).mean().reindex(cats.categories)
-        exp.index = CategoricalIndex(exp.index, categories=cats.categories,
-                                     ordered=cats.ordered)
-        assert_series_equal(result, exp)
-
-        cats = Categorical(["a", "a", "a", "b", "b", "b", "c", "c", "c"],
-                           categories=["a", "b", "c", "d"], ordered=True)
-        data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats})
-
-        result = data.groupby("b").mean()
-        result = result["a"].values
-        exp = np.array([1, 2, 4, np.nan])
-        tm.assert_numpy_array_equal(result, exp)
-
-    def test_groupby_sort_categorical(self):
-        # dataframe groupby sort was being ignored # GH 8868
-        df = DataFrame([['(7.5, 10]', 10, 10],
-                        ['(7.5, 10]', 8, 20],
-                        ['(2.5, 5]', 5, 30],
-                        ['(5, 7.5]', 6, 40],
-                        ['(2.5, 5]', 4, 50],
-                        ['(0, 2.5]', 1, 60],
-                        ['(5, 7.5]', 7, 70]], columns=['range', 'foo', 'bar'])
-        df['range'] = Categorical(df['range'], ordered=True)
-        index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]',
-                                  '(7.5, 10]'], name='range', ordered=True)
-        result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]],
-                                columns=['foo', 'bar'], index=index)
-
-        col = 'range'
-        assert_frame_equal(result_sort, df.groupby(col, sort=True).first())
-        # when categories is ordered, group is ordered by category's order
-        assert_frame_equal(result_sort, df.groupby(col, sort=False).first())
-
-        df['range'] = Categorical(df['range'], ordered=False)
-        index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]',
-                                  '(7.5, 10]'], name='range')
-        result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]],
-                                columns=['foo', 'bar'], index=index)
-
-        index = CategoricalIndex(['(7.5, 10]', '(2.5, 5]', '(5, 7.5]',
-                                  '(0, 2.5]'],
-                                 categories=['(7.5, 10]', '(2.5, 5]',
-                                             '(5, 7.5]', '(0, 2.5]'],
-                                 name='range')
-        result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]],
-                                  index=index, columns=['foo', 'bar'])
-
-        col = 'range'
-        # this is an unordered categorical, but we allow this ####
-        assert_frame_equal(result_sort, df.groupby(col, sort=True).first())
-        assert_frame_equal(result_nosort, df.groupby(col, sort=False).first())
-
-    def test_groupby_sort_categorical_datetimelike(self):
-        # GH10505
-
-        # use same data as test_groupby_sort_categorical, which category is
-        # corresponding to datetime.month
-        df = DataFrame({'dt': [datetime(2011, 7, 1), datetime(2011, 7, 1),
-                               datetime(2011, 2, 1), datetime(2011, 5, 1),
-                               datetime(2011, 2, 1), datetime(2011, 1, 1),
-                               datetime(2011, 5, 1)],
-                        'foo': [10, 8, 5, 6, 4, 1, 7],
-                        'bar': [10, 20, 30, 40, 50, 60, 70]},
-                       columns=['dt', 'foo', 'bar'])
-
-        # ordered=True
-        df['dt'] = Categorical(df['dt'], ordered=True)
-        index = [datetime(2011, 1, 1), datetime(2011, 2, 1),
-                 datetime(2011, 5, 1), datetime(2011, 7, 1)]
-        result_sort = DataFrame(
-            [[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar'])
-        result_sort.index = CategoricalIndex(index, name='dt', ordered=True)
-
-        index = [datetime(2011, 7, 1), datetime(2011, 2, 1),
-                 datetime(2011, 5, 1), datetime(2011, 1, 1)]
-        result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]],
-                                  columns=['foo', 'bar'])
-        result_nosort.index = CategoricalIndex(index, categories=index,
-                                               name='dt', ordered=True)
-
-        col = 'dt'
-        assert_frame_equal(result_sort, df.groupby(col, sort=True).first())
-        # when categories is ordered, group is ordered by category's order
-        assert_frame_equal(result_sort, df.groupby(col, sort=False).first())
-
-        # ordered = False
-        df['dt'] = Categorical(df['dt'], ordered=False)
-        index = [datetime(2011, 1, 1), datetime(2011, 2, 1),
-                 datetime(2011, 5, 1), datetime(2011, 7, 1)]
-        result_sort = DataFrame(
-            [[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar'])
-        result_sort.index = CategoricalIndex(index, name='dt')
-
-        index = [datetime(2011, 7, 1), datetime(2011, 2, 1),
-                 datetime(2011, 5, 1), datetime(2011, 1, 1)]
-        result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]],
-                                  columns=['foo', 'bar'])
-        result_nosort.index = CategoricalIndex(index, categories=index,
-                                               name='dt')
-
-        col = 'dt'
-        assert_frame_equal(result_sort, df.groupby(col, sort=True).first())
-        assert_frame_equal(result_nosort, df.groupby(col, sort=False).first())
-
-    def test_groupby_categorical_two_columns(self):
-
-        # https://github.com/pandas-dev/pandas/issues/8138
-        d = {'cat':
-             pd.Categorical(["a", "b", "a", "b"], categories=["a", "b", "c"],
-                            ordered=True),
-             'ints': [1, 1, 2, 2],
-             'val': [10, 20, 30, 40]}
-        test = pd.DataFrame(d)
-
-        # Grouping on a single column
-        groups_single_key = test.groupby("cat")
-        res = groups_single_key.agg('mean')
-
-        exp_index = pd.CategoricalIndex(["a", "b", "c"], name="cat",
-                                        ordered=True)
-        exp = DataFrame({"ints": [1.5, 1.5, np.nan], "val": [20, 30, np.nan]},
-                        index=exp_index)
-        tm.assert_frame_equal(res, exp)
-
-        # Grouping on two columns
-        groups_double_key = test.groupby(["cat", "ints"])
-        res = groups_double_key.agg('mean')
-        exp = DataFrame({"val": [10, 30, 20, 40, np.nan, np.nan],
-                         "cat": pd.Categorical(["a", "a", "b", "b", "c", "c"],
-                                               ordered=True),
-                         "ints": [1, 2, 1, 2, 1, 2]}).set_index(["cat", "ints"
-                                                                 ])
-        tm.assert_frame_equal(res, exp)
-
-        # GH 10132
-        for key in [('a', 1), ('b', 2), ('b', 1), ('a', 2)]:
-            c, i = key
-            result = groups_double_key.get_group(key)
-            expected = test[(test.cat == c) & (test.ints == i)]
-            assert_frame_equal(result, expected)
-
-        d = {'C1': [3, 3, 4, 5], 'C2': [1, 2, 3, 4], 'C3': [10, 100, 200, 34]}
-        test = pd.DataFrame(d)
-        values = pd.cut(test['C1'], [1, 2, 3, 6])
-        values.name = "cat"
-        groups_double_key = test.groupby([values, 'C2'])
-
-        res = groups_double_key.agg('mean')
-        nan = np.nan
-        idx = MultiIndex.from_product(
-            [Categorical([Interval(1, 2), Interval(2, 3),
-                          Interval(3, 6)], ordered=True),
-             [1, 2, 3, 4]],
-            names=["cat", "C2"])
-        exp = DataFrame({"C1": [nan, nan, nan, nan, 3, 3,
-                                nan, nan, nan, nan, 4, 5],
-                         "C3": [nan, nan, nan, nan, 10, 100,
-                                nan, nan, nan, nan, 200, 34]}, index=idx)
-        tm.assert_frame_equal(res, exp)
-
-    def test_empty_sum(self):
-        # https://github.com/pandas-dev/pandas/issues/18678
-        df = pd.DataFrame({"A": pd.Categorical(['a', 'a', 'b'],
-                                               categories=['a', 'b', 'c']),
-                           'B': [1, 2, 1]})
-        expected_idx = pd.CategoricalIndex(['a', 'b', 'c'], name='A')
-
-        # 0 by default
-        result = df.groupby("A").B.sum()
-        expected = pd.Series([3, 1, 0], expected_idx, name='B')
-        tm.assert_series_equal(result, expected)
-
-        # min_count=0
-        result = df.groupby("A").B.sum(min_count=0)
-        expected = pd.Series([3, 1, 0], expected_idx, name='B')
-        tm.assert_series_equal(result, expected)
-
-        # min_count=1
-        result = df.groupby("A").B.sum(min_count=1)
-        expected = pd.Series([3, 1, np.nan], expected_idx, name='B')
-        tm.assert_series_equal(result, expected)
-
-        # min_count>1
-        result = df.groupby("A").B.sum(min_count=2)
-        expected = pd.Series([3, np.nan, np.nan], expected_idx, name='B')
-        tm.assert_series_equal(result, expected)
-
-    def test_empty_prod(self):
-        # https://github.com/pandas-dev/pandas/issues/18678
-        df = pd.DataFrame({"A": pd.Categorical(['a', 'a', 'b'],
-                                               categories=['a', 'b', 'c']),
-                           'B': [1, 2, 1]})
-
-        expected_idx = pd.CategoricalIndex(['a', 'b', 'c'], name='A')
-
-        # 1 by default
-        result = df.groupby("A").B.prod()
-        expected = pd.Series([2, 1, 1], expected_idx, name='B')
-        tm.assert_series_equal(result, expected)
-
-        # min_count=0
-        result = df.groupby("A").B.prod(min_count=0)
-        expected = pd.Series([2, 1, 1], expected_idx, name='B')
-        tm.assert_series_equal(result, expected)
-
-        # min_count=1
-        result = df.groupby("A").B.prod(min_count=1)
-        expected = pd.Series([2, 1, np.nan], expected_idx, name='B')
-        tm.assert_series_equal(result, expected)
+    for col in ['C1', 'C2']:
+        result1 = df.groupby(by=col, as_index=False).mean()
+        result2 = df.groupby(by=col, as_index=True).mean().reset_index()
+        expected = exp_full.reindex(columns=result1.columns)
+        tm.assert_frame_equal(result1, expected)
+        tm.assert_frame_equal(result2, expected)
+
+    # multiple grouper
+    exp_full = DataFrame({'A': [1, 1, 1, 2, 2, 2],
+                          'B': [np.nan, 20.0, np.nan, 25.0, np.nan,
+                                np.nan],
+                          'C1': Categorical(list("bacbac"),
+                                            categories=list("bac"),
+                                            ordered=False),
+                          'C2': Categorical(list("bacbac"),
+                                            categories=list("bac"),
+                                            ordered=True)})
+    for cols in [['A', 'C1'], ['A', 'C2']]:
+        result1 = df.groupby(by=cols, as_index=False).mean()
+        result2 = df.groupby(by=cols, as_index=True).mean().reset_index()
+        expected = exp_full.reindex(columns=result1.columns)
+        tm.assert_frame_equal(result1, expected)
+        tm.assert_frame_equal(result2, expected)
+
+
+def test_groupby_categorical_no_compress():
+    data = Series(np.random.randn(9))
+
+    codes = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])
+    cats = Categorical.from_codes(codes, [0, 1, 2], ordered=True)
+
+    result = data.groupby(cats).mean()
+    exp = data.groupby(codes).mean()
+
+    exp.index = CategoricalIndex(exp.index, categories=cats.categories,
+                                 ordered=cats.ordered)
+    assert_series_equal(result, exp)
+
+    codes = np.array([0, 0, 0, 1, 1, 1, 3, 3, 3])
+    cats = Categorical.from_codes(codes, [0, 1, 2, 3], ordered=True)
+
+    result = data.groupby(cats).mean()
+    exp = data.groupby(codes).mean().reindex(cats.categories)
+    exp.index = CategoricalIndex(exp.index, categories=cats.categories,
+                                 ordered=cats.ordered)
+    assert_series_equal(result, exp)
+
+    cats = Categorical(["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+                       categories=["a", "b", "c", "d"], ordered=True)
+    data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats})
+
+    result = data.groupby("b").mean()
+    result = result["a"].values
+    exp = np.array([1, 2, 4, np.nan])
+    tm.assert_numpy_array_equal(result, exp)
+
+
+def test_groupby_sort_categorical():
+    # dataframe groupby sort was being ignored # GH 8868
+    df = DataFrame([['(7.5, 10]', 10, 10],
+                    ['(7.5, 10]', 8, 20],
+                    ['(2.5, 5]', 5, 30],
+                    ['(5, 7.5]', 6, 40],
+                    ['(2.5, 5]', 4, 50],
+                    ['(0, 2.5]', 1, 60],
+                    ['(5, 7.5]', 7, 70]], columns=['range', 'foo', 'bar'])
+    df['range'] = Categorical(df['range'], ordered=True)
+    index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]',
+                              '(7.5, 10]'], name='range', ordered=True)
+    result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]],
+                            columns=['foo', 'bar'], index=index)
+
+    col = 'range'
+    assert_frame_equal(result_sort, df.groupby(col, sort=True).first())
+    # when categories is ordered, group is ordered by category's order
+    assert_frame_equal(result_sort, df.groupby(col, sort=False).first())
+
+    df['range'] = Categorical(df['range'], ordered=False)
+    index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]',
+                              '(7.5, 10]'], name='range')
+    result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]],
+                            columns=['foo', 'bar'], index=index)
+
+    index = CategoricalIndex(['(7.5, 10]', '(2.5, 5]', '(5, 7.5]',
+                              '(0, 2.5]'],
+                             categories=['(7.5, 10]', '(2.5, 5]',
+                                         '(5, 7.5]', '(0, 2.5]'],
+                             name='range')
+    result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]],
+                              index=index, columns=['foo', 'bar'])
+
+    col = 'range'
+    # this is an unordered categorical, but we allow this ####
+    assert_frame_equal(result_sort, df.groupby(col, sort=True).first())
+    assert_frame_equal(result_nosort, df.groupby(col, sort=False).first())
+
+
+def test_groupby_sort_categorical_datetimelike():
+    # GH10505
+
+    # use same data as test_groupby_sort_categorical, which category is
+    # corresponding to datetime.month
+    df = DataFrame({'dt': [datetime(2011, 7, 1), datetime(2011, 7, 1),
+                           datetime(2011, 2, 1), datetime(2011, 5, 1),
+                           datetime(2011, 2, 1), datetime(2011, 1, 1),
+                           datetime(2011, 5, 1)],
+                    'foo': [10, 8, 5, 6, 4, 1, 7],
+                    'bar': [10, 20, 30, 40, 50, 60, 70]},
+                   columns=['dt', 'foo', 'bar'])
+
+    # ordered=True
+    df['dt'] = Categorical(df['dt'], ordered=True)
+    index = [datetime(2011, 1, 1), datetime(2011, 2, 1),
+             datetime(2011, 5, 1), datetime(2011, 7, 1)]
+    result_sort = DataFrame(
+        [[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar'])
+    result_sort.index = CategoricalIndex(index, name='dt', ordered=True)
+
+    index = [datetime(2011, 7, 1), datetime(2011, 2, 1),
+             datetime(2011, 5, 1), datetime(2011, 1, 1)]
+    result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]],
+                              columns=['foo', 'bar'])
+    result_nosort.index = CategoricalIndex(index, categories=index,
+                                           name='dt', ordered=True)
+
+    col = 'dt'
+    assert_frame_equal(result_sort, df.groupby(col, sort=True).first())
+    # when categories is ordered, group is ordered by category's order
+    assert_frame_equal(result_sort, df.groupby(col, sort=False).first())
+
+    # ordered = False
+    df['dt'] = Categorical(df['dt'], ordered=False)
+    index = [datetime(2011, 1, 1), datetime(2011, 2, 1),
+             datetime(2011, 5, 1), datetime(2011, 7, 1)]
+    result_sort = DataFrame(
+        [[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar'])
+    result_sort.index = CategoricalIndex(index, name='dt')
+
+    index = [datetime(2011, 7, 1), datetime(2011, 2, 1),
+             datetime(2011, 5, 1), datetime(2011, 1, 1)]
+    result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]],
+                              columns=['foo', 'bar'])
+    result_nosort.index = CategoricalIndex(index, categories=index,
+                                           name='dt')
+
+    col = 'dt'
+    assert_frame_equal(result_sort, df.groupby(col, sort=True).first())
+    assert_frame_equal(result_nosort, df.groupby(col, sort=False).first())
+
+
+def test_groupby_categorical_two_columns():
+
+    # https://github.com/pandas-dev/pandas/issues/8138
+    d = {'cat':
+         pd.Categorical(["a", "b", "a", "b"], categories=["a", "b", "c"],
+                        ordered=True),
+         'ints': [1, 1, 2, 2],
+         'val': [10, 20, 30, 40]}
+    test = pd.DataFrame(d)
+
+    # Grouping on a single column
+    groups_single_key = test.groupby("cat")
+    res = groups_single_key.agg('mean')
+
+    exp_index = pd.CategoricalIndex(["a", "b", "c"], name="cat",
+                                    ordered=True)
+    exp = DataFrame({"ints": [1.5, 1.5, np.nan], "val": [20, 30, np.nan]},
+                    index=exp_index)
+    tm.assert_frame_equal(res, exp)
+
+    # Grouping on two columns
+    groups_double_key = test.groupby(["cat", "ints"])
+    res = groups_double_key.agg('mean')
+    exp = DataFrame({"val": [10, 30, 20, 40, np.nan, np.nan],
+                     "cat": pd.Categorical(["a", "a", "b", "b", "c", "c"],
+                                           ordered=True),
+                     "ints": [1, 2, 1, 2, 1, 2]}).set_index(["cat", "ints"
+                                                             ])
+    tm.assert_frame_equal(res, exp)
+
+    # GH 10132
+    for key in [('a', 1), ('b', 2), ('b', 1), ('a', 2)]:
+        c, i = key
+        result = groups_double_key.get_group(key)
+        expected = test[(test.cat == c) & (test.ints == i)]
+        assert_frame_equal(result, expected)
+
+    d = {'C1': [3, 3, 4, 5], 'C2': [1, 2, 3, 4], 'C3': [10, 100, 200, 34]}
+    test = pd.DataFrame(d)
+    values = pd.cut(test['C1'], [1, 2, 3, 6])
+    values.name = "cat"
+    groups_double_key = test.groupby([values, 'C2'])
+
+    res = groups_double_key.agg('mean')
+    nan = np.nan
+    idx = MultiIndex.from_product(
+        [Categorical([Interval(1, 2), Interval(2, 3),
+                      Interval(3, 6)], ordered=True),
+         [1, 2, 3, 4]],
+        names=["cat", "C2"])
+    exp = DataFrame({"C1": [nan, nan, nan, nan, 3, 3,
+                            nan, nan, nan, nan, 4, 5],
+                     "C3": [nan, nan, nan, nan, 10, 100,
+                            nan, nan, nan, nan, 200, 34]}, index=idx)
+    tm.assert_frame_equal(res, exp)
+
+
+def test_empty_sum():
+    # https://github.com/pandas-dev/pandas/issues/18678
+    df = pd.DataFrame({"A": pd.Categorical(['a', 'a', 'b'],
+                                           categories=['a', 'b', 'c']),
+                       'B': [1, 2, 1]})
+    expected_idx = pd.CategoricalIndex(['a', 'b', 'c'], name='A')
+
+    # 0 by default
+    result = df.groupby("A").B.sum()
+    expected = pd.Series([3, 1, 0], expected_idx, name='B')
+    tm.assert_series_equal(result, expected)
+
+    # min_count=0
+    result = df.groupby("A").B.sum(min_count=0)
+    expected = pd.Series([3, 1, 0], expected_idx, name='B')
+    tm.assert_series_equal(result, expected)
+
+    # min_count=1
+    result = df.groupby("A").B.sum(min_count=1)
+    expected = pd.Series([3, 1, np.nan], expected_idx, name='B')
+    tm.assert_series_equal(result, expected)
+
+    # min_count>1
+    result = df.groupby("A").B.sum(min_count=2)
+    expected = pd.Series([3, np.nan, np.nan], expected_idx, name='B')
+    tm.assert_series_equal(result, expected)
+
+
+def test_empty_prod():
+    # https://github.com/pandas-dev/pandas/issues/18678
+    df = pd.DataFrame({"A": pd.Categorical(['a', 'a', 'b'],
+                                           categories=['a', 'b', 'c']),
+                       'B': [1, 2, 1]})
+
+    expected_idx = pd.CategoricalIndex(['a', 'b', 'c'], name='A')
+
+    # 1 by default
+    result = df.groupby("A").B.prod()
+    expected = pd.Series([2, 1, 1], expected_idx, name='B')
+    tm.assert_series_equal(result, expected)
+
+    # min_count=0
+    result = df.groupby("A").B.prod(min_count=0)
+    expected = pd.Series([2, 1, 1], expected_idx, name='B')
+    tm.assert_series_equal(result, expected)
+
+    # min_count=1
+    result = df.groupby("A").B.prod(min_count=1)
+    expected = pd.Series([2, 1, np.nan], expected_idx, name='B')
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py
index cac6b46af8f87..873d9f6076b69 100644
--- a/pandas/tests/groupby/test_filters.py
+++ b/pandas/tests/groupby/test_filters.py
@@ -1,622 +1,576 @@
 # -*- coding: utf-8 -*-
 from __future__ import print_function
-from numpy import nan
-
 import pytest
 
-from pandas import Timestamp
-from pandas.core.index import MultiIndex
-from pandas.core.api import DataFrame
-
-from pandas.core.series import Series
-
-from pandas.util.testing import (assert_frame_equal, assert_series_equal
-                                 )
-from pandas.compat import (lmap)
-
-from pandas import compat
-
-import pandas.core.common as com
 import numpy as np
-
 import pandas.util.testing as tm
+from pandas import Timestamp, DataFrame, Series
 import pandas as pd
 
 
-class TestGroupByFilter(object):
-
-    def setup_method(self, method):
-        self.ts = tm.makeTimeSeries()
-
-        self.seriesd = tm.getSeriesData()
-        self.tsd = tm.getTimeSeriesData()
-        self.frame = DataFrame(self.seriesd)
-        self.tsframe = DataFrame(self.tsd)
-
-        self.df = DataFrame(
-            {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
-             'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
-             'C': np.random.randn(8),
-             'D': np.random.randn(8)})
-
-        self.df_mixed_floats = DataFrame(
-            {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
-             'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
-             'C': np.random.randn(8),
-             'D': np.array(
-                 np.random.randn(8), dtype='float32')})
-
-        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
-                                                                  'three']],
-                           labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
-                                   [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
-                           names=['first', 'second'])
-        self.mframe = DataFrame(np.random.randn(10, 3), index=index,
-                                columns=['A', 'B', 'C'])
-
-        self.three_group = DataFrame(
-            {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
-                   'foo', 'foo', 'foo'],
-             'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two',
-                   'two', 'two', 'one'],
-             'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
-                   'dull', 'shiny', 'shiny', 'shiny'],
-             'D': np.random.randn(11),
-             'E': np.random.randn(11),
-             'F': np.random.randn(11)})
-
-    def test_filter_series(self):
-        s = pd.Series([1, 3, 20, 5, 22, 24, 7])
-        expected_odd = pd.Series([1, 3, 5, 7], index=[0, 1, 3, 6])
-        expected_even = pd.Series([20, 22, 24], index=[2, 4, 5])
-        grouper = s.apply(lambda x: x % 2)
-        grouped = s.groupby(grouper)
-        assert_series_equal(
-            grouped.filter(lambda x: x.mean() < 10), expected_odd)
-        assert_series_equal(
-            grouped.filter(lambda x: x.mean() > 10), expected_even)
-        # Test dropna=False.
-        assert_series_equal(
-            grouped.filter(lambda x: x.mean() < 10, dropna=False),
-            expected_odd.reindex(s.index))
-        assert_series_equal(
-            grouped.filter(lambda x: x.mean() > 10, dropna=False),
-            expected_even.reindex(s.index))
-
-    def test_filter_single_column_df(self):
-        df = pd.DataFrame([1, 3, 20, 5, 22, 24, 7])
-        expected_odd = pd.DataFrame([1, 3, 5, 7], index=[0, 1, 3, 6])
-        expected_even = pd.DataFrame([20, 22, 24], index=[2, 4, 5])
-        grouper = df[0].apply(lambda x: x % 2)
-        grouped = df.groupby(grouper)
-        assert_frame_equal(
-            grouped.filter(lambda x: x.mean() < 10), expected_odd)
-        assert_frame_equal(
-            grouped.filter(lambda x: x.mean() > 10), expected_even)
-        # Test dropna=False.
-        assert_frame_equal(
-            grouped.filter(lambda x: x.mean() < 10, dropna=False),
-            expected_odd.reindex(df.index))
-        assert_frame_equal(
-            grouped.filter(lambda x: x.mean() > 10, dropna=False),
-            expected_even.reindex(df.index))
-
-    def test_filter_multi_column_df(self):
-        df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': [1, 1, 1, 1]})
-        grouper = df['A'].apply(lambda x: x % 2)
-        grouped = df.groupby(grouper)
-        expected = pd.DataFrame({'A': [12, 12], 'B': [1, 1]}, index=[1, 2])
-        assert_frame_equal(
-            grouped.filter(lambda x: x['A'].sum() - x['B'].sum() > 10),
-            expected)
-
-    def test_filter_mixed_df(self):
-        df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': 'a b c d'.split()})
-        grouper = df['A'].apply(lambda x: x % 2)
-        grouped = df.groupby(grouper)
-        expected = pd.DataFrame({'A': [12, 12], 'B': ['b', 'c']}, index=[1, 2])
-        assert_frame_equal(
-            grouped.filter(lambda x: x['A'].sum() > 10), expected)
-
-    def test_filter_out_all_groups(self):
-        s = pd.Series([1, 3, 20, 5, 22, 24, 7])
-        grouper = s.apply(lambda x: x % 2)
-        grouped = s.groupby(grouper)
-        assert_series_equal(grouped.filter(lambda x: x.mean() > 1000), s[[]])
-        df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': 'a b c d'.split()})
-        grouper = df['A'].apply(lambda x: x % 2)
-        grouped = df.groupby(grouper)
-        assert_frame_equal(
-            grouped.filter(lambda x: x['A'].sum() > 1000), df.loc[[]])
-
-    def test_filter_out_no_groups(self):
-        s = pd.Series([1, 3, 20, 5, 22, 24, 7])
-        grouper = s.apply(lambda x: x % 2)
-        grouped = s.groupby(grouper)
-        filtered = grouped.filter(lambda x: x.mean() > 0)
-        assert_series_equal(filtered, s)
-        df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': 'a b c d'.split()})
-        grouper = df['A'].apply(lambda x: x % 2)
-        grouped = df.groupby(grouper)
-        filtered = grouped.filter(lambda x: x['A'].mean() > 0)
-        assert_frame_equal(filtered, df)
-
-    def test_filter_out_all_groups_in_df(self):
-        # GH12768
-        df = pd.DataFrame({'a': [1, 1, 2], 'b': [1, 2, 0]})
-        res = df.groupby('a')
-        res = res.filter(lambda x: x['b'].sum() > 5, dropna=False)
-        expected = pd.DataFrame({'a': [nan] * 3, 'b': [nan] * 3})
-        assert_frame_equal(expected, res)
-
-        df = pd.DataFrame({'a': [1, 1, 2], 'b': [1, 2, 0]})
-        res = df.groupby('a')
-        res = res.filter(lambda x: x['b'].sum() > 5, dropna=True)
-        expected = pd.DataFrame({'a': [], 'b': []}, dtype="int64")
-        assert_frame_equal(expected, res)
-
-    def test_filter_condition_raises(self):
-        def raise_if_sum_is_zero(x):
-            if x.sum() == 0:
-                raise ValueError
-            else:
-                return x.sum() > 0
-
-        s = pd.Series([-1, 0, 1, 2])
-        grouper = s.apply(lambda x: x % 2)
-        grouped = s.groupby(grouper)
-        pytest.raises(TypeError,
-                      lambda: grouped.filter(raise_if_sum_is_zero))
-
-    def test_filter_with_axis_in_groupby(self):
-        # issue 11041
-        index = pd.MultiIndex.from_product([range(10), [0, 1]])
-        data = pd.DataFrame(
-            np.arange(100).reshape(-1, 20), columns=index, dtype='int64')
-        result = data.groupby(level=0,
-                              axis=1).filter(lambda x: x.iloc[0, 0] > 10)
-        expected = data.iloc[:, 12:20]
-        assert_frame_equal(result, expected)
-
-    def test_filter_bad_shapes(self):
-        df = DataFrame({'A': np.arange(8),
-                        'B': list('aabbbbcc'),
-                        'C': np.arange(8)})
-        s = df['B']
-        g_df = df.groupby('B')
-        g_s = s.groupby(s)
-
-        f = lambda x: x
-        pytest.raises(TypeError, lambda: g_df.filter(f))
-        pytest.raises(TypeError, lambda: g_s.filter(f))
-
-        f = lambda x: x == 1
-        pytest.raises(TypeError, lambda: g_df.filter(f))
-        pytest.raises(TypeError, lambda: g_s.filter(f))
-
-        f = lambda x: np.outer(x, x)
-        pytest.raises(TypeError, lambda: g_df.filter(f))
-        pytest.raises(TypeError, lambda: g_s.filter(f))
-
-    def test_filter_nan_is_false(self):
-        df = DataFrame({'A': np.arange(8),
-                        'B': list('aabbbbcc'),
-                        'C': np.arange(8)})
-        s = df['B']
-        g_df = df.groupby(df['B'])
-        g_s = s.groupby(s)
-
-        f = lambda x: np.nan
-        assert_frame_equal(g_df.filter(f), df.loc[[]])
-        assert_series_equal(g_s.filter(f), s[[]])
-
-    def test_filter_against_workaround(self):
-        np.random.seed(0)
-        # Series of ints
-        s = Series(np.random.randint(0, 100, 1000))
-        grouper = s.apply(lambda x: np.round(x, -1))
-        grouped = s.groupby(grouper)
-        f = lambda x: x.mean() > 10
-
-        old_way = s[grouped.transform(f).astype('bool')]
-        new_way = grouped.filter(f)
-        assert_series_equal(new_way.sort_values(), old_way.sort_values())
-
-        # Series of floats
-        s = 100 * Series(np.random.random(1000))
-        grouper = s.apply(lambda x: np.round(x, -1))
-        grouped = s.groupby(grouper)
-        f = lambda x: x.mean() > 10
-        old_way = s[grouped.transform(f).astype('bool')]
-        new_way = grouped.filter(f)
-        assert_series_equal(new_way.sort_values(), old_way.sort_values())
-
-        # Set up DataFrame of ints, floats, strings.
-        from string import ascii_lowercase
-        letters = np.array(list(ascii_lowercase))
-        N = 1000
-        random_letters = letters.take(np.random.randint(0, 26, N))
-        df = DataFrame({'ints': Series(np.random.randint(0, 100, N)),
-                        'floats': N / 10 * Series(np.random.random(N)),
-                        'letters': Series(random_letters)})
-
-        # Group by ints; filter on floats.
-        grouped = df.groupby('ints')
-        old_way = df[grouped.floats.
-                     transform(lambda x: x.mean() > N / 20).astype('bool')]
-        new_way = grouped.filter(lambda x: x['floats'].mean() > N / 20)
-        assert_frame_equal(new_way, old_way)
-
-        # Group by floats (rounded); filter on strings.
-        grouper = df.floats.apply(lambda x: np.round(x, -1))
-        grouped = df.groupby(grouper)
-        old_way = df[grouped.letters.
-                     transform(lambda x: len(x) < N / 10).astype('bool')]
-        new_way = grouped.filter(lambda x: len(x.letters) < N / 10)
-        assert_frame_equal(new_way, old_way)
-
-        # Group by strings; filter on ints.
-        grouped = df.groupby('letters')
-        old_way = df[grouped.ints.
-                     transform(lambda x: x.mean() > N / 20).astype('bool')]
-        new_way = grouped.filter(lambda x: x['ints'].mean() > N / 20)
-        assert_frame_equal(new_way, old_way)
-
-    def test_filter_using_len(self):
-        # BUG GH4447
-        df = DataFrame({'A': np.arange(8),
-                        'B': list('aabbbbcc'),
-                        'C': np.arange(8)})
-        grouped = df.groupby('B')
-        actual = grouped.filter(lambda x: len(x) > 2)
-        expected = DataFrame(
-            {'A': np.arange(2, 6),
-             'B': list('bbbb'),
-             'C': np.arange(2, 6)}, index=np.arange(2, 6))
-        assert_frame_equal(actual, expected)
-
-        actual = grouped.filter(lambda x: len(x) > 4)
-        expected = df.loc[[]]
-        assert_frame_equal(actual, expected)
-
-        # Series have always worked properly, but we'll test anyway.
-        s = df['B']
-        grouped = s.groupby(s)
-        actual = grouped.filter(lambda x: len(x) > 2)
-        expected = Series(4 * ['b'], index=np.arange(2, 6), name='B')
-        assert_series_equal(actual, expected)
-
-        actual = grouped.filter(lambda x: len(x) > 4)
-        expected = s[[]]
-        assert_series_equal(actual, expected)
-
-    def test_filter_maintains_ordering(self):
-        # Simple case: index is sequential. #4621
-        df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
-                        'tag': [23, 45, 62, 24, 45, 34, 25, 62]})
-        s = df['pid']
-        grouped = df.groupby('tag')
-        actual = grouped.filter(lambda x: len(x) > 1)
-        expected = df.iloc[[1, 2, 4, 7]]
-        assert_frame_equal(actual, expected)
-
-        grouped = s.groupby(df['tag'])
-        actual = grouped.filter(lambda x: len(x) > 1)
-        expected = s.iloc[[1, 2, 4, 7]]
-        assert_series_equal(actual, expected)
-
-        # Now index is sequentially decreasing.
-        df.index = np.arange(len(df) - 1, -1, -1)
-        s = df['pid']
-        grouped = df.groupby('tag')
-        actual = grouped.filter(lambda x: len(x) > 1)
-        expected = df.iloc[[1, 2, 4, 7]]
-        assert_frame_equal(actual, expected)
-
-        grouped = s.groupby(df['tag'])
-        actual = grouped.filter(lambda x: len(x) > 1)
-        expected = s.iloc[[1, 2, 4, 7]]
-        assert_series_equal(actual, expected)
-
-        # Index is shuffled.
-        SHUFFLED = [4, 6, 7, 2, 1, 0, 5, 3]
-        df.index = df.index[SHUFFLED]
-        s = df['pid']
-        grouped = df.groupby('tag')
-        actual = grouped.filter(lambda x: len(x) > 1)
-        expected = df.iloc[[1, 2, 4, 7]]
-        assert_frame_equal(actual, expected)
-
-        grouped = s.groupby(df['tag'])
-        actual = grouped.filter(lambda x: len(x) > 1)
-        expected = s.iloc[[1, 2, 4, 7]]
-        assert_series_equal(actual, expected)
-
-    def test_filter_multiple_timestamp(self):
-        # GH 10114
-        df = DataFrame({'A': np.arange(5, dtype='int64'),
-                        'B': ['foo', 'bar', 'foo', 'bar', 'bar'],
-                        'C': Timestamp('20130101')})
-
-        grouped = df.groupby(['B', 'C'])
-
-        result = grouped['A'].filter(lambda x: True)
-        assert_series_equal(df['A'], result)
-
-        result = grouped['A'].transform(len)
-        expected = Series([2, 3, 2, 3, 3], name='A')
-        assert_series_equal(result, expected)
-
-        result = grouped.filter(lambda x: True)
-        assert_frame_equal(df, result)
-
-        result = grouped.transform('sum')
-        expected = DataFrame({'A': [2, 8, 2, 8, 8]})
-        assert_frame_equal(result, expected)
-
-        result = grouped.transform(len)
-        expected = DataFrame({'A': [2, 3, 2, 3, 3]})
-        assert_frame_equal(result, expected)
-
-    def test_filter_and_transform_with_non_unique_int_index(self):
-        # GH4620
-        index = [1, 1, 1, 2, 1, 1, 0, 1]
-        df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
-                        'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
-        grouped_df = df.groupby('tag')
-        ser = df['pid']
-        grouped_ser = ser.groupby(df['tag'])
-        expected_indexes = [1, 2, 4, 7]
-
-        # Filter DataFrame
-        actual = grouped_df.filter(lambda x: len(x) > 1)
-        expected = df.iloc[expected_indexes]
-        assert_frame_equal(actual, expected)
-
-        actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
-        expected = df.copy()
-        expected.iloc[[0, 3, 5, 6]] = np.nan
-        assert_frame_equal(actual, expected)
-
-        # Filter Series
-        actual = grouped_ser.filter(lambda x: len(x) > 1)
-        expected = ser.take(expected_indexes)
-        assert_series_equal(actual, expected)
-
-        actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
-        NA = np.nan
-        expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
-        # ^ made manually because this can get confusing!
-        assert_series_equal(actual, expected)
-
-        # Transform Series
-        actual = grouped_ser.transform(len)
-        expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
-        assert_series_equal(actual, expected)
-
-        # Transform (a column from) DataFrameGroupBy
-        actual = grouped_df.pid.transform(len)
-        assert_series_equal(actual, expected)
-
-    def test_filter_and_transform_with_multiple_non_unique_int_index(self):
-        # GH4620
-        index = [1, 1, 1, 2, 0, 0, 0, 1]
-        df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
-                        'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
-        grouped_df = df.groupby('tag')
-        ser = df['pid']
-        grouped_ser = ser.groupby(df['tag'])
-        expected_indexes = [1, 2, 4, 7]
-
-        # Filter DataFrame
-        actual = grouped_df.filter(lambda x: len(x) > 1)
-        expected = df.iloc[expected_indexes]
-        assert_frame_equal(actual, expected)
-
-        actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
-        expected = df.copy()
-        expected.iloc[[0, 3, 5, 6]] = np.nan
-        assert_frame_equal(actual, expected)
-
-        # Filter Series
-        actual = grouped_ser.filter(lambda x: len(x) > 1)
-        expected = ser.take(expected_indexes)
-        assert_series_equal(actual, expected)
-
-        actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
-        NA = np.nan
-        expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
-        # ^ made manually because this can get confusing!
-        assert_series_equal(actual, expected)
-
-        # Transform Series
-        actual = grouped_ser.transform(len)
-        expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
-        assert_series_equal(actual, expected)
-
-        # Transform (a column from) DataFrameGroupBy
-        actual = grouped_df.pid.transform(len)
-        assert_series_equal(actual, expected)
-
-    def test_filter_and_transform_with_non_unique_float_index(self):
-        # GH4620
-        index = np.array([1, 1, 1, 2, 1, 1, 0, 1], dtype=float)
-        df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
-                        'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
-        grouped_df = df.groupby('tag')
-        ser = df['pid']
-        grouped_ser = ser.groupby(df['tag'])
-        expected_indexes = [1, 2, 4, 7]
-
-        # Filter DataFrame
-        actual = grouped_df.filter(lambda x: len(x) > 1)
-        expected = df.iloc[expected_indexes]
-        assert_frame_equal(actual, expected)
-
-        actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
-        expected = df.copy()
-        expected.iloc[[0, 3, 5, 6]] = np.nan
-        assert_frame_equal(actual, expected)
-
-        # Filter Series
-        actual = grouped_ser.filter(lambda x: len(x) > 1)
-        expected = ser.take(expected_indexes)
-        assert_series_equal(actual, expected)
-
-        actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
-        NA = np.nan
-        expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
-        # ^ made manually because this can get confusing!
-        assert_series_equal(actual, expected)
-
-        # Transform Series
-        actual = grouped_ser.transform(len)
-        expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
-        assert_series_equal(actual, expected)
-
-        # Transform (a column from) DataFrameGroupBy
-        actual = grouped_df.pid.transform(len)
-        assert_series_equal(actual, expected)
-
-    def test_filter_and_transform_with_non_unique_timestamp_index(self):
-        # GH4620
-        t0 = Timestamp('2013-09-30 00:05:00')
-        t1 = Timestamp('2013-10-30 00:05:00')
-        t2 = Timestamp('2013-11-30 00:05:00')
-        index = [t1, t1, t1, t2, t1, t1, t0, t1]
-        df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
-                        'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
-        grouped_df = df.groupby('tag')
-        ser = df['pid']
-        grouped_ser = ser.groupby(df['tag'])
-        expected_indexes = [1, 2, 4, 7]
-
-        # Filter DataFrame
-        actual = grouped_df.filter(lambda x: len(x) > 1)
-        expected = df.iloc[expected_indexes]
-        assert_frame_equal(actual, expected)
-
-        actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
-        expected = df.copy()
-        expected.iloc[[0, 3, 5, 6]] = np.nan
-        assert_frame_equal(actual, expected)
-
-        # Filter Series
-        actual = grouped_ser.filter(lambda x: len(x) > 1)
-        expected = ser.take(expected_indexes)
-        assert_series_equal(actual, expected)
-
-        actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
-        NA = np.nan
-        expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
-        # ^ made manually because this can get confusing!
-        assert_series_equal(actual, expected)
-
-        # Transform Series
-        actual = grouped_ser.transform(len)
-        expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
-        assert_series_equal(actual, expected)
-
-        # Transform (a column from) DataFrameGroupBy
-        actual = grouped_df.pid.transform(len)
-        assert_series_equal(actual, expected)
-
-    def test_filter_and_transform_with_non_unique_string_index(self):
-        # GH4620
-        index = list('bbbcbbab')
-        df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
-                        'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
-        grouped_df = df.groupby('tag')
-        ser = df['pid']
-        grouped_ser = ser.groupby(df['tag'])
-        expected_indexes = [1, 2, 4, 7]
-
-        # Filter DataFrame
-        actual = grouped_df.filter(lambda x: len(x) > 1)
-        expected = df.iloc[expected_indexes]
-        assert_frame_equal(actual, expected)
-
-        actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
-        expected = df.copy()
-        expected.iloc[[0, 3, 5, 6]] = np.nan
-        assert_frame_equal(actual, expected)
-
-        # Filter Series
-        actual = grouped_ser.filter(lambda x: len(x) > 1)
-        expected = ser.take(expected_indexes)
-        assert_series_equal(actual, expected)
-
-        actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
-        NA = np.nan
-        expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
-        # ^ made manually because this can get confusing!
-        assert_series_equal(actual, expected)
-
-        # Transform Series
-        actual = grouped_ser.transform(len)
-        expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
-        assert_series_equal(actual, expected)
-
-        # Transform (a column from) DataFrameGroupBy
-        actual = grouped_df.pid.transform(len)
-        assert_series_equal(actual, expected)
-
-    def test_filter_has_access_to_grouped_cols(self):
-        df = DataFrame([[1, 2], [1, 3], [5, 6]], columns=['A', 'B'])
-        g = df.groupby('A')
-        # previously didn't have access to col A #????
-        filt = g.filter(lambda x: x['A'].sum() == 2)
-        assert_frame_equal(filt, df.iloc[[0, 1]])
-
-    def test_filter_enforces_scalarness(self):
-        df = pd.DataFrame([
-            ['best', 'a', 'x'],
-            ['worst', 'b', 'y'],
-            ['best', 'c', 'x'],
-            ['best', 'd', 'y'],
-            ['worst', 'd', 'y'],
-            ['worst', 'd', 'y'],
-            ['best', 'd', 'z'],
-        ], columns=['a', 'b', 'c'])
-        with tm.assert_raises_regex(TypeError,
-                                    'filter function returned a.*'):
-            df.groupby('c').filter(lambda g: g['a'] == 'best')
-
-    def test_filter_non_bool_raises(self):
-        df = pd.DataFrame([
-            ['best', 'a', 1],
-            ['worst', 'b', 1],
-            ['best', 'c', 1],
-            ['best', 'd', 1],
-            ['worst', 'd', 1],
-            ['worst', 'd', 1],
-            ['best', 'd', 1],
-        ], columns=['a', 'b', 'c'])
-        with tm.assert_raises_regex(TypeError,
-                                    'filter function returned a.*'):
-            df.groupby('a').filter(lambda g: g.c.mean())
-
-    def test_filter_dropna_with_empty_groups(self):
-        # GH 10780
-        data = pd.Series(np.random.rand(9), index=np.repeat([1, 2, 3], 3))
-        groupped = data.groupby(level=0)
-        result_false = groupped.filter(lambda x: x.mean() > 1, dropna=False)
-        expected_false = pd.Series([np.nan] * 9,
-                                   index=np.repeat([1, 2, 3], 3))
-        tm.assert_series_equal(result_false, expected_false)
-
-        result_true = groupped.filter(lambda x: x.mean() > 1, dropna=True)
-        expected_true = pd.Series(index=pd.Index([], dtype=int))
-        tm.assert_series_equal(result_true, expected_true)
-
-
-def assert_fp_equal(a, b):
-    assert (np.abs(a - b) < 1e-12).all()
-
-
-def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
-    tups = lmap(tuple, df[keys].values)
-    tups = com._asarray_tuplesafe(tups)
-    expected = f(df.groupby(tups)[field])
-    for k, v in compat.iteritems(expected):
-        assert (result[k] == v)
+def test_filter_series():
+    s = pd.Series([1, 3, 20, 5, 22, 24, 7])
+    expected_odd = pd.Series([1, 3, 5, 7], index=[0, 1, 3, 6])
+    expected_even = pd.Series([20, 22, 24], index=[2, 4, 5])
+    grouper = s.apply(lambda x: x % 2)
+    grouped = s.groupby(grouper)
+    tm.assert_series_equal(
+        grouped.filter(lambda x: x.mean() < 10), expected_odd)
+    tm.assert_series_equal(
+        grouped.filter(lambda x: x.mean() > 10), expected_even)
+    # Test dropna=False.
+    tm.assert_series_equal(
+        grouped.filter(lambda x: x.mean() < 10, dropna=False),
+        expected_odd.reindex(s.index))
+    tm.assert_series_equal(
+        grouped.filter(lambda x: x.mean() > 10, dropna=False),
+        expected_even.reindex(s.index))
+
+
+def test_filter_single_column_df():
+    df = pd.DataFrame([1, 3, 20, 5, 22, 24, 7])
+    expected_odd = pd.DataFrame([1, 3, 5, 7], index=[0, 1, 3, 6])
+    expected_even = pd.DataFrame([20, 22, 24], index=[2, 4, 5])
+    grouper = df[0].apply(lambda x: x % 2)
+    grouped = df.groupby(grouper)
+    tm.assert_frame_equal(
+        grouped.filter(lambda x: x.mean() < 10), expected_odd)
+    tm.assert_frame_equal(
+        grouped.filter(lambda x: x.mean() > 10), expected_even)
+    # Test dropna=False.
+    tm.assert_frame_equal(
+        grouped.filter(lambda x: x.mean() < 10, dropna=False),
+        expected_odd.reindex(df.index))
+    tm.assert_frame_equal(
+        grouped.filter(lambda x: x.mean() > 10, dropna=False),
+        expected_even.reindex(df.index))
+
+
+def test_filter_multi_column_df():
+    df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': [1, 1, 1, 1]})
+    grouper = df['A'].apply(lambda x: x % 2)
+    grouped = df.groupby(grouper)
+    expected = pd.DataFrame({'A': [12, 12], 'B': [1, 1]}, index=[1, 2])
+    tm.assert_frame_equal(
+        grouped.filter(lambda x: x['A'].sum() - x['B'].sum() > 10),
+        expected)
+
+
+def test_filter_mixed_df():
+    df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': 'a b c d'.split()})
+    grouper = df['A'].apply(lambda x: x % 2)
+    grouped = df.groupby(grouper)
+    expected = pd.DataFrame({'A': [12, 12], 'B': ['b', 'c']}, index=[1, 2])
+    tm.assert_frame_equal(
+        grouped.filter(lambda x: x['A'].sum() > 10), expected)
+
+
+def test_filter_out_all_groups():
+    s = pd.Series([1, 3, 20, 5, 22, 24, 7])
+    grouper = s.apply(lambda x: x % 2)
+    grouped = s.groupby(grouper)
+    tm.assert_series_equal(grouped.filter(lambda x: x.mean() > 1000), s[[]])
+    df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': 'a b c d'.split()})
+    grouper = df['A'].apply(lambda x: x % 2)
+    grouped = df.groupby(grouper)
+    tm.assert_frame_equal(
+        grouped.filter(lambda x: x['A'].sum() > 1000), df.loc[[]])
+
+
+def test_filter_out_no_groups():
+    s = pd.Series([1, 3, 20, 5, 22, 24, 7])
+    grouper = s.apply(lambda x: x % 2)
+    grouped = s.groupby(grouper)
+    filtered = grouped.filter(lambda x: x.mean() > 0)
+    tm.assert_series_equal(filtered, s)
+    df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': 'a b c d'.split()})
+    grouper = df['A'].apply(lambda x: x % 2)
+    grouped = df.groupby(grouper)
+    filtered = grouped.filter(lambda x: x['A'].mean() > 0)
+    tm.assert_frame_equal(filtered, df)
+
+
+def test_filter_out_all_groups_in_df():
+    # GH12768
+    df = pd.DataFrame({'a': [1, 1, 2], 'b': [1, 2, 0]})
+    res = df.groupby('a')
+    res = res.filter(lambda x: x['b'].sum() > 5, dropna=False)
+    expected = pd.DataFrame({'a': [np.nan] * 3, 'b': [np.nan] * 3})
+    tm.assert_frame_equal(expected, res)
+
+    df = pd.DataFrame({'a': [1, 1, 2], 'b': [1, 2, 0]})
+    res = df.groupby('a')
+    res = res.filter(lambda x: x['b'].sum() > 5, dropna=True)
+    expected = pd.DataFrame({'a': [], 'b': []}, dtype="int64")
+    tm.assert_frame_equal(expected, res)
+
+
+def test_filter_condition_raises():
+    def raise_if_sum_is_zero(x):
+        if x.sum() == 0:
+            raise ValueError
+        else:
+            return x.sum() > 0
+
+    s = pd.Series([-1, 0, 1, 2])
+    grouper = s.apply(lambda x: x % 2)
+    grouped = s.groupby(grouper)
+    pytest.raises(TypeError,
+                  lambda: grouped.filter(raise_if_sum_is_zero))
+
+
+def test_filter_with_axis_in_groupby():
+    # issue 11041
+    index = pd.MultiIndex.from_product([range(10), [0, 1]])
+    data = pd.DataFrame(
+        np.arange(100).reshape(-1, 20), columns=index, dtype='int64')
+    result = data.groupby(level=0,
+                          axis=1).filter(lambda x: x.iloc[0, 0] > 10)
+    expected = data.iloc[:, 12:20]
+    tm.assert_frame_equal(result, expected)
+
+
+def test_filter_bad_shapes():
+    df = DataFrame({'A': np.arange(8),
+                    'B': list('aabbbbcc'),
+                    'C': np.arange(8)})
+    s = df['B']
+    g_df = df.groupby('B')
+    g_s = s.groupby(s)
+
+    f = lambda x: x
+    pytest.raises(TypeError, lambda: g_df.filter(f))
+    pytest.raises(TypeError, lambda: g_s.filter(f))
+
+    f = lambda x: x == 1
+    pytest.raises(TypeError, lambda: g_df.filter(f))
+    pytest.raises(TypeError, lambda: g_s.filter(f))
+
+    f = lambda x: np.outer(x, x)
+    pytest.raises(TypeError, lambda: g_df.filter(f))
+    pytest.raises(TypeError, lambda: g_s.filter(f))
+
+
+def test_filter_nan_is_false():
+    df = DataFrame({'A': np.arange(8),
+                    'B': list('aabbbbcc'),
+                    'C': np.arange(8)})
+    s = df['B']
+    g_df = df.groupby(df['B'])
+    g_s = s.groupby(s)
+
+    f = lambda x: np.nan
+    tm.assert_frame_equal(g_df.filter(f), df.loc[[]])
+    tm.assert_series_equal(g_s.filter(f), s[[]])
+
+
+def test_filter_against_workaround():
+    np.random.seed(0)
+    # Series of ints
+    s = Series(np.random.randint(0, 100, 1000))
+    grouper = s.apply(lambda x: np.round(x, -1))
+    grouped = s.groupby(grouper)
+    f = lambda x: x.mean() > 10
+
+    old_way = s[grouped.transform(f).astype('bool')]
+    new_way = grouped.filter(f)
+    tm.assert_series_equal(new_way.sort_values(), old_way.sort_values())
+
+    # Series of floats
+    s = 100 * Series(np.random.random(1000))
+    grouper = s.apply(lambda x: np.round(x, -1))
+    grouped = s.groupby(grouper)
+    f = lambda x: x.mean() > 10
+    old_way = s[grouped.transform(f).astype('bool')]
+    new_way = grouped.filter(f)
+    tm.assert_series_equal(new_way.sort_values(), old_way.sort_values())
+
+    # Set up DataFrame of ints, floats, strings.
+    from string import ascii_lowercase
+    letters = np.array(list(ascii_lowercase))
+    N = 1000
+    random_letters = letters.take(np.random.randint(0, 26, N))
+    df = DataFrame({'ints': Series(np.random.randint(0, 100, N)),
+                    'floats': N / 10 * Series(np.random.random(N)),
+                    'letters': Series(random_letters)})
+
+    # Group by ints; filter on floats.
+    grouped = df.groupby('ints')
+    old_way = df[grouped.floats.
+                 transform(lambda x: x.mean() > N / 20).astype('bool')]
+    new_way = grouped.filter(lambda x: x['floats'].mean() > N / 20)
+    tm.assert_frame_equal(new_way, old_way)
+
+    # Group by floats (rounded); filter on strings.
+    grouper = df.floats.apply(lambda x: np.round(x, -1))
+    grouped = df.groupby(grouper)
+    old_way = df[grouped.letters.
+                 transform(lambda x: len(x) < N / 10).astype('bool')]
+    new_way = grouped.filter(lambda x: len(x.letters) < N / 10)
+    tm.assert_frame_equal(new_way, old_way)
+
+    # Group by strings; filter on ints.
+    grouped = df.groupby('letters')
+    old_way = df[grouped.ints.
+                 transform(lambda x: x.mean() > N / 20).astype('bool')]
+    new_way = grouped.filter(lambda x: x['ints'].mean() > N / 20)
+    tm.assert_frame_equal(new_way, old_way)
+
+
+def test_filter_using_len():
+    # BUG GH4447
+    df = DataFrame({'A': np.arange(8),
+                    'B': list('aabbbbcc'),
+                    'C': np.arange(8)})
+    grouped = df.groupby('B')
+    actual = grouped.filter(lambda x: len(x) > 2)
+    expected = DataFrame(
+        {'A': np.arange(2, 6),
+         'B': list('bbbb'),
+         'C': np.arange(2, 6)}, index=np.arange(2, 6))
+    tm.assert_frame_equal(actual, expected)
+
+    actual = grouped.filter(lambda x: len(x) > 4)
+    expected = df.loc[[]]
+    tm.assert_frame_equal(actual, expected)
+
+    # Series have always worked properly, but we'll test anyway.
+    s = df['B']
+    grouped = s.groupby(s)
+    actual = grouped.filter(lambda x: len(x) > 2)
+    expected = Series(4 * ['b'], index=np.arange(2, 6), name='B')
+    tm.assert_series_equal(actual, expected)
+
+    actual = grouped.filter(lambda x: len(x) > 4)
+    expected = s[[]]
+    tm.assert_series_equal(actual, expected)
+
+
+def test_filter_maintains_ordering():
+    # Simple case: index is sequential. #4621
+    df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
+                    'tag': [23, 45, 62, 24, 45, 34, 25, 62]})
+    s = df['pid']
+    grouped = df.groupby('tag')
+    actual = grouped.filter(lambda x: len(x) > 1)
+    expected = df.iloc[[1, 2, 4, 7]]
+    tm.assert_frame_equal(actual, expected)
+
+    grouped = s.groupby(df['tag'])
+    actual = grouped.filter(lambda x: len(x) > 1)
+    expected = s.iloc[[1, 2, 4, 7]]
+    tm.assert_series_equal(actual, expected)
+
+    # Now index is sequentially decreasing.
+    df.index = np.arange(len(df) - 1, -1, -1)
+    s = df['pid']
+    grouped = df.groupby('tag')
+    actual = grouped.filter(lambda x: len(x) > 1)
+    expected = df.iloc[[1, 2, 4, 7]]
+    tm.assert_frame_equal(actual, expected)
+
+    grouped = s.groupby(df['tag'])
+    actual = grouped.filter(lambda x: len(x) > 1)
+    expected = s.iloc[[1, 2, 4, 7]]
+    tm.assert_series_equal(actual, expected)
+
+    # Index is shuffled.
+    SHUFFLED = [4, 6, 7, 2, 1, 0, 5, 3]
+    df.index = df.index[SHUFFLED]
+    s = df['pid']
+    grouped = df.groupby('tag')
+    actual = grouped.filter(lambda x: len(x) > 1)
+    expected = df.iloc[[1, 2, 4, 7]]
+    tm.assert_frame_equal(actual, expected)
+
+    grouped = s.groupby(df['tag'])
+    actual = grouped.filter(lambda x: len(x) > 1)
+    expected = s.iloc[[1, 2, 4, 7]]
+    tm.assert_series_equal(actual, expected)
+
+
+def test_filter_multiple_timestamp():
+    # GH 10114
+    df = DataFrame({'A': np.arange(5, dtype='int64'),
+                    'B': ['foo', 'bar', 'foo', 'bar', 'bar'],
+                    'C': Timestamp('20130101')})
+
+    grouped = df.groupby(['B', 'C'])
+
+    result = grouped['A'].filter(lambda x: True)
+    tm.assert_series_equal(df['A'], result)
+
+    result = grouped['A'].transform(len)
+    expected = Series([2, 3, 2, 3, 3], name='A')
+    tm.assert_series_equal(result, expected)
+
+    result = grouped.filter(lambda x: True)
+    tm.assert_frame_equal(df, result)
+
+    result = grouped.transform('sum')
+    expected = DataFrame({'A': [2, 8, 2, 8, 8]})
+    tm.assert_frame_equal(result, expected)
+
+    result = grouped.transform(len)
+    expected = DataFrame({'A': [2, 3, 2, 3, 3]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_filter_and_transform_with_non_unique_int_index():
+    # GH4620
+    index = [1, 1, 1, 2, 1, 1, 0, 1]
+    df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
+                    'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
+    grouped_df = df.groupby('tag')
+    ser = df['pid']
+    grouped_ser = ser.groupby(df['tag'])
+    expected_indexes = [1, 2, 4, 7]
+
+    # Filter DataFrame
+    actual = grouped_df.filter(lambda x: len(x) > 1)
+    expected = df.iloc[expected_indexes]
+    tm.assert_frame_equal(actual, expected)
+
+    actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
+    expected = df.copy()
+    expected.iloc[[0, 3, 5, 6]] = np.nan
+    tm.assert_frame_equal(actual, expected)
+
+    # Filter Series
+    actual = grouped_ser.filter(lambda x: len(x) > 1)
+    expected = ser.take(expected_indexes)
+    tm.assert_series_equal(actual, expected)
+
+    actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
+    NA = np.nan
+    expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
+    # ^ made manually because this can get confusing!
+    tm.assert_series_equal(actual, expected)
+
+    # Transform Series
+    actual = grouped_ser.transform(len)
+    expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
+    tm.assert_series_equal(actual, expected)
+
+    # Transform (a column from) DataFrameGroupBy
+    actual = grouped_df.pid.transform(len)
+    tm.assert_series_equal(actual, expected)
+
+
+def test_filter_and_transform_with_multiple_non_unique_int_index():
+    # GH4620
+    index = [1, 1, 1, 2, 0, 0, 0, 1]
+    df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
+                    'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
+    grouped_df = df.groupby('tag')
+    ser = df['pid']
+    grouped_ser = ser.groupby(df['tag'])
+    expected_indexes = [1, 2, 4, 7]
+
+    # Filter DataFrame
+    actual = grouped_df.filter(lambda x: len(x) > 1)
+    expected = df.iloc[expected_indexes]
+    tm.assert_frame_equal(actual, expected)
+
+    actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
+    expected = df.copy()
+    expected.iloc[[0, 3, 5, 6]] = np.nan
+    tm.assert_frame_equal(actual, expected)
+
+    # Filter Series
+    actual = grouped_ser.filter(lambda x: len(x) > 1)
+    expected = ser.take(expected_indexes)
+    tm.assert_series_equal(actual, expected)
+
+    actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
+    NA = np.nan
+    expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
+    # ^ made manually because this can get confusing!
+    tm.assert_series_equal(actual, expected)
+
+    # Transform Series
+    actual = grouped_ser.transform(len)
+    expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
+    tm.assert_series_equal(actual, expected)
+
+    # Transform (a column from) DataFrameGroupBy
+    actual = grouped_df.pid.transform(len)
+    tm.assert_series_equal(actual, expected)
+
+
+def test_filter_and_transform_with_non_unique_float_index():
+    # GH4620
+    index = np.array([1, 1, 1, 2, 1, 1, 0, 1], dtype=float)
+    df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
+                    'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
+    grouped_df = df.groupby('tag')
+    ser = df['pid']
+    grouped_ser = ser.groupby(df['tag'])
+    expected_indexes = [1, 2, 4, 7]
+
+    # Filter DataFrame
+    actual = grouped_df.filter(lambda x: len(x) > 1)
+    expected = df.iloc[expected_indexes]
+    tm.assert_frame_equal(actual, expected)
+
+    actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
+    expected = df.copy()
+    expected.iloc[[0, 3, 5, 6]] = np.nan
+    tm.assert_frame_equal(actual, expected)
+
+    # Filter Series
+    actual = grouped_ser.filter(lambda x: len(x) > 1)
+    expected = ser.take(expected_indexes)
+    tm.assert_series_equal(actual, expected)
+
+    actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
+    NA = np.nan
+    expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
+    # ^ made manually because this can get confusing!
+    tm.assert_series_equal(actual, expected)
+
+    # Transform Series
+    actual = grouped_ser.transform(len)
+    expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
+    tm.assert_series_equal(actual, expected)
+
+    # Transform (a column from) DataFrameGroupBy
+    actual = grouped_df.pid.transform(len)
+    tm.assert_series_equal(actual, expected)
+
+
+def test_filter_and_transform_with_non_unique_timestamp_index():
+    # GH4620
+    t0 = Timestamp('2013-09-30 00:05:00')
+    t1 = Timestamp('2013-10-30 00:05:00')
+    t2 = Timestamp('2013-11-30 00:05:00')
+    index = [t1, t1, t1, t2, t1, t1, t0, t1]
+    df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
+                    'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
+    grouped_df = df.groupby('tag')
+    ser = df['pid']
+    grouped_ser = ser.groupby(df['tag'])
+    expected_indexes = [1, 2, 4, 7]
+
+    # Filter DataFrame
+    actual = grouped_df.filter(lambda x: len(x) > 1)
+    expected = df.iloc[expected_indexes]
+    tm.assert_frame_equal(actual, expected)
+
+    actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
+    expected = df.copy()
+    expected.iloc[[0, 3, 5, 6]] = np.nan
+    tm.assert_frame_equal(actual, expected)
+
+    # Filter Series
+    actual = grouped_ser.filter(lambda x: len(x) > 1)
+    expected = ser.take(expected_indexes)
+    tm.assert_series_equal(actual, expected)
+
+    actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
+    NA = np.nan
+    expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
+    # ^ made manually because this can get confusing!
+    tm.assert_series_equal(actual, expected)
+
+    # Transform Series
+    actual = grouped_ser.transform(len)
+    expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
+    tm.assert_series_equal(actual, expected)
+
+    # Transform (a column from) DataFrameGroupBy
+    actual = grouped_df.pid.transform(len)
+    tm.assert_series_equal(actual, expected)
+
+
+def test_filter_and_transform_with_non_unique_string_index():
+    # GH4620
+    index = list('bbbcbbab')
+    df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
+                    'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
+    grouped_df = df.groupby('tag')
+    ser = df['pid']
+    grouped_ser = ser.groupby(df['tag'])
+    expected_indexes = [1, 2, 4, 7]
+
+    # Filter DataFrame
+    actual = grouped_df.filter(lambda x: len(x) > 1)
+    expected = df.iloc[expected_indexes]
+    tm.assert_frame_equal(actual, expected)
+
+    actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
+    expected = df.copy()
+    expected.iloc[[0, 3, 5, 6]] = np.nan
+    tm.assert_frame_equal(actual, expected)
+
+    # Filter Series
+    actual = grouped_ser.filter(lambda x: len(x) > 1)
+    expected = ser.take(expected_indexes)
+    tm.assert_series_equal(actual, expected)
+
+    actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
+    NA = np.nan
+    expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
+    # ^ made manually because this can get confusing!
+    tm.assert_series_equal(actual, expected)
+
+    # Transform Series
+    actual = grouped_ser.transform(len)
+    expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
+    tm.assert_series_equal(actual, expected)
+
+    # Transform (a column from) DataFrameGroupBy
+    actual = grouped_df.pid.transform(len)
+    tm.assert_series_equal(actual, expected)
+
+
+def test_filter_has_access_to_grouped_cols():
+    df = DataFrame([[1, 2], [1, 3], [5, 6]], columns=['A', 'B'])
+    g = df.groupby('A')
+    # previously didn't have access to col A #????
+    filt = g.filter(lambda x: x['A'].sum() == 2)
+    tm.assert_frame_equal(filt, df.iloc[[0, 1]])
+
+
+def test_filter_enforces_scalarness():
+    df = pd.DataFrame([
+        ['best', 'a', 'x'],
+        ['worst', 'b', 'y'],
+        ['best', 'c', 'x'],
+        ['best', 'd', 'y'],
+        ['worst', 'd', 'y'],
+        ['worst', 'd', 'y'],
+        ['best', 'd', 'z'],
+    ], columns=['a', 'b', 'c'])
+    with tm.assert_raises_regex(TypeError,
+                                'filter function returned a.*'):
+        df.groupby('c').filter(lambda g: g['a'] == 'best')
+
+
+def test_filter_non_bool_raises():
+    df = pd.DataFrame([
+        ['best', 'a', 1],
+        ['worst', 'b', 1],
+        ['best', 'c', 1],
+        ['best', 'd', 1],
+        ['worst', 'd', 1],
+        ['worst', 'd', 1],
+        ['best', 'd', 1],
+    ], columns=['a', 'b', 'c'])
+    with tm.assert_raises_regex(TypeError,
+                                'filter function returned a.*'):
+        df.groupby('a').filter(lambda g: g.c.mean())
+
+
+def test_filter_dropna_with_empty_groups():
+    # GH 10780
+    data = pd.Series(np.random.rand(9), index=np.repeat([1, 2, 3], 3))
+    groupped = data.groupby(level=0)
+    result_false = groupped.filter(lambda x: x.mean() > 1, dropna=False)
+    expected_false = pd.Series([np.nan] * 9,
+                               index=np.repeat([1, 2, 3], 3))
+    tm.assert_series_equal(result_false, expected_false)
+
+    result_true = groupped.filter(lambda x: x.mean() > 1, dropna=True)
+    expected_true = pd.Series(index=pd.Index([], dtype=int))
+    tm.assert_series_equal(result_true, expected_true)
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
new file mode 100644
index 0000000000000..ba1371fe9f931
--- /dev/null
+++ b/pandas/tests/groupby/test_function.py
@@ -0,0 +1,1120 @@
+import pytest
+
+import numpy as np
+import pandas as pd
+from pandas import (DataFrame, Index, compat, isna,
+                    Series, MultiIndex, Timestamp, date_range)
+from pandas.errors import UnsupportedFunctionCall
+from pandas.util import testing as tm
+import pandas.core.nanops as nanops
+from string import ascii_lowercase
+from pandas.compat import product as cart_product
+
+
+@pytest.mark.parametrize("agg_func", ['any', 'all'])
+@pytest.mark.parametrize("skipna", [True, False])
+@pytest.mark.parametrize("vals", [
+    ['foo', 'bar', 'baz'], ['foo', '', ''], ['', '', ''],
+    [1, 2, 3], [1, 0, 0], [0, 0, 0],
+    [1., 2., 3.], [1., 0., 0.], [0., 0., 0.],
+    [True, True, True], [True, False, False], [False, False, False],
+    [np.nan, np.nan, np.nan]
+])
+def test_groupby_bool_aggs(agg_func, skipna, vals):
+    df = DataFrame({'key': ['a'] * 3 + ['b'] * 3, 'val': vals * 2})
+
+    # Figure out expectation using Python builtin
+    exp = getattr(compat.builtins, agg_func)(vals)
+
+    # edge case for missing data with skipna and 'any'
+    if skipna and all(isna(vals)) and agg_func == 'any':
+        exp = False
+
+    exp_df = DataFrame([exp] * 2, columns=['val'], index=Index(
+        ['a', 'b'], name='key'))
+    result = getattr(df.groupby('key'), agg_func)(skipna=skipna)
+    tm.assert_frame_equal(result, exp_df)
+
+
+def test_max_min_non_numeric():
+    # #2700
+    aa = DataFrame({'nn': [11, 11, 22, 22],
+                    'ii': [1, 2, 3, 4],
+                    'ss': 4 * ['mama']})
+
+    result = aa.groupby('nn').max()
+    assert 'ss' in result
+
+    result = aa.groupby('nn').max(numeric_only=False)
+    assert 'ss' in result
+
+    result = aa.groupby('nn').min()
+    assert 'ss' in result
+
+    result = aa.groupby('nn').min(numeric_only=False)
+    assert 'ss' in result
+
+
+def test_intercept_builtin_sum():
+    s = Series([1., 2., np.nan, 3.])
+    grouped = s.groupby([0, 1, 2, 2])
+
+    result = grouped.agg(compat.builtins.sum)
+    result2 = grouped.apply(compat.builtins.sum)
+    expected = grouped.sum()
+    tm.assert_series_equal(result, expected)
+    tm.assert_series_equal(result2, expected)
+
+
+def test_builtins_apply():  # GH8155
+    df = pd.DataFrame(np.random.randint(1, 50, (1000, 2)),
+                      columns=['jim', 'joe'])
+    df['jolie'] = np.random.randn(1000)
+
+    for keys in ['jim', ['jim', 'joe']]:  # single key & multi-key
+        if keys == 'jim':
+            continue
+        for f in [max, min, sum]:
+            fname = f.__name__
+            result = df.groupby(keys).apply(f)
+            result.shape
+            ngroups = len(df.drop_duplicates(subset=keys))
+            assert result.shape == (ngroups, 3), 'invalid frame shape: '\
+                '{} (expected ({}, 3))'.format(result.shape, ngroups)
+
+            tm.assert_frame_equal(result,  # numpy's equivalent function
+                                  df.groupby(keys).apply(getattr(np, fname)))
+
+            if f != sum:
+                expected = df.groupby(keys).agg(fname).reset_index()
+                expected.set_index(keys, inplace=True, drop=False)
+                tm.assert_frame_equal(result, expected, check_dtype=False)
+
+            tm.assert_series_equal(getattr(result, fname)(),
+                                   getattr(df, fname)())
+
+
+def test_arg_passthru():
+    # make sure that we are passing thru kwargs
+    # to our agg functions
+
+    # GH3668
+    # GH5724
+    df = pd.DataFrame(
+        {'group': [1, 1, 2],
+         'int': [1, 2, 3],
+         'float': [4., 5., 6.],
+         'string': list('abc'),
+         'category_string': pd.Series(list('abc')).astype('category'),
+         'category_int': [7, 8, 9],
+         'datetime': pd.date_range('20130101', periods=3),
+         'datetimetz': pd.date_range('20130101',
+                                     periods=3,
+                                     tz='US/Eastern'),
+         'timedelta': pd.timedelta_range('1 s', periods=3, freq='s')},
+        columns=['group', 'int', 'float', 'string',
+                 'category_string', 'category_int',
+                 'datetime', 'datetimetz',
+                 'timedelta'])
+
+    expected_columns_numeric = Index(['int', 'float', 'category_int'])
+
+    # mean / median
+    expected = pd.DataFrame(
+        {'category_int': [7.5, 9],
+         'float': [4.5, 6.],
+         'timedelta': [pd.Timedelta('1.5s'),
+                       pd.Timedelta('3s')],
+         'int': [1.5, 3],
+         'datetime': [pd.Timestamp('2013-01-01 12:00:00'),
+                      pd.Timestamp('2013-01-03 00:00:00')],
+         'datetimetz': [
+             pd.Timestamp('2013-01-01 12:00:00', tz='US/Eastern'),
+             pd.Timestamp('2013-01-03 00:00:00', tz='US/Eastern')]},
+        index=Index([1, 2], name='group'),
+        columns=['int', 'float', 'category_int',
+                 'datetime', 'datetimetz', 'timedelta'])
+    for attr in ['mean', 'median']:
+        f = getattr(df.groupby('group'), attr)
+        result = f()
+        tm.assert_index_equal(result.columns, expected_columns_numeric)
+
+        result = f(numeric_only=False)
+        tm.assert_frame_equal(result.reindex_like(expected), expected)
+
+    # TODO: min, max *should* handle
+    # categorical (ordered) dtype
+    expected_columns = Index(['int', 'float', 'string',
+                              'category_int',
+                              'datetime', 'datetimetz',
+                              'timedelta'])
+    for attr in ['min', 'max']:
+        f = getattr(df.groupby('group'), attr)
+        result = f()
+        tm.assert_index_equal(result.columns, expected_columns)
+
+        result = f(numeric_only=False)
+        tm.assert_index_equal(result.columns, expected_columns)
+
+    expected_columns = Index(['int', 'float', 'string',
+                              'category_string', 'category_int',
+                              'datetime', 'datetimetz',
+                              'timedelta'])
+    for attr in ['first', 'last']:
+        f = getattr(df.groupby('group'), attr)
+        result = f()
+        tm.assert_index_equal(result.columns, expected_columns)
+
+        result = f(numeric_only=False)
+        tm.assert_index_equal(result.columns, expected_columns)
+
+    expected_columns = Index(['int', 'float', 'string',
+                              'category_int', 'timedelta'])
+    for attr in ['sum']:
+        f = getattr(df.groupby('group'), attr)
+        result = f()
+        tm.assert_index_equal(result.columns, expected_columns_numeric)
+
+        result = f(numeric_only=False)
+        tm.assert_index_equal(result.columns, expected_columns)
+
+    expected_columns = Index(['int', 'float', 'category_int'])
+    for attr in ['prod', 'cumprod']:
+        f = getattr(df.groupby('group'), attr)
+        result = f()
+        tm.assert_index_equal(result.columns, expected_columns_numeric)
+
+        result = f(numeric_only=False)
+        tm.assert_index_equal(result.columns, expected_columns)
+
+    # like min, max, but don't include strings
+    expected_columns = Index(['int', 'float',
+                              'category_int',
+                              'datetime', 'datetimetz',
+                              'timedelta'])
+    for attr in ['cummin', 'cummax']:
+        f = getattr(df.groupby('group'), attr)
+        result = f()
+        # GH 15561: numeric_only=False set by default like min/max
+        tm.assert_index_equal(result.columns, expected_columns)
+
+        result = f(numeric_only=False)
+        tm.assert_index_equal(result.columns, expected_columns)
+
+    expected_columns = Index(['int', 'float', 'category_int',
+                              'timedelta'])
+    for attr in ['cumsum']:
+        f = getattr(df.groupby('group'), attr)
+        result = f()
+        tm.assert_index_equal(result.columns, expected_columns_numeric)
+
+        result = f(numeric_only=False)
+        tm.assert_index_equal(result.columns, expected_columns)
+
+
+def test_non_cython_api():
+
+    # GH5610
+    # non-cython calls should not include the grouper
+
+    df = DataFrame(
+        [[1, 2, 'foo'],
+         [1, np.nan, 'bar'],
+         [3, np.nan, 'baz']],
+        columns=['A', 'B', 'C'])
+    g = df.groupby('A')
+    gni = df.groupby('A', as_index=False)
+
+    # mad
+    expected = DataFrame([[0], [np.nan]], columns=['B'], index=[1, 3])
+    expected.index.name = 'A'
+    result = g.mad()
+    tm.assert_frame_equal(result, expected)
+
+    expected = DataFrame([[0., 0.], [0, np.nan]], columns=['A', 'B'],
+                         index=[0, 1])
+    result = gni.mad()
+    tm.assert_frame_equal(result, expected)
+
+    # describe
+    expected_index = pd.Index([1, 3], name='A')
+    expected_col = pd.MultiIndex(levels=[['B'],
+                                         ['count', 'mean', 'std', 'min',
+                                          '25%', '50%', '75%', 'max']],
+                                 labels=[[0] * 8, list(range(8))])
+    expected = pd.DataFrame([[1.0, 2.0, np.nan, 2.0, 2.0, 2.0, 2.0, 2.0],
+                             [0.0, np.nan, np.nan, np.nan, np.nan, np.nan,
+                              np.nan, np.nan]],
+                            index=expected_index,
+                            columns=expected_col)
+    result = g.describe()
+    tm.assert_frame_equal(result, expected)
+
+    expected = pd.concat([df[df.A == 1].describe().unstack().to_frame().T,
+                          df[df.A == 3].describe().unstack().to_frame().T])
+    expected.index = pd.Index([0, 1])
+    result = gni.describe()
+    tm.assert_frame_equal(result, expected)
+
+    # any
+    expected = DataFrame([[True, True], [False, True]], columns=['B', 'C'],
+                         index=[1, 3])
+    expected.index.name = 'A'
+    result = g.any()
+    tm.assert_frame_equal(result, expected)
+
+    # idxmax
+    expected = DataFrame([[0.0], [np.nan]], columns=['B'], index=[1, 3])
+    expected.index.name = 'A'
+    result = g.idxmax()
+    tm.assert_frame_equal(result, expected)
+
+
+def test_cython_api2():
+
+    # this takes the fast apply path
+
+    # cumsum (GH5614)
+    df = DataFrame(
+        [[1, 2, np.nan], [1, np.nan, 9], [3, 4, 9]
+         ], columns=['A', 'B', 'C'])
+    expected = DataFrame(
+        [[2, np.nan], [np.nan, 9], [4, 9]], columns=['B', 'C'])
+    result = df.groupby('A').cumsum()
+    tm.assert_frame_equal(result, expected)
+
+    # GH 5755 - cumsum is a transformer and should ignore as_index
+    result = df.groupby('A', as_index=False).cumsum()
+    tm.assert_frame_equal(result, expected)
+
+    # GH 13994
+    result = df.groupby('A').cumsum(axis=1)
+    expected = df.cumsum(axis=1)
+    tm.assert_frame_equal(result, expected)
+    result = df.groupby('A').cumprod(axis=1)
+    expected = df.cumprod(axis=1)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_cython_median():
+    df = DataFrame(np.random.randn(1000))
+    df.values[::2] = np.nan
+
+    labels = np.random.randint(0, 50, size=1000).astype(float)
+    labels[::17] = np.nan
+
+    result = df.groupby(labels).median()
+    exp = df.groupby(labels).agg(nanops.nanmedian)
+    tm.assert_frame_equal(result, exp)
+
+    df = DataFrame(np.random.randn(1000, 5))
+    rs = df.groupby(labels).agg(np.median)
+    xp = df.groupby(labels).median()
+    tm.assert_frame_equal(rs, xp)
+
+
+def test_median_empty_bins():
+    df = pd.DataFrame(np.random.randint(0, 44, 500))
+
+    grps = range(0, 55, 5)
+    bins = pd.cut(df[0], grps)
+
+    result = df.groupby(bins).median()
+    expected = df.groupby(bins).agg(lambda x: x.median())
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("dtype", [
+    'int8', 'int16', 'int32', 'int64', 'float32', 'float64'])
+@pytest.mark.parametrize("method,data", [
+    ('first', {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}),
+    ('last', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}),
+    ('min', {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}),
+    ('max', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}),
+    ('nth', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}],
+             'args': [1]}),
+    ('count', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 2}],
+               'out_type': 'int64'})
+])
+def test_groupby_non_arithmetic_agg_types(dtype, method, data):
+    # GH9311, GH6620
+    df = pd.DataFrame(
+        [{'a': 1, 'b': 1},
+         {'a': 1, 'b': 2},
+         {'a': 2, 'b': 3},
+         {'a': 2, 'b': 4}])
+
+    df['b'] = df.b.astype(dtype)
+
+    if 'args' not in data:
+        data['args'] = []
+
+    if 'out_type' in data:
+        out_type = data['out_type']
+    else:
+        out_type = dtype
+
+    exp = data['df']
+    df_out = pd.DataFrame(exp)
+
+    df_out['b'] = df_out.b.astype(out_type)
+    df_out.set_index('a', inplace=True)
+
+    grpd = df.groupby('a')
+    t = getattr(grpd, method)(*data['args'])
+    tm.assert_frame_equal(t, df_out)
+
+
+def test_groupby_non_arithmetic_agg_intlike_precision():
+    # GH9311, GH6620
+    c = 24650000000000000
+
+    inputs = ((Timestamp('2011-01-15 12:50:28.502376'),
+               Timestamp('2011-01-20 12:50:28.593448')), (1 + c, 2 + c))
+
+    for i in inputs:
+        df = pd.DataFrame([{'a': 1, 'b': i[0]}, {'a': 1, 'b': i[1]}])
+
+        grp_exp = {'first': {'expected': i[0]},
+                   'last': {'expected': i[1]},
+                   'min': {'expected': i[0]},
+                   'max': {'expected': i[1]},
+                   'nth': {'expected': i[1],
+                           'args': [1]},
+                   'count': {'expected': 2}}
+
+        for method, data in compat.iteritems(grp_exp):
+            if 'args' not in data:
+                data['args'] = []
+
+            grpd = df.groupby('a')
+            res = getattr(grpd, method)(*data['args'])
+            assert res.iloc[0].b == data['expected']
+
+
+def test_fill_constistency():
+
+    # GH9221
+    # pass thru keyword arguments to the generated wrapper
+    # are set if the passed kw is None (only)
+    df = DataFrame(index=pd.MultiIndex.from_product(
+        [['value1', 'value2'], date_range('2014-01-01', '2014-01-06')]),
+        columns=Index(
+        ['1', '2'], name='id'))
+    df['1'] = [np.nan, 1, np.nan, np.nan, 11, np.nan, np.nan, 2, np.nan,
+               np.nan, 22, np.nan]
+    df['2'] = [np.nan, 3, np.nan, np.nan, 33, np.nan, np.nan, 4, np.nan,
+               np.nan, 44, np.nan]
+
+    expected = df.groupby(level=0, axis=0).fillna(method='ffill')
+    result = df.T.groupby(level=0, axis=1).fillna(method='ffill').T
+    tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_cumprod():
+    # GH 4095
+    df = pd.DataFrame({'key': ['b'] * 10, 'value': 2})
+
+    actual = df.groupby('key')['value'].cumprod()
+    expected = df.groupby('key')['value'].apply(lambda x: x.cumprod())
+    expected.name = 'value'
+    tm.assert_series_equal(actual, expected)
+
+    df = pd.DataFrame({'key': ['b'] * 100, 'value': 2})
+    actual = df.groupby('key')['value'].cumprod()
+    # if overflows, groupby product casts to float
+    # while numpy passes back invalid values
+    df['value'] = df['value'].astype(float)
+    expected = df.groupby('key')['value'].apply(lambda x: x.cumprod())
+    expected.name = 'value'
+    tm.assert_series_equal(actual, expected)
+
+
+def test_ops_general():
+    ops = [('mean', np.mean),
+           ('median', np.median),
+           ('std', np.std),
+           ('var', np.var),
+           ('sum', np.sum),
+           ('prod', np.prod),
+           ('min', np.min),
+           ('max', np.max),
+           ('first', lambda x: x.iloc[0]),
+           ('last', lambda x: x.iloc[-1]),
+           ('count', np.size), ]
+    try:
+        from scipy.stats import sem
+    except ImportError:
+        pass
+    else:
+        ops.append(('sem', sem))
+    df = DataFrame(np.random.randn(1000))
+    labels = np.random.randint(0, 50, size=1000).astype(float)
+
+    for op, targop in ops:
+        result = getattr(df.groupby(labels), op)().astype(float)
+        expected = df.groupby(labels).agg(targop)
+        try:
+            tm.assert_frame_equal(result, expected)
+        except BaseException as exc:
+            exc.args += ('operation: %s' % op, )
+            raise
+
+
+def test_max_nan_bug():
+    raw = """,Date,app,File
+-04-23,2013-04-23 00:00:00,,log080001.log
+-05-06,2013-05-06 00:00:00,,log.log
+-05-07,2013-05-07 00:00:00,OE,xlsx"""
+
+    df = pd.read_csv(compat.StringIO(raw), parse_dates=[0])
+    gb = df.groupby('Date')
+    r = gb[['File']].max()
+    e = gb['File'].max().to_frame()
+    tm.assert_frame_equal(r, e)
+    assert not r['File'].isna().any()
+
+
+def test_nlargest():
+    a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10])
+    b = Series(list('a' * 5 + 'b' * 5))
+    gb = a.groupby(b)
+    r = gb.nlargest(3)
+    e = Series([
+        7, 5, 3, 10, 9, 6
+    ], index=MultiIndex.from_arrays([list('aaabbb'), [3, 2, 1, 9, 5, 8]]))
+    tm.assert_series_equal(r, e)
+
+    a = Series([1, 1, 3, 2, 0, 3, 3, 2, 1, 0])
+    gb = a.groupby(b)
+    e = Series([
+        3, 2, 1, 3, 3, 2
+    ], index=MultiIndex.from_arrays([list('aaabbb'), [2, 3, 1, 6, 5, 7]]))
+    tm.assert_series_equal(gb.nlargest(3, keep='last'), e)
+
+
+def test_nsmallest():
+    a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10])
+    b = Series(list('a' * 5 + 'b' * 5))
+    gb = a.groupby(b)
+    r = gb.nsmallest(3)
+    e = Series([
+        1, 2, 3, 0, 4, 6
+    ], index=MultiIndex.from_arrays([list('aaabbb'), [0, 4, 1, 6, 7, 8]]))
+    tm.assert_series_equal(r, e)
+
+    a = Series([1, 1, 3, 2, 0, 3, 3, 2, 1, 0])
+    gb = a.groupby(b)
+    e = Series([
+        0, 1, 1, 0, 1, 2
+    ], index=MultiIndex.from_arrays([list('aaabbb'), [4, 1, 0, 9, 8, 7]]))
+    tm.assert_series_equal(gb.nsmallest(3, keep='last'), e)
+
+
+def test_numpy_compat():
+    # see gh-12811
+    df = pd.DataFrame({'A': [1, 2, 1], 'B': [1, 2, 3]})
+    g = df.groupby('A')
+
+    msg = "numpy operations are not valid with groupby"
+
+    for func in ('mean', 'var', 'std', 'cumprod', 'cumsum'):
+        tm.assert_raises_regex(UnsupportedFunctionCall, msg,
+                               getattr(g, func), 1, 2, 3)
+        tm.assert_raises_regex(UnsupportedFunctionCall, msg,
+                               getattr(g, func), foo=1)
+
+
+def test_cummin_cummax():
+    # GH 15048
+    num_types = [np.int32, np.int64, np.float32, np.float64]
+    num_mins = [np.iinfo(np.int32).min, np.iinfo(np.int64).min,
+                np.finfo(np.float32).min, np.finfo(np.float64).min]
+    num_max = [np.iinfo(np.int32).max, np.iinfo(np.int64).max,
+               np.finfo(np.float32).max, np.finfo(np.float64).max]
+    base_df = pd.DataFrame({'A': [1, 1, 1, 1, 2, 2, 2, 2],
+                            'B': [3, 4, 3, 2, 2, 3, 2, 1]})
+    expected_mins = [3, 3, 3, 2, 2, 2, 2, 1]
+    expected_maxs = [3, 4, 4, 4, 2, 3, 3, 3]
+
+    for dtype, min_val, max_val in zip(num_types, num_mins, num_max):
+        df = base_df.astype(dtype)
+
+        # cummin
+        expected = pd.DataFrame({'B': expected_mins}).astype(dtype)
+        result = df.groupby('A').cummin()
+        tm.assert_frame_equal(result, expected)
+        result = df.groupby('A').B.apply(lambda x: x.cummin()).to_frame()
+        tm.assert_frame_equal(result, expected)
+
+        # Test cummin w/ min value for dtype
+        df.loc[[2, 6], 'B'] = min_val
+        expected.loc[[2, 3, 6, 7], 'B'] = min_val
+        result = df.groupby('A').cummin()
+        tm.assert_frame_equal(result, expected)
+        expected = df.groupby('A').B.apply(lambda x: x.cummin()).to_frame()
+        tm.assert_frame_equal(result, expected)
+
+        # cummax
+        expected = pd.DataFrame({'B': expected_maxs}).astype(dtype)
+        result = df.groupby('A').cummax()
+        tm.assert_frame_equal(result, expected)
+        result = df.groupby('A').B.apply(lambda x: x.cummax()).to_frame()
+        tm.assert_frame_equal(result, expected)
+
+        # Test cummax w/ max value for dtype
+        df.loc[[2, 6], 'B'] = max_val
+        expected.loc[[2, 3, 6, 7], 'B'] = max_val
+        result = df.groupby('A').cummax()
+        tm.assert_frame_equal(result, expected)
+        expected = df.groupby('A').B.apply(lambda x: x.cummax()).to_frame()
+        tm.assert_frame_equal(result, expected)
+
+    # Test nan in some values
+    base_df.loc[[0, 2, 4, 6], 'B'] = np.nan
+    expected = pd.DataFrame({'B': [np.nan, 4, np.nan, 2,
+                                   np.nan, 3, np.nan, 1]})
+    result = base_df.groupby('A').cummin()
+    tm.assert_frame_equal(result, expected)
+    expected = (base_df.groupby('A')
+                       .B
+                       .apply(lambda x: x.cummin())
+                       .to_frame())
+    tm.assert_frame_equal(result, expected)
+
+    expected = pd.DataFrame({'B': [np.nan, 4, np.nan, 4,
+                                   np.nan, 3, np.nan, 3]})
+    result = base_df.groupby('A').cummax()
+    tm.assert_frame_equal(result, expected)
+    expected = (base_df.groupby('A')
+                       .B
+                       .apply(lambda x: x.cummax())
+                       .to_frame())
+    tm.assert_frame_equal(result, expected)
+
+    # Test nan in entire column
+    base_df['B'] = np.nan
+    expected = pd.DataFrame({'B': [np.nan] * 8})
+    result = base_df.groupby('A').cummin()
+    tm.assert_frame_equal(expected, result)
+    result = base_df.groupby('A').B.apply(lambda x: x.cummin()).to_frame()
+    tm.assert_frame_equal(expected, result)
+    result = base_df.groupby('A').cummax()
+    tm.assert_frame_equal(expected, result)
+    result = base_df.groupby('A').B.apply(lambda x: x.cummax()).to_frame()
+    tm.assert_frame_equal(expected, result)
+
+    # GH 15561
+    df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(['2001'])))
+    expected = pd.Series(pd.to_datetime('2001'), index=[0], name='b')
+    for method in ['cummax', 'cummin']:
+        result = getattr(df.groupby('a')['b'], method)()
+        tm.assert_series_equal(expected, result)
+
+    # GH 15635
+    df = pd.DataFrame(dict(a=[1, 2, 1], b=[2, 1, 1]))
+    result = df.groupby('a').b.cummax()
+    expected = pd.Series([2, 1, 2], name='b')
+    tm.assert_series_equal(result, expected)
+
+    df = pd.DataFrame(dict(a=[1, 2, 1], b=[1, 2, 2]))
+    result = df.groupby('a').b.cummin()
+    expected = pd.Series([1, 2, 1], name='b')
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize('in_vals, out_vals', [
+
+    # Basics: strictly increasing (T), strictly decreasing (F),
+    # abs val increasing (F), non-strictly increasing (T)
+    ([1, 2, 5, 3, 2, 0, 4, 5, -6, 1, 1],
+     [True, False, False, True]),
+
+    # Test with inf vals
+    ([1, 2.1, np.inf, 3, 2, np.inf, -np.inf, 5, 11, 1, -np.inf],
+     [True, False, True, False]),
+
+    # Test with nan vals; should always be False
+    ([1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan],
+     [False, False, False, False]),
+])
+def test_is_monotonic_increasing(in_vals, out_vals):
+    # GH 17015
+    source_dict = {
+        'A': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11'],
+        'B': ['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'd', 'd'],
+        'C': in_vals}
+    df = pd.DataFrame(source_dict)
+    result = df.groupby('B').C.is_monotonic_increasing
+    index = Index(list('abcd'), name='B')
+    expected = pd.Series(index=index, data=out_vals, name='C')
+    tm.assert_series_equal(result, expected)
+
+    # Also check result equal to manually taking x.is_monotonic_increasing.
+    expected = (
+        df.groupby(['B']).C.apply(lambda x: x.is_monotonic_increasing))
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize('in_vals, out_vals', [
+    # Basics: strictly decreasing (T), strictly increasing (F),
+    # abs val decreasing (F), non-strictly increasing (T)
+    ([10, 9, 7, 3, 4, 5, -3, 2, 0, 1, 1],
+     [True, False, False, True]),
+
+    # Test with inf vals
+    ([np.inf, 1, -np.inf, np.inf, 2, -3, -np.inf, 5, -3, -np.inf, -np.inf],
+     [True, True, False, True]),
+
+    # Test with nan vals; should always be False
+    ([1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan],
+     [False, False, False, False]),
+])
+def test_is_monotonic_decreasing(in_vals, out_vals):
+    # GH 17015
+    source_dict = {
+        'A': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11'],
+        'B': ['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'd', 'd'],
+        'C': in_vals}
+
+    df = pd.DataFrame(source_dict)
+    result = df.groupby('B').C.is_monotonic_decreasing
+    index = Index(list('abcd'), name='B')
+    expected = pd.Series(index=index, data=out_vals, name='C')
+    tm.assert_series_equal(result, expected)
+
+
+# describe
+# --------------------------------
+
+def test_apply_describe_bug(mframe):
+    grouped = mframe.groupby(level='first')
+    grouped.describe()  # it works!
+
+
+def test_series_describe_multikey():
+    ts = tm.makeTimeSeries()
+    grouped = ts.groupby([lambda x: x.year, lambda x: x.month])
+    result = grouped.describe()
+    tm.assert_series_equal(result['mean'], grouped.mean(),
+                           check_names=False)
+    tm.assert_series_equal(result['std'], grouped.std(), check_names=False)
+    tm.assert_series_equal(result['min'], grouped.min(), check_names=False)
+
+
+def test_series_describe_single():
+    ts = tm.makeTimeSeries()
+    grouped = ts.groupby(lambda x: x.month)
+    result = grouped.apply(lambda x: x.describe())
+    expected = grouped.describe().stack()
+    tm.assert_series_equal(result, expected)
+
+
+def test_series_index_name(df):
+    grouped = df.loc[:, ['C']].groupby(df['A'])
+    result = grouped.agg(lambda x: x.mean())
+    assert result.index.name == 'A'
+
+
+def test_frame_describe_multikey(tsframe):
+    grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])
+    result = grouped.describe()
+    desc_groups = []
+    for col in tsframe:
+        group = grouped[col].describe()
+        # GH 17464 - Remove duplicate MultiIndex levels
+        group_col = pd.MultiIndex(
+            levels=[[col], group.columns],
+            labels=[[0] * len(group.columns), range(len(group.columns))])
+        group = pd.DataFrame(group.values,
+                             columns=group_col,
+                             index=group.index)
+        desc_groups.append(group)
+    expected = pd.concat(desc_groups, axis=1)
+    tm.assert_frame_equal(result, expected)
+
+    groupedT = tsframe.groupby({'A': 0, 'B': 0,
+                                'C': 1, 'D': 1}, axis=1)
+    result = groupedT.describe()
+    expected = tsframe.describe().T
+    expected.index = pd.MultiIndex(
+        levels=[[0, 1], expected.index],
+        labels=[[0, 0, 1, 1], range(len(expected.index))])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_frame_describe_tupleindex():
+
+    # GH 14848 - regression from 0.19.0 to 0.19.1
+    df1 = DataFrame({'x': [1, 2, 3, 4, 5] * 3,
+                     'y': [10, 20, 30, 40, 50] * 3,
+                     'z': [100, 200, 300, 400, 500] * 3})
+    df1['k'] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5
+    df2 = df1.rename(columns={'k': 'key'})
+    pytest.raises(ValueError, lambda: df1.groupby('k').describe())
+    pytest.raises(ValueError, lambda: df2.groupby('key').describe())
+
+
+def test_frame_describe_unstacked_format():
+    # GH 4792
+    prices = {pd.Timestamp('2011-01-06 10:59:05', tz=None): 24990,
+              pd.Timestamp('2011-01-06 12:43:33', tz=None): 25499,
+              pd.Timestamp('2011-01-06 12:54:09', tz=None): 25499}
+    volumes = {pd.Timestamp('2011-01-06 10:59:05', tz=None): 1500000000,
+               pd.Timestamp('2011-01-06 12:43:33', tz=None): 5000000000,
+               pd.Timestamp('2011-01-06 12:54:09', tz=None): 100000000}
+    df = pd.DataFrame({'PRICE': prices,
+                       'VOLUME': volumes})
+    result = df.groupby('PRICE').VOLUME.describe()
+    data = [df[df.PRICE == 24990].VOLUME.describe().values.tolist(),
+            df[df.PRICE == 25499].VOLUME.describe().values.tolist()]
+    expected = pd.DataFrame(data,
+                            index=pd.Index([24990, 25499], name='PRICE'),
+                            columns=['count', 'mean', 'std', 'min',
+                                     '25%', '50%', '75%', 'max'])
+    tm.assert_frame_equal(result, expected)
+
+
+# nunique
+# --------------------------------
+
+@pytest.mark.parametrize("n, m", cart_product(10 ** np.arange(2, 6),
+                                              (10, 100, 1000)))
+@pytest.mark.parametrize("sort, dropna", cart_product((False, True), repeat=2))
+def test_series_groupby_nunique(n, m, sort, dropna):
+
+    def check_nunique(df, keys, as_index=True):
+        gr = df.groupby(keys, as_index=as_index, sort=sort)
+        left = gr['julie'].nunique(dropna=dropna)
+
+        gr = df.groupby(keys, as_index=as_index, sort=sort)
+        right = gr['julie'].apply(Series.nunique, dropna=dropna)
+        if not as_index:
+            right = right.reset_index(drop=True)
+
+        tm.assert_series_equal(left, right, check_names=False)
+
+    days = date_range('2015-08-23', periods=10)
+
+    frame = DataFrame({'jim': np.random.choice(list(ascii_lowercase), n),
+                       'joe': np.random.choice(days, n),
+                       'julie': np.random.randint(0, m, n)})
+
+    check_nunique(frame, ['jim'])
+    check_nunique(frame, ['jim', 'joe'])
+
+    frame.loc[1::17, 'jim'] = None
+    frame.loc[3::37, 'joe'] = None
+    frame.loc[7::19, 'julie'] = None
+    frame.loc[8::19, 'julie'] = None
+    frame.loc[9::19, 'julie'] = None
+
+    check_nunique(frame, ['jim'])
+    check_nunique(frame, ['jim', 'joe'])
+    check_nunique(frame, ['jim'], as_index=False)
+    check_nunique(frame, ['jim', 'joe'], as_index=False)
+
+
+def test_nunique():
+    df = DataFrame({
+        'A': list('abbacc'),
+        'B': list('abxacc'),
+        'C': list('abbacx'),
+    })
+
+    expected = DataFrame({'A': [1] * 3, 'B': [1, 2, 1], 'C': [1, 1, 2]})
+    result = df.groupby('A', as_index=False).nunique()
+    tm.assert_frame_equal(result, expected)
+
+    # as_index
+    expected.index = list('abc')
+    expected.index.name = 'A'
+    result = df.groupby('A').nunique()
+    tm.assert_frame_equal(result, expected)
+
+    # with na
+    result = df.replace({'x': None}).groupby('A').nunique(dropna=False)
+    tm.assert_frame_equal(result, expected)
+
+    # dropna
+    expected = DataFrame({'A': [1] * 3, 'B': [1] * 3, 'C': [1] * 3},
+                         index=list('abc'))
+    expected.index.name = 'A'
+    result = df.replace({'x': None}).groupby('A').nunique()
+    tm.assert_frame_equal(result, expected)
+
+
+def test_nunique_with_object():
+    # GH 11077
+    data = pd.DataFrame(
+        [[100, 1, 'Alice'],
+         [200, 2, 'Bob'],
+         [300, 3, 'Charlie'],
+         [-400, 4, 'Dan'],
+         [500, 5, 'Edith']],
+        columns=['amount', 'id', 'name']
+    )
+
+    result = data.groupby(['id', 'amount'])['name'].nunique()
+    index = MultiIndex.from_arrays([data.id, data.amount])
+    expected = pd.Series([1] * 5, name='name', index=index)
+    tm.assert_series_equal(result, expected)
+
+
+def test_nunique_with_empty_series():
+    # GH 12553
+    data = pd.Series(name='name')
+    result = data.groupby(level=0).nunique()
+    expected = pd.Series(name='name', dtype='int64')
+    tm.assert_series_equal(result, expected)
+
+
+def test_nunique_with_timegrouper():
+    # GH 13453
+    test = pd.DataFrame({
+        'time': [Timestamp('2016-06-28 09:35:35'),
+                 Timestamp('2016-06-28 16:09:30'),
+                 Timestamp('2016-06-28 16:46:28')],
+        'data': ['1', '2', '3']}).set_index('time')
+    result = test.groupby(pd.Grouper(freq='h'))['data'].nunique()
+    expected = test.groupby(
+        pd.Grouper(freq='h')
+    )['data'].apply(pd.Series.nunique)
+    tm.assert_series_equal(result, expected)
+
+
+# count
+# --------------------------------
+
+def test_groupby_timedelta_cython_count():
+    df = DataFrame({'g': list('ab' * 2),
+                    'delt': np.arange(4).astype('timedelta64[ns]')})
+    expected = Series([
+        2, 2
+    ], index=pd.Index(['a', 'b'], name='g'), name='delt')
+    result = df.groupby('g').delt.count()
+    tm.assert_series_equal(expected, result)
+
+
+def test_count():
+    n = 1 << 15
+    dr = date_range('2015-08-30', periods=n // 10, freq='T')
+
+    df = DataFrame({
+        '1st': np.random.choice(
+            list(ascii_lowercase), n),
+        '2nd': np.random.randint(0, 5, n),
+        '3rd': np.random.randn(n).round(3),
+        '4th': np.random.randint(-10, 10, n),
+        '5th': np.random.choice(dr, n),
+        '6th': np.random.randn(n).round(3),
+        '7th': np.random.randn(n).round(3),
+        '8th': np.random.choice(dr, n) - np.random.choice(dr, 1),
+        '9th': np.random.choice(
+            list(ascii_lowercase), n)
+    })
+
+    for col in df.columns.drop(['1st', '2nd', '4th']):
+        df.loc[np.random.choice(n, n // 10), col] = np.nan
+
+    df['9th'] = df['9th'].astype('category')
+
+    for key in '1st', '2nd', ['1st', '2nd']:
+        left = df.groupby(key).count()
+        right = df.groupby(key).apply(DataFrame.count).drop(key, axis=1)
+        tm.assert_frame_equal(left, right)
+
+    # GH5610
+    # count counts non-nulls
+    df = pd.DataFrame([[1, 2, 'foo'],
+                       [1, np.nan, 'bar'],
+                       [3, np.nan, np.nan]],
+                      columns=['A', 'B', 'C'])
+
+    count_as = df.groupby('A').count()
+    count_not_as = df.groupby('A', as_index=False).count()
+
+    expected = DataFrame([[1, 2], [0, 0]], columns=['B', 'C'],
+                         index=[1, 3])
+    expected.index.name = 'A'
+    tm.assert_frame_equal(count_not_as, expected.reset_index())
+    tm.assert_frame_equal(count_as, expected)
+
+    count_B = df.groupby('A')['B'].count()
+    tm.assert_series_equal(count_B, expected['B'])
+
+
+def test_count_object():
+    df = pd.DataFrame({'a': ['a'] * 3 + ['b'] * 3, 'c': [2] * 3 + [3] * 3})
+    result = df.groupby('c').a.count()
+    expected = pd.Series([
+        3, 3
+    ], index=pd.Index([2, 3], name='c'), name='a')
+    tm.assert_series_equal(result, expected)
+
+    df = pd.DataFrame({'a': ['a', np.nan, np.nan] + ['b'] * 3,
+                       'c': [2] * 3 + [3] * 3})
+    result = df.groupby('c').a.count()
+    expected = pd.Series([
+        1, 3
+    ], index=pd.Index([2, 3], name='c'), name='a')
+    tm.assert_series_equal(result, expected)
+
+
+def test_count_cross_type():
+    # GH8169
+    vals = np.hstack((np.random.randint(0, 5, (100, 2)), np.random.randint(
+        0, 2, (100, 2))))
+
+    df = pd.DataFrame(vals, columns=['a', 'b', 'c', 'd'])
+    df[df == 2] = np.nan
+    expected = df.groupby(['c', 'd']).count()
+
+    for t in ['float32', 'object']:
+        df['a'] = df['a'].astype(t)
+        df['b'] = df['b'].astype(t)
+        result = df.groupby(['c', 'd']).count()
+        tm.assert_frame_equal(result, expected)
+
+
+def test_lower_int_prec_count():
+    df = DataFrame({'a': np.array(
+        [0, 1, 2, 100], np.int8),
+        'b': np.array(
+        [1, 2, 3, 6], np.uint32),
+        'c': np.array(
+        [4, 5, 6, 8], np.int16),
+        'grp': list('ab' * 2)})
+    result = df.groupby('grp').count()
+    expected = DataFrame({'a': [2, 2],
+                          'b': [2, 2],
+                          'c': [2, 2]}, index=pd.Index(list('ab'),
+                                                       name='grp'))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_count_uses_size_on_exception():
+    class RaisingObjectException(Exception):
+        pass
+
+    class RaisingObject(object):
+
+        def __init__(self, msg='I will raise inside Cython'):
+            super(RaisingObject, self).__init__()
+            self.msg = msg
+
+        def __eq__(self, other):
+            # gets called in Cython to check that raising calls the method
+            raise RaisingObjectException(self.msg)
+
+    df = DataFrame({'a': [RaisingObject() for _ in range(4)],
+                    'grp': list('ab' * 2)})
+    result = df.groupby('grp').count()
+    expected = DataFrame({'a': [2, 2]}, index=pd.Index(
+        list('ab'), name='grp'))
+    tm.assert_frame_equal(result, expected)
+
+
+# size
+# --------------------------------
+
+def test_size(df):
+    grouped = df.groupby(['A', 'B'])
+    result = grouped.size()
+    for key, group in grouped:
+        assert result[key] == len(group)
+
+    grouped = df.groupby('A')
+    result = grouped.size()
+    for key, group in grouped:
+        assert result[key] == len(group)
+
+    grouped = df.groupby('B')
+    result = grouped.size()
+    for key, group in grouped:
+        assert result[key] == len(group)
+
+    df = DataFrame(np.random.choice(20, (1000, 3)), columns=list('abc'))
+    for sort, key in cart_product((False, True), ('a', 'b', ['a', 'b'])):
+        left = df.groupby(key, sort=sort).size()
+        right = df.groupby(key, sort=sort)['c'].apply(lambda a: a.shape[0])
+        tm.assert_series_equal(left, right, check_names=False)
+
+    # GH11699
+    df = DataFrame([], columns=['A', 'B'])
+    out = Series([], dtype='int64', index=Index([], name='A'))
+    tm.assert_series_equal(df.groupby('A').size(), out)
+
+
+# pipe
+# --------------------------------
+
+def test_pipe():
+    # Test the pipe method of DataFrameGroupBy.
+    # Issue #17871
+
+    random_state = np.random.RandomState(1234567890)
+
+    df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
+                          'foo', 'bar', 'foo', 'foo'],
+                    'B': random_state.randn(8),
+                    'C': random_state.randn(8)})
+
+    def f(dfgb):
+        return dfgb.B.max() - dfgb.C.min().min()
+
+    def square(srs):
+        return srs ** 2
+
+    # Note that the transformations are
+    # GroupBy -> Series
+    # Series -> Series
+    # This then chains the GroupBy.pipe and the
+    # NDFrame.pipe methods
+    result = df.groupby('A').pipe(f).pipe(square)
+
+    index = Index([u'bar', u'foo'], dtype='object', name=u'A')
+    expected = pd.Series([8.99110003361, 8.17516964785], name='B',
+                         index=index)
+
+    tm.assert_series_equal(expected, result)
+
+
+def test_pipe_args():
+    # Test passing args to the pipe method of DataFrameGroupBy.
+    # Issue #17871
+
+    df = pd.DataFrame({'group': ['A', 'A', 'B', 'B', 'C'],
+                       'x': [1.0, 2.0, 3.0, 2.0, 5.0],
+                       'y': [10.0, 100.0, 1000.0, -100.0, -1000.0]})
+
+    def f(dfgb, arg1):
+        return (dfgb.filter(lambda grp: grp.y.mean() > arg1, dropna=False)
+                    .groupby(dfgb.grouper))
+
+    def g(dfgb, arg2):
+        return dfgb.sum() / dfgb.sum().sum() + arg2
+
+    def h(df, arg3):
+        return df.x + df.y - arg3
+
+    result = (df
+              .groupby('group')
+              .pipe(f, 0)
+              .pipe(g, 10)
+              .pipe(h, 100))
+
+    # Assert the results here
+    index = pd.Index(['A', 'B', 'C'], name='group')
+    expected = pd.Series([-79.5160891089, -78.4839108911, -80],
+                         index=index)
+
+    tm.assert_series_equal(expected, result)
+
+    # test SeriesGroupby.pipe
+    ser = pd.Series([1, 1, 2, 2, 3, 3])
+    result = ser.groupby(ser).pipe(lambda grp: grp.sum() * grp.count())
+
+    expected = pd.Series([4, 8, 12], index=pd.Int64Index([1, 2, 3]))
+
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/test_functional.py b/pandas/tests/groupby/test_functional.py
deleted file mode 100644
index b9718663570bd..0000000000000
--- a/pandas/tests/groupby/test_functional.py
+++ /dev/null
@@ -1,372 +0,0 @@
-# -*- coding: utf-8 -*-
-
-""" test function application """
-
-import pytest
-
-from string import ascii_lowercase
-from pandas import (date_range, Timestamp,
-                    Index, MultiIndex, DataFrame, Series)
-from pandas.util.testing import assert_frame_equal, assert_series_equal
-from pandas.compat import product as cart_product
-
-import numpy as np
-
-import pandas.util.testing as tm
-import pandas as pd
-from .common import MixIn
-
-
-# describe
-# --------------------------------
-
-class TestDescribe(MixIn):
-
-    def test_apply_describe_bug(self):
-        grouped = self.mframe.groupby(level='first')
-        grouped.describe()  # it works!
-
-    def test_series_describe_multikey(self):
-        ts = tm.makeTimeSeries()
-        grouped = ts.groupby([lambda x: x.year, lambda x: x.month])
-        result = grouped.describe()
-        assert_series_equal(result['mean'], grouped.mean(), check_names=False)
-        assert_series_equal(result['std'], grouped.std(), check_names=False)
-        assert_series_equal(result['min'], grouped.min(), check_names=False)
-
-    def test_series_describe_single(self):
-        ts = tm.makeTimeSeries()
-        grouped = ts.groupby(lambda x: x.month)
-        result = grouped.apply(lambda x: x.describe())
-        expected = grouped.describe().stack()
-        assert_series_equal(result, expected)
-
-    def test_series_index_name(self):
-        grouped = self.df.loc[:, ['C']].groupby(self.df['A'])
-        result = grouped.agg(lambda x: x.mean())
-        assert result.index.name == 'A'
-
-    def test_frame_describe_multikey(self):
-        grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month])
-        result = grouped.describe()
-        desc_groups = []
-        for col in self.tsframe:
-            group = grouped[col].describe()
-            # GH 17464 - Remove duplicate MultiIndex levels
-            group_col = pd.MultiIndex(
-                levels=[[col], group.columns],
-                labels=[[0] * len(group.columns), range(len(group.columns))])
-            group = pd.DataFrame(group.values,
-                                 columns=group_col,
-                                 index=group.index)
-            desc_groups.append(group)
-        expected = pd.concat(desc_groups, axis=1)
-        tm.assert_frame_equal(result, expected)
-
-        groupedT = self.tsframe.groupby({'A': 0, 'B': 0,
-                                         'C': 1, 'D': 1}, axis=1)
-        result = groupedT.describe()
-        expected = self.tsframe.describe().T
-        expected.index = pd.MultiIndex(
-            levels=[[0, 1], expected.index],
-            labels=[[0, 0, 1, 1], range(len(expected.index))])
-        tm.assert_frame_equal(result, expected)
-
-    def test_frame_describe_tupleindex(self):
-
-        # GH 14848 - regression from 0.19.0 to 0.19.1
-        df1 = DataFrame({'x': [1, 2, 3, 4, 5] * 3,
-                         'y': [10, 20, 30, 40, 50] * 3,
-                         'z': [100, 200, 300, 400, 500] * 3})
-        df1['k'] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5
-        df2 = df1.rename(columns={'k': 'key'})
-        pytest.raises(ValueError, lambda: df1.groupby('k').describe())
-        pytest.raises(ValueError, lambda: df2.groupby('key').describe())
-
-    def test_frame_describe_unstacked_format(self):
-        # GH 4792
-        prices = {pd.Timestamp('2011-01-06 10:59:05', tz=None): 24990,
-                  pd.Timestamp('2011-01-06 12:43:33', tz=None): 25499,
-                  pd.Timestamp('2011-01-06 12:54:09', tz=None): 25499}
-        volumes = {pd.Timestamp('2011-01-06 10:59:05', tz=None): 1500000000,
-                   pd.Timestamp('2011-01-06 12:43:33', tz=None): 5000000000,
-                   pd.Timestamp('2011-01-06 12:54:09', tz=None): 100000000}
-        df = pd.DataFrame({'PRICE': prices,
-                           'VOLUME': volumes})
-        result = df.groupby('PRICE').VOLUME.describe()
-        data = [df[df.PRICE == 24990].VOLUME.describe().values.tolist(),
-                df[df.PRICE == 25499].VOLUME.describe().values.tolist()]
-        expected = pd.DataFrame(data,
-                                index=pd.Index([24990, 25499], name='PRICE'),
-                                columns=['count', 'mean', 'std', 'min',
-                                         '25%', '50%', '75%', 'max'])
-        tm.assert_frame_equal(result, expected)
-
-
-# nunique
-# --------------------------------
-
-class TestNUnique(MixIn):
-
-    def test_series_groupby_nunique(self):
-
-        def check_nunique(df, keys, as_index=True):
-            for sort, dropna in cart_product((False, True), repeat=2):
-                gr = df.groupby(keys, as_index=as_index, sort=sort)
-                left = gr['julie'].nunique(dropna=dropna)
-
-                gr = df.groupby(keys, as_index=as_index, sort=sort)
-                right = gr['julie'].apply(Series.nunique, dropna=dropna)
-                if not as_index:
-                    right = right.reset_index(drop=True)
-
-                assert_series_equal(left, right, check_names=False)
-
-        days = date_range('2015-08-23', periods=10)
-
-        for n, m in cart_product(10 ** np.arange(2, 6), (10, 100, 1000)):
-            frame = DataFrame({
-                'jim': np.random.choice(
-                    list(ascii_lowercase), n),
-                'joe': np.random.choice(days, n),
-                'julie': np.random.randint(0, m, n)
-            })
-
-            check_nunique(frame, ['jim'])
-            check_nunique(frame, ['jim', 'joe'])
-
-            frame.loc[1::17, 'jim'] = None
-            frame.loc[3::37, 'joe'] = None
-            frame.loc[7::19, 'julie'] = None
-            frame.loc[8::19, 'julie'] = None
-            frame.loc[9::19, 'julie'] = None
-
-            check_nunique(frame, ['jim'])
-            check_nunique(frame, ['jim', 'joe'])
-            check_nunique(frame, ['jim'], as_index=False)
-            check_nunique(frame, ['jim', 'joe'], as_index=False)
-
-    def test_nunique(self):
-        df = DataFrame({
-            'A': list('abbacc'),
-            'B': list('abxacc'),
-            'C': list('abbacx'),
-        })
-
-        expected = DataFrame({'A': [1] * 3, 'B': [1, 2, 1], 'C': [1, 1, 2]})
-        result = df.groupby('A', as_index=False).nunique()
-        tm.assert_frame_equal(result, expected)
-
-        # as_index
-        expected.index = list('abc')
-        expected.index.name = 'A'
-        result = df.groupby('A').nunique()
-        tm.assert_frame_equal(result, expected)
-
-        # with na
-        result = df.replace({'x': None}).groupby('A').nunique(dropna=False)
-        tm.assert_frame_equal(result, expected)
-
-        # dropna
-        expected = DataFrame({'A': [1] * 3, 'B': [1] * 3, 'C': [1] * 3},
-                             index=list('abc'))
-        expected.index.name = 'A'
-        result = df.replace({'x': None}).groupby('A').nunique()
-        tm.assert_frame_equal(result, expected)
-
-    def test_nunique_with_object(self):
-        # GH 11077
-        data = pd.DataFrame(
-            [[100, 1, 'Alice'],
-             [200, 2, 'Bob'],
-             [300, 3, 'Charlie'],
-             [-400, 4, 'Dan'],
-             [500, 5, 'Edith']],
-            columns=['amount', 'id', 'name']
-        )
-
-        result = data.groupby(['id', 'amount'])['name'].nunique()
-        index = MultiIndex.from_arrays([data.id, data.amount])
-        expected = pd.Series([1] * 5, name='name', index=index)
-        tm.assert_series_equal(result, expected)
-
-    def test_nunique_with_empty_series(self):
-        # GH 12553
-        data = pd.Series(name='name')
-        result = data.groupby(level=0).nunique()
-        expected = pd.Series(name='name', dtype='int64')
-        tm.assert_series_equal(result, expected)
-
-    def test_nunique_with_timegrouper(self):
-        # GH 13453
-        test = pd.DataFrame({
-            'time': [Timestamp('2016-06-28 09:35:35'),
-                     Timestamp('2016-06-28 16:09:30'),
-                     Timestamp('2016-06-28 16:46:28')],
-            'data': ['1', '2', '3']}).set_index('time')
-        result = test.groupby(pd.Grouper(freq='h'))['data'].nunique()
-        expected = test.groupby(
-            pd.Grouper(freq='h')
-        )['data'].apply(pd.Series.nunique)
-        tm.assert_series_equal(result, expected)
-
-
-# count
-# --------------------------------
-
-class TestCount(MixIn):
-
-    def test_groupby_timedelta_cython_count(self):
-        df = DataFrame({'g': list('ab' * 2),
-                        'delt': np.arange(4).astype('timedelta64[ns]')})
-        expected = Series([
-            2, 2
-        ], index=pd.Index(['a', 'b'], name='g'), name='delt')
-        result = df.groupby('g').delt.count()
-        tm.assert_series_equal(expected, result)
-
-    def test_count(self):
-        n = 1 << 15
-        dr = date_range('2015-08-30', periods=n // 10, freq='T')
-
-        df = DataFrame({
-            '1st': np.random.choice(
-                list(ascii_lowercase), n),
-            '2nd': np.random.randint(0, 5, n),
-            '3rd': np.random.randn(n).round(3),
-            '4th': np.random.randint(-10, 10, n),
-            '5th': np.random.choice(dr, n),
-            '6th': np.random.randn(n).round(3),
-            '7th': np.random.randn(n).round(3),
-            '8th': np.random.choice(dr, n) - np.random.choice(dr, 1),
-            '9th': np.random.choice(
-                list(ascii_lowercase), n)
-        })
-
-        for col in df.columns.drop(['1st', '2nd', '4th']):
-            df.loc[np.random.choice(n, n // 10), col] = np.nan
-
-        df['9th'] = df['9th'].astype('category')
-
-        for key in '1st', '2nd', ['1st', '2nd']:
-            left = df.groupby(key).count()
-            right = df.groupby(key).apply(DataFrame.count).drop(key, axis=1)
-            assert_frame_equal(left, right)
-
-        # GH5610
-        # count counts non-nulls
-        df = pd.DataFrame([[1, 2, 'foo'],
-                           [1, np.nan, 'bar'],
-                           [3, np.nan, np.nan]],
-                          columns=['A', 'B', 'C'])
-
-        count_as = df.groupby('A').count()
-        count_not_as = df.groupby('A', as_index=False).count()
-
-        expected = DataFrame([[1, 2], [0, 0]], columns=['B', 'C'],
-                             index=[1, 3])
-        expected.index.name = 'A'
-        assert_frame_equal(count_not_as, expected.reset_index())
-        assert_frame_equal(count_as, expected)
-
-        count_B = df.groupby('A')['B'].count()
-        assert_series_equal(count_B, expected['B'])
-
-    def test_count_object(self):
-        df = pd.DataFrame({'a': ['a'] * 3 + ['b'] * 3, 'c': [2] * 3 + [3] * 3})
-        result = df.groupby('c').a.count()
-        expected = pd.Series([
-            3, 3
-        ], index=pd.Index([2, 3], name='c'), name='a')
-        tm.assert_series_equal(result, expected)
-
-        df = pd.DataFrame({'a': ['a', np.nan, np.nan] + ['b'] * 3,
-                           'c': [2] * 3 + [3] * 3})
-        result = df.groupby('c').a.count()
-        expected = pd.Series([
-            1, 3
-        ], index=pd.Index([2, 3], name='c'), name='a')
-        tm.assert_series_equal(result, expected)
-
-    def test_count_cross_type(self):  # GH8169
-        vals = np.hstack((np.random.randint(0, 5, (100, 2)), np.random.randint(
-            0, 2, (100, 2))))
-
-        df = pd.DataFrame(vals, columns=['a', 'b', 'c', 'd'])
-        df[df == 2] = np.nan
-        expected = df.groupby(['c', 'd']).count()
-
-        for t in ['float32', 'object']:
-            df['a'] = df['a'].astype(t)
-            df['b'] = df['b'].astype(t)
-            result = df.groupby(['c', 'd']).count()
-            tm.assert_frame_equal(result, expected)
-
-    def test_lower_int_prec_count(self):
-        df = DataFrame({'a': np.array(
-            [0, 1, 2, 100], np.int8),
-            'b': np.array(
-            [1, 2, 3, 6], np.uint32),
-            'c': np.array(
-            [4, 5, 6, 8], np.int16),
-            'grp': list('ab' * 2)})
-        result = df.groupby('grp').count()
-        expected = DataFrame({'a': [2, 2],
-                              'b': [2, 2],
-                              'c': [2, 2]}, index=pd.Index(list('ab'),
-                                                           name='grp'))
-        tm.assert_frame_equal(result, expected)
-
-    def test_count_uses_size_on_exception(self):
-        class RaisingObjectException(Exception):
-            pass
-
-        class RaisingObject(object):
-
-            def __init__(self, msg='I will raise inside Cython'):
-                super(RaisingObject, self).__init__()
-                self.msg = msg
-
-            def __eq__(self, other):
-                # gets called in Cython to check that raising calls the method
-                raise RaisingObjectException(self.msg)
-
-        df = DataFrame({'a': [RaisingObject() for _ in range(4)],
-                        'grp': list('ab' * 2)})
-        result = df.groupby('grp').count()
-        expected = DataFrame({'a': [2, 2]}, index=pd.Index(
-            list('ab'), name='grp'))
-        tm.assert_frame_equal(result, expected)
-
-
-# size
-# --------------------------------
-
-class TestSize(MixIn):
-
-    def test_size(self):
-        grouped = self.df.groupby(['A', 'B'])
-        result = grouped.size()
-        for key, group in grouped:
-            assert result[key] == len(group)
-
-        grouped = self.df.groupby('A')
-        result = grouped.size()
-        for key, group in grouped:
-            assert result[key] == len(group)
-
-        grouped = self.df.groupby('B')
-        result = grouped.size()
-        for key, group in grouped:
-            assert result[key] == len(group)
-
-        df = DataFrame(np.random.choice(20, (1000, 3)), columns=list('abc'))
-        for sort, key in cart_product((False, True), ('a', 'b', ['a', 'b'])):
-            left = df.groupby(key, sort=sort).size()
-            right = df.groupby(key, sort=sort)['c'].apply(lambda a: a.shape[0])
-            assert_series_equal(left, right, check_names=False)
-
-        # GH11699
-        df = DataFrame([], columns=['A', 'B'])
-        out = Series([], dtype='int64', index=Index([], name='A'))
-        assert_series_equal(df.groupby('A').size(), out)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index c3400b6b710e5..bb892f92f213e 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -5,3090 +5,1672 @@
 
 from warnings import catch_warnings
 from datetime import datetime
+from decimal import Decimal
 
-from pandas import (date_range, bdate_range, Timestamp,
+from pandas import (date_range, Timestamp,
                     Index, MultiIndex, DataFrame, Series,
-                    concat, Panel, DatetimeIndex, read_csv)
-from pandas.core.dtypes.missing import isna
-from pandas.errors import UnsupportedFunctionCall, PerformanceWarning
-from pandas.util.testing import (assert_frame_equal, assert_index_equal,
+                    Panel, DatetimeIndex, read_csv)
+from pandas.errors import PerformanceWarning
+from pandas.util.testing import (assert_frame_equal,
                                  assert_series_equal, assert_almost_equal)
 from pandas.compat import (range, lrange, StringIO, lmap, lzip, map, zip,
-                           builtins, OrderedDict)
+                           OrderedDict)
 from pandas import compat
 from collections import defaultdict
 import pandas.core.common as com
 import numpy as np
 
-import pandas.core.nanops as nanops
 import pandas.util.testing as tm
 import pandas as pd
-from .common import MixIn
 
 
-class TestGrouper(object):
+def test_repr():
+    # GH18203
+    result = repr(pd.Grouper(key='A', level='B'))
+    expected = "Grouper(key='A', level='B', axis=0, sort=False)"
+    assert result == expected
 
-    def test_repr(self):
-        # GH18203
-        result = repr(pd.Grouper(key='A', level='B'))
-        expected = "Grouper(key='A', level='B', axis=0, sort=False)"
-        assert result == expected
 
+@pytest.mark.parametrize('dtype', ['int64', 'int32', 'float64', 'float32'])
+def test_basic(dtype):
 
-class TestGroupBy(MixIn):
+    data = Series(np.arange(9) // 3, index=np.arange(9), dtype=dtype)
 
-    def test_basic(self):
-        def checkit(dtype):
-            data = Series(np.arange(9) // 3, index=np.arange(9), dtype=dtype)
+    index = np.arange(9)
+    np.random.shuffle(index)
+    data = data.reindex(index)
 
-            index = np.arange(9)
-            np.random.shuffle(index)
-            data = data.reindex(index)
+    grouped = data.groupby(lambda x: x // 3)
 
-            grouped = data.groupby(lambda x: x // 3)
+    for k, v in grouped:
+        assert len(v) == 3
 
-            for k, v in grouped:
-                assert len(v) == 3
+    agged = grouped.aggregate(np.mean)
+    assert agged[1] == 1
 
-            agged = grouped.aggregate(np.mean)
-            assert agged[1] == 1
+    assert_series_equal(agged, grouped.agg(np.mean))  # shorthand
+    assert_series_equal(agged, grouped.mean())
+    assert_series_equal(grouped.agg(np.sum), grouped.sum())
 
-            assert_series_equal(agged, grouped.agg(np.mean))  # shorthand
-            assert_series_equal(agged, grouped.mean())
-            assert_series_equal(grouped.agg(np.sum), grouped.sum())
+    expected = grouped.apply(lambda x: x * x.sum())
+    transformed = grouped.transform(lambda x: x * x.sum())
+    assert transformed[7] == 12
+    assert_series_equal(transformed, expected)
 
-            expected = grouped.apply(lambda x: x * x.sum())
-            transformed = grouped.transform(lambda x: x * x.sum())
-            assert transformed[7] == 12
-            assert_series_equal(transformed, expected)
+    value_grouped = data.groupby(data)
+    assert_series_equal(value_grouped.aggregate(np.mean), agged,
+                        check_index_type=False)
 
-            value_grouped = data.groupby(data)
-            assert_series_equal(value_grouped.aggregate(np.mean), agged,
-                                check_index_type=False)
+    # complex agg
+    agged = grouped.aggregate([np.mean, np.std])
 
-            # complex agg
-            agged = grouped.aggregate([np.mean, np.std])
+    with tm.assert_produces_warning(FutureWarning,
+                                    check_stacklevel=False):
+        agged = grouped.aggregate({'one': np.mean, 'two': np.std})
 
-            with tm.assert_produces_warning(FutureWarning,
-                                            check_stacklevel=False):
-                agged = grouped.aggregate({'one': np.mean, 'two': np.std})
+    group_constants = {0: 10, 1: 20, 2: 30}
+    agged = grouped.agg(lambda x: group_constants[x.name] + x.mean())
+    assert agged[1] == 21
 
-            group_constants = {0: 10, 1: 20, 2: 30}
-            agged = grouped.agg(lambda x: group_constants[x.name] + x.mean())
-            assert agged[1] == 21
+    # corner cases
+    pytest.raises(Exception, grouped.aggregate, lambda x: x * 2)
 
-            # corner cases
-            pytest.raises(Exception, grouped.aggregate, lambda x: x * 2)
 
-        for dtype in ['int64', 'int32', 'float64', 'float32']:
-            checkit(dtype)
+def test_groupby_nonobject_dtype(mframe, df_mixed_floats):
+    key = mframe.index.labels[0]
+    grouped = mframe.groupby(key)
+    result = grouped.sum()
 
-    def test_groupby_nonobject_dtype(self):
-        key = self.mframe.index.labels[0]
-        grouped = self.mframe.groupby(key)
-        result = grouped.sum()
+    expected = mframe.groupby(key.astype('O')).sum()
+    assert_frame_equal(result, expected)
 
-        expected = self.mframe.groupby(key.astype('O')).sum()
-        assert_frame_equal(result, expected)
+    # GH 3911, mixed frame non-conversion
+    df = df_mixed_floats.copy()
+    df['value'] = lrange(len(df))
 
-        # GH 3911, mixed frame non-conversion
-        df = self.df_mixed_floats.copy()
-        df['value'] = lrange(len(df))
+    def max_value(group):
+        return group.loc[group['value'].idxmax()]
 
-        def max_value(group):
-            return group.loc[group['value'].idxmax()]
+    applied = df.groupby('A').apply(max_value)
+    result = applied.get_dtype_counts().sort_values()
+    expected = Series({'float64': 2,
+                       'int64': 1,
+                       'object': 2}).sort_values()
+    assert_series_equal(result, expected)
 
-        applied = df.groupby('A').apply(max_value)
-        result = applied.get_dtype_counts().sort_values()
-        expected = Series({'float64': 2,
-                           'int64': 1,
-                           'object': 2}).sort_values()
-        assert_series_equal(result, expected)
 
-    def test_groupby_return_type(self):
+def test_groupby_return_type():
 
-        # GH2893, return a reduced type
-        df1 = DataFrame(
-            [{"val1": 1, "val2": 20},
-             {"val1": 1, "val2": 19},
-             {"val1": 2, "val2": 27},
-             {"val1": 2, "val2": 12}
-             ])
+    # GH2893, return a reduced type
+    df1 = DataFrame(
+        [{"val1": 1, "val2": 20},
+         {"val1": 1, "val2": 19},
+         {"val1": 2, "val2": 27},
+         {"val1": 2, "val2": 12}
+         ])
 
-        def func(dataf):
-            return dataf["val2"] - dataf["val2"].mean()
+    def func(dataf):
+        return dataf["val2"] - dataf["val2"].mean()
 
-        result = df1.groupby("val1", squeeze=True).apply(func)
-        assert isinstance(result, Series)
+    result = df1.groupby("val1", squeeze=True).apply(func)
+    assert isinstance(result, Series)
 
-        df2 = DataFrame(
-            [{"val1": 1, "val2": 20},
-             {"val1": 1, "val2": 19},
-             {"val1": 1, "val2": 27},
-             {"val1": 1, "val2": 12}
-             ])
+    df2 = DataFrame(
+        [{"val1": 1, "val2": 20},
+         {"val1": 1, "val2": 19},
+         {"val1": 1, "val2": 27},
+         {"val1": 1, "val2": 12}
+         ])
 
-        def func(dataf):
-            return dataf["val2"] - dataf["val2"].mean()
+    def func(dataf):
+        return dataf["val2"] - dataf["val2"].mean()
+
+    result = df2.groupby("val1", squeeze=True).apply(func)
+    assert isinstance(result, Series)
 
-        result = df2.groupby("val1", squeeze=True).apply(func)
-        assert isinstance(result, Series)
+    # GH3596, return a consistent type (regression in 0.11 from 0.10.1)
+    df = DataFrame([[1, 1], [1, 1]], columns=['X', 'Y'])
+    result = df.groupby('X', squeeze=False).count()
+    assert isinstance(result, DataFrame)
+
+    # GH5592
+    # inconcistent return type
+    df = DataFrame(dict(A=['Tiger', 'Tiger', 'Tiger', 'Lamb', 'Lamb',
+                           'Pony', 'Pony'], B=Series(
+                               np.arange(7), dtype='int64'), C=date_range(
+                                   '20130101', periods=7)))
+
+    def f(grp):
+        return grp.iloc[0]
+
+    expected = df.groupby('A').first()[['B']]
+    result = df.groupby('A').apply(f)[['B']]
+    assert_frame_equal(result, expected)
+
+    def f(grp):
+        if grp.name == 'Tiger':
+            return None
+        return grp.iloc[0]
+
+    result = df.groupby('A').apply(f)[['B']]
+    e = expected.copy()
+    e.loc['Tiger'] = np.nan
+    assert_frame_equal(result, e)
+
+    def f(grp):
+        if grp.name == 'Pony':
+            return None
+        return grp.iloc[0]
+
+    result = df.groupby('A').apply(f)[['B']]
+    e = expected.copy()
+    e.loc['Pony'] = np.nan
+    assert_frame_equal(result, e)
+
+    # 5592 revisited, with datetimes
+    def f(grp):
+        if grp.name == 'Pony':
+            return None
+        return grp.iloc[0]
+
+    result = df.groupby('A').apply(f)[['C']]
+    e = df.groupby('A').first()[['C']]
+    e.loc['Pony'] = pd.NaT
+    assert_frame_equal(result, e)
+
+    # scalar outputs
+    def f(grp):
+        if grp.name == 'Pony':
+            return None
+        return grp.iloc[0].loc['C']
+
+    result = df.groupby('A').apply(f)
+    e = df.groupby('A').first()['C'].copy()
+    e.loc['Pony'] = np.nan
+    e.name = None
+    assert_series_equal(result, e)
 
-        # GH3596, return a consistent type (regression in 0.11 from 0.10.1)
-        df = DataFrame([[1, 1], [1, 1]], columns=['X', 'Y'])
-        result = df.groupby('X', squeeze=False).count()
-        assert isinstance(result, DataFrame)
 
-        # GH5592
-        # inconcistent return type
-        df = DataFrame(dict(A=['Tiger', 'Tiger', 'Tiger', 'Lamb', 'Lamb',
-                               'Pony', 'Pony'], B=Series(
-                                   np.arange(7), dtype='int64'), C=date_range(
-                                       '20130101', periods=7)))
+def test_pass_args_kwargs(ts, tsframe):
 
-        def f(grp):
-            return grp.iloc[0]
+    def f(x, q=None, axis=0):
+        return np.percentile(x, q, axis=axis)
 
-        expected = df.groupby('A').first()[['B']]
-        result = df.groupby('A').apply(f)[['B']]
-        assert_frame_equal(result, expected)
+    g = lambda x: np.percentile(x, 80, axis=0)
 
-        def f(grp):
-            if grp.name == 'Tiger':
-                return None
-            return grp.iloc[0]
+    # Series
+    ts_grouped = ts.groupby(lambda x: x.month)
+    agg_result = ts_grouped.agg(np.percentile, 80, axis=0)
+    apply_result = ts_grouped.apply(np.percentile, 80, axis=0)
+    trans_result = ts_grouped.transform(np.percentile, 80, axis=0)
+
+    agg_expected = ts_grouped.quantile(.8)
+    trans_expected = ts_grouped.transform(g)
+
+    assert_series_equal(apply_result, agg_expected)
+    assert_series_equal(agg_result, agg_expected, check_names=False)
+    assert_series_equal(trans_result, trans_expected)
+
+    agg_result = ts_grouped.agg(f, q=80)
+    apply_result = ts_grouped.apply(f, q=80)
+    trans_result = ts_grouped.transform(f, q=80)
+    assert_series_equal(agg_result, agg_expected)
+    assert_series_equal(apply_result, agg_expected)
+    assert_series_equal(trans_result, trans_expected)
+
+    # DataFrame
+    df_grouped = tsframe.groupby(lambda x: x.month)
+    agg_result = df_grouped.agg(np.percentile, 80, axis=0)
+    apply_result = df_grouped.apply(DataFrame.quantile, .8)
+    expected = df_grouped.quantile(.8)
+    assert_frame_equal(apply_result, expected)
+    assert_frame_equal(agg_result, expected, check_names=False)
+
+    agg_result = df_grouped.agg(f, q=80)
+    apply_result = df_grouped.apply(DataFrame.quantile, q=.8)
+    assert_frame_equal(agg_result, expected, check_names=False)
+    assert_frame_equal(apply_result, expected)
+
+
+def test_len():
+    df = tm.makeTimeDataFrame()
+    grouped = df.groupby([lambda x: x.year, lambda x: x.month,
+                          lambda x: x.day])
+    assert len(grouped) == len(df)
 
-        result = df.groupby('A').apply(f)[['B']]
-        e = expected.copy()
-        e.loc['Tiger'] = np.nan
-        assert_frame_equal(result, e)
+    grouped = df.groupby([lambda x: x.year, lambda x: x.month])
+    expected = len({(x.year, x.month) for x in df.index})
+    assert len(grouped) == expected
 
-        def f(grp):
-            if grp.name == 'Pony':
-                return None
-            return grp.iloc[0]
+    # issue 11016
+    df = pd.DataFrame(dict(a=[np.nan] * 3, b=[1, 2, 3]))
+    assert len(df.groupby(('a'))) == 0
+    assert len(df.groupby(('b'))) == 3
+    assert len(df.groupby(['a', 'b'])) == 3
+
+
+def test_basic_regression():
+    # regression
+    T = [1.0 * x for x in lrange(1, 10) * 10][:1095]
+    result = Series(T, lrange(0, len(T)))
 
-        result = df.groupby('A').apply(f)[['B']]
-        e = expected.copy()
-        e.loc['Pony'] = np.nan
-        assert_frame_equal(result, e)
+    groupings = np.random.random((1100, ))
+    groupings = Series(groupings, lrange(0, len(groupings))) * 10.
 
-        # 5592 revisited, with datetimes
-        def f(grp):
-            if grp.name == 'Pony':
-                return None
-            return grp.iloc[0]
+    grouped = result.groupby(groupings)
+    grouped.mean()
 
-        result = df.groupby('A').apply(f)[['C']]
-        e = df.groupby('A').first()[['C']]
-        e.loc['Pony'] = pd.NaT
-        assert_frame_equal(result, e)
 
-        # scalar outputs
-        def f(grp):
-            if grp.name == 'Pony':
-                return None
-            return grp.iloc[0].loc['C']
-
-        result = df.groupby('A').apply(f)
-        e = df.groupby('A').first()['C'].copy()
-        e.loc['Pony'] = np.nan
-        e.name = None
-        assert_series_equal(result, e)
-
-    def test_apply_issues(self):
-        # GH 5788
-
-        s = """2011.05.16,00:00,1.40893
-2011.05.16,01:00,1.40760
-2011.05.16,02:00,1.40750
-2011.05.16,03:00,1.40649
-2011.05.17,02:00,1.40893
-2011.05.17,03:00,1.40760
-2011.05.17,04:00,1.40750
-2011.05.17,05:00,1.40649
-2011.05.18,02:00,1.40893
-2011.05.18,03:00,1.40760
-2011.05.18,04:00,1.40750
-2011.05.18,05:00,1.40649"""
-
-        df = pd.read_csv(
-            StringIO(s), header=None, names=['date', 'time', 'value'],
-            parse_dates=[['date', 'time']])
-        df = df.set_index('date_time')
-
-        expected = df.groupby(df.index.date).idxmax()
-        result = df.groupby(df.index.date).apply(lambda x: x.idxmax())
-        assert_frame_equal(result, expected)
-
-        # GH 5789
-        # don't auto coerce dates
-        df = pd.read_csv(
-            StringIO(s), header=None, names=['date', 'time', 'value'])
-        exp_idx = pd.Index(
-            ['2011.05.16', '2011.05.17', '2011.05.18'
-             ], dtype=object, name='date')
-        expected = Series(['00:00', '02:00', '02:00'], index=exp_idx)
-        result = df.groupby('date').apply(
-            lambda x: x['time'][x['value'].idxmax()])
-        assert_series_equal(result, expected)
-
-    def test_apply_trivial(self):
-        # GH 20066
-        # trivial apply: ignore input and return a constant dataframe.
-        df = pd.DataFrame({'key': ['a', 'a', 'b', 'b', 'a'],
-                           'data': [1.0, 2.0, 3.0, 4.0, 5.0]},
-                          columns=['key', 'data'])
-        expected = pd.concat([df.iloc[1:], df.iloc[1:]],
-                             axis=1, keys=['float64', 'object'])
-        result = df.groupby([str(x) for x in df.dtypes],
-                            axis=1).apply(lambda x: df.iloc[1:])
-
-        assert_frame_equal(result, expected)
-
-    @pytest.mark.xfail(reason=("GH 20066; function passed into apply "
-                               "returns a DataFrame with the same index "
-                               "as the one to create GroupBy object."))
-    def test_apply_trivial_fail(self):
-        # GH 20066
-        # trivial apply fails if the constant dataframe has the same index
-        # with the one used to create GroupBy object.
-        df = pd.DataFrame({'key': ['a', 'a', 'b', 'b', 'a'],
-                           'data': [1.0, 2.0, 3.0, 4.0, 5.0]},
-                          columns=['key', 'data'])
-        expected = pd.concat([df, df],
-                             axis=1, keys=['float64', 'object'])
-        result = df.groupby([str(x) for x in df.dtypes],
-                            axis=1).apply(lambda x: df)
-
-        assert_frame_equal(result, expected)
-
-    def test_time_field_bug(self):
-        # Test a fix for the following error related to GH issue 11324 When
-        # non-key fields in a group-by dataframe contained time-based fields
-        # that were not returned by the apply function, an exception would be
-        # raised.
-
-        df = pd.DataFrame({'a': 1, 'b': [datetime.now() for nn in range(10)]})
-
-        def func_with_no_date(batch):
-            return pd.Series({'c': 2})
-
-        def func_with_date(batch):
-            return pd.Series({'b': datetime(2015, 1, 1), 'c': 2})
-
-        dfg_no_conversion = df.groupby(by=['a']).apply(func_with_no_date)
-        dfg_no_conversion_expected = pd.DataFrame({'c': 2}, index=[1])
-        dfg_no_conversion_expected.index.name = 'a'
-
-        dfg_conversion = df.groupby(by=['a']).apply(func_with_date)
-        dfg_conversion_expected = pd.DataFrame(
-            {'b': datetime(2015, 1, 1),
-             'c': 2}, index=[1])
-        dfg_conversion_expected.index.name = 'a'
-
-        tm.assert_frame_equal(dfg_no_conversion, dfg_no_conversion_expected)
-        tm.assert_frame_equal(dfg_conversion, dfg_conversion_expected)
-
-    def test_len(self):
-        df = tm.makeTimeDataFrame()
-        grouped = df.groupby([lambda x: x.year, lambda x: x.month,
-                              lambda x: x.day])
-        assert len(grouped) == len(df)
-
-        grouped = df.groupby([lambda x: x.year, lambda x: x.month])
-        expected = len({(x.year, x.month) for x in df.index})
-        assert len(grouped) == expected
-
-        # issue 11016
-        df = pd.DataFrame(dict(a=[np.nan] * 3, b=[1, 2, 3]))
-        assert len(df.groupby(('a'))) == 0
-        assert len(df.groupby(('b'))) == 3
-        assert len(df.groupby(['a', 'b'])) == 3
-
-    def test_basic_regression(self):
-        # regression
-        T = [1.0 * x for x in lrange(1, 10) * 10][:1095]
-        result = Series(T, lrange(0, len(T)))
-
-        groupings = np.random.random((1100, ))
-        groupings = Series(groupings, lrange(0, len(groupings))) * 10.
-
-        grouped = result.groupby(groupings)
-        grouped.mean()
-
-    def test_with_na_groups(self):
-        index = Index(np.arange(10))
-
-        for dtype in ['float64', 'float32', 'int64', 'int32', 'int16', 'int8']:
-            values = Series(np.ones(10), index, dtype=dtype)
-            labels = Series([np.nan, 'foo', 'bar', 'bar', np.nan, np.nan,
-                             'bar', 'bar', np.nan, 'foo'], index=index)
-
-            # this SHOULD be an int
-            grouped = values.groupby(labels)
-            agged = grouped.agg(len)
-            expected = Series([4, 2], index=['bar', 'foo'])
-
-            assert_series_equal(agged, expected, check_dtype=False)
-
-            # assert issubclass(agged.dtype.type, np.integer)
-
-            # explicitly return a float from my function
-            def f(x):
-                return float(len(x))
-
-            agged = grouped.agg(f)
-            expected = Series([4, 2], index=['bar', 'foo'])
-
-            assert_series_equal(agged, expected, check_dtype=False)
-            assert issubclass(agged.dtype.type, np.dtype(dtype).type)
-
-    def test_indices_concatenation_order(self):
-
-        # GH 2808
-
-        def f1(x):
-            y = x[(x.b % 2) == 1] ** 2
-            if y.empty:
-                multiindex = MultiIndex(levels=[[]] * 2, labels=[[]] * 2,
-                                        names=['b', 'c'])
-                res = DataFrame(None, columns=['a'], index=multiindex)
-                return res
-            else:
-                y = y.set_index(['b', 'c'])
-                return y
-
-        def f2(x):
-            y = x[(x.b % 2) == 1] ** 2
-            if y.empty:
-                return DataFrame()
-            else:
-                y = y.set_index(['b', 'c'])
-                return y
-
-        def f3(x):
-            y = x[(x.b % 2) == 1] ** 2
-            if y.empty:
-                multiindex = MultiIndex(levels=[[]] * 2, labels=[[]] * 2,
-                                        names=['foo', 'bar'])
-                res = DataFrame(None, columns=['a', 'b'], index=multiindex)
-                return res
-            else:
-                return y
-
-        df = DataFrame({'a': [1, 2, 2, 2], 'b': lrange(4), 'c': lrange(5, 9)})
-
-        df2 = DataFrame({'a': [3, 2, 2, 2], 'b': lrange(4), 'c': lrange(5, 9)})
-
-        # correct result
-        result1 = df.groupby('a').apply(f1)
-        result2 = df2.groupby('a').apply(f1)
-        assert_frame_equal(result1, result2)
-
-        # should fail (not the same number of levels)
-        pytest.raises(AssertionError, df.groupby('a').apply, f2)
-        pytest.raises(AssertionError, df2.groupby('a').apply, f2)
-
-        # should fail (incorrect shape)
-        pytest.raises(AssertionError, df.groupby('a').apply, f3)
-        pytest.raises(AssertionError, df2.groupby('a').apply, f3)
-
-    def test_attr_wrapper(self):
-        grouped = self.ts.groupby(lambda x: x.weekday())
-
-        result = grouped.std()
-        expected = grouped.agg(lambda x: np.std(x, ddof=1))
-        assert_series_equal(result, expected)
-
-        # this is pretty cool
-        result = grouped.describe()
-        expected = {}
-        for name, gp in grouped:
-            expected[name] = gp.describe()
-        expected = DataFrame(expected).T
-        assert_frame_equal(result, expected)
-
-        # get attribute
-        result = grouped.dtype
-        expected = grouped.agg(lambda x: x.dtype)
-
-        # make sure raises error
-        pytest.raises(AttributeError, getattr, grouped, 'foo')
-
-    def test_frame_groupby(self):
-        grouped = self.tsframe.groupby(lambda x: x.weekday())
-
-        # aggregate
-        aggregated = grouped.aggregate(np.mean)
-        assert len(aggregated) == 5
-        assert len(aggregated.columns) == 4
-
-        # by string
-        tscopy = self.tsframe.copy()
-        tscopy['weekday'] = [x.weekday() for x in tscopy.index]
-        stragged = tscopy.groupby('weekday').aggregate(np.mean)
-        assert_frame_equal(stragged, aggregated, check_names=False)
-
-        # transform
-        grouped = self.tsframe.head(30).groupby(lambda x: x.weekday())
-        transformed = grouped.transform(lambda x: x - x.mean())
-        assert len(transformed) == 30
-        assert len(transformed.columns) == 4
-
-        # transform propagate
-        transformed = grouped.transform(lambda x: x.mean())
-        for name, group in grouped:
-            mean = group.mean()
-            for idx in group.index:
-                tm.assert_series_equal(transformed.xs(idx), mean,
-                                       check_names=False)
-
-        # iterate
-        for weekday, group in grouped:
-            assert group.index[0].weekday() == weekday
-
-        # groups / group_indices
-        groups = grouped.groups
-        indices = grouped.indices
-
-        for k, v in compat.iteritems(groups):
-            samething = self.tsframe.index.take(indices[k])
-            assert (samething == v).all()
-
-    def test_frame_groupby_columns(self):
-        mapping = {'A': 0, 'B': 0, 'C': 1, 'D': 1}
-        grouped = self.tsframe.groupby(mapping, axis=1)
-
-        # aggregate
-        aggregated = grouped.aggregate(np.mean)
-        assert len(aggregated) == len(self.tsframe)
-        assert len(aggregated.columns) == 2
-
-        # transform
-        tf = lambda x: x - x.mean()
-        groupedT = self.tsframe.T.groupby(mapping, axis=0)
-        assert_frame_equal(groupedT.transform(tf).T, grouped.transform(tf))
-
-        # iterate
-        for k, v in grouped:
-            assert len(v.columns) == 2
-
-    def test_frame_set_name_single(self):
-        grouped = self.df.groupby('A')
-
-        result = grouped.mean()
-        assert result.index.name == 'A'
-
-        result = self.df.groupby('A', as_index=False).mean()
-        assert result.index.name != 'A'
-
-        result = grouped.agg(np.mean)
-        assert result.index.name == 'A'
-
-        result = grouped.agg({'C': np.mean, 'D': np.std})
-        assert result.index.name == 'A'
-
-        result = grouped['C'].mean()
-        assert result.index.name == 'A'
-        result = grouped['C'].agg(np.mean)
-        assert result.index.name == 'A'
-        result = grouped['C'].agg([np.mean, np.std])
-        assert result.index.name == 'A'
-
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            result = grouped['C'].agg({'foo': np.mean, 'bar': np.std})
-        assert result.index.name == 'A'
-
-    def test_multi_func(self):
-        col1 = self.df['A']
-        col2 = self.df['B']
-
-        grouped = self.df.groupby([col1.get, col2.get])
-        agged = grouped.mean()
-        expected = self.df.groupby(['A', 'B']).mean()
-
-        # TODO groupby get drops names
-        assert_frame_equal(agged.loc[:, ['C', 'D']],
-                           expected.loc[:, ['C', 'D']],
-                           check_names=False)
-
-        # some "groups" with no data
-        df = DataFrame({'v1': np.random.randn(6),
-                        'v2': np.random.randn(6),
-                        'k1': np.array(['b', 'b', 'b', 'a', 'a', 'a']),
-                        'k2': np.array(['1', '1', '1', '2', '2', '2'])},
-                       index=['one', 'two', 'three', 'four', 'five', 'six'])
-        # only verify that it works for now
-        grouped = df.groupby(['k1', 'k2'])
-        grouped.agg(np.sum)
-
-    def test_multi_key_multiple_functions(self):
-        grouped = self.df.groupby(['A', 'B'])['C']
-
-        agged = grouped.agg([np.mean, np.std])
-        expected = DataFrame({'mean': grouped.agg(np.mean),
-                              'std': grouped.agg(np.std)})
-        assert_frame_equal(agged, expected)
-
-    def test_frame_multi_key_function_list(self):
-        data = DataFrame(
-            {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
-                   'foo', 'foo', 'foo'],
-             'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two',
-                   'two', 'two', 'one'],
-             'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
-                   'dull', 'shiny', 'shiny', 'shiny'],
-             'D': np.random.randn(11),
-             'E': np.random.randn(11),
-             'F': np.random.randn(11)})
-
-        grouped = data.groupby(['A', 'B'])
-        funcs = [np.mean, np.std]
-        agged = grouped.agg(funcs)
-        expected = concat([grouped['D'].agg(funcs), grouped['E'].agg(funcs),
-                           grouped['F'].agg(funcs)],
-                          keys=['D', 'E', 'F'], axis=1)
-        assert (isinstance(agged.index, MultiIndex))
-        assert (isinstance(expected.index, MultiIndex))
-        assert_frame_equal(agged, expected)
-
-    def test_groupby_multiple_columns(self):
-        data = self.df
-        grouped = data.groupby(['A', 'B'])
-
-        def _check_op(op):
-
-            with catch_warnings(record=True):
-                result1 = op(grouped)
-
-                expected = defaultdict(dict)
-                for n1, gp1 in data.groupby('A'):
-                    for n2, gp2 in gp1.groupby('B'):
-                        expected[n1][n2] = op(gp2.loc[:, ['C', 'D']])
-                expected = dict((k, DataFrame(v))
-                                for k, v in compat.iteritems(expected))
-                expected = Panel.fromDict(expected).swapaxes(0, 1)
-                expected.major_axis.name, expected.minor_axis.name = 'A', 'B'
-
-                # a little bit crude
-                for col in ['C', 'D']:
-                    result_col = op(grouped[col])
-                    exp = expected[col]
-                    pivoted = result1[col].unstack()
-                    pivoted2 = result_col.unstack()
-                    assert_frame_equal(pivoted.reindex_like(exp), exp)
-                    assert_frame_equal(pivoted2.reindex_like(exp), exp)
-
-        _check_op(lambda x: x.sum())
-        _check_op(lambda x: x.mean())
-
-        # test single series works the same
-        result = data['C'].groupby([data['A'], data['B']]).mean()
-        expected = data.groupby(['A', 'B']).mean()['C']
-
-        assert_series_equal(result, expected)
-
-    def test_groupby_as_index_agg(self):
-        grouped = self.df.groupby('A', as_index=False)
-
-        # single-key
-
-        result = grouped.agg(np.mean)
-        expected = grouped.mean()
-        assert_frame_equal(result, expected)
-
-        result2 = grouped.agg(OrderedDict([['C', np.mean], ['D', np.sum]]))
-        expected2 = grouped.mean()
-        expected2['D'] = grouped.sum()['D']
-        assert_frame_equal(result2, expected2)
-
-        grouped = self.df.groupby('A', as_index=True)
-        expected3 = grouped['C'].sum()
-        expected3 = DataFrame(expected3).rename(columns={'C': 'Q'})
-
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            result3 = grouped['C'].agg({'Q': np.sum})
-        assert_frame_equal(result3, expected3)
-
-        # multi-key
-
-        grouped = self.df.groupby(['A', 'B'], as_index=False)
-
-        result = grouped.agg(np.mean)
-        expected = grouped.mean()
-        assert_frame_equal(result, expected)
-
-        result2 = grouped.agg(OrderedDict([['C', np.mean], ['D', np.sum]]))
-        expected2 = grouped.mean()
-        expected2['D'] = grouped.sum()['D']
-        assert_frame_equal(result2, expected2)
-
-        expected3 = grouped['C'].sum()
-        expected3 = DataFrame(expected3).rename(columns={'C': 'Q'})
+@pytest.mark.parametrize('dtype', ['float64', 'float32', 'int64',
+                                   'int32', 'int16', 'int8'])
+def test_with_na_groups(dtype):
+    index = Index(np.arange(10))
+    values = Series(np.ones(10), index, dtype=dtype)
+    labels = Series([np.nan, 'foo', 'bar', 'bar', np.nan, np.nan,
+                     'bar', 'bar', np.nan, 'foo'], index=index)
+
+    # this SHOULD be an int
+    grouped = values.groupby(labels)
+    agged = grouped.agg(len)
+    expected = Series([4, 2], index=['bar', 'foo'])
+
+    assert_series_equal(agged, expected, check_dtype=False)
+
+    # assert issubclass(agged.dtype.type, np.integer)
+
+    # explicitly return a float from my function
+    def f(x):
+        return float(len(x))
+
+    agged = grouped.agg(f)
+    expected = Series([4, 2], index=['bar', 'foo'])
+
+    assert_series_equal(agged, expected, check_dtype=False)
+    assert issubclass(agged.dtype.type, np.dtype(dtype).type)
+
+
+def test_indices_concatenation_order():
+
+    # GH 2808
+
+    def f1(x):
+        y = x[(x.b % 2) == 1] ** 2
+        if y.empty:
+            multiindex = MultiIndex(levels=[[]] * 2, labels=[[]] * 2,
+                                    names=['b', 'c'])
+            res = DataFrame(None, columns=['a'], index=multiindex)
+            return res
+        else:
+            y = y.set_index(['b', 'c'])
+            return y
+
+    def f2(x):
+        y = x[(x.b % 2) == 1] ** 2
+        if y.empty:
+            return DataFrame()
+        else:
+            y = y.set_index(['b', 'c'])
+            return y
+
+    def f3(x):
+        y = x[(x.b % 2) == 1] ** 2
+        if y.empty:
+            multiindex = MultiIndex(levels=[[]] * 2, labels=[[]] * 2,
+                                    names=['foo', 'bar'])
+            res = DataFrame(None, columns=['a', 'b'], index=multiindex)
+            return res
+        else:
+            return y
+
+    df = DataFrame({'a': [1, 2, 2, 2], 'b': lrange(4), 'c': lrange(5, 9)})
+
+    df2 = DataFrame({'a': [3, 2, 2, 2], 'b': lrange(4), 'c': lrange(5, 9)})
+
+    # correct result
+    result1 = df.groupby('a').apply(f1)
+    result2 = df2.groupby('a').apply(f1)
+    assert_frame_equal(result1, result2)
+
+    # should fail (not the same number of levels)
+    pytest.raises(AssertionError, df.groupby('a').apply, f2)
+    pytest.raises(AssertionError, df2.groupby('a').apply, f2)
+
+    # should fail (incorrect shape)
+    pytest.raises(AssertionError, df.groupby('a').apply, f3)
+    pytest.raises(AssertionError, df2.groupby('a').apply, f3)
+
+
+def test_attr_wrapper(ts):
+    grouped = ts.groupby(lambda x: x.weekday())
+
+    result = grouped.std()
+    expected = grouped.agg(lambda x: np.std(x, ddof=1))
+    assert_series_equal(result, expected)
+
+    # this is pretty cool
+    result = grouped.describe()
+    expected = {}
+    for name, gp in grouped:
+        expected[name] = gp.describe()
+    expected = DataFrame(expected).T
+    assert_frame_equal(result, expected)
+
+    # get attribute
+    result = grouped.dtype
+    expected = grouped.agg(lambda x: x.dtype)
+
+    # make sure raises error
+    pytest.raises(AttributeError, getattr, grouped, 'foo')
+
+
+def test_frame_groupby(tsframe):
+    grouped = tsframe.groupby(lambda x: x.weekday())
+
+    # aggregate
+    aggregated = grouped.aggregate(np.mean)
+    assert len(aggregated) == 5
+    assert len(aggregated.columns) == 4
+
+    # by string
+    tscopy = tsframe.copy()
+    tscopy['weekday'] = [x.weekday() for x in tscopy.index]
+    stragged = tscopy.groupby('weekday').aggregate(np.mean)
+    assert_frame_equal(stragged, aggregated, check_names=False)
+
+    # transform
+    grouped = tsframe.head(30).groupby(lambda x: x.weekday())
+    transformed = grouped.transform(lambda x: x - x.mean())
+    assert len(transformed) == 30
+    assert len(transformed.columns) == 4
+
+    # transform propagate
+    transformed = grouped.transform(lambda x: x.mean())
+    for name, group in grouped:
+        mean = group.mean()
+        for idx in group.index:
+            tm.assert_series_equal(transformed.xs(idx), mean,
+                                   check_names=False)
+
+    # iterate
+    for weekday, group in grouped:
+        assert group.index[0].weekday() == weekday
+
+    # groups / group_indices
+    groups = grouped.groups
+    indices = grouped.indices
+
+    for k, v in compat.iteritems(groups):
+        samething = tsframe.index.take(indices[k])
+        assert (samething == v).all()
+
+
+def test_frame_groupby_columns(tsframe):
+    mapping = {'A': 0, 'B': 0, 'C': 1, 'D': 1}
+    grouped = tsframe.groupby(mapping, axis=1)
+
+    # aggregate
+    aggregated = grouped.aggregate(np.mean)
+    assert len(aggregated) == len(tsframe)
+    assert len(aggregated.columns) == 2
+
+    # transform
+    tf = lambda x: x - x.mean()
+    groupedT = tsframe.T.groupby(mapping, axis=0)
+    assert_frame_equal(groupedT.transform(tf).T, grouped.transform(tf))
+
+    # iterate
+    for k, v in grouped:
+        assert len(v.columns) == 2
+
+
+def test_frame_set_name_single(df):
+    grouped = df.groupby('A')
+
+    result = grouped.mean()
+    assert result.index.name == 'A'
+
+    result = df.groupby('A', as_index=False).mean()
+    assert result.index.name != 'A'
+
+    result = grouped.agg(np.mean)
+    assert result.index.name == 'A'
+
+    result = grouped.agg({'C': np.mean, 'D': np.std})
+    assert result.index.name == 'A'
+
+    result = grouped['C'].mean()
+    assert result.index.name == 'A'
+    result = grouped['C'].agg(np.mean)
+    assert result.index.name == 'A'
+    result = grouped['C'].agg([np.mean, np.std])
+    assert result.index.name == 'A'
+
+    with tm.assert_produces_warning(FutureWarning,
+                                    check_stacklevel=False):
+        result = grouped['C'].agg({'foo': np.mean, 'bar': np.std})
+    assert result.index.name == 'A'
+
+
+def test_multi_func(df):
+    col1 = df['A']
+    col2 = df['B']
+
+    grouped = df.groupby([col1.get, col2.get])
+    agged = grouped.mean()
+    expected = df.groupby(['A', 'B']).mean()
+
+    # TODO groupby get drops names
+    assert_frame_equal(agged.loc[:, ['C', 'D']],
+                       expected.loc[:, ['C', 'D']],
+                       check_names=False)
+
+    # some "groups" with no data
+    df = DataFrame({'v1': np.random.randn(6),
+                    'v2': np.random.randn(6),
+                    'k1': np.array(['b', 'b', 'b', 'a', 'a', 'a']),
+                    'k2': np.array(['1', '1', '1', '2', '2', '2'])},
+                   index=['one', 'two', 'three', 'four', 'five', 'six'])
+    # only verify that it works for now
+    grouped = df.groupby(['k1', 'k2'])
+    grouped.agg(np.sum)
+
+
+def test_multi_key_multiple_functions(df):
+    grouped = df.groupby(['A', 'B'])['C']
+
+    agged = grouped.agg([np.mean, np.std])
+    expected = DataFrame({'mean': grouped.agg(np.mean),
+                          'std': grouped.agg(np.std)})
+    assert_frame_equal(agged, expected)
+
+
+def test_frame_multi_key_function_list():
+    data = DataFrame(
+        {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
+               'foo', 'foo', 'foo'],
+         'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two',
+               'two', 'two', 'one'],
+         'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
+               'dull', 'shiny', 'shiny', 'shiny'],
+         'D': np.random.randn(11),
+         'E': np.random.randn(11),
+         'F': np.random.randn(11)})
+
+    grouped = data.groupby(['A', 'B'])
+    funcs = [np.mean, np.std]
+    agged = grouped.agg(funcs)
+    expected = pd.concat([grouped['D'].agg(funcs), grouped['E'].agg(funcs),
+                          grouped['F'].agg(funcs)],
+                         keys=['D', 'E', 'F'], axis=1)
+    assert (isinstance(agged.index, MultiIndex))
+    assert (isinstance(expected.index, MultiIndex))
+    assert_frame_equal(agged, expected)
+
+
+@pytest.mark.parametrize('op', [lambda x: x.sum(), lambda x: x.mean()])
+def test_groupby_multiple_columns(df, op):
+    data = df
+    grouped = data.groupby(['A', 'B'])
+
+    with catch_warnings(record=True):
+        result1 = op(grouped)
+
+        expected = defaultdict(dict)
+        for n1, gp1 in data.groupby('A'):
+            for n2, gp2 in gp1.groupby('B'):
+                expected[n1][n2] = op(gp2.loc[:, ['C', 'D']])
+        expected = dict((k, DataFrame(v))
+                        for k, v in compat.iteritems(expected))
+        expected = Panel.fromDict(expected).swapaxes(0, 1)
+        expected.major_axis.name, expected.minor_axis.name = 'A', 'B'
+
+        # a little bit crude
+        for col in ['C', 'D']:
+            result_col = op(grouped[col])
+            exp = expected[col]
+            pivoted = result1[col].unstack()
+            pivoted2 = result_col.unstack()
+            assert_frame_equal(pivoted.reindex_like(exp), exp)
+            assert_frame_equal(pivoted2.reindex_like(exp), exp)
+
+    # test single series works the same
+    result = data['C'].groupby([data['A'], data['B']]).mean()
+    expected = data.groupby(['A', 'B']).mean()['C']
+
+    assert_series_equal(result, expected)
+
+
+def test_groupby_as_index_agg(df):
+    grouped = df.groupby('A', as_index=False)
+
+    # single-key
+
+    result = grouped.agg(np.mean)
+    expected = grouped.mean()
+    assert_frame_equal(result, expected)
+
+    result2 = grouped.agg(OrderedDict([['C', np.mean], ['D', np.sum]]))
+    expected2 = grouped.mean()
+    expected2['D'] = grouped.sum()['D']
+    assert_frame_equal(result2, expected2)
+
+    grouped = df.groupby('A', as_index=True)
+    expected3 = grouped['C'].sum()
+    expected3 = DataFrame(expected3).rename(columns={'C': 'Q'})
+
+    with tm.assert_produces_warning(FutureWarning,
+                                    check_stacklevel=False):
         result3 = grouped['C'].agg({'Q': np.sum})
-        assert_frame_equal(result3, expected3)
-
-        # GH7115 & GH8112 & GH8582
-        df = DataFrame(np.random.randint(0, 100, (50, 3)),
-                       columns=['jim', 'joe', 'jolie'])
-        ts = Series(np.random.randint(5, 10, 50), name='jim')
-
-        gr = df.groupby(ts)
-        gr.nth(0)  # invokes set_selection_from_grouper internally
-        assert_frame_equal(gr.apply(sum), df.groupby(ts).apply(sum))
-
-        for attr in ['mean', 'max', 'count', 'idxmax', 'cumsum', 'all']:
-            gr = df.groupby(ts, as_index=False)
-            left = getattr(gr, attr)()
-
-            gr = df.groupby(ts.values, as_index=True)
-            right = getattr(gr, attr)().reset_index(drop=True)
-
-            assert_frame_equal(left, right)
-
-    def test_as_index_series_return_frame(self):
-        grouped = self.df.groupby('A', as_index=False)
-        grouped2 = self.df.groupby(['A', 'B'], as_index=False)
-
-        result = grouped['C'].agg(np.sum)
-        expected = grouped.agg(np.sum).loc[:, ['A', 'C']]
-        assert isinstance(result, DataFrame)
-        assert_frame_equal(result, expected)
-
-        result2 = grouped2['C'].agg(np.sum)
-        expected2 = grouped2.agg(np.sum).loc[:, ['A', 'B', 'C']]
-        assert isinstance(result2, DataFrame)
-        assert_frame_equal(result2, expected2)
-
-        result = grouped['C'].sum()
-        expected = grouped.sum().loc[:, ['A', 'C']]
-        assert isinstance(result, DataFrame)
-        assert_frame_equal(result, expected)
-
-        result2 = grouped2['C'].sum()
-        expected2 = grouped2.sum().loc[:, ['A', 'B', 'C']]
-        assert isinstance(result2, DataFrame)
-        assert_frame_equal(result2, expected2)
-
-        # corner case
-        pytest.raises(Exception, grouped['C'].__getitem__, 'D')
-
-    def test_groupby_as_index_cython(self):
-        data = self.df
-
-        # single-key
-        grouped = data.groupby('A', as_index=False)
-        result = grouped.mean()
-        expected = data.groupby(['A']).mean()
-        expected.insert(0, 'A', expected.index)
-        expected.index = np.arange(len(expected))
-        assert_frame_equal(result, expected)
-
-        # multi-key
-        grouped = data.groupby(['A', 'B'], as_index=False)
-        result = grouped.mean()
-        expected = data.groupby(['A', 'B']).mean()
-
-        arrays = lzip(*expected.index.values)
-        expected.insert(0, 'A', arrays[0])
-        expected.insert(1, 'B', arrays[1])
-        expected.index = np.arange(len(expected))
-        assert_frame_equal(result, expected)
-
-    def test_groupby_as_index_series_scalar(self):
-        grouped = self.df.groupby(['A', 'B'], as_index=False)
-
-        # GH #421
-
-        result = grouped['C'].agg(len)
-        expected = grouped.agg(len).loc[:, ['A', 'B', 'C']]
-        assert_frame_equal(result, expected)
-
-    def test_groupby_as_index_corner(self):
-        pytest.raises(TypeError, self.ts.groupby, lambda x: x.weekday(),
-                      as_index=False)
-
-        pytest.raises(ValueError, self.df.groupby, lambda x: x.lower(),
-                      as_index=False, axis=1)
-
-    def test_groupby_as_index_apply(self):
-        # GH #4648 and #3417
-        df = DataFrame({'item_id': ['b', 'b', 'a', 'c', 'a', 'b'],
-                        'user_id': [1, 2, 1, 1, 3, 1],
-                        'time': range(6)})
-
-        g_as = df.groupby('user_id', as_index=True)
-        g_not_as = df.groupby('user_id', as_index=False)
-
-        res_as = g_as.head(2).index
-        res_not_as = g_not_as.head(2).index
-        exp = Index([0, 1, 2, 4])
-        assert_index_equal(res_as, exp)
-        assert_index_equal(res_not_as, exp)
-
-        res_as_apply = g_as.apply(lambda x: x.head(2)).index
-        res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index
-
-        # apply doesn't maintain the original ordering
-        # changed in GH5610 as the as_index=False returns a MI here
-        exp_not_as_apply = MultiIndex.from_tuples([(0, 0), (0, 2), (1, 1), (
-            2, 4)])
-        tp = [(1, 0), (1, 2), (2, 1), (3, 4)]
-        exp_as_apply = MultiIndex.from_tuples(tp, names=['user_id', None])
-
-        assert_index_equal(res_as_apply, exp_as_apply)
-        assert_index_equal(res_not_as_apply, exp_not_as_apply)
-
-        ind = Index(list('abcde'))
-        df = DataFrame([[1, 2], [2, 3], [1, 4], [1, 5], [2, 6]], index=ind)
-        res = df.groupby(0, as_index=False).apply(lambda x: x).index
-        assert_index_equal(res, ind)
-
-    def test_groupby_multiple_key(self):
-        df = tm.makeTimeDataFrame()
-        grouped = df.groupby([lambda x: x.year, lambda x: x.month,
-                              lambda x: x.day])
-        agged = grouped.sum()
-        assert_almost_equal(df.values, agged.values)
-
-        grouped = df.T.groupby([lambda x: x.year,
-                                lambda x: x.month,
-                                lambda x: x.day], axis=1)
-
-        agged = grouped.agg(lambda x: x.sum())
-        tm.assert_index_equal(agged.index, df.columns)
-        assert_almost_equal(df.T.values, agged.values)
-
-        agged = grouped.agg(lambda x: x.sum())
-        assert_almost_equal(df.T.values, agged.values)
-
-    def test_groupby_multi_corner(self):
-        # test that having an all-NA column doesn't mess you up
-        df = self.df.copy()
-        df['bad'] = np.nan
-        agged = df.groupby(['A', 'B']).mean()
-
-        expected = self.df.groupby(['A', 'B']).mean()
-        expected['bad'] = np.nan
-
-        assert_frame_equal(agged, expected)
-
-    def test_omit_nuisance(self):
-        grouped = self.df.groupby('A')
-
-        result = grouped.mean()
-        expected = self.df.loc[:, ['A', 'C', 'D']].groupby('A').mean()
-        assert_frame_equal(result, expected)
-
-        agged = grouped.agg(np.mean)
-        exp = grouped.mean()
-        assert_frame_equal(agged, exp)
-
-        df = self.df.loc[:, ['A', 'C', 'D']]
-        df['E'] = datetime.now()
-        grouped = df.groupby('A')
-        result = grouped.agg(np.sum)
-        expected = grouped.sum()
-        assert_frame_equal(result, expected)
-
-        # won't work with axis = 1
-        grouped = df.groupby({'A': 0, 'C': 0, 'D': 1, 'E': 1}, axis=1)
-        result = pytest.raises(TypeError, grouped.agg,
-                               lambda x: x.sum(0, numeric_only=False))
-
-    def test_omit_nuisance_python_multiple(self):
-        grouped = self.three_group.groupby(['A', 'B'])
-
-        agged = grouped.agg(np.mean)
-        exp = grouped.mean()
-        assert_frame_equal(agged, exp)
-
-    def test_empty_groups_corner(self):
-        # handle empty groups
-        df = DataFrame({'k1': np.array(['b', 'b', 'b', 'a', 'a', 'a']),
-                        'k2': np.array(['1', '1', '1', '2', '2', '2']),
-                        'k3': ['foo', 'bar'] * 3,
-                        'v1': np.random.randn(6),
-                        'v2': np.random.randn(6)})
-
-        grouped = df.groupby(['k1', 'k2'])
-        result = grouped.agg(np.mean)
-        expected = grouped.mean()
-        assert_frame_equal(result, expected)
-
-        grouped = self.mframe[3:5].groupby(level=0)
-        agged = grouped.apply(lambda x: x.mean())
-        agged_A = grouped['A'].apply(np.mean)
-        assert_series_equal(agged['A'], agged_A)
-        assert agged.index.name == 'first'
-
-    def test_apply_concat_preserve_names(self):
-        grouped = self.three_group.groupby(['A', 'B'])
-
-        def desc(group):
-            result = group.describe()
-            result.index.name = 'stat'
-            return result
-
-        def desc2(group):
-            result = group.describe()
-            result.index.name = 'stat'
-            result = result[:len(group)]
-            # weirdo
-            return result
-
-        def desc3(group):
-            result = group.describe()
-
-            # names are different
-            result.index.name = 'stat_%d' % len(group)
-
-            result = result[:len(group)]
-            # weirdo
-            return result
-
-        result = grouped.apply(desc)
-        assert result.index.names == ('A', 'B', 'stat')
-
-        result2 = grouped.apply(desc2)
-        assert result2.index.names == ('A', 'B', 'stat')
-
-        result3 = grouped.apply(desc3)
-        assert result3.index.names == ('A', 'B', None)
-
-    def test_nonsense_func(self):
-        df = DataFrame([0])
-        pytest.raises(Exception, df.groupby, lambda x: x + 'foo')
-
-    def test_builtins_apply(self):  # GH8155
-        df = pd.DataFrame(np.random.randint(1, 50, (1000, 2)),
-                          columns=['jim', 'joe'])
-        df['jolie'] = np.random.randn(1000)
-
-        for keys in ['jim', ['jim', 'joe']]:  # single key & multi-key
-            if keys == 'jim':
-                continue
-            for f in [max, min, sum]:
-                fname = f.__name__
-                result = df.groupby(keys).apply(f)
-                result.shape
-                ngroups = len(df.drop_duplicates(subset=keys))
-                assert result.shape == (ngroups, 3), 'invalid frame shape: '\
-                    '{} (expected ({}, 3))'.format(result.shape, ngroups)
-
-                assert_frame_equal(result,  # numpy's equivalent function
-                                   df.groupby(keys).apply(getattr(np, fname)))
-
-                if f != sum:
-                    expected = df.groupby(keys).agg(fname).reset_index()
-                    expected.set_index(keys, inplace=True, drop=False)
-                    assert_frame_equal(result, expected, check_dtype=False)
-
-                assert_series_equal(getattr(result, fname)(),
-                                    getattr(df, fname)())
-
-    def test_max_min_non_numeric(self):
-        # #2700
-        aa = DataFrame({'nn': [11, 11, 22, 22],
-                        'ii': [1, 2, 3, 4],
-                        'ss': 4 * ['mama']})
-
-        result = aa.groupby('nn').max()
-        assert 'ss' in result
-
-        result = aa.groupby('nn').max(numeric_only=False)
-        assert 'ss' in result
-
-        result = aa.groupby('nn').min()
-        assert 'ss' in result
-
-        result = aa.groupby('nn').min(numeric_only=False)
-        assert 'ss' in result
-
-    def test_arg_passthru(self):
-        # make sure that we are passing thru kwargs
-        # to our agg functions
-
-        # GH3668
-        # GH5724
-        df = pd.DataFrame(
-            {'group': [1, 1, 2],
-             'int': [1, 2, 3],
-             'float': [4., 5., 6.],
-             'string': list('abc'),
-             'category_string': pd.Series(list('abc')).astype('category'),
-             'category_int': [7, 8, 9],
-             'datetime': pd.date_range('20130101', periods=3),
-             'datetimetz': pd.date_range('20130101',
-                                         periods=3,
-                                         tz='US/Eastern'),
-             'timedelta': pd.timedelta_range('1 s', periods=3, freq='s')},
-            columns=['group', 'int', 'float', 'string',
-                     'category_string', 'category_int',
-                     'datetime', 'datetimetz',
-                     'timedelta'])
-
-        expected_columns_numeric = Index(['int', 'float', 'category_int'])
-
-        # mean / median
-        expected = pd.DataFrame(
-            {'category_int': [7.5, 9],
-             'float': [4.5, 6.],
-             'timedelta': [pd.Timedelta('1.5s'),
-                           pd.Timedelta('3s')],
-             'int': [1.5, 3],
-             'datetime': [pd.Timestamp('2013-01-01 12:00:00'),
-                          pd.Timestamp('2013-01-03 00:00:00')],
-             'datetimetz': [
-                 pd.Timestamp('2013-01-01 12:00:00', tz='US/Eastern'),
-                 pd.Timestamp('2013-01-03 00:00:00', tz='US/Eastern')]},
-            index=Index([1, 2], name='group'),
-            columns=['int', 'float', 'category_int',
-                     'datetime', 'datetimetz', 'timedelta'])
-        for attr in ['mean', 'median']:
-            f = getattr(df.groupby('group'), attr)
-            result = f()
-            tm.assert_index_equal(result.columns, expected_columns_numeric)
-
-            result = f(numeric_only=False)
-            assert_frame_equal(result.reindex_like(expected), expected)
-
-        # TODO: min, max *should* handle
-        # categorical (ordered) dtype
-        expected_columns = Index(['int', 'float', 'string',
-                                  'category_int',
-                                  'datetime', 'datetimetz',
-                                  'timedelta'])
-        for attr in ['min', 'max']:
-            f = getattr(df.groupby('group'), attr)
-            result = f()
-            tm.assert_index_equal(result.columns, expected_columns)
-
-            result = f(numeric_only=False)
-            tm.assert_index_equal(result.columns, expected_columns)
-
-        expected_columns = Index(['int', 'float', 'string',
-                                  'category_string', 'category_int',
-                                  'datetime', 'datetimetz',
-                                  'timedelta'])
-        for attr in ['first', 'last']:
-            f = getattr(df.groupby('group'), attr)
-            result = f()
-            tm.assert_index_equal(result.columns, expected_columns)
-
-            result = f(numeric_only=False)
-            tm.assert_index_equal(result.columns, expected_columns)
-
-        expected_columns = Index(['int', 'float', 'string',
-                                  'category_int', 'timedelta'])
-        for attr in ['sum']:
-            f = getattr(df.groupby('group'), attr)
-            result = f()
-            tm.assert_index_equal(result.columns, expected_columns_numeric)
-
-            result = f(numeric_only=False)
-            tm.assert_index_equal(result.columns, expected_columns)
-
-        expected_columns = Index(['int', 'float', 'category_int'])
-        for attr in ['prod', 'cumprod']:
-            f = getattr(df.groupby('group'), attr)
-            result = f()
-            tm.assert_index_equal(result.columns, expected_columns_numeric)
-
-            result = f(numeric_only=False)
-            tm.assert_index_equal(result.columns, expected_columns)
-
-        # like min, max, but don't include strings
-        expected_columns = Index(['int', 'float',
-                                  'category_int',
-                                  'datetime', 'datetimetz',
-                                  'timedelta'])
-        for attr in ['cummin', 'cummax']:
-            f = getattr(df.groupby('group'), attr)
-            result = f()
-            # GH 15561: numeric_only=False set by default like min/max
-            tm.assert_index_equal(result.columns, expected_columns)
-
-            result = f(numeric_only=False)
-            tm.assert_index_equal(result.columns, expected_columns)
-
-        expected_columns = Index(['int', 'float', 'category_int',
-                                  'timedelta'])
-        for attr in ['cumsum']:
-            f = getattr(df.groupby('group'), attr)
-            result = f()
-            tm.assert_index_equal(result.columns, expected_columns_numeric)
-
-            result = f(numeric_only=False)
-            tm.assert_index_equal(result.columns, expected_columns)
-
-    def test_wrap_aggregated_output_multindex(self):
-        df = self.mframe.T
-        df['baz', 'two'] = 'peekaboo'
-
-        keys = [np.array([0, 0, 1]), np.array([0, 0, 1])]
-        agged = df.groupby(keys).agg(np.mean)
-        assert isinstance(agged.columns, MultiIndex)
-
-        def aggfun(ser):
-            if ser.name == ('foo', 'one'):
-                raise TypeError
-            else:
-                return ser.sum()
-
-        agged2 = df.groupby(keys).aggregate(aggfun)
-        assert len(agged2.columns) + 1 == len(df.columns)
-
-    def test_groupby_level_apply(self):
-        frame = self.mframe
-
-        result = frame.groupby(level=0).count()
-        assert result.index.name == 'first'
-        result = frame.groupby(level=1).count()
-        assert result.index.name == 'second'
-
-        result = frame['A'].groupby(level=0).count()
-        assert result.index.name == 'first'
-
-    def test_groupby_level_mapper(self):
-        frame = self.mframe
-        deleveled = frame.reset_index()
-
-        mapper0 = {'foo': 0, 'bar': 0, 'baz': 1, 'qux': 1}
-        mapper1 = {'one': 0, 'two': 0, 'three': 1}
-
-        result0 = frame.groupby(mapper0, level=0).sum()
-        result1 = frame.groupby(mapper1, level=1).sum()
-
-        mapped_level0 = np.array([mapper0.get(x) for x in deleveled['first']])
-        mapped_level1 = np.array([mapper1.get(x) for x in deleveled['second']])
-        expected0 = frame.groupby(mapped_level0).sum()
-        expected1 = frame.groupby(mapped_level1).sum()
-        expected0.index.name, expected1.index.name = 'first', 'second'
-
-        assert_frame_equal(result0, expected0)
-        assert_frame_equal(result1, expected1)
-
-    def test_groupby_level_nonmulti(self):
-        # GH 1313, GH 13901
-        s = Series([1, 2, 3, 10, 4, 5, 20, 6],
-                   Index([1, 2, 3, 1, 4, 5, 2, 6], name='foo'))
-        expected = Series([11, 22, 3, 4, 5, 6],
-                          Index(range(1, 7), name='foo'))
-
-        result = s.groupby(level=0).sum()
-        tm.assert_series_equal(result, expected)
-        result = s.groupby(level=[0]).sum()
-        tm.assert_series_equal(result, expected)
-        result = s.groupby(level=-1).sum()
-        tm.assert_series_equal(result, expected)
-        result = s.groupby(level=[-1]).sum()
-        tm.assert_series_equal(result, expected)
-
-        pytest.raises(ValueError, s.groupby, level=1)
-        pytest.raises(ValueError, s.groupby, level=-2)
-        pytest.raises(ValueError, s.groupby, level=[])
-        pytest.raises(ValueError, s.groupby, level=[0, 0])
-        pytest.raises(ValueError, s.groupby, level=[0, 1])
-        pytest.raises(ValueError, s.groupby, level=[1])
-
-    def test_groupby_complex(self):
-        # GH 12902
-        a = Series(data=np.arange(4) * (1 + 2j), index=[0, 0, 1, 1])
-        expected = Series((1 + 2j, 5 + 10j))
-
-        result = a.groupby(level=0).sum()
-        assert_series_equal(result, expected)
-
-        result = a.sum(level=0)
-        assert_series_equal(result, expected)
-
-    def test_apply_series_to_frame(self):
-        def f(piece):
-            with np.errstate(invalid='ignore'):
-                logged = np.log(piece)
-            return DataFrame({'value': piece,
-                              'demeaned': piece - piece.mean(),
-                              'logged': logged})
-
-        dr = bdate_range('1/1/2000', periods=100)
-        ts = Series(np.random.randn(100), index=dr)
-
-        grouped = ts.groupby(lambda x: x.month)
-        result = grouped.apply(f)
-
-        assert isinstance(result, DataFrame)
-        tm.assert_index_equal(result.index, ts.index)
-
-    def test_apply_series_yield_constant(self):
-        result = self.df.groupby(['A', 'B'])['C'].apply(len)
-        assert result.index.names[:2] == ('A', 'B')
-
-    def test_apply_frame_yield_constant(self):
-        # GH13568
-        result = self.df.groupby(['A', 'B']).apply(len)
-        assert isinstance(result, Series)
-        assert result.name is None
-
-        result = self.df.groupby(['A', 'B'])[['C', 'D']].apply(len)
-        assert isinstance(result, Series)
-        assert result.name is None
-
-    def test_apply_frame_to_series(self):
-        grouped = self.df.groupby(['A', 'B'])
-        result = grouped.apply(len)
-        expected = grouped.count()['C']
-        tm.assert_index_equal(result.index, expected.index)
-        tm.assert_numpy_array_equal(result.values, expected.values)
-
-    def test_apply_frame_concat_series(self):
-        def trans(group):
-            return group.groupby('B')['C'].sum().sort_values()[:2]
-
-        def trans2(group):
-            grouped = group.groupby(df.reindex(group.index)['B'])
-            return grouped.sum().sort_values()[:2]
-
-        df = DataFrame({'A': np.random.randint(0, 5, 1000),
-                        'B': np.random.randint(0, 5, 1000),
-                        'C': np.random.randn(1000)})
-
-        result = df.groupby('A').apply(trans)
-        exp = df.groupby('A')['C'].apply(trans2)
-        assert_series_equal(result, exp, check_names=False)
-        assert result.name == 'C'
-
-    def test_apply_transform(self):
-        grouped = self.ts.groupby(lambda x: x.month)
-        result = grouped.apply(lambda x: x * 2)
-        expected = grouped.transform(lambda x: x * 2)
-        assert_series_equal(result, expected)
-
-    def test_apply_multikey_corner(self):
-        grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month])
-
-        def f(group):
-            return group.sort_values('A')[-5:]
-
-        result = grouped.apply(f)
-        for key, group in grouped:
-            assert_frame_equal(result.loc[key], f(group))
-
-    def test_mutate_groups(self):
-
-        # GH3380
-
-        mydf = DataFrame({
-            'cat1': ['a'] * 8 + ['b'] * 6,
-            'cat2': ['c'] * 2 + ['d'] * 2 + ['e'] * 2 + ['f'] * 2 + ['c'] * 2 +
-            ['d'] * 2 + ['e'] * 2,
-            'cat3': lmap(lambda x: 'g%s' % x, lrange(1, 15)),
-            'val': np.random.randint(100, size=14),
-        })
-
-        def f_copy(x):
-            x = x.copy()
-            x['rank'] = x.val.rank(method='min')
-            return x.groupby('cat2')['rank'].min()
-
-        def f_no_copy(x):
-            x['rank'] = x.val.rank(method='min')
-            return x.groupby('cat2')['rank'].min()
-
-        grpby_copy = mydf.groupby('cat1').apply(f_copy)
-        grpby_no_copy = mydf.groupby('cat1').apply(f_no_copy)
-        assert_series_equal(grpby_copy, grpby_no_copy)
-
-    def test_no_mutate_but_looks_like(self):
-
-        # GH 8467
-        # first show's mutation indicator
-        # second does not, but should yield the same results
-        df = DataFrame({'key': [1, 1, 1, 2, 2, 2, 3, 3, 3], 'value': range(9)})
-
-        result1 = df.groupby('key', group_keys=True).apply(lambda x: x[:].key)
-        result2 = df.groupby('key', group_keys=True).apply(lambda x: x.key)
-        assert_series_equal(result1, result2)
-
-    def test_apply_chunk_view(self):
-        # Low level tinkering could be unsafe, make sure not
-        df = DataFrame({'key': [1, 1, 1, 2, 2, 2, 3, 3, 3],
-                        'value': lrange(9)})
-
-        # return view
-        f = lambda x: x[:2]
-
-        result = df.groupby('key', group_keys=False).apply(f)
-        expected = df.take([0, 1, 3, 4, 6, 7])
-        assert_frame_equal(result, expected)
-
-    def test_apply_no_name_column_conflict(self):
-        df = DataFrame({'name': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2],
-                        'name2': [0, 0, 0, 1, 1, 1, 0, 0, 1, 1],
-                        'value': lrange(10)[::-1]})
-
-        # it works! #2605
-        grouped = df.groupby(['name', 'name2'])
-        grouped.apply(lambda x: x.sort_values('value', inplace=True))
-
-    def test_groupby_series_indexed_differently(self):
-        s1 = Series([5.0, -9.0, 4.0, 100., -5., 55., 6.7],
-                    index=Index(['a', 'b', 'c', 'd', 'e', 'f', 'g']))
-        s2 = Series([1.0, 1.0, 4.0, 5.0, 5.0, 7.0],
-                    index=Index(['a', 'b', 'd', 'f', 'g', 'h']))
-
-        grouped = s1.groupby(s2)
-        agged = grouped.mean()
-        exp = s1.groupby(s2.reindex(s1.index).get).mean()
-        assert_series_equal(agged, exp)
-
-    def test_groupby_with_hier_columns(self):
-        tuples = list(zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux',
-                             'qux'], ['one', 'two', 'one', 'two', 'one', 'two',
-                                      'one', 'two']]))
-        index = MultiIndex.from_tuples(tuples)
-        columns = MultiIndex.from_tuples([('A', 'cat'), ('B', 'dog'), (
-            'B', 'cat'), ('A', 'dog')])
-        df = DataFrame(np.random.randn(8, 4), index=index, columns=columns)
-
-        result = df.groupby(level=0).mean()
-        tm.assert_index_equal(result.columns, columns)
-
-        result = df.groupby(level=0, axis=1).mean()
-        tm.assert_index_equal(result.index, df.index)
-
-        result = df.groupby(level=0).agg(np.mean)
-        tm.assert_index_equal(result.columns, columns)
-
-        result = df.groupby(level=0).apply(lambda x: x.mean())
-        tm.assert_index_equal(result.columns, columns)
-
-        result = df.groupby(level=0, axis=1).agg(lambda x: x.mean(1))
-        tm.assert_index_equal(result.columns, Index(['A', 'B']))
-        tm.assert_index_equal(result.index, df.index)
-
-        # add a nuisance column
-        sorted_columns, _ = columns.sortlevel(0)
-        df['A', 'foo'] = 'bar'
-        result = df.groupby(level=0).mean()
-        tm.assert_index_equal(result.columns, df.columns[:-1])
-
-    def test_pass_args_kwargs(self):
-        from numpy import percentile
-
-        def f(x, q=None, axis=0):
-            return percentile(x, q, axis=axis)
-
-        g = lambda x: percentile(x, 80, axis=0)
-
-        # Series
-        ts_grouped = self.ts.groupby(lambda x: x.month)
-        agg_result = ts_grouped.agg(percentile, 80, axis=0)
-        apply_result = ts_grouped.apply(percentile, 80, axis=0)
-        trans_result = ts_grouped.transform(percentile, 80, axis=0)
-
-        agg_expected = ts_grouped.quantile(.8)
-        trans_expected = ts_grouped.transform(g)
-
-        assert_series_equal(apply_result, agg_expected)
-        assert_series_equal(agg_result, agg_expected, check_names=False)
-        assert_series_equal(trans_result, trans_expected)
-
-        agg_result = ts_grouped.agg(f, q=80)
-        apply_result = ts_grouped.apply(f, q=80)
-        trans_result = ts_grouped.transform(f, q=80)
-        assert_series_equal(agg_result, agg_expected)
-        assert_series_equal(apply_result, agg_expected)
-        assert_series_equal(trans_result, trans_expected)
-
-        # DataFrame
-        df_grouped = self.tsframe.groupby(lambda x: x.month)
-        agg_result = df_grouped.agg(percentile, 80, axis=0)
-        apply_result = df_grouped.apply(DataFrame.quantile, .8)
-        expected = df_grouped.quantile(.8)
-        assert_frame_equal(apply_result, expected)
-        assert_frame_equal(agg_result, expected, check_names=False)
-
-        agg_result = df_grouped.agg(f, q=80)
-        apply_result = df_grouped.apply(DataFrame.quantile, q=.8)
-        assert_frame_equal(agg_result, expected, check_names=False)
-        assert_frame_equal(apply_result, expected)
-
-    def test_non_cython_api(self):
-
-        # GH5610
-        # non-cython calls should not include the grouper
-
-        df = DataFrame(
-            [[1, 2, 'foo'],
-             [1, np.nan, 'bar'],
-             [3, np.nan, 'baz']],
-            columns=['A', 'B', 'C'])
-        g = df.groupby('A')
-        gni = df.groupby('A', as_index=False)
-
-        # mad
-        expected = DataFrame([[0], [np.nan]], columns=['B'], index=[1, 3])
-        expected.index.name = 'A'
-        result = g.mad()
-        assert_frame_equal(result, expected)
-
-        expected = DataFrame([[0., 0.], [0, np.nan]], columns=['A', 'B'],
-                             index=[0, 1])
-        result = gni.mad()
-        assert_frame_equal(result, expected)
-
-        # describe
-        expected_index = pd.Index([1, 3], name='A')
-        expected_col = pd.MultiIndex(levels=[['B'],
-                                             ['count', 'mean', 'std', 'min',
-                                              '25%', '50%', '75%', 'max']],
-                                     labels=[[0] * 8, list(range(8))])
-        expected = pd.DataFrame([[1.0, 2.0, np.nan, 2.0, 2.0, 2.0, 2.0, 2.0],
-                                 [0.0, np.nan, np.nan, np.nan, np.nan, np.nan,
-                                  np.nan, np.nan]],
-                                index=expected_index,
-                                columns=expected_col)
-        result = g.describe()
-        assert_frame_equal(result, expected)
-
-        expected = pd.concat([df[df.A == 1].describe().unstack().to_frame().T,
-                              df[df.A == 3].describe().unstack().to_frame().T])
-        expected.index = pd.Index([0, 1])
-        result = gni.describe()
-        assert_frame_equal(result, expected)
-
-        # any
-        expected = DataFrame([[True, True], [False, True]], columns=['B', 'C'],
-                             index=[1, 3])
-        expected.index.name = 'A'
-        result = g.any()
-        assert_frame_equal(result, expected)
-
-        # idxmax
-        expected = DataFrame([[0.0], [np.nan]], columns=['B'], index=[1, 3])
-        expected.index.name = 'A'
-        result = g.idxmax()
-        assert_frame_equal(result, expected)
-
-    def test_cython_api2(self):
-
-        # this takes the fast apply path
-
-        # cumsum (GH5614)
-        df = DataFrame(
-            [[1, 2, np.nan], [1, np.nan, 9], [3, 4, 9]
-             ], columns=['A', 'B', 'C'])
-        expected = DataFrame(
-            [[2, np.nan], [np.nan, 9], [4, 9]], columns=['B', 'C'])
-        result = df.groupby('A').cumsum()
-        assert_frame_equal(result, expected)
-
-        # GH 5755 - cumsum is a transformer and should ignore as_index
-        result = df.groupby('A', as_index=False).cumsum()
-        assert_frame_equal(result, expected)
-
-        # GH 13994
-        result = df.groupby('A').cumsum(axis=1)
-        expected = df.cumsum(axis=1)
-        assert_frame_equal(result, expected)
-        result = df.groupby('A').cumprod(axis=1)
-        expected = df.cumprod(axis=1)
-        assert_frame_equal(result, expected)
-
-    def test_grouping_ndarray(self):
-        grouped = self.df.groupby(self.df['A'].values)
-
-        result = grouped.sum()
-        expected = self.df.groupby('A').sum()
-        assert_frame_equal(result, expected, check_names=False
-                           )  # Note: no names when grouping by value
-
-    def test_apply_typecast_fail(self):
-        df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
-                        'c': np.tile(
-                            ['a', 'b', 'c'], 2),
-                        'v': np.arange(1., 7.)})
-
-        def f(group):
-            v = group['v']
-            group['v2'] = (v - v.min()) / (v.max() - v.min())
-            return group
-
-        result = df.groupby('d').apply(f)
-
-        expected = df.copy()
-        expected['v2'] = np.tile([0., 0.5, 1], 2)
-
-        assert_frame_equal(result, expected)
-
-    def test_apply_multiindex_fail(self):
-        index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]
-                                        ])
-        df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
-                        'c': np.tile(['a', 'b', 'c'], 2),
-                        'v': np.arange(1., 7.)}, index=index)
-
-        def f(group):
-            v = group['v']
-            group['v2'] = (v - v.min()) / (v.max() - v.min())
-            return group
-
-        result = df.groupby('d').apply(f)
-
-        expected = df.copy()
-        expected['v2'] = np.tile([0., 0.5, 1], 2)
-
-        assert_frame_equal(result, expected)
-
-    def test_apply_corner(self):
-        result = self.tsframe.groupby(lambda x: x.year).apply(lambda x: x * 2)
-        expected = self.tsframe * 2
-        assert_frame_equal(result, expected)
-
-    def test_apply_without_copy(self):
-        # GH 5545
-        # returning a non-copy in an applied function fails
-
-        data = DataFrame({'id_field': [100, 100, 200, 300],
-                          'category': ['a', 'b', 'c', 'c'],
-                          'value': [1, 2, 3, 4]})
-
-        def filt1(x):
-            if x.shape[0] == 1:
-                return x.copy()
-            else:
-                return x[x.category == 'c']
-
-        def filt2(x):
-            if x.shape[0] == 1:
-                return x
-            else:
-                return x[x.category == 'c']
-
-        expected = data.groupby('id_field').apply(filt1)
-        result = data.groupby('id_field').apply(filt2)
-        assert_frame_equal(result, expected)
-
-    def test_apply_corner_cases(self):
-        # #535, can't use sliding iterator
-
-        N = 1000
-        labels = np.random.randint(0, 100, size=N)
-        df = DataFrame({'key': labels,
-                        'value1': np.random.randn(N),
-                        'value2': ['foo', 'bar', 'baz', 'qux'] * (N // 4)})
-
-        grouped = df.groupby('key')
-
-        def f(g):
-            g['value3'] = g['value1'] * 2
-            return g
-
-        result = grouped.apply(f)
-        assert 'value3' in result
+    assert_frame_equal(result3, expected3)
+
+    # multi-key
+
+    grouped = df.groupby(['A', 'B'], as_index=False)
+
+    result = grouped.agg(np.mean)
+    expected = grouped.mean()
+    assert_frame_equal(result, expected)
+
+    result2 = grouped.agg(OrderedDict([['C', np.mean], ['D', np.sum]]))
+    expected2 = grouped.mean()
+    expected2['D'] = grouped.sum()['D']
+    assert_frame_equal(result2, expected2)
+
+    expected3 = grouped['C'].sum()
+    expected3 = DataFrame(expected3).rename(columns={'C': 'Q'})
+    result3 = grouped['C'].agg({'Q': np.sum})
+    assert_frame_equal(result3, expected3)
+
+    # GH7115 & GH8112 & GH8582
+    df = DataFrame(np.random.randint(0, 100, (50, 3)),
+                   columns=['jim', 'joe', 'jolie'])
+    ts = Series(np.random.randint(5, 10, 50), name='jim')
+
+    gr = df.groupby(ts)
+    gr.nth(0)  # invokes set_selection_from_grouper internally
+    assert_frame_equal(gr.apply(sum), df.groupby(ts).apply(sum))
+
+    for attr in ['mean', 'max', 'count', 'idxmax', 'cumsum', 'all']:
+        gr = df.groupby(ts, as_index=False)
+        left = getattr(gr, attr)()
+
+        gr = df.groupby(ts.values, as_index=True)
+        right = getattr(gr, attr)().reset_index(drop=True)
+
+        assert_frame_equal(left, right)
+
+
+def test_as_index_series_return_frame(df):
+    grouped = df.groupby('A', as_index=False)
+    grouped2 = df.groupby(['A', 'B'], as_index=False)
+
+    result = grouped['C'].agg(np.sum)
+    expected = grouped.agg(np.sum).loc[:, ['A', 'C']]
+    assert isinstance(result, DataFrame)
+    assert_frame_equal(result, expected)
+
+    result2 = grouped2['C'].agg(np.sum)
+    expected2 = grouped2.agg(np.sum).loc[:, ['A', 'B', 'C']]
+    assert isinstance(result2, DataFrame)
+    assert_frame_equal(result2, expected2)
+
+    result = grouped['C'].sum()
+    expected = grouped.sum().loc[:, ['A', 'C']]
+    assert isinstance(result, DataFrame)
+    assert_frame_equal(result, expected)
+
+    result2 = grouped2['C'].sum()
+    expected2 = grouped2.sum().loc[:, ['A', 'B', 'C']]
+    assert isinstance(result2, DataFrame)
+    assert_frame_equal(result2, expected2)
+
+    # corner case
+    pytest.raises(Exception, grouped['C'].__getitem__, 'D')
+
+
+def test_groupby_as_index_cython(df):
+    data = df
+
+    # single-key
+    grouped = data.groupby('A', as_index=False)
+    result = grouped.mean()
+    expected = data.groupby(['A']).mean()
+    expected.insert(0, 'A', expected.index)
+    expected.index = np.arange(len(expected))
+    assert_frame_equal(result, expected)
+
+    # multi-key
+    grouped = data.groupby(['A', 'B'], as_index=False)
+    result = grouped.mean()
+    expected = data.groupby(['A', 'B']).mean()
+
+    arrays = lzip(*expected.index.values)
+    expected.insert(0, 'A', arrays[0])
+    expected.insert(1, 'B', arrays[1])
+    expected.index = np.arange(len(expected))
+    assert_frame_equal(result, expected)
+
+
+def test_groupby_as_index_series_scalar(df):
+    grouped = df.groupby(['A', 'B'], as_index=False)
+
+    # GH #421
+
+    result = grouped['C'].agg(len)
+    expected = grouped.agg(len).loc[:, ['A', 'B', 'C']]
+    assert_frame_equal(result, expected)
+
+
+def test_groupby_as_index_corner(df, ts):
+    pytest.raises(TypeError, ts.groupby, lambda x: x.weekday(),
+                  as_index=False)
+
+    pytest.raises(ValueError, df.groupby, lambda x: x.lower(),
+                  as_index=False, axis=1)
+
+
+def test_groupby_multiple_key(df):
+    df = tm.makeTimeDataFrame()
+    grouped = df.groupby([lambda x: x.year, lambda x: x.month,
+                          lambda x: x.day])
+    agged = grouped.sum()
+    assert_almost_equal(df.values, agged.values)
+
+    grouped = df.T.groupby([lambda x: x.year,
+                            lambda x: x.month,
+                            lambda x: x.day], axis=1)
+
+    agged = grouped.agg(lambda x: x.sum())
+    tm.assert_index_equal(agged.index, df.columns)
+    assert_almost_equal(df.T.values, agged.values)
+
+    agged = grouped.agg(lambda x: x.sum())
+    assert_almost_equal(df.T.values, agged.values)
+
+
+def test_groupby_multi_corner(df):
+    # test that having an all-NA column doesn't mess you up
+    df = df.copy()
+    df['bad'] = np.nan
+    agged = df.groupby(['A', 'B']).mean()
+
+    expected = df.groupby(['A', 'B']).mean()
+    expected['bad'] = np.nan
+
+    assert_frame_equal(agged, expected)
+
+
+def test_omit_nuisance(df):
+    grouped = df.groupby('A')
+
+    result = grouped.mean()
+    expected = df.loc[:, ['A', 'C', 'D']].groupby('A').mean()
+    assert_frame_equal(result, expected)
+
+    agged = grouped.agg(np.mean)
+    exp = grouped.mean()
+    assert_frame_equal(agged, exp)
+
+    df = df.loc[:, ['A', 'C', 'D']]
+    df['E'] = datetime.now()
+    grouped = df.groupby('A')
+    result = grouped.agg(np.sum)
+    expected = grouped.sum()
+    assert_frame_equal(result, expected)
+
+    # won't work with axis = 1
+    grouped = df.groupby({'A': 0, 'C': 0, 'D': 1, 'E': 1}, axis=1)
+    result = pytest.raises(TypeError, grouped.agg,
+                           lambda x: x.sum(0, numeric_only=False))
+
+
+def test_omit_nuisance_python_multiple(three_group):
+    grouped = three_group.groupby(['A', 'B'])
+
+    agged = grouped.agg(np.mean)
+    exp = grouped.mean()
+    assert_frame_equal(agged, exp)
+
+
+def test_empty_groups_corner(mframe):
+    # handle empty groups
+    df = DataFrame({'k1': np.array(['b', 'b', 'b', 'a', 'a', 'a']),
+                    'k2': np.array(['1', '1', '1', '2', '2', '2']),
+                    'k3': ['foo', 'bar'] * 3,
+                    'v1': np.random.randn(6),
+                    'v2': np.random.randn(6)})
+
+    grouped = df.groupby(['k1', 'k2'])
+    result = grouped.agg(np.mean)
+    expected = grouped.mean()
+    assert_frame_equal(result, expected)
+
+    grouped = mframe[3:5].groupby(level=0)
+    agged = grouped.apply(lambda x: x.mean())
+    agged_A = grouped['A'].apply(np.mean)
+    assert_series_equal(agged['A'], agged_A)
+    assert agged.index.name == 'first'
+
+
+def test_nonsense_func():
+    df = DataFrame([0])
+    pytest.raises(Exception, df.groupby, lambda x: x + 'foo')
+
+
+def test_wrap_aggregated_output_multindex(mframe):
+    df = mframe.T
+    df['baz', 'two'] = 'peekaboo'
+
+    keys = [np.array([0, 0, 1]), np.array([0, 0, 1])]
+    agged = df.groupby(keys).agg(np.mean)
+    assert isinstance(agged.columns, MultiIndex)
+
+    def aggfun(ser):
+        if ser.name == ('foo', 'one'):
+            raise TypeError
+        else:
+            return ser.sum()
+
+    agged2 = df.groupby(keys).aggregate(aggfun)
+    assert len(agged2.columns) + 1 == len(df.columns)
+
+
+def test_groupby_level_apply(mframe):
+
+    result = mframe.groupby(level=0).count()
+    assert result.index.name == 'first'
+    result = mframe.groupby(level=1).count()
+    assert result.index.name == 'second'
+
+    result = mframe['A'].groupby(level=0).count()
+    assert result.index.name == 'first'
+
+
+def test_groupby_level_mapper(mframe):
+    deleveled = mframe.reset_index()
+
+    mapper0 = {'foo': 0, 'bar': 0, 'baz': 1, 'qux': 1}
+    mapper1 = {'one': 0, 'two': 0, 'three': 1}
+
+    result0 = mframe.groupby(mapper0, level=0).sum()
+    result1 = mframe.groupby(mapper1, level=1).sum()
+
+    mapped_level0 = np.array([mapper0.get(x) for x in deleveled['first']])
+    mapped_level1 = np.array([mapper1.get(x) for x in deleveled['second']])
+    expected0 = mframe.groupby(mapped_level0).sum()
+    expected1 = mframe.groupby(mapped_level1).sum()
+    expected0.index.name, expected1.index.name = 'first', 'second'
+
+    assert_frame_equal(result0, expected0)
+    assert_frame_equal(result1, expected1)
+
+
+def test_groupby_level_nonmulti():
+    # GH 1313, GH 13901
+    s = Series([1, 2, 3, 10, 4, 5, 20, 6],
+               Index([1, 2, 3, 1, 4, 5, 2, 6], name='foo'))
+    expected = Series([11, 22, 3, 4, 5, 6],
+                      Index(range(1, 7), name='foo'))
+
+    result = s.groupby(level=0).sum()
+    tm.assert_series_equal(result, expected)
+    result = s.groupby(level=[0]).sum()
+    tm.assert_series_equal(result, expected)
+    result = s.groupby(level=-1).sum()
+    tm.assert_series_equal(result, expected)
+    result = s.groupby(level=[-1]).sum()
+    tm.assert_series_equal(result, expected)
+
+    pytest.raises(ValueError, s.groupby, level=1)
+    pytest.raises(ValueError, s.groupby, level=-2)
+    pytest.raises(ValueError, s.groupby, level=[])
+    pytest.raises(ValueError, s.groupby, level=[0, 0])
+    pytest.raises(ValueError, s.groupby, level=[0, 1])
+    pytest.raises(ValueError, s.groupby, level=[1])
+
+
+def test_groupby_complex():
+    # GH 12902
+    a = Series(data=np.arange(4) * (1 + 2j), index=[0, 0, 1, 1])
+    expected = Series((1 + 2j, 5 + 10j))
+
+    result = a.groupby(level=0).sum()
+    assert_series_equal(result, expected)
+
+    result = a.sum(level=0)
+    assert_series_equal(result, expected)
+
+
+def test_mutate_groups():
+
+    # GH3380
+
+    df = DataFrame({
+        'cat1': ['a'] * 8 + ['b'] * 6,
+        'cat2': ['c'] * 2 + ['d'] * 2 + ['e'] * 2 + ['f'] * 2 + ['c'] * 2 +
+        ['d'] * 2 + ['e'] * 2,
+        'cat3': lmap(lambda x: 'g%s' % x, lrange(1, 15)),
+        'val': np.random.randint(100, size=14),
+    })
+
+    def f_copy(x):
+        x = x.copy()
+        x['rank'] = x.val.rank(method='min')
+        return x.groupby('cat2')['rank'].min()
+
+    def f_no_copy(x):
+        x['rank'] = x.val.rank(method='min')
+        return x.groupby('cat2')['rank'].min()
+
+    grpby_copy = df.groupby('cat1').apply(f_copy)
+    grpby_no_copy = df.groupby('cat1').apply(f_no_copy)
+    assert_series_equal(grpby_copy, grpby_no_copy)
+
+
+def test_no_mutate_but_looks_like():
+
+    # GH 8467
+    # first show's mutation indicator
+    # second does not, but should yield the same results
+    df = DataFrame({'key': [1, 1, 1, 2, 2, 2, 3, 3, 3], 'value': range(9)})
+
+    result1 = df.groupby('key', group_keys=True).apply(lambda x: x[:].key)
+    result2 = df.groupby('key', group_keys=True).apply(lambda x: x.key)
+    assert_series_equal(result1, result2)
+
+
+def test_groupby_series_indexed_differently():
+    s1 = Series([5.0, -9.0, 4.0, 100., -5., 55., 6.7],
+                index=Index(['a', 'b', 'c', 'd', 'e', 'f', 'g']))
+    s2 = Series([1.0, 1.0, 4.0, 5.0, 5.0, 7.0],
+                index=Index(['a', 'b', 'd', 'f', 'g', 'h']))
+
+    grouped = s1.groupby(s2)
+    agged = grouped.mean()
+    exp = s1.groupby(s2.reindex(s1.index).get).mean()
+    assert_series_equal(agged, exp)
+
+
+def test_groupby_with_hier_columns():
+    tuples = list(zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux',
+                         'qux'], ['one', 'two', 'one', 'two', 'one', 'two',
+                                  'one', 'two']]))
+    index = MultiIndex.from_tuples(tuples)
+    columns = MultiIndex.from_tuples([('A', 'cat'), ('B', 'dog'), (
+        'B', 'cat'), ('A', 'dog')])
+    df = DataFrame(np.random.randn(8, 4), index=index, columns=columns)
+
+    result = df.groupby(level=0).mean()
+    tm.assert_index_equal(result.columns, columns)
+
+    result = df.groupby(level=0, axis=1).mean()
+    tm.assert_index_equal(result.index, df.index)
+
+    result = df.groupby(level=0).agg(np.mean)
+    tm.assert_index_equal(result.columns, columns)
+
+    result = df.groupby(level=0).apply(lambda x: x.mean())
+    tm.assert_index_equal(result.columns, columns)
+
+    result = df.groupby(level=0, axis=1).agg(lambda x: x.mean(1))
+    tm.assert_index_equal(result.columns, Index(['A', 'B']))
+    tm.assert_index_equal(result.index, df.index)
+
+    # add a nuisance column
+    sorted_columns, _ = columns.sortlevel(0)
+    df['A', 'foo'] = 'bar'
+    result = df.groupby(level=0).mean()
+    tm.assert_index_equal(result.columns, df.columns[:-1])
 
-    def test_groupby_wrong_multi_labels(self):
-        data = """index,foo,bar,baz,spam,data
+
+def test_grouping_ndarray(df):
+    grouped = df.groupby(df['A'].values)
+
+    result = grouped.sum()
+    expected = df.groupby('A').sum()
+    assert_frame_equal(result, expected, check_names=False
+                       )  # Note: no names when grouping by value
+
+
+def test_groupby_wrong_multi_labels():
+    data = """index,foo,bar,baz,spam,data
 0,foo1,bar1,baz1,spam2,20
 1,foo1,bar2,baz1,spam3,30
 2,foo2,bar2,baz1,spam2,40
 3,foo1,bar1,baz2,spam1,50
 4,foo3,bar1,baz2,spam1,60"""
 
-        data = read_csv(StringIO(data), index_col=0)
-
-        grouped = data.groupby(['foo', 'bar', 'baz', 'spam'])
-
-        result = grouped.agg(np.mean)
-        expected = grouped.mean()
-        assert_frame_equal(result, expected)
-
-    def test_groupby_series_with_name(self):
-        result = self.df.groupby(self.df['A']).mean()
-        result2 = self.df.groupby(self.df['A'], as_index=False).mean()
-        assert result.index.name == 'A'
-        assert 'A' in result2
-
-        result = self.df.groupby([self.df['A'], self.df['B']]).mean()
-        result2 = self.df.groupby([self.df['A'], self.df['B']],
-                                  as_index=False).mean()
-        assert result.index.names == ('A', 'B')
-        assert 'A' in result2
-        assert 'B' in result2
-
-    def test_seriesgroupby_name_attr(self):
-        # GH 6265
-        result = self.df.groupby('A')['C']
-        assert result.count().name == 'C'
-        assert result.mean().name == 'C'
-
-        testFunc = lambda x: np.sum(x) * 2
-        assert result.agg(testFunc).name == 'C'
-
-    def test_consistency_name(self):
-        # GH 12363
-
-        df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
-                              'foo', 'bar', 'foo', 'foo'],
-                        'B': ['one', 'one', 'two', 'two',
-                              'two', 'two', 'one', 'two'],
-                        'C': np.random.randn(8) + 1.0,
-                        'D': np.arange(8)})
-
-        expected = df.groupby(['A']).B.count()
-        result = df.B.groupby(df.A).count()
-        assert_series_equal(result, expected)
-
-    def test_groupby_name_propagation(self):
-        # GH 6124
-        def summarize(df, name=None):
-            return Series({'count': 1, 'mean': 2, 'omissions': 3, }, name=name)
-
-        def summarize_random_name(df):
-            # Provide a different name for each Series.  In this case, groupby
-            # should not attempt to propagate the Series name since they are
-            # inconsistent.
-            return Series({
-                'count': 1,
-                'mean': 2,
-                'omissions': 3,
-            }, name=df.iloc[0]['A'])
-
-        metrics = self.df.groupby('A').apply(summarize)
-        assert metrics.columns.name is None
-        metrics = self.df.groupby('A').apply(summarize, 'metrics')
-        assert metrics.columns.name == 'metrics'
-        metrics = self.df.groupby('A').apply(summarize_random_name)
-        assert metrics.columns.name is None
-
-    def test_groupby_nonstring_columns(self):
-        df = DataFrame([np.arange(10) for x in range(10)])
-        grouped = df.groupby(0)
-        result = grouped.mean()
-        expected = df.groupby(df[0]).mean()
-        assert_frame_equal(result, expected)
-
-    def test_groupby_mixed_type_columns(self):
-        # GH 13432, unorderable types in py3
-        df = DataFrame([[0, 1, 2]], columns=['A', 'B', 0])
-        expected = DataFrame([[1, 2]], columns=['B', 0],
-                             index=Index([0], name='A'))
-
-        result = df.groupby('A').first()
-        tm.assert_frame_equal(result, expected)
-
-        result = df.groupby('A').sum()
-        tm.assert_frame_equal(result, expected)
-
-    def test_cython_grouper_series_bug_noncontig(self):
-        arr = np.empty((100, 100))
-        arr.fill(np.nan)
-        obj = Series(arr[:, 0], index=lrange(100))
-        inds = np.tile(lrange(10), 10)
-
-        result = obj.groupby(inds).agg(Series.median)
-        assert result.isna().all()
-
-    def test_series_grouper_noncontig_index(self):
-        index = Index(tm.rands_array(10, 100))
-
-        values = Series(np.random.randn(50), index=index[::2])
-        labels = np.random.randint(0, 5, 50)
-
-        # it works!
-        grouped = values.groupby(labels)
-
-        # accessing the index elements causes segfault
-        f = lambda x: len(set(map(id, x.index)))
-        grouped.agg(f)
-
-    def test_convert_objects_leave_decimal_alone(self):
-
-        from decimal import Decimal
-
-        s = Series(lrange(5))
-        labels = np.array(['a', 'b', 'c', 'd', 'e'], dtype='O')
-
-        def convert_fast(x):
-            return Decimal(str(x.mean()))
-
-        def convert_force_pure(x):
-            # base will be length 0
-            assert (len(x.base) > 0)
-            return Decimal(str(x.mean()))
-
-        grouped = s.groupby(labels)
-
-        result = grouped.agg(convert_fast)
-        assert result.dtype == np.object_
-        assert isinstance(result[0], Decimal)
-
-        result = grouped.agg(convert_force_pure)
-        assert result.dtype == np.object_
-        assert isinstance(result[0], Decimal)
-
-    def test_fast_apply(self):
-        # make sure that fast apply is correctly called
-        # rather than raising any kind of error
-        # otherwise the python path will be callsed
-        # which slows things down
-        N = 1000
-        labels = np.random.randint(0, 2000, size=N)
-        labels2 = np.random.randint(0, 3, size=N)
-        df = DataFrame({'key': labels,
-                        'key2': labels2,
-                        'value1': np.random.randn(N),
-                        'value2': ['foo', 'bar', 'baz', 'qux'] * (N // 4)})
-
-        def f(g):
-            return 1
-
-        g = df.groupby(['key', 'key2'])
-
-        grouper = g.grouper
-
-        splitter = grouper._get_splitter(g._selected_obj, axis=g.axis)
-        group_keys = grouper._get_group_keys()
-
-        values, mutated = splitter.fast_apply(f, group_keys)
-        assert not mutated
-
-    def test_apply_with_mixed_dtype(self):
-        # GH3480, apply with mixed dtype on axis=1 breaks in 0.11
-        df = DataFrame({'foo1': np.random.randn(6),
-                        'foo2': ['one', 'two', 'two', 'three', 'one', 'two']})
-        result = df.apply(lambda x: x, axis=1)
-        assert_series_equal(df.get_dtype_counts(), result.get_dtype_counts())
-
-        # GH 3610 incorrect dtype conversion with as_index=False
-        df = DataFrame({"c1": [1, 2, 6, 6, 8]})
-        df["c2"] = df.c1 / 2.0
-        result1 = df.groupby("c2").mean().reset_index().c2
-        result2 = df.groupby("c2", as_index=False).mean().c2
-        assert_series_equal(result1, result2)
-
-    def test_groupby_aggregation_mixed_dtype(self):
-
-        # GH 6212
-        expected = DataFrame({
-            'v1': [5, 5, 7, np.nan, 3, 3, 4, 1],
-            'v2': [55, 55, 77, np.nan, 33, 33, 44, 11]},
-            index=MultiIndex.from_tuples([(1, 95), (1, 99), (2, 95), (2, 99),
-                                          ('big', 'damp'),
-                                          ('blue', 'dry'),
-                                          ('red', 'red'), ('red', 'wet')],
-                                         names=['by1', 'by2']))
-
-        df = DataFrame({
-            'v1': [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9],
-            'v2': [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99],
-            'by1': ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan,
-                    12],
-            'by2': ["wet", "dry", 99, 95, np.nan, "damp", 95, 99, "red", 99,
-                    np.nan, np.nan]
-        })
-
-        g = df.groupby(['by1', 'by2'])
-        result = g[['v1', 'v2']].mean()
-        assert_frame_equal(result, expected)
-
-    def test_groupby_dtype_inference_empty(self):
-        # GH 6733
-        df = DataFrame({'x': [], 'range': np.arange(0, dtype='int64')})
-        assert df['x'].dtype == np.float64
-
-        result = df.groupby('x').first()
-        exp_index = Index([], name='x', dtype=np.float64)
-        expected = DataFrame({'range': Series(
-            [], index=exp_index, dtype='int64')})
-        assert_frame_equal(result, expected, by_blocks=True)
-
-    def test_groupby_list_infer_array_like(self):
-        result = self.df.groupby(list(self.df['A'])).mean()
-        expected = self.df.groupby(self.df['A']).mean()
-        assert_frame_equal(result, expected, check_names=False)
-
-        pytest.raises(Exception, self.df.groupby, list(self.df['A'][:-1]))
-
-        # pathological case of ambiguity
-        df = DataFrame({'foo': [0, 1],
-                        'bar': [3, 4],
-                        'val': np.random.randn(2)})
-
-        result = df.groupby(['foo', 'bar']).mean()
-        expected = df.groupby([df['foo'], df['bar']]).mean()[['val']]
-
-    def test_groupby_keys_same_size_as_index(self):
-        # GH 11185
-        freq = 's'
-        index = pd.date_range(start=pd.Timestamp('2015-09-29T11:34:44-0700'),
-                              periods=2, freq=freq)
-        df = pd.DataFrame([['A', 10], ['B', 15]], columns=[
-            'metric', 'values'
-        ], index=index)
-        result = df.groupby([pd.Grouper(level=0, freq=freq), 'metric']).mean()
-        expected = df.set_index([df.index, 'metric'])
-
-        assert_frame_equal(result, expected)
-
-    def test_groupby_one_row(self):
-        # GH 11741
-        df1 = pd.DataFrame(np.random.randn(1, 4), columns=list('ABCD'))
-        pytest.raises(KeyError, df1.groupby, 'Z')
-        df2 = pd.DataFrame(np.random.randn(2, 4), columns=list('ABCD'))
-        pytest.raises(KeyError, df2.groupby, 'Z')
-
-    def test_groupby_nat_exclude(self):
-        # GH 6992
-        df = pd.DataFrame(
-            {'values': np.random.randn(8),
-             'dt': [np.nan, pd.Timestamp('2013-01-01'), np.nan, pd.Timestamp(
-                 '2013-02-01'), np.nan, pd.Timestamp('2013-02-01'), np.nan,
-                pd.Timestamp('2013-01-01')],
-             'str': [np.nan, 'a', np.nan, 'a', np.nan, 'a', np.nan, 'b']})
-        grouped = df.groupby('dt')
-
-        expected = [pd.Index([1, 7]), pd.Index([3, 5])]
-        keys = sorted(grouped.groups.keys())
-        assert len(keys) == 2
-        for k, e in zip(keys, expected):
-            # grouped.groups keys are np.datetime64 with system tz
-            # not to be affected by tz, only compare values
-            tm.assert_index_equal(grouped.groups[k], e)
-
-        # confirm obj is not filtered
-        tm.assert_frame_equal(grouped.grouper.groupings[0].obj, df)
-        assert grouped.ngroups == 2
-
-        expected = {
-            Timestamp('2013-01-01 00:00:00'): np.array([1, 7], dtype=np.int64),
-            Timestamp('2013-02-01 00:00:00'): np.array([3, 5], dtype=np.int64)
-        }
-
-        for k in grouped.indices:
-            tm.assert_numpy_array_equal(grouped.indices[k], expected[k])
-
-        tm.assert_frame_equal(
-            grouped.get_group(Timestamp('2013-01-01')), df.iloc[[1, 7]])
-        tm.assert_frame_equal(
-            grouped.get_group(Timestamp('2013-02-01')), df.iloc[[3, 5]])
+    data = read_csv(StringIO(data), index_col=0)
+
+    grouped = data.groupby(['foo', 'bar', 'baz', 'spam'])
+
+    result = grouped.agg(np.mean)
+    expected = grouped.mean()
+    assert_frame_equal(result, expected)
+
+
+def test_groupby_series_with_name(df):
+    result = df.groupby(df['A']).mean()
+    result2 = df.groupby(df['A'], as_index=False).mean()
+    assert result.index.name == 'A'
+    assert 'A' in result2
+
+    result = df.groupby([df['A'], df['B']]).mean()
+    result2 = df.groupby([df['A'], df['B']],
+                         as_index=False).mean()
+    assert result.index.names == ('A', 'B')
+    assert 'A' in result2
+    assert 'B' in result2
+
+
+def test_seriesgroupby_name_attr(df):
+    # GH 6265
+    result = df.groupby('A')['C']
+    assert result.count().name == 'C'
+    assert result.mean().name == 'C'
+
+    testFunc = lambda x: np.sum(x) * 2
+    assert result.agg(testFunc).name == 'C'
+
+
+def test_consistency_name():
+    # GH 12363
+
+    df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
+                          'foo', 'bar', 'foo', 'foo'],
+                    'B': ['one', 'one', 'two', 'two',
+                          'two', 'two', 'one', 'two'],
+                    'C': np.random.randn(8) + 1.0,
+                    'D': np.arange(8)})
+
+    expected = df.groupby(['A']).B.count()
+    result = df.B.groupby(df.A).count()
+    assert_series_equal(result, expected)
+
+
+def test_groupby_name_propagation(df):
+    # GH 6124
+    def summarize(df, name=None):
+        return Series({'count': 1, 'mean': 2, 'omissions': 3, }, name=name)
+
+    def summarize_random_name(df):
+        # Provide a different name for each Series.  In this case, groupby
+        # should not attempt to propagate the Series name since they are
+        # inconsistent.
+        return Series({
+            'count': 1,
+            'mean': 2,
+            'omissions': 3,
+        }, name=df.iloc[0]['A'])
+
+    metrics = df.groupby('A').apply(summarize)
+    assert metrics.columns.name is None
+    metrics = df.groupby('A').apply(summarize, 'metrics')
+    assert metrics.columns.name == 'metrics'
+    metrics = df.groupby('A').apply(summarize_random_name)
+    assert metrics.columns.name is None
+
+
+def test_groupby_nonstring_columns():
+    df = DataFrame([np.arange(10) for x in range(10)])
+    grouped = df.groupby(0)
+    result = grouped.mean()
+    expected = df.groupby(df[0]).mean()
+    assert_frame_equal(result, expected)
+
+
+def test_groupby_mixed_type_columns():
+    # GH 13432, unorderable types in py3
+    df = DataFrame([[0, 1, 2]], columns=['A', 'B', 0])
+    expected = DataFrame([[1, 2]], columns=['B', 0],
+                         index=Index([0], name='A'))
+
+    result = df.groupby('A').first()
+    tm.assert_frame_equal(result, expected)
+
+    result = df.groupby('A').sum()
+    tm.assert_frame_equal(result, expected)
+
+
+def test_cython_grouper_series_bug_noncontig():
+    arr = np.empty((100, 100))
+    arr.fill(np.nan)
+    obj = Series(arr[:, 0], index=lrange(100))
+    inds = np.tile(lrange(10), 10)
+
+    result = obj.groupby(inds).agg(Series.median)
+    assert result.isna().all()
+
+
+def test_series_grouper_noncontig_index():
+    index = Index(tm.rands_array(10, 100))
+
+    values = Series(np.random.randn(50), index=index[::2])
+    labels = np.random.randint(0, 5, 50)
+
+    # it works!
+    grouped = values.groupby(labels)
+
+    # accessing the index elements causes segfault
+    f = lambda x: len(set(map(id, x.index)))
+    grouped.agg(f)
+
 
+def test_convert_objects_leave_decimal_alone():
+
+    s = Series(lrange(5))
+    labels = np.array(['a', 'b', 'c', 'd', 'e'], dtype='O')
+
+    def convert_fast(x):
+        return Decimal(str(x.mean()))
+
+    def convert_force_pure(x):
+        # base will be length 0
+        assert (len(x.base) > 0)
+        return Decimal(str(x.mean()))
+
+    grouped = s.groupby(labels)
+
+    result = grouped.agg(convert_fast)
+    assert result.dtype == np.object_
+    assert isinstance(result[0], Decimal)
+
+    result = grouped.agg(convert_force_pure)
+    assert result.dtype == np.object_
+    assert isinstance(result[0], Decimal)
+
+
+def test_groupby_dtype_inference_empty():
+    # GH 6733
+    df = DataFrame({'x': [], 'range': np.arange(0, dtype='int64')})
+    assert df['x'].dtype == np.float64
+
+    result = df.groupby('x').first()
+    exp_index = Index([], name='x', dtype=np.float64)
+    expected = DataFrame({'range': Series(
+        [], index=exp_index, dtype='int64')})
+    assert_frame_equal(result, expected, by_blocks=True)
+
+
+def test_groupby_list_infer_array_like(df):
+    result = df.groupby(list(df['A'])).mean()
+    expected = df.groupby(df['A']).mean()
+    assert_frame_equal(result, expected, check_names=False)
+
+    pytest.raises(Exception, df.groupby, list(df['A'][:-1]))
+
+    # pathological case of ambiguity
+    df = DataFrame({'foo': [0, 1],
+                    'bar': [3, 4],
+                    'val': np.random.randn(2)})
+
+    result = df.groupby(['foo', 'bar']).mean()
+    expected = df.groupby([df['foo'], df['bar']]).mean()[['val']]
+
+
+def test_groupby_keys_same_size_as_index():
+    # GH 11185
+    freq = 's'
+    index = pd.date_range(start=pd.Timestamp('2015-09-29T11:34:44-0700'),
+                          periods=2, freq=freq)
+    df = pd.DataFrame([['A', 10], ['B', 15]], columns=[
+        'metric', 'values'
+    ], index=index)
+    result = df.groupby([pd.Grouper(level=0, freq=freq), 'metric']).mean()
+    expected = df.set_index([df.index, 'metric'])
+
+    assert_frame_equal(result, expected)
+
+
+def test_groupby_one_row():
+    # GH 11741
+    df1 = pd.DataFrame(np.random.randn(1, 4), columns=list('ABCD'))
+    pytest.raises(KeyError, df1.groupby, 'Z')
+    df2 = pd.DataFrame(np.random.randn(2, 4), columns=list('ABCD'))
+    pytest.raises(KeyError, df2.groupby, 'Z')
+
+
+def test_groupby_nat_exclude():
+    # GH 6992
+    df = pd.DataFrame(
+        {'values': np.random.randn(8),
+         'dt': [np.nan, pd.Timestamp('2013-01-01'), np.nan, pd.Timestamp(
+             '2013-02-01'), np.nan, pd.Timestamp('2013-02-01'), np.nan,
+            pd.Timestamp('2013-01-01')],
+         'str': [np.nan, 'a', np.nan, 'a', np.nan, 'a', np.nan, 'b']})
+    grouped = df.groupby('dt')
+
+    expected = [pd.Index([1, 7]), pd.Index([3, 5])]
+    keys = sorted(grouped.groups.keys())
+    assert len(keys) == 2
+    for k, e in zip(keys, expected):
+        # grouped.groups keys are np.datetime64 with system tz
+        # not to be affected by tz, only compare values
+        tm.assert_index_equal(grouped.groups[k], e)
+
+    # confirm obj is not filtered
+    tm.assert_frame_equal(grouped.grouper.groupings[0].obj, df)
+    assert grouped.ngroups == 2
+
+    expected = {
+        Timestamp('2013-01-01 00:00:00'): np.array([1, 7], dtype=np.int64),
+        Timestamp('2013-02-01 00:00:00'): np.array([3, 5], dtype=np.int64)
+    }
+
+    for k in grouped.indices:
+        tm.assert_numpy_array_equal(grouped.indices[k], expected[k])
+
+    tm.assert_frame_equal(
+        grouped.get_group(Timestamp('2013-01-01')), df.iloc[[1, 7]])
+    tm.assert_frame_equal(
+        grouped.get_group(Timestamp('2013-02-01')), df.iloc[[3, 5]])
+
+    pytest.raises(KeyError, grouped.get_group, pd.NaT)
+
+    nan_df = DataFrame({'nan': [np.nan, np.nan, np.nan],
+                        'nat': [pd.NaT, pd.NaT, pd.NaT]})
+    assert nan_df['nan'].dtype == 'float64'
+    assert nan_df['nat'].dtype == 'datetime64[ns]'
+
+    for key in ['nan', 'nat']:
+        grouped = nan_df.groupby(key)
+        assert grouped.groups == {}
+        assert grouped.ngroups == 0
+        assert grouped.indices == {}
+        pytest.raises(KeyError, grouped.get_group, np.nan)
         pytest.raises(KeyError, grouped.get_group, pd.NaT)
 
-        nan_df = DataFrame({'nan': [np.nan, np.nan, np.nan],
-                            'nat': [pd.NaT, pd.NaT, pd.NaT]})
-        assert nan_df['nan'].dtype == 'float64'
-        assert nan_df['nat'].dtype == 'datetime64[ns]'
-
-        for key in ['nan', 'nat']:
-            grouped = nan_df.groupby(key)
-            assert grouped.groups == {}
-            assert grouped.ngroups == 0
-            assert grouped.indices == {}
-            pytest.raises(KeyError, grouped.get_group, np.nan)
-            pytest.raises(KeyError, grouped.get_group, pd.NaT)
-
-    def test_sparse_friendly(self):
-        sdf = self.df[['C', 'D']].to_sparse()
-        with catch_warnings(record=True):
-            panel = tm.makePanel()
-            tm.add_nans(panel)
-
-        def _check_work(gp):
-            gp.mean()
-            gp.agg(np.mean)
-            dict(iter(gp))
-
-        # it works!
-        _check_work(sdf.groupby(lambda x: x // 2))
-        _check_work(sdf['C'].groupby(lambda x: x // 2))
-        _check_work(sdf.groupby(self.df['A']))
-
-        # do this someday
-        # _check_work(panel.groupby(lambda x: x.month, axis=1))
-
-    def test_panel_groupby(self):
-        with catch_warnings(record=True):
-            self.panel = tm.makePanel()
-            tm.add_nans(self.panel)
-            grouped = self.panel.groupby({'ItemA': 0, 'ItemB': 0, 'ItemC': 1},
-                                         axis='items')
-            agged = grouped.mean()
-            agged2 = grouped.agg(lambda x: x.mean('items'))
-
-            tm.assert_panel_equal(agged, agged2)
-
-            tm.assert_index_equal(agged.items, Index([0, 1]))
-
-            grouped = self.panel.groupby(lambda x: x.month, axis='major')
-            agged = grouped.mean()
-
-            exp = Index(sorted(list(set(self.panel.major_axis.month))))
-            tm.assert_index_equal(agged.major_axis, exp)
-
-            grouped = self.panel.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1},
-                                         axis='minor')
-            agged = grouped.mean()
-            tm.assert_index_equal(agged.minor_axis, Index([0, 1]))
-
-    def test_groupby_2d_malformed(self):
-        d = DataFrame(index=lrange(2))
-        d['group'] = ['g1', 'g2']
-        d['zeros'] = [0, 0]
-        d['ones'] = [1, 1]
-        d['label'] = ['l1', 'l2']
-        tmp = d.groupby(['group']).mean()
-        res_values = np.array([[0, 1], [0, 1]], dtype=np.int64)
-        tm.assert_index_equal(tmp.columns, Index(['zeros', 'ones']))
-        tm.assert_numpy_array_equal(tmp.values, res_values)
-
-    def test_int32_overflow(self):
-        B = np.concatenate((np.arange(10000), np.arange(10000), np.arange(5000)
-                            ))
-        A = np.arange(25000)
-        df = DataFrame({'A': A,
-                        'B': B,
-                        'C': A,
-                        'D': B,
-                        'E': np.random.randn(25000)})
-
-        left = df.groupby(['A', 'B', 'C', 'D']).sum()
-        right = df.groupby(['D', 'C', 'B', 'A']).sum()
-        assert len(left) == len(right)
-
-    def test_groupby_sort_multi(self):
-        df = DataFrame({'a': ['foo', 'bar', 'baz'],
-                        'b': [3, 2, 1],
-                        'c': [0, 1, 2],
-                        'd': np.random.randn(3)})
-
-        tups = lmap(tuple, df[['a', 'b', 'c']].values)
-        tups = com._asarray_tuplesafe(tups)
-        result = df.groupby(['a', 'b', 'c'], sort=True).sum()
-        tm.assert_numpy_array_equal(result.index.values, tups[[1, 2, 0]])
 
-        tups = lmap(tuple, df[['c', 'a', 'b']].values)
-        tups = com._asarray_tuplesafe(tups)
-        result = df.groupby(['c', 'a', 'b'], sort=True).sum()
-        tm.assert_numpy_array_equal(result.index.values, tups)
+def test_sparse_friendly(df):
+    sdf = df[['C', 'D']].to_sparse()
+    with catch_warnings(record=True):
+        panel = tm.makePanel()
+        tm.add_nans(panel)
+
+    def _check_work(gp):
+        gp.mean()
+        gp.agg(np.mean)
+        dict(iter(gp))
+
+    # it works!
+    _check_work(sdf.groupby(lambda x: x // 2))
+    _check_work(sdf['C'].groupby(lambda x: x // 2))
+    _check_work(sdf.groupby(df['A']))
+
+    # do this someday
+    # _check_work(panel.groupby(lambda x: x.month, axis=1))
+
+
+def test_panel_groupby():
+    with catch_warnings(record=True):
+        panel = tm.makePanel()
+        tm.add_nans(panel)
+        grouped = panel.groupby({'ItemA': 0, 'ItemB': 0, 'ItemC': 1},
+                                axis='items')
+        agged = grouped.mean()
+        agged2 = grouped.agg(lambda x: x.mean('items'))
+
+        tm.assert_panel_equal(agged, agged2)
+
+        tm.assert_index_equal(agged.items, Index([0, 1]))
 
-        tups = lmap(tuple, df[['b', 'c', 'a']].values)
+        grouped = panel.groupby(lambda x: x.month, axis='major')
+        agged = grouped.mean()
+
+        exp = Index(sorted(list(set(panel.major_axis.month))))
+        tm.assert_index_equal(agged.major_axis, exp)
+
+        grouped = panel.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1},
+                                axis='minor')
+        agged = grouped.mean()
+        tm.assert_index_equal(agged.minor_axis, Index([0, 1]))
+
+
+def test_groupby_2d_malformed():
+    d = DataFrame(index=lrange(2))
+    d['group'] = ['g1', 'g2']
+    d['zeros'] = [0, 0]
+    d['ones'] = [1, 1]
+    d['label'] = ['l1', 'l2']
+    tmp = d.groupby(['group']).mean()
+    res_values = np.array([[0, 1], [0, 1]], dtype=np.int64)
+    tm.assert_index_equal(tmp.columns, Index(['zeros', 'ones']))
+    tm.assert_numpy_array_equal(tmp.values, res_values)
+
+
+def test_int32_overflow():
+    B = np.concatenate((np.arange(10000), np.arange(10000), np.arange(5000)
+                        ))
+    A = np.arange(25000)
+    df = DataFrame({'A': A,
+                    'B': B,
+                    'C': A,
+                    'D': B,
+                    'E': np.random.randn(25000)})
+
+    left = df.groupby(['A', 'B', 'C', 'D']).sum()
+    right = df.groupby(['D', 'C', 'B', 'A']).sum()
+    assert len(left) == len(right)
+
+
+def test_groupby_sort_multi():
+    df = DataFrame({'a': ['foo', 'bar', 'baz'],
+                    'b': [3, 2, 1],
+                    'c': [0, 1, 2],
+                    'd': np.random.randn(3)})
+
+    tups = lmap(tuple, df[['a', 'b', 'c']].values)
+    tups = com._asarray_tuplesafe(tups)
+    result = df.groupby(['a', 'b', 'c'], sort=True).sum()
+    tm.assert_numpy_array_equal(result.index.values, tups[[1, 2, 0]])
+
+    tups = lmap(tuple, df[['c', 'a', 'b']].values)
+    tups = com._asarray_tuplesafe(tups)
+    result = df.groupby(['c', 'a', 'b'], sort=True).sum()
+    tm.assert_numpy_array_equal(result.index.values, tups)
+
+    tups = lmap(tuple, df[['b', 'c', 'a']].values)
+    tups = com._asarray_tuplesafe(tups)
+    result = df.groupby(['b', 'c', 'a'], sort=True).sum()
+    tm.assert_numpy_array_equal(result.index.values, tups[[2, 1, 0]])
+
+    df = DataFrame({'a': [0, 1, 2, 0, 1, 2],
+                    'b': [0, 0, 0, 1, 1, 1],
+                    'd': np.random.randn(6)})
+    grouped = df.groupby(['a', 'b'])['d']
+    result = grouped.sum()
+
+    def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
+        tups = lmap(tuple, df[keys].values)
         tups = com._asarray_tuplesafe(tups)
-        result = df.groupby(['b', 'c', 'a'], sort=True).sum()
-        tm.assert_numpy_array_equal(result.index.values, tups[[2, 1, 0]])
-
-        df = DataFrame({'a': [0, 1, 2, 0, 1, 2],
-                        'b': [0, 0, 0, 1, 1, 1],
-                        'd': np.random.randn(6)})
-        grouped = df.groupby(['a', 'b'])['d']
-        result = grouped.sum()
-        _check_groupby(df, result, ['a', 'b'], 'd')
-
-    def test_intercept_builtin_sum(self):
-        s = Series([1., 2., np.nan, 3.])
-        grouped = s.groupby([0, 1, 2, 2])
-
-        result = grouped.agg(builtins.sum)
-        result2 = grouped.apply(builtins.sum)
-        expected = grouped.sum()
-        assert_series_equal(result, expected)
-        assert_series_equal(result2, expected)
-
-    def test_rank_apply(self):
-        lev1 = tm.rands_array(10, 100)
-        lev2 = tm.rands_array(10, 130)
-        lab1 = np.random.randint(0, 100, size=500)
-        lab2 = np.random.randint(0, 130, size=500)
-
-        df = DataFrame({'value': np.random.randn(500),
-                        'key1': lev1.take(lab1),
-                        'key2': lev2.take(lab2)})
-
-        result = df.groupby(['key1', 'key2']).value.rank()
-
-        expected = []
-        for key, piece in df.groupby(['key1', 'key2']):
-            expected.append(piece.value.rank())
-        expected = concat(expected, axis=0)
-        expected = expected.reindex(result.index)
-        assert_series_equal(result, expected)
-
-        result = df.groupby(['key1', 'key2']).value.rank(pct=True)
-
-        expected = []
-        for key, piece in df.groupby(['key1', 'key2']):
-            expected.append(piece.value.rank(pct=True))
-        expected = concat(expected, axis=0)
-        expected = expected.reindex(result.index)
-        assert_series_equal(result, expected)
-
-    @pytest.mark.parametrize("grps", [
-        ['qux'], ['qux', 'quux']])
-    @pytest.mark.parametrize("vals", [
-        [2, 2, 8, 2, 6],
-        [pd.Timestamp('2018-01-02'), pd.Timestamp('2018-01-02'),
-         pd.Timestamp('2018-01-08'), pd.Timestamp('2018-01-02'),
-         pd.Timestamp('2018-01-06')]])
-    @pytest.mark.parametrize("ties_method,ascending,pct,exp", [
-        ('average', True, False, [2., 2., 5., 2., 4.]),
-        ('average', True, True, [0.4, 0.4, 1.0, 0.4, 0.8]),
-        ('average', False, False, [4., 4., 1., 4., 2.]),
-        ('average', False, True, [.8, .8, .2, .8, .4]),
-        ('min', True, False, [1., 1., 5., 1., 4.]),
-        ('min', True, True, [0.2, 0.2, 1.0, 0.2, 0.8]),
-        ('min', False, False, [3., 3., 1., 3., 2.]),
-        ('min', False, True, [.6, .6, .2, .6, .4]),
-        ('max', True, False, [3., 3., 5., 3., 4.]),
-        ('max', True, True, [0.6, 0.6, 1.0, 0.6, 0.8]),
-        ('max', False, False, [5., 5., 1., 5., 2.]),
-        ('max', False, True, [1., 1., .2, 1., .4]),
-        ('first', True, False, [1., 2., 5., 3., 4.]),
-        ('first', True, True, [0.2, 0.4, 1.0, 0.6, 0.8]),
-        ('first', False, False, [3., 4., 1., 5., 2.]),
-        ('first', False, True, [.6, .8, .2, 1., .4]),
-        ('dense', True, False, [1., 1., 3., 1., 2.]),
-        ('dense', True, True, [0.2, 0.2, 0.6, 0.2, 0.4]),
-        ('dense', False, False, [3., 3., 1., 3., 2.]),
-        ('dense', False, True, [.6, .6, .2, .6, .4]),
-    ])
-    def test_rank_args(self, grps, vals, ties_method, ascending, pct, exp):
-        key = np.repeat(grps, len(vals))
-        vals = vals * len(grps)
-        df = DataFrame({'key': key, 'val': vals})
-        result = df.groupby('key').rank(method=ties_method,
-                                        ascending=ascending, pct=pct)
-
-        exp_df = DataFrame(exp * len(grps), columns=['val'])
-        assert_frame_equal(result, exp_df)
-
-    @pytest.mark.parametrize("grps", [
-        ['qux'], ['qux', 'quux']])
-    @pytest.mark.parametrize("vals", [
-        [-np.inf, -np.inf, np.nan, 1., np.nan, np.inf, np.inf],
-    ])
-    @pytest.mark.parametrize("ties_method,ascending,na_option,exp", [
-        ('average', True, 'keep', [1.5, 1.5, np.nan, 3, np.nan, 4.5, 4.5]),
-        ('average', True, 'top', [3.5, 3.5, 1.5, 5., 1.5, 6.5, 6.5]),
-        ('average', True, 'bottom', [1.5, 1.5, 6.5, 3., 6.5, 4.5, 4.5]),
-        ('average', False, 'keep', [4.5, 4.5, np.nan, 3, np.nan, 1.5, 1.5]),
-        ('average', False, 'top', [6.5, 6.5, 1.5, 5., 1.5, 3.5, 3.5]),
-        ('average', False, 'bottom', [4.5, 4.5, 6.5, 3., 6.5, 1.5, 1.5]),
-        ('min', True, 'keep', [1., 1., np.nan, 3., np.nan, 4., 4.]),
-        ('min', True, 'top', [3., 3., 1., 5., 1., 6., 6.]),
-        ('min', True, 'bottom', [1., 1., 6., 3., 6., 4., 4.]),
-        ('min', False, 'keep', [4., 4., np.nan, 3., np.nan, 1., 1.]),
-        ('min', False, 'top', [6., 6., 1., 5., 1., 3., 3.]),
-        ('min', False, 'bottom', [4., 4., 6., 3., 6., 1., 1.]),
-        ('max', True, 'keep', [2., 2., np.nan, 3., np.nan, 5., 5.]),
-        ('max', True, 'top', [4., 4., 2., 5., 2., 7., 7.]),
-        ('max', True, 'bottom', [2., 2., 7., 3., 7., 5., 5.]),
-        ('max', False, 'keep', [5., 5., np.nan, 3., np.nan, 2., 2.]),
-        ('max', False, 'top', [7., 7., 2., 5., 2., 4., 4.]),
-        ('max', False, 'bottom', [5., 5., 7., 3., 7., 2., 2.]),
-        ('first', True, 'keep', [1., 2., np.nan, 3., np.nan, 4., 5.]),
-        ('first', True, 'top', [3., 4., 1., 5., 2., 6., 7.]),
-        ('first', True, 'bottom', [1., 2., 6., 3., 7., 4., 5.]),
-        ('first', False, 'keep', [4., 5., np.nan, 3., np.nan, 1., 2.]),
-        ('first', False, 'top', [6., 7., 1., 5., 2., 3., 4.]),
-        ('first', False, 'bottom', [4., 5., 6., 3., 7., 1., 2.]),
-        ('dense', True, 'keep', [1., 1., np.nan, 2., np.nan, 3., 3.]),
-        ('dense', True, 'top', [2., 2., 1., 3., 1., 4., 4.]),
-        ('dense', True, 'bottom', [1., 1., 4., 2., 4., 3., 3.]),
-        ('dense', False, 'keep', [3., 3., np.nan, 2., np.nan, 1., 1.]),
-        ('dense', False, 'top', [4., 4., 1., 3., 1., 2., 2.]),
-        ('dense', False, 'bottom', [3., 3., 4., 2., 4., 1., 1.])
-    ])
-    def test_infs_n_nans(self, grps, vals, ties_method, ascending, na_option,
-                         exp):
-        # GH 20561
-        key = np.repeat(grps, len(vals))
-        vals = vals * len(grps)
-        df = DataFrame({'key': key, 'val': vals})
-        result = df.groupby('key').rank(method=ties_method,
-                                        ascending=ascending,
-                                        na_option=na_option)
-        exp_df = DataFrame(exp * len(grps), columns=['val'])
-        assert_frame_equal(result, exp_df)
-
-    @pytest.mark.parametrize("grps", [
-        ['qux'], ['qux', 'quux']])
-    @pytest.mark.parametrize("vals", [
-        [2, 2, np.nan, 8, 2, 6, np.nan, np.nan],  # floats
-        [pd.Timestamp('2018-01-02'), pd.Timestamp('2018-01-02'), np.nan,
-         pd.Timestamp('2018-01-08'), pd.Timestamp('2018-01-02'),
-         pd.Timestamp('2018-01-06'), np.nan, np.nan]
-    ])
-    @pytest.mark.parametrize("ties_method,ascending,na_option,pct,exp", [
-        ('average', True, 'keep', False,
-            [2., 2., np.nan, 5., 2., 4., np.nan, np.nan]),
-        ('average', True, 'keep', True,
-            [0.4, 0.4, np.nan, 1.0, 0.4, 0.8, np.nan, np.nan]),
-        ('average', False, 'keep', False,
-            [4., 4., np.nan, 1., 4., 2., np.nan, np.nan]),
-        ('average', False, 'keep', True,
-            [.8, 0.8, np.nan, 0.2, 0.8, 0.4, np.nan, np.nan]),
-        ('min', True, 'keep', False,
-            [1., 1., np.nan, 5., 1., 4., np.nan, np.nan]),
-        ('min', True, 'keep', True,
-            [0.2, 0.2, np.nan, 1.0, 0.2, 0.8, np.nan, np.nan]),
-        ('min', False, 'keep', False,
-            [3., 3., np.nan, 1., 3., 2., np.nan, np.nan]),
-        ('min', False, 'keep', True,
-            [.6, 0.6, np.nan, 0.2, 0.6, 0.4, np.nan, np.nan]),
-        ('max', True, 'keep', False,
-            [3., 3., np.nan, 5., 3., 4., np.nan, np.nan]),
-        ('max', True, 'keep', True,
-            [0.6, 0.6, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan]),
-        ('max', False, 'keep', False,
-            [5., 5., np.nan, 1., 5., 2., np.nan, np.nan]),
-        ('max', False, 'keep', True,
-            [1., 1., np.nan, 0.2, 1., 0.4, np.nan, np.nan]),
-        ('first', True, 'keep', False,
-            [1., 2., np.nan, 5., 3., 4., np.nan, np.nan]),
-        ('first', True, 'keep', True,
-            [0.2, 0.4, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan]),
-        ('first', False, 'keep', False,
-            [3., 4., np.nan, 1., 5., 2., np.nan, np.nan]),
-        ('first', False, 'keep', True,
-            [.6, 0.8, np.nan, 0.2, 1., 0.4, np.nan, np.nan]),
-        ('dense', True, 'keep', False,
-            [1., 1., np.nan, 3., 1., 2., np.nan, np.nan]),
-        ('dense', True, 'keep', True,
-            [0.2, 0.2, np.nan, 0.6, 0.2, 0.4, np.nan, np.nan]),
-        ('dense', False, 'keep', False,
-            [3., 3., np.nan, 1., 3., 2., np.nan, np.nan]),
-        ('dense', False, 'keep', True,
-            [.6, 0.6, np.nan, 0.2, 0.6, 0.4, np.nan, np.nan]),
-        ('average', True, 'no_na', False, [2., 2., 7., 5., 2., 4., 7., 7.]),
-        ('average', True, 'no_na', True,
-            [0.25, 0.25, 0.875, 0.625, 0.25, 0.5, 0.875, 0.875]),
-        ('average', False, 'no_na', False, [4., 4., 7., 1., 4., 2., 7., 7.]),
-        ('average', False, 'no_na', True,
-            [0.5, 0.5, 0.875, 0.125, 0.5, 0.25, 0.875, 0.875]),
-        ('min', True, 'no_na', False, [1., 1., 6., 5., 1., 4., 6., 6.]),
-        ('min', True, 'no_na', True,
-            [0.125, 0.125, 0.75, 0.625, 0.125, 0.5, 0.75, 0.75]),
-        ('min', False, 'no_na', False, [3., 3., 6., 1., 3., 2., 6., 6.]),
-        ('min', False, 'no_na', True,
-            [0.375, 0.375, 0.75, 0.125, 0.375, 0.25, 0.75, 0.75]),
-        ('max', True, 'no_na', False, [3., 3., 8., 5., 3., 4., 8., 8.]),
-        ('max', True, 'no_na', True,
-            [0.375, 0.375, 1., 0.625, 0.375, 0.5, 1., 1.]),
-        ('max', False, 'no_na', False, [5., 5., 8., 1., 5., 2., 8., 8.]),
-        ('max', False, 'no_na', True,
-            [0.625, 0.625, 1., 0.125, 0.625, 0.25, 1., 1.]),
-        ('first', True, 'no_na', False, [1., 2., 6., 5., 3., 4., 7., 8.]),
-        ('first', True, 'no_na', True,
-            [0.125, 0.25, 0.75, 0.625, 0.375, 0.5, 0.875, 1.]),
-        ('first', False, 'no_na', False, [3., 4., 6., 1., 5., 2., 7., 8.]),
-        ('first', False, 'no_na', True,
-            [0.375, 0.5, 0.75, 0.125, 0.625, 0.25, 0.875, 1.]),
-        ('dense', True, 'no_na', False, [1., 1., 4., 3., 1., 2., 4., 4.]),
-        ('dense', True, 'no_na', True,
-            [0.125, 0.125, 0.5, 0.375, 0.125, 0.25, 0.5, 0.5]),
-        ('dense', False, 'no_na', False, [3., 3., 4., 1., 3., 2., 4., 4.]),
-        ('dense', False, 'no_na', True,
-            [0.375, 0.375, 0.5, 0.125, 0.375, 0.25, 0.5, 0.5])
-    ])
-    def test_rank_args_missing(self, grps, vals, ties_method, ascending,
-                               na_option, pct, exp):
-        key = np.repeat(grps, len(vals))
-        vals = vals * len(grps)
-        df = DataFrame({'key': key, 'val': vals})
-        result = df.groupby('key').rank(method=ties_method,
-                                        ascending=ascending,
-                                        na_option=na_option, pct=pct)
-
-        exp_df = DataFrame(exp * len(grps), columns=['val'])
-        assert_frame_equal(result, exp_df)
-
-    @pytest.mark.parametrize("pct,exp", [
-        (False, [3., 3., 3., 3., 3.]),
-        (True, [.6, .6, .6, .6, .6])])
-    def test_rank_resets_each_group(self, pct, exp):
-        df = DataFrame(
-            {'key': ['a', 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'b'],
-             'val': [1] * 10}
-        )
-        result = df.groupby('key').rank(pct=pct)
-        exp_df = DataFrame(exp * 2, columns=['val'])
-        assert_frame_equal(result, exp_df)
-
-    def test_rank_avg_even_vals(self):
-        df = DataFrame({'key': ['a'] * 4, 'val': [1] * 4})
-        result = df.groupby('key').rank()
-        exp_df = DataFrame([2.5, 2.5, 2.5, 2.5], columns=['val'])
-        assert_frame_equal(result, exp_df)
-
-    @pytest.mark.parametrize("ties_method", [
-        'average', 'min', 'max', 'first', 'dense'])
-    @pytest.mark.parametrize("ascending", [True, False])
-    @pytest.mark.parametrize("na_option", ["keep", "top", "bottom"])
-    @pytest.mark.parametrize("pct", [True, False])
-    @pytest.mark.parametrize("vals", [
-        ['bar', 'bar', 'foo', 'bar', 'baz'],
-        ['bar', np.nan, 'foo', np.nan, 'baz']
-    ])
-    def test_rank_object_raises(self, ties_method, ascending, na_option,
-                                pct, vals):
-        df = DataFrame({'key': ['foo'] * 5, 'val': vals})
-        with tm.assert_raises_regex(TypeError, "not callable"):
-            df.groupby('key').rank(method=ties_method,
-                                   ascending=ascending,
-                                   na_option=na_option, pct=pct)
-
-    @pytest.mark.parametrize("agg_func", ['any', 'all'])
-    @pytest.mark.parametrize("skipna", [True, False])
-    @pytest.mark.parametrize("vals", [
-        ['foo', 'bar', 'baz'], ['foo', '', ''], ['', '', ''],
-        [1, 2, 3], [1, 0, 0], [0, 0, 0],
-        [1., 2., 3.], [1., 0., 0.], [0., 0., 0.],
-        [True, True, True], [True, False, False], [False, False, False],
-        [np.nan, np.nan, np.nan]
-    ])
-    def test_groupby_bool_aggs(self, agg_func, skipna, vals):
-        df = DataFrame({'key': ['a'] * 3 + ['b'] * 3, 'val': vals * 2})
-
-        # Figure out expectation using Python builtin
-        exp = getattr(compat.builtins, agg_func)(vals)
-
-        # edge case for missing data with skipna and 'any'
-        if skipna and all(isna(vals)) and agg_func == 'any':
-            exp = False
-
-        exp_df = DataFrame([exp] * 2, columns=['val'], index=pd.Index(
-            ['a', 'b'], name='key'))
-        result = getattr(df.groupby('key'), agg_func)(skipna=skipna)
-        assert_frame_equal(result, exp_df)
-
-    def test_dont_clobber_name_column(self):
-        df = DataFrame({'key': ['a', 'a', 'a', 'b', 'b', 'b'],
-                        'name': ['foo', 'bar', 'baz'] * 2})
-
-        result = df.groupby('key').apply(lambda x: x)
-        assert_frame_equal(result, df)
-
-    def test_skip_group_keys(self):
-        from pandas import concat
-
-        tsf = tm.makeTimeDataFrame()
-
-        grouped = tsf.groupby(lambda x: x.month, group_keys=False)
-        result = grouped.apply(lambda x: x.sort_values(by='A')[:3])
-
-        pieces = []
-        for key, group in grouped:
-            pieces.append(group.sort_values(by='A')[:3])
-
-        expected = concat(pieces)
-        assert_frame_equal(result, expected)
-
-        grouped = tsf['A'].groupby(lambda x: x.month, group_keys=False)
-        result = grouped.apply(lambda x: x.sort_values()[:3])
-
-        pieces = []
-        for key, group in grouped:
-            pieces.append(group.sort_values()[:3])
-
-        expected = concat(pieces)
-        assert_series_equal(result, expected)
-
-    def test_no_nonsense_name(self):
-        # GH #995
-        s = self.frame['C'].copy()
-        s.name = None
-
-        result = s.groupby(self.frame['A']).agg(np.sum)
-        assert result.name is None
-
-    def test_multifunc_sum_bug(self):
-        # GH #1065
-        x = DataFrame(np.arange(9).reshape(3, 3))
-        x['test'] = 0
-        x['fl'] = [1.3, 1.5, 1.6]
-
-        grouped = x.groupby('test')
-        result = grouped.agg({'fl': 'sum', 2: 'size'})
-        assert result['fl'].dtype == np.float64
-
-    def test_handle_dict_return_value(self):
-        def f(group):
-            return {'max': group.max(), 'min': group.min()}
-
-        def g(group):
-            return Series({'max': group.max(), 'min': group.min()})
-
-        result = self.df.groupby('A')['C'].apply(f)
-        expected = self.df.groupby('A')['C'].apply(g)
-
-        assert isinstance(result, Series)
-        assert_series_equal(result, expected)
-
-    def test_set_group_name(self):
-        def f(group):
-            assert group.name is not None
-            return group
-
-        def freduce(group):
-            assert group.name is not None
-            return group.sum()
-
-        def foo(x):
-            return freduce(x)
-
-        def _check_all(grouped):
-            # make sure all these work
-            grouped.apply(f)
-            grouped.aggregate(freduce)
-            grouped.aggregate({'C': freduce, 'D': freduce})
-            grouped.transform(f)
-
-            grouped['C'].apply(f)
-            grouped['C'].aggregate(freduce)
-            grouped['C'].aggregate([freduce, foo])
-            grouped['C'].transform(f)
+        expected = f(df.groupby(tups)[field])
+        for k, v in compat.iteritems(expected):
+            assert (result[k] == v)
 
-        _check_all(self.df.groupby('A'))
-        _check_all(self.df.groupby(['A', 'B']))
-
-    def test_group_name_available_in_inference_pass(self):
-        # gh-15062
-        df = pd.DataFrame({'a': [0, 0, 1, 1, 2, 2], 'b': np.arange(6)})
-
-        names = []
-
-        def f(group):
-            names.append(group.name)
-            return group.copy()
-
-        df.groupby('a', sort=False, group_keys=False).apply(f)
-        # we expect 2 zeros because we call ``f`` once to see if a faster route
-        # can be used.
-        expected_names = [0, 0, 1, 2]
-        assert names == expected_names
-
-    def test_no_dummy_key_names(self):
-        # see gh-1291
-        result = self.df.groupby(self.df['A'].values).sum()
-        assert result.index.name is None
-
-        result = self.df.groupby([self.df['A'].values, self.df['B'].values
-                                  ]).sum()
-        assert result.index.names == (None, None)
-
-    def test_groupby_sort_multiindex_series(self):
-        # series multiindex groupby sort argument was not being passed through
-        # _compress_group_index
-        # GH 9444
-        index = MultiIndex(levels=[[1, 2], [1, 2]],
-                           labels=[[0, 0, 0, 0, 1, 1], [1, 1, 0, 0, 0, 0]],
-                           names=['a', 'b'])
-        mseries = Series([0, 1, 2, 3, 4, 5], index=index)
-        index = MultiIndex(levels=[[1, 2], [1, 2]],
-                           labels=[[0, 0, 1], [1, 0, 0]], names=['a', 'b'])
-        mseries_result = Series([0, 2, 4], index=index)
-
-        result = mseries.groupby(level=['a', 'b'], sort=False).first()
-        assert_series_equal(result, mseries_result)
-        result = mseries.groupby(level=['a', 'b'], sort=True).first()
-        assert_series_equal(result, mseries_result.sort_index())
-
-    def test_groupby_reindex_inside_function(self):
-
-        periods = 1000
-        ind = DatetimeIndex(start='2012/1/1', freq='5min', periods=periods)
-        df = DataFrame({'high': np.arange(
-            periods), 'low': np.arange(periods)}, index=ind)
-
-        def agg_before(hour, func, fix=False):
-            """
-                Run an aggregate func on the subset of data.
-            """
-
-            def _func(data):
-                d = data.loc[data.index.map(
-                    lambda x: x.hour < 11)].dropna()
-                if fix:
-                    data[data.index[0]]
-                if len(d) == 0:
-                    return None
-                return func(d)
-
-            return _func
-
-        def afunc(data):
-            d = data.select(lambda x: x.hour < 11).dropna()
-            return np.max(d)
-
-        grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day))
-        closure_bad = grouped.agg({'high': agg_before(11, np.max)})
-        closure_good = grouped.agg({'high': agg_before(11, np.max, True)})
-
-        assert_frame_equal(closure_bad, closure_good)
-
-    def test_cython_median(self):
-        df = DataFrame(np.random.randn(1000))
-        df.values[::2] = np.nan
-
-        labels = np.random.randint(0, 50, size=1000).astype(float)
-        labels[::17] = np.nan
-
-        result = df.groupby(labels).median()
-        exp = df.groupby(labels).agg(nanops.nanmedian)
-        assert_frame_equal(result, exp)
-
-        df = DataFrame(np.random.randn(1000, 5))
-        rs = df.groupby(labels).agg(np.median)
-        xp = df.groupby(labels).median()
-        assert_frame_equal(rs, xp)
-
-    def test_median_empty_bins(self):
-        df = pd.DataFrame(np.random.randint(0, 44, 500))
-
-        grps = range(0, 55, 5)
-        bins = pd.cut(df[0], grps)
-
-        result = df.groupby(bins).median()
-        expected = df.groupby(bins).agg(lambda x: x.median())
-        assert_frame_equal(result, expected)
-
-    @pytest.mark.parametrize("dtype", [
-        'int8', 'int16', 'int32', 'int64', 'float32', 'float64'])
-    @pytest.mark.parametrize("method,data", [
-        ('first', {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}),
-        ('last', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}),
-        ('min', {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}),
-        ('max', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}),
-        ('nth', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}],
-                 'args': [1]}),
-        ('count', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 2}],
-                   'out_type': 'int64'})
-    ])
-    def test_groupby_non_arithmetic_agg_types(self, dtype, method, data):
-        # GH9311, GH6620
-        df = pd.DataFrame(
-            [{'a': 1, 'b': 1},
-             {'a': 1, 'b': 2},
-             {'a': 2, 'b': 3},
-             {'a': 2, 'b': 4}])
-
-        df['b'] = df.b.astype(dtype)
-
-        if 'args' not in data:
-            data['args'] = []
-
-        if 'out_type' in data:
-            out_type = data['out_type']
-        else:
-            out_type = dtype
-
-        exp = data['df']
-        df_out = pd.DataFrame(exp)
-
-        df_out['b'] = df_out.b.astype(out_type)
-        df_out.set_index('a', inplace=True)
-
-        grpd = df.groupby('a')
-        t = getattr(grpd, method)(*data['args'])
-        assert_frame_equal(t, df_out)
-
-    def test_groupby_non_arithmetic_agg_intlike_precision(self):
-        # GH9311, GH6620
-        c = 24650000000000000
-
-        inputs = ((Timestamp('2011-01-15 12:50:28.502376'),
-                   Timestamp('2011-01-20 12:50:28.593448')), (1 + c, 2 + c))
-
-        for i in inputs:
-            df = pd.DataFrame([{'a': 1, 'b': i[0]}, {'a': 1, 'b': i[1]}])
-
-            grp_exp = {'first': {'expected': i[0]},
-                       'last': {'expected': i[1]},
-                       'min': {'expected': i[0]},
-                       'max': {'expected': i[1]},
-                       'nth': {'expected': i[1],
-                               'args': [1]},
-                       'count': {'expected': 2}}
-
-            for method, data in compat.iteritems(grp_exp):
-                if 'args' not in data:
-                    data['args'] = []
-
-                grpd = df.groupby('a')
-                res = getattr(grpd, method)(*data['args'])
-                assert res.iloc[0].b == data['expected']
-
-    def test_groupby_multiindex_missing_pair(self):
-        # GH9049
-        df = DataFrame({'group1': ['a', 'a', 'a', 'b'],
-                        'group2': ['c', 'c', 'd', 'c'],
-                        'value': [1, 1, 1, 5]})
-        df = df.set_index(['group1', 'group2'])
-        df_grouped = df.groupby(level=['group1', 'group2'], sort=True)
-
-        res = df_grouped.agg('sum')
-        idx = MultiIndex.from_tuples(
-            [('a', 'c'), ('a', 'd'), ('b', 'c')], names=['group1', 'group2'])
-        exp = DataFrame([[2], [1], [5]], index=idx, columns=['value'])
-
-        tm.assert_frame_equal(res, exp)
-
-    def test_groupby_multiindex_not_lexsorted(self):
-        # GH 11640
-
-        # define the lexsorted version
-        lexsorted_mi = MultiIndex.from_tuples(
-            [('a', ''), ('b1', 'c1'), ('b2', 'c2')], names=['b', 'c'])
-        lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi)
-        assert lexsorted_df.columns.is_lexsorted()
-
-        # define the non-lexsorted version
-        not_lexsorted_df = DataFrame(columns=['a', 'b', 'c', 'd'],
-                                     data=[[1, 'b1', 'c1', 3],
-                                           [1, 'b2', 'c2', 4]])
-        not_lexsorted_df = not_lexsorted_df.pivot_table(
-            index='a', columns=['b', 'c'], values='d')
-        not_lexsorted_df = not_lexsorted_df.reset_index()
-        assert not not_lexsorted_df.columns.is_lexsorted()
-
-        # compare the results
-        tm.assert_frame_equal(lexsorted_df, not_lexsorted_df)
-
-        expected = lexsorted_df.groupby('a').mean()
-        with tm.assert_produces_warning(PerformanceWarning):
-            result = not_lexsorted_df.groupby('a').mean()
-        tm.assert_frame_equal(expected, result)
-
-        # a transforming function should work regardless of sort
-        # GH 14776
-        df = DataFrame({'x': ['a', 'a', 'b', 'a'],
-                        'y': [1, 1, 2, 2],
-                        'z': [1, 2, 3, 4]}).set_index(['x', 'y'])
-        assert not df.index.is_lexsorted()
-
-        for level in [0, 1, [0, 1]]:
-            for sort in [False, True]:
-                result = df.groupby(level=level, sort=sort).apply(
-                    DataFrame.drop_duplicates)
-                expected = df
-                tm.assert_frame_equal(expected, result)
-
-                result = df.sort_index().groupby(level=level, sort=sort).apply(
-                    DataFrame.drop_duplicates)
-                expected = df.sort_index()
-                tm.assert_frame_equal(expected, result)
-
-    def test_gb_apply_list_of_unequal_len_arrays(self):
-
-        # GH1738
-        df = DataFrame({'group1': ['a', 'a', 'a', 'b', 'b', 'b', 'a', 'a', 'a',
-                                   'b', 'b', 'b'],
-                        'group2': ['c', 'c', 'd', 'd', 'd', 'e', 'c', 'c', 'd',
-                                   'd', 'd', 'e'],
-                        'weight': [1.1, 2, 3, 4, 5, 6, 2, 4, 6, 8, 1, 2],
-                        'value': [7.1, 8, 9, 10, 11, 12, 8, 7, 6, 5, 4, 3]})
-        df = df.set_index(['group1', 'group2'])
-        df_grouped = df.groupby(level=['group1', 'group2'], sort=True)
-
-        def noddy(value, weight):
-            out = np.array(value * weight).repeat(3)
-            return out
-
-        # the kernel function returns arrays of unequal length
-        # pandas sniffs the first one, sees it's an array and not
-        # a list, and assumed the rest are of equal length
-        # and so tries a vstack
-
-        # don't die
-        df_grouped.apply(lambda x: noddy(x.value, x.weight))
-
-    def test_fill_constistency(self):
-
-        # GH9221
-        # pass thru keyword arguments to the generated wrapper
-        # are set if the passed kw is None (only)
-        df = DataFrame(index=pd.MultiIndex.from_product(
-            [['value1', 'value2'], date_range('2014-01-01', '2014-01-06')]),
-            columns=Index(
-            ['1', '2'], name='id'))
-        df['1'] = [np.nan, 1, np.nan, np.nan, 11, np.nan, np.nan, 2, np.nan,
-                   np.nan, 22, np.nan]
-        df['2'] = [np.nan, 3, np.nan, np.nan, 33, np.nan, np.nan, 4, np.nan,
-                   np.nan, 44, np.nan]
-
-        expected = df.groupby(level=0, axis=0).fillna(method='ffill')
-        result = df.T.groupby(level=0, axis=1).fillna(method='ffill').T
-        assert_frame_equal(result, expected)
-
-    def test_index_label_overlaps_location(self):
-        # checking we don't have any label/location confusion in the
-        # the wake of GH5375
-        df = DataFrame(list('ABCDE'), index=[2, 0, 2, 1, 1])
-        g = df.groupby(list('ababb'))
-        actual = g.filter(lambda x: len(x) > 2)
-        expected = df.iloc[[1, 3, 4]]
-        assert_frame_equal(actual, expected)
-
-        ser = df[0]
-        g = ser.groupby(list('ababb'))
-        actual = g.filter(lambda x: len(x) > 2)
-        expected = ser.take([1, 3, 4])
-        assert_series_equal(actual, expected)
-
-        # ... and again, with a generic Index of floats
-        df.index = df.index.astype(float)
-        g = df.groupby(list('ababb'))
-        actual = g.filter(lambda x: len(x) > 2)
-        expected = df.iloc[[1, 3, 4]]
-        assert_frame_equal(actual, expected)
-
-        ser = df[0]
-        g = ser.groupby(list('ababb'))
-        actual = g.filter(lambda x: len(x) > 2)
-        expected = ser.take([1, 3, 4])
-        assert_series_equal(actual, expected)
-
-    def test_groupby_cumprod(self):
-        # GH 4095
-        df = pd.DataFrame({'key': ['b'] * 10, 'value': 2})
-
-        actual = df.groupby('key')['value'].cumprod()
-        expected = df.groupby('key')['value'].apply(lambda x: x.cumprod())
-        expected.name = 'value'
-        tm.assert_series_equal(actual, expected)
-
-        df = pd.DataFrame({'key': ['b'] * 100, 'value': 2})
-        actual = df.groupby('key')['value'].cumprod()
-        # if overflows, groupby product casts to float
-        # while numpy passes back invalid values
-        df['value'] = df['value'].astype(float)
-        expected = df.groupby('key')['value'].apply(lambda x: x.cumprod())
-        expected.name = 'value'
-        tm.assert_series_equal(actual, expected)
-
-    def test_ops_general(self):
-        ops = [('mean', np.mean),
-               ('median', np.median),
-               ('std', np.std),
-               ('var', np.var),
-               ('sum', np.sum),
-               ('prod', np.prod),
-               ('min', np.min),
-               ('max', np.max),
-               ('first', lambda x: x.iloc[0]),
-               ('last', lambda x: x.iloc[-1]),
-               ('count', np.size), ]
-        try:
-            from scipy.stats import sem
-        except ImportError:
-            pass
-        else:
-            ops.append(('sem', sem))
-        df = DataFrame(np.random.randn(1000))
-        labels = np.random.randint(0, 50, size=1000).astype(float)
-
-        for op, targop in ops:
-            result = getattr(df.groupby(labels), op)().astype(float)
-            expected = df.groupby(labels).agg(targop)
-            try:
-                tm.assert_frame_equal(result, expected)
-            except BaseException as exc:
-                exc.args += ('operation: %s' % op, )
-                raise
-
-    def test_max_nan_bug(self):
-        raw = """,Date,app,File
-2013-04-23,2013-04-23 00:00:00,,log080001.log
-2013-05-06,2013-05-06 00:00:00,,log.log
-2013-05-07,2013-05-07 00:00:00,OE,xlsx"""
-
-        df = pd.read_csv(StringIO(raw), parse_dates=[0])
-        gb = df.groupby('Date')
-        r = gb[['File']].max()
-        e = gb['File'].max().to_frame()
-        tm.assert_frame_equal(r, e)
-        assert not r['File'].isna().any()
-
-    def test_nlargest(self):
-        a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10])
-        b = Series(list('a' * 5 + 'b' * 5))
-        gb = a.groupby(b)
-        r = gb.nlargest(3)
-        e = Series([
-            7, 5, 3, 10, 9, 6
-        ], index=MultiIndex.from_arrays([list('aaabbb'), [3, 2, 1, 9, 5, 8]]))
-        tm.assert_series_equal(r, e)
-
-        a = Series([1, 1, 3, 2, 0, 3, 3, 2, 1, 0])
-        gb = a.groupby(b)
-        e = Series([
-            3, 2, 1, 3, 3, 2
-        ], index=MultiIndex.from_arrays([list('aaabbb'), [2, 3, 1, 6, 5, 7]]))
-        assert_series_equal(gb.nlargest(3, keep='last'), e)
-
-    def test_nsmallest(self):
-        a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10])
-        b = Series(list('a' * 5 + 'b' * 5))
-        gb = a.groupby(b)
-        r = gb.nsmallest(3)
-        e = Series([
-            1, 2, 3, 0, 4, 6
-        ], index=MultiIndex.from_arrays([list('aaabbb'), [0, 4, 1, 6, 7, 8]]))
-        tm.assert_series_equal(r, e)
-
-        a = Series([1, 1, 3, 2, 0, 3, 3, 2, 1, 0])
-        gb = a.groupby(b)
-        e = Series([
-            0, 1, 1, 0, 1, 2
-        ], index=MultiIndex.from_arrays([list('aaabbb'), [4, 1, 0, 9, 8, 7]]))
-        assert_series_equal(gb.nsmallest(3, keep='last'), e)
-
-    def test_transform_doesnt_clobber_ints(self):
-        # GH 7972
-        n = 6
-        x = np.arange(n)
-        df = DataFrame({'a': x // 2, 'b': 2.0 * x, 'c': 3.0 * x})
-        df2 = DataFrame({'a': x // 2 * 1.0, 'b': 2.0 * x, 'c': 3.0 * x})
-
-        gb = df.groupby('a')
-        result = gb.transform('mean')
-
-        gb2 = df2.groupby('a')
-        expected = gb2.transform('mean')
-        tm.assert_frame_equal(result, expected)
-
-    def test_groupby_apply_all_none(self):
-        # Tests to make sure no errors if apply function returns all None
-        # values. Issue 9684.
-        test_df = DataFrame({'groups': [0, 0, 1, 1],
-                             'random_vars': [8, 7, 4, 5]})
-
-        def test_func(x):
-            pass
-
-        result = test_df.groupby('groups').apply(test_func)
-        expected = DataFrame()
-        tm.assert_frame_equal(result, expected)
-
-    def test_groupby_apply_none_first(self):
-        # GH 12824. Tests if apply returns None first.
-        test_df1 = DataFrame({'groups': [1, 1, 1, 2], 'vars': [0, 1, 2, 3]})
-        test_df2 = DataFrame({'groups': [1, 2, 2, 2], 'vars': [0, 1, 2, 3]})
-
-        def test_func(x):
-            if x.shape[0] < 2:
+    _check_groupby(df, result, ['a', 'b'], 'd')
+
+
+def test_dont_clobber_name_column():
+    df = DataFrame({'key': ['a', 'a', 'a', 'b', 'b', 'b'],
+                    'name': ['foo', 'bar', 'baz'] * 2})
+
+    result = df.groupby('key').apply(lambda x: x)
+    assert_frame_equal(result, df)
+
+
+def test_skip_group_keys():
+
+    tsf = tm.makeTimeDataFrame()
+
+    grouped = tsf.groupby(lambda x: x.month, group_keys=False)
+    result = grouped.apply(lambda x: x.sort_values(by='A')[:3])
+
+    pieces = []
+    for key, group in grouped:
+        pieces.append(group.sort_values(by='A')[:3])
+
+    expected = pd.concat(pieces)
+    assert_frame_equal(result, expected)
+
+    grouped = tsf['A'].groupby(lambda x: x.month, group_keys=False)
+    result = grouped.apply(lambda x: x.sort_values()[:3])
+
+    pieces = []
+    for key, group in grouped:
+        pieces.append(group.sort_values()[:3])
+
+    expected = pd.concat(pieces)
+    assert_series_equal(result, expected)
+
+
+def test_no_nonsense_name(frame):
+    # GH #995
+    s = frame['C'].copy()
+    s.name = None
+
+    result = s.groupby(frame['A']).agg(np.sum)
+    assert result.name is None
+
+
+def test_multifunc_sum_bug():
+    # GH #1065
+    x = DataFrame(np.arange(9).reshape(3, 3))
+    x['test'] = 0
+    x['fl'] = [1.3, 1.5, 1.6]
+
+    grouped = x.groupby('test')
+    result = grouped.agg({'fl': 'sum', 2: 'size'})
+    assert result['fl'].dtype == np.float64
+
+
+def test_handle_dict_return_value(df):
+    def f(group):
+        return {'max': group.max(), 'min': group.min()}
+
+    def g(group):
+        return Series({'max': group.max(), 'min': group.min()})
+
+    result = df.groupby('A')['C'].apply(f)
+    expected = df.groupby('A')['C'].apply(g)
+
+    assert isinstance(result, Series)
+    assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize('grouper', ['A', ['A', 'B']])
+def test_set_group_name(df, grouper):
+    def f(group):
+        assert group.name is not None
+        return group
+
+    def freduce(group):
+        assert group.name is not None
+        return group.sum()
+
+    def foo(x):
+        return freduce(x)
+
+    grouped = df.groupby(grouper)
+
+    # make sure all these work
+    grouped.apply(f)
+    grouped.aggregate(freduce)
+    grouped.aggregate({'C': freduce, 'D': freduce})
+    grouped.transform(f)
+
+    grouped['C'].apply(f)
+    grouped['C'].aggregate(freduce)
+    grouped['C'].aggregate([freduce, foo])
+    grouped['C'].transform(f)
+
+
+def test_group_name_available_in_inference_pass():
+    # gh-15062
+    df = pd.DataFrame({'a': [0, 0, 1, 1, 2, 2], 'b': np.arange(6)})
+
+    names = []
+
+    def f(group):
+        names.append(group.name)
+        return group.copy()
+
+    df.groupby('a', sort=False, group_keys=False).apply(f)
+    # we expect 2 zeros because we call ``f`` once to see if a faster route
+    # can be used.
+    expected_names = [0, 0, 1, 2]
+    assert names == expected_names
+
+
+def test_no_dummy_key_names(df):
+    # see gh-1291
+    result = df.groupby(df['A'].values).sum()
+    assert result.index.name is None
+
+    result = df.groupby([df['A'].values, df['B'].values]).sum()
+    assert result.index.names == (None, None)
+
+
+def test_groupby_sort_multiindex_series():
+    # series multiindex groupby sort argument was not being passed through
+    # _compress_group_index
+    # GH 9444
+    index = MultiIndex(levels=[[1, 2], [1, 2]],
+                       labels=[[0, 0, 0, 0, 1, 1], [1, 1, 0, 0, 0, 0]],
+                       names=['a', 'b'])
+    mseries = Series([0, 1, 2, 3, 4, 5], index=index)
+    index = MultiIndex(levels=[[1, 2], [1, 2]],
+                       labels=[[0, 0, 1], [1, 0, 0]], names=['a', 'b'])
+    mseries_result = Series([0, 2, 4], index=index)
+
+    result = mseries.groupby(level=['a', 'b'], sort=False).first()
+    assert_series_equal(result, mseries_result)
+    result = mseries.groupby(level=['a', 'b'], sort=True).first()
+    assert_series_equal(result, mseries_result.sort_index())
+
+
+def test_groupby_reindex_inside_function():
+
+    periods = 1000
+    ind = DatetimeIndex(start='2012/1/1', freq='5min', periods=periods)
+    df = DataFrame({'high': np.arange(
+        periods), 'low': np.arange(periods)}, index=ind)
+
+    def agg_before(hour, func, fix=False):
+        """
+            Run an aggregate func on the subset of data.
+        """
+
+        def _func(data):
+            d = data.loc[data.index.map(
+                lambda x: x.hour < 11)].dropna()
+            if fix:
+                data[data.index[0]]
+            if len(d) == 0:
                 return None
-            return x.iloc[[0, -1]]
-
-        result1 = test_df1.groupby('groups').apply(test_func)
-        result2 = test_df2.groupby('groups').apply(test_func)
-        index1 = MultiIndex.from_arrays([[1, 1], [0, 2]],
-                                        names=['groups', None])
-        index2 = MultiIndex.from_arrays([[2, 2], [1, 3]],
-                                        names=['groups', None])
-        expected1 = DataFrame({'groups': [1, 1], 'vars': [0, 2]},
-                              index=index1)
-        expected2 = DataFrame({'groups': [2, 2], 'vars': [1, 3]},
-                              index=index2)
-        tm.assert_frame_equal(result1, expected1)
-        tm.assert_frame_equal(result2, expected2)
-
-    def test_groupby_preserves_sort(self):
-        # Test to ensure that groupby always preserves sort order of original
-        # object. Issue #8588 and #9651
-
-        df = DataFrame(
-            {'int_groups': [3, 1, 0, 1, 0, 3, 3, 3],
-             'string_groups': ['z', 'a', 'z', 'a', 'a', 'g', 'g', 'g'],
-             'ints': [8, 7, 4, 5, 2, 9, 1, 1],
-             'floats': [2.3, 5.3, 6.2, -2.4, 2.2, 1.1, 1.1, 5],
-             'strings': ['z', 'd', 'a', 'e', 'word', 'word2', '42', '47']})
-
-        # Try sorting on different types and with different group types
-        for sort_column in ['ints', 'floats', 'strings', ['ints', 'floats'],
-                            ['ints', 'strings']]:
-            for group_column in ['int_groups', 'string_groups',
-                                 ['int_groups', 'string_groups']]:
-
-                df = df.sort_values(by=sort_column)
-
-                g = df.groupby(group_column)
-
-                def test_sort(x):
-                    assert_frame_equal(x, x.sort_values(by=sort_column))
-
-                g.apply(test_sort)
-
-    def test_numpy_compat(self):
-        # see gh-12811
-        df = pd.DataFrame({'A': [1, 2, 1], 'B': [1, 2, 3]})
-        g = df.groupby('A')
-
-        msg = "numpy operations are not valid with groupby"
-
-        for func in ('mean', 'var', 'std', 'cumprod', 'cumsum'):
-            tm.assert_raises_regex(UnsupportedFunctionCall, msg,
-                                   getattr(g, func), 1, 2, 3)
-            tm.assert_raises_regex(UnsupportedFunctionCall, msg,
-                                   getattr(g, func), foo=1)
-
-    def test_group_shift_with_null_key(self):
-        # This test is designed to replicate the segfault in issue #13813.
-        n_rows = 1200
-
-        # Generate a moderately large dataframe with occasional missing
-        # values in column `B`, and then group by [`A`, `B`]. This should
-        # force `-1` in `labels` array of `g.grouper.group_info` exactly
-        # at those places, where the group-by key is partially missing.
-        df = DataFrame([(i % 12, i % 3 if i % 3 else np.nan, i)
-                        for i in range(n_rows)], dtype=float,
-                       columns=["A", "B", "Z"], index=None)
-        g = df.groupby(["A", "B"])
-
-        expected = DataFrame([(i + 12 if i % 3 and i < n_rows - 12
-                               else np.nan)
-                              for i in range(n_rows)], dtype=float,
-                             columns=["Z"], index=None)
-        result = g.shift(-1)
-
-        assert_frame_equal(result, expected)
-
-    def test_pivot_table_values_key_error(self):
-        # This test is designed to replicate the error in issue #14938
-        df = pd.DataFrame({'eventDate':
-                           pd.date_range(pd.datetime.today(),
-                                         periods=20, freq='M').tolist(),
-                           'thename': range(0, 20)})
-
-        df['year'] = df.set_index('eventDate').index.year
-        df['month'] = df.set_index('eventDate').index.month
-
-        with pytest.raises(KeyError):
-            df.reset_index().pivot_table(index='year', columns='month',
-                                         values='badname', aggfunc='count')
-
-    def test_cummin_cummax(self):
-        # GH 15048
-        num_types = [np.int32, np.int64, np.float32, np.float64]
-        num_mins = [np.iinfo(np.int32).min, np.iinfo(np.int64).min,
-                    np.finfo(np.float32).min, np.finfo(np.float64).min]
-        num_max = [np.iinfo(np.int32).max, np.iinfo(np.int64).max,
-                   np.finfo(np.float32).max, np.finfo(np.float64).max]
-        base_df = pd.DataFrame({'A': [1, 1, 1, 1, 2, 2, 2, 2],
-                                'B': [3, 4, 3, 2, 2, 3, 2, 1]})
-        expected_mins = [3, 3, 3, 2, 2, 2, 2, 1]
-        expected_maxs = [3, 4, 4, 4, 2, 3, 3, 3]
-
-        for dtype, min_val, max_val in zip(num_types, num_mins, num_max):
-            df = base_df.astype(dtype)
-
-            # cummin
-            expected = pd.DataFrame({'B': expected_mins}).astype(dtype)
-            result = df.groupby('A').cummin()
-            tm.assert_frame_equal(result, expected)
-            result = df.groupby('A').B.apply(lambda x: x.cummin()).to_frame()
-            tm.assert_frame_equal(result, expected)
-
-            # Test cummin w/ min value for dtype
-            df.loc[[2, 6], 'B'] = min_val
-            expected.loc[[2, 3, 6, 7], 'B'] = min_val
-            result = df.groupby('A').cummin()
-            tm.assert_frame_equal(result, expected)
-            expected = df.groupby('A').B.apply(lambda x: x.cummin()).to_frame()
-            tm.assert_frame_equal(result, expected)
-
-            # cummax
-            expected = pd.DataFrame({'B': expected_maxs}).astype(dtype)
-            result = df.groupby('A').cummax()
-            tm.assert_frame_equal(result, expected)
-            result = df.groupby('A').B.apply(lambda x: x.cummax()).to_frame()
-            tm.assert_frame_equal(result, expected)
-
-            # Test cummax w/ max value for dtype
-            df.loc[[2, 6], 'B'] = max_val
-            expected.loc[[2, 3, 6, 7], 'B'] = max_val
-            result = df.groupby('A').cummax()
-            tm.assert_frame_equal(result, expected)
-            expected = df.groupby('A').B.apply(lambda x: x.cummax()).to_frame()
-            tm.assert_frame_equal(result, expected)
-
-        # Test nan in some values
-        base_df.loc[[0, 2, 4, 6], 'B'] = np.nan
-        expected = pd.DataFrame({'B': [np.nan, 4, np.nan, 2,
-                                       np.nan, 3, np.nan, 1]})
-        result = base_df.groupby('A').cummin()
-        tm.assert_frame_equal(result, expected)
-        expected = (base_df.groupby('A')
-                           .B
-                           .apply(lambda x: x.cummin())
-                           .to_frame())
-        tm.assert_frame_equal(result, expected)
-
-        expected = pd.DataFrame({'B': [np.nan, 4, np.nan, 4,
-                                       np.nan, 3, np.nan, 3]})
-        result = base_df.groupby('A').cummax()
-        tm.assert_frame_equal(result, expected)
-        expected = (base_df.groupby('A')
-                           .B
-                           .apply(lambda x: x.cummax())
-                           .to_frame())
-        tm.assert_frame_equal(result, expected)
-
-        # Test nan in entire column
-        base_df['B'] = np.nan
-        expected = pd.DataFrame({'B': [np.nan] * 8})
-        result = base_df.groupby('A').cummin()
-        tm.assert_frame_equal(expected, result)
-        result = base_df.groupby('A').B.apply(lambda x: x.cummin()).to_frame()
-        tm.assert_frame_equal(expected, result)
-        result = base_df.groupby('A').cummax()
-        tm.assert_frame_equal(expected, result)
-        result = base_df.groupby('A').B.apply(lambda x: x.cummax()).to_frame()
-        tm.assert_frame_equal(expected, result)
-
-        # GH 15561
-        df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(['2001'])))
-        expected = pd.Series(pd.to_datetime('2001'), index=[0], name='b')
-        for method in ['cummax', 'cummin']:
-            result = getattr(df.groupby('a')['b'], method)()
-            tm.assert_series_equal(expected, result)
-
-        # GH 15635
-        df = pd.DataFrame(dict(a=[1, 2, 1], b=[2, 1, 1]))
-        result = df.groupby('a').b.cummax()
-        expected = pd.Series([2, 1, 2], name='b')
-        tm.assert_series_equal(result, expected)
-
-        df = pd.DataFrame(dict(a=[1, 2, 1], b=[1, 2, 2]))
-        result = df.groupby('a').b.cummin()
-        expected = pd.Series([1, 2, 1], name='b')
-        tm.assert_series_equal(result, expected)
-
-    @pytest.mark.parametrize('in_vals, out_vals', [
-
-        # Basics: strictly increasing (T), strictly decreasing (F),
-        # abs val increasing (F), non-strictly increasing (T)
-        ([1, 2, 5, 3, 2, 0, 4, 5, -6, 1, 1],
-         [True, False, False, True]),
-
-        # Test with inf vals
-        ([1, 2.1, np.inf, 3, 2, np.inf, -np.inf, 5, 11, 1, -np.inf],
-         [True, False, True, False]),
-
-        # Test with nan vals; should always be False
-        ([1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan],
-         [False, False, False, False]),
-    ])
-    def test_is_monotonic_increasing(self, in_vals, out_vals):
-        # GH 17015
-        source_dict = {
-            'A': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11'],
-            'B': ['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'd', 'd'],
-            'C': in_vals}
-        df = pd.DataFrame(source_dict)
-        result = df.groupby('B').C.is_monotonic_increasing
-        index = Index(list('abcd'), name='B')
-        expected = pd.Series(index=index, data=out_vals, name='C')
-        tm.assert_series_equal(result, expected)
-
-        # Also check result equal to manually taking x.is_monotonic_increasing.
-        expected = (
-            df.groupby(['B']).C.apply(lambda x: x.is_monotonic_increasing))
-        tm.assert_series_equal(result, expected)
-
-    @pytest.mark.parametrize('in_vals, out_vals', [
-        # Basics: strictly decreasing (T), strictly increasing (F),
-        # abs val decreasing (F), non-strictly increasing (T)
-        ([10, 9, 7, 3, 4, 5, -3, 2, 0, 1, 1],
-         [True, False, False, True]),
-
-        # Test with inf vals
-        ([np.inf, 1, -np.inf, np.inf, 2, -3, -np.inf, 5, -3, -np.inf, -np.inf],
-         [True, True, False, True]),
-
-        # Test with nan vals; should always be False
-        ([1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan],
-         [False, False, False, False]),
-    ])
-    def test_is_monotonic_decreasing(self, in_vals, out_vals):
-        # GH 17015
-        source_dict = {
-            'A': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11'],
-            'B': ['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'd', 'd'],
-            'C': in_vals}
-
-        df = pd.DataFrame(source_dict)
-        result = df.groupby('B').C.is_monotonic_decreasing
-        index = Index(list('abcd'), name='B')
-        expected = pd.Series(index=index, data=out_vals, name='C')
-        tm.assert_series_equal(result, expected)
-
-    def test_apply_numeric_coercion_when_datetime(self):
-        # In the past, group-by/apply operations have been over-eager
-        # in converting dtypes to numeric, in the presence of datetime
-        # columns.  Various GH issues were filed, the reproductions
-        # for which are here.
-
-        # GH 15670
-        df = pd.DataFrame({'Number': [1, 2],
-                           'Date': ["2017-03-02"] * 2,
-                           'Str': ["foo", "inf"]})
-        expected = df.groupby(['Number']).apply(lambda x: x.iloc[0])
-        df.Date = pd.to_datetime(df.Date)
-        result = df.groupby(['Number']).apply(lambda x: x.iloc[0])
-        tm.assert_series_equal(result['Str'], expected['Str'])
-
-        # GH 15421
-        df = pd.DataFrame({'A': [10, 20, 30],
-                           'B': ['foo', '3', '4'],
-                           'T': [pd.Timestamp("12:31:22")] * 3})
-
-        def get_B(g):
-            return g.iloc[0][['B']]
-        result = df.groupby('A').apply(get_B)['B']
-        expected = df.B
-        expected.index = df.A
-        tm.assert_series_equal(result, expected)
-
-        # GH 14423
-        def predictions(tool):
-            out = pd.Series(index=['p1', 'p2', 'useTime'], dtype=object)
-            if 'step1' in list(tool.State):
-                out['p1'] = str(tool[tool.State == 'step1'].Machine.values[0])
-            if 'step2' in list(tool.State):
-                out['p2'] = str(tool[tool.State == 'step2'].Machine.values[0])
-                out['useTime'] = str(
-                    tool[tool.State == 'step2'].oTime.values[0])
-            return out
-        df1 = pd.DataFrame({'Key': ['B', 'B', 'A', 'A'],
-                            'State': ['step1', 'step2', 'step1', 'step2'],
-                            'oTime': ['', '2016-09-19 05:24:33',
-                                      '', '2016-09-19 23:59:04'],
-                            'Machine': ['23', '36L', '36R', '36R']})
-        df2 = df1.copy()
-        df2.oTime = pd.to_datetime(df2.oTime)
-        expected = df1.groupby('Key').apply(predictions).p1
-        result = df2.groupby('Key').apply(predictions).p1
-        tm.assert_series_equal(expected, result)
-
-    def test_pipe(self):
-        # Test the pipe method of DataFrameGroupBy.
-        # Issue #17871
-
-        random_state = np.random.RandomState(1234567890)
-
-        df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
-                              'foo', 'bar', 'foo', 'foo'],
-                        'B': random_state.randn(8),
-                        'C': random_state.randn(8)})
-
-        def f(dfgb):
-            return dfgb.B.max() - dfgb.C.min().min()
-
-        def square(srs):
-            return srs ** 2
-
-        # Note that the transformations are
-        # GroupBy -> Series
-        # Series -> Series
-        # This then chains the GroupBy.pipe and the
-        # NDFrame.pipe methods
-        result = df.groupby('A').pipe(f).pipe(square)
-
-        index = Index([u'bar', u'foo'], dtype='object', name=u'A')
-        expected = pd.Series([8.99110003361, 8.17516964785], name='B',
-                             index=index)
-
-        assert_series_equal(expected, result)
-
-    def test_pipe_args(self):
-        # Test passing args to the pipe method of DataFrameGroupBy.
-        # Issue #17871
-
-        df = pd.DataFrame({'group': ['A', 'A', 'B', 'B', 'C'],
-                           'x': [1.0, 2.0, 3.0, 2.0, 5.0],
-                           'y': [10.0, 100.0, 1000.0, -100.0, -1000.0]})
-
-        def f(dfgb, arg1):
-            return (dfgb.filter(lambda grp: grp.y.mean() > arg1, dropna=False)
-                        .groupby(dfgb.grouper))
-
-        def g(dfgb, arg2):
-            return dfgb.sum() / dfgb.sum().sum() + arg2
-
-        def h(df, arg3):
-            return df.x + df.y - arg3
-
-        result = (df
-                  .groupby('group')
-                  .pipe(f, 0)
-                  .pipe(g, 10)
-                  .pipe(h, 100))
-
-        # Assert the results here
-        index = pd.Index(['A', 'B', 'C'], name='group')
-        expected = pd.Series([-79.5160891089, -78.4839108911, -80],
-                             index=index)
-
-        assert_series_equal(expected, result)
-
-        # test SeriesGroupby.pipe
-        ser = pd.Series([1, 1, 2, 2, 3, 3])
-        result = ser.groupby(ser).pipe(lambda grp: grp.sum() * grp.count())
-
-        expected = pd.Series([4, 8, 12], index=pd.Int64Index([1, 2, 3]))
-
-        assert_series_equal(result, expected)
-
-    def test_empty_dataframe_groupby(self):
-        # GH8093
-        df = DataFrame(columns=['A', 'B', 'C'])
-
-        result = df.groupby('A').sum()
-        expected = DataFrame(columns=['B', 'C'], dtype=np.float64)
-        expected.index.name = 'A'
-
-        assert_frame_equal(result, expected)
-
-    def test_tuple_warns(self):
-        # https://github.com/pandas-dev/pandas/issues/18314
-        df = pd.DataFrame({('a', 'b'): [1, 1, 2, 2], 'a': [1, 1, 1, 2],
-                           'b': [1, 2, 2, 2], 'c': [1, 1, 1, 1]})
-        with tm.assert_produces_warning(FutureWarning) as w:
-            df[['a', 'b', 'c']].groupby(('a', 'b')).c.mean()
-
-        assert "Interpreting tuple 'by' as a list" in str(w[0].message)
+            return func(d)
+
+        return _func
+
+    def afunc(data):
+        d = data.select(lambda x: x.hour < 11).dropna()
+        return np.max(d)
+
+    grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day))
+    closure_bad = grouped.agg({'high': agg_before(11, np.max)})
+    closure_good = grouped.agg({'high': agg_before(11, np.max, True)})
+
+    assert_frame_equal(closure_bad, closure_good)
+
+
+def test_groupby_multiindex_missing_pair():
+    # GH9049
+    df = DataFrame({'group1': ['a', 'a', 'a', 'b'],
+                    'group2': ['c', 'c', 'd', 'c'],
+                    'value': [1, 1, 1, 5]})
+    df = df.set_index(['group1', 'group2'])
+    df_grouped = df.groupby(level=['group1', 'group2'], sort=True)
+
+    res = df_grouped.agg('sum')
+    idx = MultiIndex.from_tuples(
+        [('a', 'c'), ('a', 'd'), ('b', 'c')], names=['group1', 'group2'])
+    exp = DataFrame([[2], [1], [5]], index=idx, columns=['value'])
+
+    tm.assert_frame_equal(res, exp)
+
+
+def test_groupby_multiindex_not_lexsorted():
+    # GH 11640
+
+    # define the lexsorted version
+    lexsorted_mi = MultiIndex.from_tuples(
+        [('a', ''), ('b1', 'c1'), ('b2', 'c2')], names=['b', 'c'])
+    lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi)
+    assert lexsorted_df.columns.is_lexsorted()
+
+    # define the non-lexsorted version
+    not_lexsorted_df = DataFrame(columns=['a', 'b', 'c', 'd'],
+                                 data=[[1, 'b1', 'c1', 3],
+                                       [1, 'b2', 'c2', 4]])
+    not_lexsorted_df = not_lexsorted_df.pivot_table(
+        index='a', columns=['b', 'c'], values='d')
+    not_lexsorted_df = not_lexsorted_df.reset_index()
+    assert not not_lexsorted_df.columns.is_lexsorted()
+
+    # compare the results
+    tm.assert_frame_equal(lexsorted_df, not_lexsorted_df)
+
+    expected = lexsorted_df.groupby('a').mean()
+    with tm.assert_produces_warning(PerformanceWarning):
+        result = not_lexsorted_df.groupby('a').mean()
+    tm.assert_frame_equal(expected, result)
+
+    # a transforming function should work regardless of sort
+    # GH 14776
+    df = DataFrame({'x': ['a', 'a', 'b', 'a'],
+                    'y': [1, 1, 2, 2],
+                    'z': [1, 2, 3, 4]}).set_index(['x', 'y'])
+    assert not df.index.is_lexsorted()
+
+    for level in [0, 1, [0, 1]]:
+        for sort in [False, True]:
+            result = df.groupby(level=level, sort=sort).apply(
+                DataFrame.drop_duplicates)
+            expected = df
+            tm.assert_frame_equal(expected, result)
+
+            result = df.sort_index().groupby(level=level, sort=sort).apply(
+                DataFrame.drop_duplicates)
+            expected = df.sort_index()
+            tm.assert_frame_equal(expected, result)
+
+
+def test_index_label_overlaps_location():
+    # checking we don't have any label/location confusion in the
+    # the wake of GH5375
+    df = DataFrame(list('ABCDE'), index=[2, 0, 2, 1, 1])
+    g = df.groupby(list('ababb'))
+    actual = g.filter(lambda x: len(x) > 2)
+    expected = df.iloc[[1, 3, 4]]
+    assert_frame_equal(actual, expected)
+
+    ser = df[0]
+    g = ser.groupby(list('ababb'))
+    actual = g.filter(lambda x: len(x) > 2)
+    expected = ser.take([1, 3, 4])
+    assert_series_equal(actual, expected)
+
+    # ... and again, with a generic Index of floats
+    df.index = df.index.astype(float)
+    g = df.groupby(list('ababb'))
+    actual = g.filter(lambda x: len(x) > 2)
+    expected = df.iloc[[1, 3, 4]]
+    assert_frame_equal(actual, expected)
+
+    ser = df[0]
+    g = ser.groupby(list('ababb'))
+    actual = g.filter(lambda x: len(x) > 2)
+    expected = ser.take([1, 3, 4])
+    assert_series_equal(actual, expected)
+
+
+def test_transform_doesnt_clobber_ints():
+    # GH 7972
+    n = 6
+    x = np.arange(n)
+    df = DataFrame({'a': x // 2, 'b': 2.0 * x, 'c': 3.0 * x})
+    df2 = DataFrame({'a': x // 2 * 1.0, 'b': 2.0 * x, 'c': 3.0 * x})
+
+    gb = df.groupby('a')
+    result = gb.transform('mean')
+
+    gb2 = df2.groupby('a')
+    expected = gb2.transform('mean')
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize('sort_column', ['ints', 'floats', 'strings',
+                                         ['ints', 'floats'],
+                                         ['ints', 'strings']])
+@pytest.mark.parametrize('group_column', ['int_groups', 'string_groups',
+                                          ['int_groups', 'string_groups']])
+def test_groupby_preserves_sort(sort_column, group_column):
+    # Test to ensure that groupby always preserves sort order of original
+    # object. Issue #8588 and #9651
+
+    df = DataFrame(
+        {'int_groups': [3, 1, 0, 1, 0, 3, 3, 3],
+         'string_groups': ['z', 'a', 'z', 'a', 'a', 'g', 'g', 'g'],
+         'ints': [8, 7, 4, 5, 2, 9, 1, 1],
+         'floats': [2.3, 5.3, 6.2, -2.4, 2.2, 1.1, 1.1, 5],
+         'strings': ['z', 'd', 'a', 'e', 'word', 'word2', '42', '47']})
+
+    # Try sorting on different types and with different group types
+
+    df = df.sort_values(by=sort_column)
+    g = df.groupby(group_column)
+
+    def test_sort(x):
+        assert_frame_equal(x, x.sort_values(by=sort_column))
+    g.apply(test_sort)
+
+
+def test_group_shift_with_null_key():
+    # This test is designed to replicate the segfault in issue #13813.
+    n_rows = 1200
+
+    # Generate a moderately large dataframe with occasional missing
+    # values in column `B`, and then group by [`A`, `B`]. This should
+    # force `-1` in `labels` array of `g.grouper.group_info` exactly
+    # at those places, where the group-by key is partially missing.
+    df = DataFrame([(i % 12, i % 3 if i % 3 else np.nan, i)
+                    for i in range(n_rows)], dtype=float,
+                   columns=["A", "B", "Z"], index=None)
+    g = df.groupby(["A", "B"])
+
+    expected = DataFrame([(i + 12 if i % 3 and i < n_rows - 12
+                           else np.nan)
+                          for i in range(n_rows)], dtype=float,
+                         columns=["Z"], index=None)
+    result = g.shift(-1)
+
+    assert_frame_equal(result, expected)
+
 
-        with tm.assert_produces_warning(None):
-            df.groupby(('a', 'b')).c.mean()
+def test_pivot_table_values_key_error():
+    # This test is designed to replicate the error in issue #14938
+    df = pd.DataFrame({'eventDate':
+                       pd.date_range(pd.datetime.today(),
+                                     periods=20, freq='M').tolist(),
+                       'thename': range(0, 20)})
 
-    def test_tuple_warns_unhashable(self):
-        # https://github.com/pandas-dev/pandas/issues/18314
-        business_dates = date_range(start='4/1/2014', end='6/30/2014',
-                                    freq='B')
-        df = DataFrame(1, index=business_dates, columns=['a', 'b'])
+    df['year'] = df.set_index('eventDate').index.year
+    df['month'] = df.set_index('eventDate').index.month
+
+    with pytest.raises(KeyError):
+        df.reset_index().pivot_table(index='year', columns='month',
+                                     values='badname', aggfunc='count')
 
-        with tm.assert_produces_warning(FutureWarning) as w:
-            df.groupby((df.index.year, df.index.month)).nth([0, 3, -1])
 
-        assert "Interpreting tuple 'by' as a list" in str(w[0].message)
+def test_empty_dataframe_groupby():
+    # GH8093
+    df = DataFrame(columns=['A', 'B', 'C'])
 
-    def test_tuple_correct_keyerror(self):
-        # https://github.com/pandas-dev/pandas/issues/18798
-        df = pd.DataFrame(1, index=range(3),
-                          columns=pd.MultiIndex.from_product([[1, 2],
-                                                              [3, 4]]))
-        with tm.assert_raises_regex(KeyError, "(7, 8)"):
-            df.groupby((7, 8)).mean()
+    result = df.groupby('A').sum()
+    expected = DataFrame(columns=['B', 'C'], dtype=np.float64)
+    expected.index.name = 'A'
 
+    assert_frame_equal(result, expected)
 
-def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
-    tups = lmap(tuple, df[keys].values)
-    tups = com._asarray_tuplesafe(tups)
-    expected = f(df.groupby(tups)[field])
-    for k, v in compat.iteritems(expected):
-        assert (result[k] == v)
+
+def test_tuple_warns():
+    # https://github.com/pandas-dev/pandas/issues/18314
+    df = pd.DataFrame({('a', 'b'): [1, 1, 2, 2], 'a': [1, 1, 1, 2],
+                       'b': [1, 2, 2, 2], 'c': [1, 1, 1, 1]})
+    with tm.assert_produces_warning(FutureWarning) as w:
+        df[['a', 'b', 'c']].groupby(('a', 'b')).c.mean()
+
+    assert "Interpreting tuple 'by' as a list" in str(w[0].message)
+
+    with tm.assert_produces_warning(None):
+        df.groupby(('a', 'b')).c.mean()
+
+
+def test_tuple_warns_unhashable():
+    # https://github.com/pandas-dev/pandas/issues/18314
+    business_dates = date_range(start='4/1/2014', end='6/30/2014',
+                                freq='B')
+    df = DataFrame(1, index=business_dates, columns=['a', 'b'])
+
+    with tm.assert_produces_warning(FutureWarning) as w:
+        df.groupby((df.index.year, df.index.month)).nth([0, 3, -1])
+
+    assert "Interpreting tuple 'by' as a list" in str(w[0].message)
+
+
+def test_tuple_correct_keyerror():
+    # https://github.com/pandas-dev/pandas/issues/18798
+    df = pd.DataFrame(1, index=range(3),
+                      columns=pd.MultiIndex.from_product([[1, 2],
+                                                          [3, 4]]))
+    with tm.assert_raises_regex(KeyError, "(7, 8)"):
+        df.groupby((7, 8)).mean()
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 57becd342d370..743237f5b386c 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -9,6 +9,7 @@
                     Index, MultiIndex, DataFrame, Series, CategoricalIndex)
 from pandas.util.testing import (assert_panel_equal, assert_frame_equal,
                                  assert_series_equal, assert_almost_equal)
+from pandas.core.groupby.groupby import Grouping
 from pandas.compat import lrange, long
 
 from pandas import compat
@@ -16,13 +17,12 @@
 
 import pandas.util.testing as tm
 import pandas as pd
-from .common import MixIn
 
 
 # selection
 # --------------------------------
 
-class TestSelection(MixIn):
+class TestSelection():
 
     def test_select_bad_cols(self):
         df = DataFrame([[1, 2]], columns=['A', 'B'])
@@ -48,14 +48,14 @@ def test_groupby_duplicated_column_errormsg(self):
         assert c.columns.nlevels == 1
         assert c.columns.size == 3
 
-    def test_column_select_via_attr(self):
-        result = self.df.groupby('A').C.sum()
-        expected = self.df.groupby('A')['C'].sum()
+    def test_column_select_via_attr(self, df):
+        result = df.groupby('A').C.sum()
+        expected = df.groupby('A')['C'].sum()
         assert_series_equal(result, expected)
 
-        self.df['mean'] = 1.5
-        result = self.df.groupby('A').mean()
-        expected = self.df.groupby('A').agg(np.mean)
+        df['mean'] = 1.5
+        result = df.groupby('A').mean()
+        expected = df.groupby('A').agg(np.mean)
         assert_frame_equal(result, expected)
 
     def test_getitem_list_of_columns(self):
@@ -96,7 +96,7 @@ def test_getitem_numeric_column_names(self):
 # grouping
 # --------------------------------
 
-class TestGrouping(MixIn):
+class TestGrouping():
 
     def test_grouper_index_types(self):
         # related GH5375
@@ -291,17 +291,17 @@ def test_grouper_getting_correct_binner(self):
                                  names=['one', 'two']))
         assert_frame_equal(result, expected)
 
-    def test_grouper_iter(self):
-        assert sorted(self.df.groupby('A').grouper) == ['bar', 'foo']
+    def test_grouper_iter(self, df):
+        assert sorted(df.groupby('A').grouper) == ['bar', 'foo']
 
-    def test_empty_groups(self):
+    def test_empty_groups(self, df):
         # see gh-1048
-        pytest.raises(ValueError, self.df.groupby, [])
+        pytest.raises(ValueError, df.groupby, [])
 
-    def test_groupby_grouper(self):
-        grouped = self.df.groupby('A')
+    def test_groupby_grouper(self, df):
+        grouped = df.groupby('A')
 
-        result = self.df.groupby(grouped.grouper).mean()
+        result = df.groupby(grouped.grouper).mean()
         expected = grouped.mean()
         tm.assert_frame_equal(result, expected)
 
@@ -339,10 +339,9 @@ def test_groupby_grouper_f_sanity_checked(self):
 
         pytest.raises(AssertionError, ts.groupby, lambda key: key[0:6])
 
-    def test_grouping_error_on_multidim_input(self):
-        from pandas.core.groupby.groupby import Grouping
+    def test_grouping_error_on_multidim_input(self, df):
         pytest.raises(ValueError,
-                      Grouping, self.df.index, self.df[['A', 'A']])
+                      Grouping, df.index, df[['A', 'A']])
 
     def test_multiindex_passthru(self):
 
@@ -354,26 +353,25 @@ def test_multiindex_passthru(self):
         result = df.groupby(axis=1, level=[0, 1]).first()
         assert_frame_equal(result, df)
 
-    def test_multiindex_negative_level(self):
+    def test_multiindex_negative_level(self, mframe):
         # GH 13901
-        result = self.mframe.groupby(level=-1).sum()
-        expected = self.mframe.groupby(level='second').sum()
+        result = mframe.groupby(level=-1).sum()
+        expected = mframe.groupby(level='second').sum()
         assert_frame_equal(result, expected)
 
-        result = self.mframe.groupby(level=-2).sum()
-        expected = self.mframe.groupby(level='first').sum()
+        result = mframe.groupby(level=-2).sum()
+        expected = mframe.groupby(level='first').sum()
         assert_frame_equal(result, expected)
 
-        result = self.mframe.groupby(level=[-2, -1]).sum()
-        expected = self.mframe
+        result = mframe.groupby(level=[-2, -1]).sum()
+        expected = mframe
         assert_frame_equal(result, expected)
 
-        result = self.mframe.groupby(level=[-1, 'first']).sum()
-        expected = self.mframe.groupby(level=['second', 'first']).sum()
+        result = mframe.groupby(level=[-1, 'first']).sum()
+        expected = mframe.groupby(level=['second', 'first']).sum()
         assert_frame_equal(result, expected)
 
-    def test_multifunc_select_col_integer_cols(self):
-        df = self.df
+    def test_multifunc_select_col_integer_cols(self, df):
         df.columns = np.arange(len(df.columns))
 
         # it works!
@@ -428,9 +426,9 @@ def test_groupby_multiindex_tuple(self):
         tm.assert_dict_equal(expected, result)
 
     @pytest.mark.parametrize('sort', [True, False])
-    def test_groupby_level(self, sort):
+    def test_groupby_level(self, sort, mframe, df):
         # GH 17537
-        frame = self.mframe
+        frame = mframe
         deleveled = frame.reset_index()
 
         result0 = frame.groupby(level=0, sort=sort).sum()
@@ -464,7 +462,7 @@ def test_groupby_level(self, sort):
         assert_frame_equal(result1, expected1.T)
 
         # raise exception for non-MultiIndex
-        pytest.raises(ValueError, self.df.groupby, level=1)
+        pytest.raises(ValueError, df.groupby, level=1)
 
     def test_groupby_level_index_names(self):
         # GH4014 this used to raise ValueError since 'exp'>1 (in py2)
@@ -496,9 +494,9 @@ def test_groupby_level_with_nas(self, sort):
         expected = Series([6., 18.], index=[0.0, 1.0])
         assert_series_equal(result, expected)
 
-    def test_groupby_args(self):
+    def test_groupby_args(self, mframe):
         # PR8618 and issue 8015
-        frame = self.mframe
+        frame = mframe
 
         def j():
             frame.groupby()
@@ -516,14 +514,14 @@ def k():
         [True, [2, 2, 2, 0, 0, 1, 1, 3, 3, 3]],
         [False, [0, 0, 0, 1, 1, 2, 2, 3, 3, 3]]
     ])
-    def test_level_preserve_order(self, sort, labels):
+    def test_level_preserve_order(self, sort, labels, mframe):
         # GH 17537
-        grouped = self.mframe.groupby(level=0, sort=sort)
+        grouped = mframe.groupby(level=0, sort=sort)
         exp_labels = np.array(labels, np.intp)
         assert_almost_equal(grouped.grouper.labels[0], exp_labels)
 
-    def test_grouping_labels(self):
-        grouped = self.mframe.groupby(self.mframe.index.get_level_values(0))
+    def test_grouping_labels(self, mframe):
+        grouped = mframe.groupby(mframe.index.get_level_values(0))
         exp_labels = np.array([2, 2, 2, 0, 0, 1, 1, 3, 3, 3], dtype=np.intp)
         assert_almost_equal(grouped.grouper.labels[0], exp_labels)
 
@@ -531,7 +529,7 @@ def test_grouping_labels(self):
 # get_group
 # --------------------------------
 
-class TestGetGroup(MixIn):
+class TestGetGroup():
 
     def test_get_group(self):
         with catch_warnings(record=True):
@@ -638,29 +636,28 @@ def test_gb_key_len_equal_axis_len(self):
 # groups & iteration
 # --------------------------------
 
-class TestIteration(MixIn):
+class TestIteration():
 
-    def test_groups(self):
-        grouped = self.df.groupby(['A'])
+    def test_groups(self, df):
+        grouped = df.groupby(['A'])
         groups = grouped.groups
         assert groups is grouped.groups  # caching works
 
         for k, v in compat.iteritems(grouped.groups):
-            assert (self.df.loc[v]['A'] == k).all()
+            assert (df.loc[v]['A'] == k).all()
 
-        grouped = self.df.groupby(['A', 'B'])
+        grouped = df.groupby(['A', 'B'])
         groups = grouped.groups
         assert groups is grouped.groups  # caching works
 
         for k, v in compat.iteritems(grouped.groups):
-            assert (self.df.loc[v]['A'] == k[0]).all()
-            assert (self.df.loc[v]['B'] == k[1]).all()
+            assert (df.loc[v]['A'] == k[0]).all()
+            assert (df.loc[v]['B'] == k[1]).all()
 
-    def test_grouping_is_iterable(self):
+    def test_grouping_is_iterable(self, tsframe):
         # this code path isn't used anywhere else
         # not sure it's useful
-        grouped = self.tsframe.groupby([lambda x: x.weekday(), lambda x: x.year
-                                        ])
+        grouped = tsframe.groupby([lambda x: x.weekday(), lambda x: x.year])
 
         # test it works
         for g in grouped.grouper.groupings[0]:
@@ -682,7 +679,7 @@ def test_multi_iter(self):
             assert e2 == two
             assert_series_equal(three, e3)
 
-    def test_multi_iter_frame(self):
+    def test_multi_iter_frame(self, three_group):
         k1 = np.array(['b', 'b', 'b', 'a', 'a', 'a'])
         k2 = np.array(['1', '2', '1', '2', '1', '2'])
         df = DataFrame({'v1': np.random.randn(6),
@@ -715,7 +712,7 @@ def test_multi_iter_frame(self):
         assert len(groups) == 2
 
         # axis = 1
-        three_levels = self.three_group.groupby(['A', 'B', 'C']).mean()
+        three_levels = three_group.groupby(['A', 'B', 'C']).mean()
         grouped = three_levels.T.groupby(axis=1, level=(1, 2))
         for key, group in grouped:
             pass
@@ -733,13 +730,13 @@ def test_multi_iter_panel(self):
                 expected = wp.reindex(major=exp_axis)
                 assert_panel_equal(group, expected)
 
-    def test_dictify(self):
-        dict(iter(self.df.groupby('A')))
-        dict(iter(self.df.groupby(['A', 'B'])))
-        dict(iter(self.df['C'].groupby(self.df['A'])))
-        dict(iter(self.df['C'].groupby([self.df['A'], self.df['B']])))
-        dict(iter(self.df.groupby('A')['C']))
-        dict(iter(self.df.groupby(['A', 'B'])['C']))
+    def test_dictify(self, df):
+        dict(iter(df.groupby('A')))
+        dict(iter(df.groupby(['A', 'B'])))
+        dict(iter(df['C'].groupby(df['A'])))
+        dict(iter(df['C'].groupby([df['A'], df['B']])))
+        dict(iter(df.groupby('A')['C']))
+        dict(iter(df.groupby(['A', 'B'])['C']))
 
     def test_groupby_with_small_elem(self):
         # GH 8542
diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py
index ccde545b5b8e9..a32ba9ad76f14 100644
--- a/pandas/tests/groupby/test_nth.py
+++ b/pandas/tests/groupby/test_nth.py
@@ -7,314 +7,316 @@
     assert_produces_warning,
     assert_series_equal)
 
-from .common import MixIn
-
-
-class TestNth(MixIn):
-
-    def test_first_last_nth(self):
-        # tests for first / last / nth
-        grouped = self.df.groupby('A')
-        first = grouped.first()
-        expected = self.df.loc[[1, 0], ['B', 'C', 'D']]
-        expected.index = Index(['bar', 'foo'], name='A')
-        expected = expected.sort_index()
-        assert_frame_equal(first, expected)
-
-        nth = grouped.nth(0)
-        assert_frame_equal(nth, expected)
-
-        last = grouped.last()
-        expected = self.df.loc[[5, 7], ['B', 'C', 'D']]
-        expected.index = Index(['bar', 'foo'], name='A')
-        assert_frame_equal(last, expected)
-
-        nth = grouped.nth(-1)
-        assert_frame_equal(nth, expected)
-
-        nth = grouped.nth(1)
-        expected = self.df.loc[[2, 3], ['B', 'C', 'D']].copy()
-        expected.index = Index(['foo', 'bar'], name='A')
-        expected = expected.sort_index()
-        assert_frame_equal(nth, expected)
-
-        # it works!
-        grouped['B'].first()
-        grouped['B'].last()
-        grouped['B'].nth(0)
-
-        self.df.loc[self.df['A'] == 'foo', 'B'] = np.nan
-        assert isna(grouped['B'].first()['foo'])
-        assert isna(grouped['B'].last()['foo'])
-        assert isna(grouped['B'].nth(0)['foo'])
-
-        # v0.14.0 whatsnew
-        df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
-        g = df.groupby('A')
-        result = g.first()
-        expected = df.iloc[[1, 2]].set_index('A')
-        assert_frame_equal(result, expected)
-
-        expected = df.iloc[[1, 2]].set_index('A')
-        result = g.nth(0, dropna='any')
-        assert_frame_equal(result, expected)
-
-    def test_first_last_nth_dtypes(self):
-
-        df = self.df_mixed_floats.copy()
-        df['E'] = True
-        df['F'] = 1
-
-        # tests for first / last / nth
-        grouped = df.groupby('A')
-        first = grouped.first()
-        expected = df.loc[[1, 0], ['B', 'C', 'D', 'E', 'F']]
-        expected.index = Index(['bar', 'foo'], name='A')
-        expected = expected.sort_index()
-        assert_frame_equal(first, expected)
-
-        last = grouped.last()
-        expected = df.loc[[5, 7], ['B', 'C', 'D', 'E', 'F']]
-        expected.index = Index(['bar', 'foo'], name='A')
-        expected = expected.sort_index()
-        assert_frame_equal(last, expected)
-
-        nth = grouped.nth(1)
-        expected = df.loc[[3, 2], ['B', 'C', 'D', 'E', 'F']]
-        expected.index = Index(['bar', 'foo'], name='A')
-        expected = expected.sort_index()
-        assert_frame_equal(nth, expected)
-
-        # GH 2763, first/last shifting dtypes
-        idx = lrange(10)
-        idx.append(9)
-        s = Series(data=lrange(11), index=idx, name='IntCol')
-        assert s.dtype == 'int64'
-        f = s.groupby(level=0).first()
-        assert f.dtype == 'int64'
-
-    def test_nth(self):
-        df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
-        g = df.groupby('A')
-
-        assert_frame_equal(g.nth(0), df.iloc[[0, 2]].set_index('A'))
-        assert_frame_equal(g.nth(1), df.iloc[[1]].set_index('A'))
-        assert_frame_equal(g.nth(2), df.loc[[]].set_index('A'))
-        assert_frame_equal(g.nth(-1), df.iloc[[1, 2]].set_index('A'))
-        assert_frame_equal(g.nth(-2), df.iloc[[0]].set_index('A'))
-        assert_frame_equal(g.nth(-3), df.loc[[]].set_index('A'))
-        assert_series_equal(g.B.nth(0), df.set_index('A').B.iloc[[0, 2]])
-        assert_series_equal(g.B.nth(1), df.set_index('A').B.iloc[[1]])
-        assert_frame_equal(g[['B']].nth(0),
-                           df.loc[[0, 2], ['A', 'B']].set_index('A'))
-
-        exp = df.set_index('A')
-        assert_frame_equal(g.nth(0, dropna='any'), exp.iloc[[1, 2]])
-        assert_frame_equal(g.nth(-1, dropna='any'), exp.iloc[[1, 2]])
-
-        exp['B'] = np.nan
-        assert_frame_equal(g.nth(7, dropna='any'), exp.iloc[[1, 2]])
-        assert_frame_equal(g.nth(2, dropna='any'), exp.iloc[[1, 2]])
-
-        # out of bounds, regression from 0.13.1
-        # GH 6621
-        df = DataFrame({'color': {0: 'green',
-                                  1: 'green',
-                                  2: 'red',
-                                  3: 'red',
-                                  4: 'red'},
-                        'food': {0: 'ham',
-                                 1: 'eggs',
-                                 2: 'eggs',
-                                 3: 'ham',
-                                 4: 'pork'},
-                        'two': {0: 1.5456590000000001,
-                                1: -0.070345000000000005,
-                                2: -2.4004539999999999,
-                                3: 0.46206000000000003,
-                                4: 0.52350799999999997},
-                        'one': {0: 0.56573799999999996,
-                                1: -0.9742360000000001,
-                                2: 1.033801,
-                                3: -0.78543499999999999,
-                                4: 0.70422799999999997}}).set_index(['color',
-                                                                     'food'])
-
-        result = df.groupby(level=0, as_index=False).nth(2)
-        expected = df.iloc[[-1]]
-        assert_frame_equal(result, expected)
-
-        result = df.groupby(level=0, as_index=False).nth(3)
-        expected = df.loc[[]]
-        assert_frame_equal(result, expected)
-
-        # GH 7559
-        # from the vbench
-        df = DataFrame(np.random.randint(1, 10, (100, 2)), dtype='int64')
-        s = df[1]
-        g = df[0]
-        expected = s.groupby(g).first()
-        expected2 = s.groupby(g).apply(lambda x: x.iloc[0])
-        assert_series_equal(expected2, expected, check_names=False)
-        assert expected.name == 1
-        assert expected2.name == 1
-
-        # validate first
-        v = s[g == 1].iloc[0]
-        assert expected.iloc[0] == v
-        assert expected2.iloc[0] == v
-
-        # this is NOT the same as .first (as sorted is default!)
-        # as it keeps the order in the series (and not the group order)
-        # related GH 7287
-        expected = s.groupby(g, sort=False).first()
-        result = s.groupby(g, sort=False).nth(0, dropna='all')
-        assert_series_equal(result, expected)
-
-        # doc example
-        df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
-        g = df.groupby('A')
-        # PR 17493, related to issue 11038
-        # test Series.nth with True for dropna produces FutureWarning
-        with assert_produces_warning(FutureWarning):
-            result = g.B.nth(0, dropna=True)
-        expected = g.B.first()
-        assert_series_equal(result, expected)
-
-        # test multiple nth values
-        df = DataFrame([[1, np.nan], [1, 3], [1, 4], [5, 6], [5, 7]],
-                       columns=['A', 'B'])
-        g = df.groupby('A')
-
-        assert_frame_equal(g.nth(0), df.iloc[[0, 3]].set_index('A'))
-        assert_frame_equal(g.nth([0]), df.iloc[[0, 3]].set_index('A'))
-        assert_frame_equal(g.nth([0, 1]), df.iloc[[0, 1, 3, 4]].set_index('A'))
-        assert_frame_equal(
-            g.nth([0, -1]), df.iloc[[0, 2, 3, 4]].set_index('A'))
-        assert_frame_equal(
-            g.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]].set_index('A'))
-        assert_frame_equal(
-            g.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]].set_index('A'))
-        assert_frame_equal(g.nth([2]), df.iloc[[2]].set_index('A'))
-        assert_frame_equal(g.nth([3, 4]), df.loc[[]].set_index('A'))
-
-        business_dates = pd.date_range(start='4/1/2014', end='6/30/2014',
-                                       freq='B')
-        df = DataFrame(1, index=business_dates, columns=['a', 'b'])
-        # get the first, fourth and last two business days for each month
-        key = [df.index.year, df.index.month]
-        result = df.groupby(key, as_index=False).nth([0, 3, -2, -1])
-        expected_dates = pd.to_datetime(
-            ['2014/4/1', '2014/4/4', '2014/4/29', '2014/4/30', '2014/5/1',
-             '2014/5/6', '2014/5/29', '2014/5/30', '2014/6/2', '2014/6/5',
-             '2014/6/27', '2014/6/30'])
-        expected = DataFrame(1, columns=['a', 'b'], index=expected_dates)
-        assert_frame_equal(result, expected)
-
-    def test_nth_multi_index(self):
-        # PR 9090, related to issue 8979
-        # test nth on MultiIndex, should match .first()
-        grouped = self.three_group.groupby(['A', 'B'])
-        result = grouped.nth(0)
-        expected = grouped.first()
-        assert_frame_equal(result, expected)
-
-    def test_nth_multi_index_as_expected(self):
-        # PR 9090, related to issue 8979
-        # test nth on MultiIndex
-        three_group = DataFrame(
-            {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
-                   'foo', 'foo', 'foo'],
-             'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two',
-                   'two', 'two', 'one'],
-             'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
-                   'dull', 'shiny', 'shiny', 'shiny']})
-        grouped = three_group.groupby(['A', 'B'])
-        result = grouped.nth(0)
-        expected = DataFrame(
-            {'C': ['dull', 'dull', 'dull', 'dull']},
-            index=MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo'],
-                                          ['one', 'two', 'one', 'two']],
-                                         names=['A', 'B']))
-        assert_frame_equal(result, expected)
-
-    def test_groupby_head_tail(self):
-        df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
-        g_as = df.groupby('A', as_index=True)
-        g_not_as = df.groupby('A', as_index=False)
-
-        # as_index= False, much easier
-        assert_frame_equal(df.loc[[0, 2]], g_not_as.head(1))
-        assert_frame_equal(df.loc[[1, 2]], g_not_as.tail(1))
-
-        empty_not_as = DataFrame(columns=df.columns,
-                                 index=pd.Index([], dtype=df.index.dtype))
-        empty_not_as['A'] = empty_not_as['A'].astype(df.A.dtype)
-        empty_not_as['B'] = empty_not_as['B'].astype(df.B.dtype)
-        assert_frame_equal(empty_not_as, g_not_as.head(0))
-        assert_frame_equal(empty_not_as, g_not_as.tail(0))
-        assert_frame_equal(empty_not_as, g_not_as.head(-1))
-        assert_frame_equal(empty_not_as, g_not_as.tail(-1))
-
-        assert_frame_equal(df, g_not_as.head(7))  # contains all
-        assert_frame_equal(df, g_not_as.tail(7))
-
-        # as_index=True, (used to be different)
-        df_as = df
-
-        assert_frame_equal(df_as.loc[[0, 2]], g_as.head(1))
-        assert_frame_equal(df_as.loc[[1, 2]], g_as.tail(1))
-
-        empty_as = DataFrame(index=df_as.index[:0], columns=df.columns)
-        empty_as['A'] = empty_not_as['A'].astype(df.A.dtype)
-        empty_as['B'] = empty_not_as['B'].astype(df.B.dtype)
-        assert_frame_equal(empty_as, g_as.head(0))
-        assert_frame_equal(empty_as, g_as.tail(0))
-        assert_frame_equal(empty_as, g_as.head(-1))
-        assert_frame_equal(empty_as, g_as.tail(-1))
-
-        assert_frame_equal(df_as, g_as.head(7))  # contains all
-        assert_frame_equal(df_as, g_as.tail(7))
-
-        # test with selection
-        assert_frame_equal(g_as[[]].head(1), df_as.loc[[0, 2], []])
-        assert_frame_equal(g_as[['A']].head(1), df_as.loc[[0, 2], ['A']])
-        assert_frame_equal(g_as[['B']].head(1), df_as.loc[[0, 2], ['B']])
-        assert_frame_equal(g_as[['A', 'B']].head(1), df_as.loc[[0, 2]])
-
-        assert_frame_equal(g_not_as[[]].head(1), df_as.loc[[0, 2], []])
-        assert_frame_equal(g_not_as[['A']].head(1), df_as.loc[[0, 2], ['A']])
-        assert_frame_equal(g_not_as[['B']].head(1), df_as.loc[[0, 2], ['B']])
-        assert_frame_equal(g_not_as[['A', 'B']].head(1), df_as.loc[[0, 2]])
-
-    def test_group_selection_cache(self):
-        # GH 12839 nth, head, and tail should return same result consistently
-        df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
-        expected = df.iloc[[0, 2]].set_index('A')
-
-        g = df.groupby('A')
-        result1 = g.head(n=2)
-        result2 = g.nth(0)
-        assert_frame_equal(result1, df)
-        assert_frame_equal(result2, expected)
-
-        g = df.groupby('A')
-        result1 = g.tail(n=2)
-        result2 = g.nth(0)
-        assert_frame_equal(result1, df)
-        assert_frame_equal(result2, expected)
-
-        g = df.groupby('A')
-        result1 = g.nth(0)
-        result2 = g.head(n=2)
-        assert_frame_equal(result1, expected)
-        assert_frame_equal(result2, df)
-
-        g = df.groupby('A')
-        result1 = g.nth(0)
-        result2 = g.tail(n=2)
-        assert_frame_equal(result1, expected)
-        assert_frame_equal(result2, df)
+
+def test_first_last_nth(df):
+    # tests for first / last / nth
+    grouped = df.groupby('A')
+    first = grouped.first()
+    expected = df.loc[[1, 0], ['B', 'C', 'D']]
+    expected.index = Index(['bar', 'foo'], name='A')
+    expected = expected.sort_index()
+    assert_frame_equal(first, expected)
+
+    nth = grouped.nth(0)
+    assert_frame_equal(nth, expected)
+
+    last = grouped.last()
+    expected = df.loc[[5, 7], ['B', 'C', 'D']]
+    expected.index = Index(['bar', 'foo'], name='A')
+    assert_frame_equal(last, expected)
+
+    nth = grouped.nth(-1)
+    assert_frame_equal(nth, expected)
+
+    nth = grouped.nth(1)
+    expected = df.loc[[2, 3], ['B', 'C', 'D']].copy()
+    expected.index = Index(['foo', 'bar'], name='A')
+    expected = expected.sort_index()
+    assert_frame_equal(nth, expected)
+
+    # it works!
+    grouped['B'].first()
+    grouped['B'].last()
+    grouped['B'].nth(0)
+
+    df.loc[df['A'] == 'foo', 'B'] = np.nan
+    assert isna(grouped['B'].first()['foo'])
+    assert isna(grouped['B'].last()['foo'])
+    assert isna(grouped['B'].nth(0)['foo'])
+
+    # v0.14.0 whatsnew
+    df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
+    g = df.groupby('A')
+    result = g.first()
+    expected = df.iloc[[1, 2]].set_index('A')
+    assert_frame_equal(result, expected)
+
+    expected = df.iloc[[1, 2]].set_index('A')
+    result = g.nth(0, dropna='any')
+    assert_frame_equal(result, expected)
+
+
+def test_first_last_nth_dtypes(df_mixed_floats):
+
+    df = df_mixed_floats.copy()
+    df['E'] = True
+    df['F'] = 1
+
+    # tests for first / last / nth
+    grouped = df.groupby('A')
+    first = grouped.first()
+    expected = df.loc[[1, 0], ['B', 'C', 'D', 'E', 'F']]
+    expected.index = Index(['bar', 'foo'], name='A')
+    expected = expected.sort_index()
+    assert_frame_equal(first, expected)
+
+    last = grouped.last()
+    expected = df.loc[[5, 7], ['B', 'C', 'D', 'E', 'F']]
+    expected.index = Index(['bar', 'foo'], name='A')
+    expected = expected.sort_index()
+    assert_frame_equal(last, expected)
+
+    nth = grouped.nth(1)
+    expected = df.loc[[3, 2], ['B', 'C', 'D', 'E', 'F']]
+    expected.index = Index(['bar', 'foo'], name='A')
+    expected = expected.sort_index()
+    assert_frame_equal(nth, expected)
+
+    # GH 2763, first/last shifting dtypes
+    idx = lrange(10)
+    idx.append(9)
+    s = Series(data=lrange(11), index=idx, name='IntCol')
+    assert s.dtype == 'int64'
+    f = s.groupby(level=0).first()
+    assert f.dtype == 'int64'
+
+
+def test_nth():
+    df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
+    g = df.groupby('A')
+
+    assert_frame_equal(g.nth(0), df.iloc[[0, 2]].set_index('A'))
+    assert_frame_equal(g.nth(1), df.iloc[[1]].set_index('A'))
+    assert_frame_equal(g.nth(2), df.loc[[]].set_index('A'))
+    assert_frame_equal(g.nth(-1), df.iloc[[1, 2]].set_index('A'))
+    assert_frame_equal(g.nth(-2), df.iloc[[0]].set_index('A'))
+    assert_frame_equal(g.nth(-3), df.loc[[]].set_index('A'))
+    assert_series_equal(g.B.nth(0), df.set_index('A').B.iloc[[0, 2]])
+    assert_series_equal(g.B.nth(1), df.set_index('A').B.iloc[[1]])
+    assert_frame_equal(g[['B']].nth(0),
+                       df.loc[[0, 2], ['A', 'B']].set_index('A'))
+
+    exp = df.set_index('A')
+    assert_frame_equal(g.nth(0, dropna='any'), exp.iloc[[1, 2]])
+    assert_frame_equal(g.nth(-1, dropna='any'), exp.iloc[[1, 2]])
+
+    exp['B'] = np.nan
+    assert_frame_equal(g.nth(7, dropna='any'), exp.iloc[[1, 2]])
+    assert_frame_equal(g.nth(2, dropna='any'), exp.iloc[[1, 2]])
+
+    # out of bounds, regression from 0.13.1
+    # GH 6621
+    df = DataFrame({'color': {0: 'green',
+                              1: 'green',
+                              2: 'red',
+                              3: 'red',
+                              4: 'red'},
+                    'food': {0: 'ham',
+                             1: 'eggs',
+                             2: 'eggs',
+                             3: 'ham',
+                             4: 'pork'},
+                    'two': {0: 1.5456590000000001,
+                            1: -0.070345000000000005,
+                            2: -2.4004539999999999,
+                            3: 0.46206000000000003,
+                            4: 0.52350799999999997},
+                    'one': {0: 0.56573799999999996,
+                            1: -0.9742360000000001,
+                            2: 1.033801,
+                            3: -0.78543499999999999,
+                            4: 0.70422799999999997}}).set_index(['color',
+                                                                 'food'])
+
+    result = df.groupby(level=0, as_index=False).nth(2)
+    expected = df.iloc[[-1]]
+    assert_frame_equal(result, expected)
+
+    result = df.groupby(level=0, as_index=False).nth(3)
+    expected = df.loc[[]]
+    assert_frame_equal(result, expected)
+
+    # GH 7559
+    # from the vbench
+    df = DataFrame(np.random.randint(1, 10, (100, 2)), dtype='int64')
+    s = df[1]
+    g = df[0]
+    expected = s.groupby(g).first()
+    expected2 = s.groupby(g).apply(lambda x: x.iloc[0])
+    assert_series_equal(expected2, expected, check_names=False)
+    assert expected.name == 1
+    assert expected2.name == 1
+
+    # validate first
+    v = s[g == 1].iloc[0]
+    assert expected.iloc[0] == v
+    assert expected2.iloc[0] == v
+
+    # this is NOT the same as .first (as sorted is default!)
+    # as it keeps the order in the series (and not the group order)
+    # related GH 7287
+    expected = s.groupby(g, sort=False).first()
+    result = s.groupby(g, sort=False).nth(0, dropna='all')
+    assert_series_equal(result, expected)
+
+    # doc example
+    df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
+    g = df.groupby('A')
+    # PR 17493, related to issue 11038
+    # test Series.nth with True for dropna produces FutureWarning
+    with assert_produces_warning(FutureWarning):
+        result = g.B.nth(0, dropna=True)
+    expected = g.B.first()
+    assert_series_equal(result, expected)
+
+    # test multiple nth values
+    df = DataFrame([[1, np.nan], [1, 3], [1, 4], [5, 6], [5, 7]],
+                   columns=['A', 'B'])
+    g = df.groupby('A')
+
+    assert_frame_equal(g.nth(0), df.iloc[[0, 3]].set_index('A'))
+    assert_frame_equal(g.nth([0]), df.iloc[[0, 3]].set_index('A'))
+    assert_frame_equal(g.nth([0, 1]), df.iloc[[0, 1, 3, 4]].set_index('A'))
+    assert_frame_equal(
+        g.nth([0, -1]), df.iloc[[0, 2, 3, 4]].set_index('A'))
+    assert_frame_equal(
+        g.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]].set_index('A'))
+    assert_frame_equal(
+        g.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]].set_index('A'))
+    assert_frame_equal(g.nth([2]), df.iloc[[2]].set_index('A'))
+    assert_frame_equal(g.nth([3, 4]), df.loc[[]].set_index('A'))
+
+    business_dates = pd.date_range(start='4/1/2014', end='6/30/2014',
+                                   freq='B')
+    df = DataFrame(1, index=business_dates, columns=['a', 'b'])
+    # get the first, fourth and last two business days for each month
+    key = [df.index.year, df.index.month]
+    result = df.groupby(key, as_index=False).nth([0, 3, -2, -1])
+    expected_dates = pd.to_datetime(
+        ['2014/4/1', '2014/4/4', '2014/4/29', '2014/4/30', '2014/5/1',
+         '2014/5/6', '2014/5/29', '2014/5/30', '2014/6/2', '2014/6/5',
+         '2014/6/27', '2014/6/30'])
+    expected = DataFrame(1, columns=['a', 'b'], index=expected_dates)
+    assert_frame_equal(result, expected)
+
+
+def test_nth_multi_index(three_group):
+    # PR 9090, related to issue 8979
+    # test nth on MultiIndex, should match .first()
+    grouped = three_group.groupby(['A', 'B'])
+    result = grouped.nth(0)
+    expected = grouped.first()
+    assert_frame_equal(result, expected)
+
+
+def test_nth_multi_index_as_expected():
+    # PR 9090, related to issue 8979
+    # test nth on MultiIndex
+    three_group = DataFrame(
+        {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
+               'foo', 'foo', 'foo'],
+         'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two',
+               'two', 'two', 'one'],
+         'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
+               'dull', 'shiny', 'shiny', 'shiny']})
+    grouped = three_group.groupby(['A', 'B'])
+    result = grouped.nth(0)
+    expected = DataFrame(
+        {'C': ['dull', 'dull', 'dull', 'dull']},
+        index=MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo'],
+                                      ['one', 'two', 'one', 'two']],
+                                     names=['A', 'B']))
+    assert_frame_equal(result, expected)
+
+
+def test_groupby_head_tail():
+    df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
+    g_as = df.groupby('A', as_index=True)
+    g_not_as = df.groupby('A', as_index=False)
+
+    # as_index= False, much easier
+    assert_frame_equal(df.loc[[0, 2]], g_not_as.head(1))
+    assert_frame_equal(df.loc[[1, 2]], g_not_as.tail(1))
+
+    empty_not_as = DataFrame(columns=df.columns,
+                             index=pd.Index([], dtype=df.index.dtype))
+    empty_not_as['A'] = empty_not_as['A'].astype(df.A.dtype)
+    empty_not_as['B'] = empty_not_as['B'].astype(df.B.dtype)
+    assert_frame_equal(empty_not_as, g_not_as.head(0))
+    assert_frame_equal(empty_not_as, g_not_as.tail(0))
+    assert_frame_equal(empty_not_as, g_not_as.head(-1))
+    assert_frame_equal(empty_not_as, g_not_as.tail(-1))
+
+    assert_frame_equal(df, g_not_as.head(7))  # contains all
+    assert_frame_equal(df, g_not_as.tail(7))
+
+    # as_index=True, (used to be different)
+    df_as = df
+
+    assert_frame_equal(df_as.loc[[0, 2]], g_as.head(1))
+    assert_frame_equal(df_as.loc[[1, 2]], g_as.tail(1))
+
+    empty_as = DataFrame(index=df_as.index[:0], columns=df.columns)
+    empty_as['A'] = empty_not_as['A'].astype(df.A.dtype)
+    empty_as['B'] = empty_not_as['B'].astype(df.B.dtype)
+    assert_frame_equal(empty_as, g_as.head(0))
+    assert_frame_equal(empty_as, g_as.tail(0))
+    assert_frame_equal(empty_as, g_as.head(-1))
+    assert_frame_equal(empty_as, g_as.tail(-1))
+
+    assert_frame_equal(df_as, g_as.head(7))  # contains all
+    assert_frame_equal(df_as, g_as.tail(7))
+
+    # test with selection
+    assert_frame_equal(g_as[[]].head(1), df_as.loc[[0, 2], []])
+    assert_frame_equal(g_as[['A']].head(1), df_as.loc[[0, 2], ['A']])
+    assert_frame_equal(g_as[['B']].head(1), df_as.loc[[0, 2], ['B']])
+    assert_frame_equal(g_as[['A', 'B']].head(1), df_as.loc[[0, 2]])
+
+    assert_frame_equal(g_not_as[[]].head(1), df_as.loc[[0, 2], []])
+    assert_frame_equal(g_not_as[['A']].head(1), df_as.loc[[0, 2], ['A']])
+    assert_frame_equal(g_not_as[['B']].head(1), df_as.loc[[0, 2], ['B']])
+    assert_frame_equal(g_not_as[['A', 'B']].head(1), df_as.loc[[0, 2]])
+
+
+def test_group_selection_cache():
+    # GH 12839 nth, head, and tail should return same result consistently
+    df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
+    expected = df.iloc[[0, 2]].set_index('A')
+
+    g = df.groupby('A')
+    result1 = g.head(n=2)
+    result2 = g.nth(0)
+    assert_frame_equal(result1, df)
+    assert_frame_equal(result2, expected)
+
+    g = df.groupby('A')
+    result1 = g.tail(n=2)
+    result2 = g.nth(0)
+    assert_frame_equal(result1, df)
+    assert_frame_equal(result2, expected)
+
+    g = df.groupby('A')
+    result1 = g.nth(0)
+    result2 = g.head(n=2)
+    assert_frame_equal(result1, expected)
+    assert_frame_equal(result2, df)
+
+    g = df.groupby('A')
+    result1 = g.nth(0)
+    result2 = g.tail(n=2)
+    assert_frame_equal(result1, expected)
+    assert_frame_equal(result2, df)
 
 
 def test_nth_empty():
diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
new file mode 100644
index 0000000000000..6ad8b4905abff
--- /dev/null
+++ b/pandas/tests/groupby/test_rank.py
@@ -0,0 +1,254 @@
+import pytest
+import numpy as np
+import pandas as pd
+from pandas import DataFrame, concat
+from pandas.util import testing as tm
+
+
+def test_rank_apply():
+    lev1 = tm.rands_array(10, 100)
+    lev2 = tm.rands_array(10, 130)
+    lab1 = np.random.randint(0, 100, size=500)
+    lab2 = np.random.randint(0, 130, size=500)
+
+    df = DataFrame({'value': np.random.randn(500),
+                    'key1': lev1.take(lab1),
+                    'key2': lev2.take(lab2)})
+
+    result = df.groupby(['key1', 'key2']).value.rank()
+
+    expected = []
+    for key, piece in df.groupby(['key1', 'key2']):
+        expected.append(piece.value.rank())
+    expected = concat(expected, axis=0)
+    expected = expected.reindex(result.index)
+    tm.assert_series_equal(result, expected)
+
+    result = df.groupby(['key1', 'key2']).value.rank(pct=True)
+
+    expected = []
+    for key, piece in df.groupby(['key1', 'key2']):
+        expected.append(piece.value.rank(pct=True))
+    expected = concat(expected, axis=0)
+    expected = expected.reindex(result.index)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("grps", [
+    ['qux'], ['qux', 'quux']])
+@pytest.mark.parametrize("vals", [
+    [2, 2, 8, 2, 6],
+    [pd.Timestamp('2018-01-02'), pd.Timestamp('2018-01-02'),
+     pd.Timestamp('2018-01-08'), pd.Timestamp('2018-01-02'),
+     pd.Timestamp('2018-01-06')]])
+@pytest.mark.parametrize("ties_method,ascending,pct,exp", [
+    ('average', True, False, [2., 2., 5., 2., 4.]),
+    ('average', True, True, [0.4, 0.4, 1.0, 0.4, 0.8]),
+    ('average', False, False, [4., 4., 1., 4., 2.]),
+    ('average', False, True, [.8, .8, .2, .8, .4]),
+    ('min', True, False, [1., 1., 5., 1., 4.]),
+    ('min', True, True, [0.2, 0.2, 1.0, 0.2, 0.8]),
+    ('min', False, False, [3., 3., 1., 3., 2.]),
+    ('min', False, True, [.6, .6, .2, .6, .4]),
+    ('max', True, False, [3., 3., 5., 3., 4.]),
+    ('max', True, True, [0.6, 0.6, 1.0, 0.6, 0.8]),
+    ('max', False, False, [5., 5., 1., 5., 2.]),
+    ('max', False, True, [1., 1., .2, 1., .4]),
+    ('first', True, False, [1., 2., 5., 3., 4.]),
+    ('first', True, True, [0.2, 0.4, 1.0, 0.6, 0.8]),
+    ('first', False, False, [3., 4., 1., 5., 2.]),
+    ('first', False, True, [.6, .8, .2, 1., .4]),
+    ('dense', True, False, [1., 1., 3., 1., 2.]),
+    ('dense', True, True, [0.2, 0.2, 0.6, 0.2, 0.4]),
+    ('dense', False, False, [3., 3., 1., 3., 2.]),
+    ('dense', False, True, [.6, .6, .2, .6, .4]),
+])
+def test_rank_args(grps, vals, ties_method, ascending, pct, exp):
+    key = np.repeat(grps, len(vals))
+    vals = vals * len(grps)
+    df = DataFrame({'key': key, 'val': vals})
+    result = df.groupby('key').rank(method=ties_method,
+                                    ascending=ascending, pct=pct)
+
+    exp_df = DataFrame(exp * len(grps), columns=['val'])
+    tm.assert_frame_equal(result, exp_df)
+
+
+@pytest.mark.parametrize("grps", [
+    ['qux'], ['qux', 'quux']])
+@pytest.mark.parametrize("vals", [
+    [-np.inf, -np.inf, np.nan, 1., np.nan, np.inf, np.inf],
+])
+@pytest.mark.parametrize("ties_method,ascending,na_option,exp", [
+    ('average', True, 'keep', [1.5, 1.5, np.nan, 3, np.nan, 4.5, 4.5]),
+    ('average', True, 'top', [3.5, 3.5, 1.5, 5., 1.5, 6.5, 6.5]),
+    ('average', True, 'bottom', [1.5, 1.5, 6.5, 3., 6.5, 4.5, 4.5]),
+    ('average', False, 'keep', [4.5, 4.5, np.nan, 3, np.nan, 1.5, 1.5]),
+    ('average', False, 'top', [6.5, 6.5, 1.5, 5., 1.5, 3.5, 3.5]),
+    ('average', False, 'bottom', [4.5, 4.5, 6.5, 3., 6.5, 1.5, 1.5]),
+    ('min', True, 'keep', [1., 1., np.nan, 3., np.nan, 4., 4.]),
+    ('min', True, 'top', [3., 3., 1., 5., 1., 6., 6.]),
+    ('min', True, 'bottom', [1., 1., 6., 3., 6., 4., 4.]),
+    ('min', False, 'keep', [4., 4., np.nan, 3., np.nan, 1., 1.]),
+    ('min', False, 'top', [6., 6., 1., 5., 1., 3., 3.]),
+    ('min', False, 'bottom', [4., 4., 6., 3., 6., 1., 1.]),
+    ('max', True, 'keep', [2., 2., np.nan, 3., np.nan, 5., 5.]),
+    ('max', True, 'top', [4., 4., 2., 5., 2., 7., 7.]),
+    ('max', True, 'bottom', [2., 2., 7., 3., 7., 5., 5.]),
+    ('max', False, 'keep', [5., 5., np.nan, 3., np.nan, 2., 2.]),
+    ('max', False, 'top', [7., 7., 2., 5., 2., 4., 4.]),
+    ('max', False, 'bottom', [5., 5., 7., 3., 7., 2., 2.]),
+    ('first', True, 'keep', [1., 2., np.nan, 3., np.nan, 4., 5.]),
+    ('first', True, 'top', [3., 4., 1., 5., 2., 6., 7.]),
+    ('first', True, 'bottom', [1., 2., 6., 3., 7., 4., 5.]),
+    ('first', False, 'keep', [4., 5., np.nan, 3., np.nan, 1., 2.]),
+    ('first', False, 'top', [6., 7., 1., 5., 2., 3., 4.]),
+    ('first', False, 'bottom', [4., 5., 6., 3., 7., 1., 2.]),
+    ('dense', True, 'keep', [1., 1., np.nan, 2., np.nan, 3., 3.]),
+    ('dense', True, 'top', [2., 2., 1., 3., 1., 4., 4.]),
+    ('dense', True, 'bottom', [1., 1., 4., 2., 4., 3., 3.]),
+    ('dense', False, 'keep', [3., 3., np.nan, 2., np.nan, 1., 1.]),
+    ('dense', False, 'top', [4., 4., 1., 3., 1., 2., 2.]),
+    ('dense', False, 'bottom', [3., 3., 4., 2., 4., 1., 1.])
+])
+def test_infs_n_nans(grps, vals, ties_method, ascending, na_option, exp):
+    # GH 20561
+    key = np.repeat(grps, len(vals))
+    vals = vals * len(grps)
+    df = DataFrame({'key': key, 'val': vals})
+    result = df.groupby('key').rank(method=ties_method,
+                                    ascending=ascending,
+                                    na_option=na_option)
+    exp_df = DataFrame(exp * len(grps), columns=['val'])
+    tm.assert_frame_equal(result, exp_df)
+
+
+@pytest.mark.parametrize("grps", [
+    ['qux'], ['qux', 'quux']])
+@pytest.mark.parametrize("vals", [
+    [2, 2, np.nan, 8, 2, 6, np.nan, np.nan],  # floats
+    [pd.Timestamp('2018-01-02'), pd.Timestamp('2018-01-02'), np.nan,
+     pd.Timestamp('2018-01-08'), pd.Timestamp('2018-01-02'),
+     pd.Timestamp('2018-01-06'), np.nan, np.nan]
+])
+@pytest.mark.parametrize("ties_method,ascending,na_option,pct,exp", [
+    ('average', True, 'keep', False,
+        [2., 2., np.nan, 5., 2., 4., np.nan, np.nan]),
+    ('average', True, 'keep', True,
+        [0.4, 0.4, np.nan, 1.0, 0.4, 0.8, np.nan, np.nan]),
+    ('average', False, 'keep', False,
+        [4., 4., np.nan, 1., 4., 2., np.nan, np.nan]),
+    ('average', False, 'keep', True,
+        [.8, 0.8, np.nan, 0.2, 0.8, 0.4, np.nan, np.nan]),
+    ('min', True, 'keep', False,
+        [1., 1., np.nan, 5., 1., 4., np.nan, np.nan]),
+    ('min', True, 'keep', True,
+        [0.2, 0.2, np.nan, 1.0, 0.2, 0.8, np.nan, np.nan]),
+    ('min', False, 'keep', False,
+        [3., 3., np.nan, 1., 3., 2., np.nan, np.nan]),
+    ('min', False, 'keep', True,
+        [.6, 0.6, np.nan, 0.2, 0.6, 0.4, np.nan, np.nan]),
+    ('max', True, 'keep', False,
+        [3., 3., np.nan, 5., 3., 4., np.nan, np.nan]),
+    ('max', True, 'keep', True,
+        [0.6, 0.6, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan]),
+    ('max', False, 'keep', False,
+        [5., 5., np.nan, 1., 5., 2., np.nan, np.nan]),
+    ('max', False, 'keep', True,
+        [1., 1., np.nan, 0.2, 1., 0.4, np.nan, np.nan]),
+    ('first', True, 'keep', False,
+        [1., 2., np.nan, 5., 3., 4., np.nan, np.nan]),
+    ('first', True, 'keep', True,
+        [0.2, 0.4, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan]),
+    ('first', False, 'keep', False,
+        [3., 4., np.nan, 1., 5., 2., np.nan, np.nan]),
+    ('first', False, 'keep', True,
+        [.6, 0.8, np.nan, 0.2, 1., 0.4, np.nan, np.nan]),
+    ('dense', True, 'keep', False,
+        [1., 1., np.nan, 3., 1., 2., np.nan, np.nan]),
+    ('dense', True, 'keep', True,
+        [0.2, 0.2, np.nan, 0.6, 0.2, 0.4, np.nan, np.nan]),
+    ('dense', False, 'keep', False,
+        [3., 3., np.nan, 1., 3., 2., np.nan, np.nan]),
+    ('dense', False, 'keep', True,
+        [.6, 0.6, np.nan, 0.2, 0.6, 0.4, np.nan, np.nan]),
+    ('average', True, 'no_na', False, [2., 2., 7., 5., 2., 4., 7., 7.]),
+    ('average', True, 'no_na', True,
+        [0.25, 0.25, 0.875, 0.625, 0.25, 0.5, 0.875, 0.875]),
+    ('average', False, 'no_na', False, [4., 4., 7., 1., 4., 2., 7., 7.]),
+    ('average', False, 'no_na', True,
+        [0.5, 0.5, 0.875, 0.125, 0.5, 0.25, 0.875, 0.875]),
+    ('min', True, 'no_na', False, [1., 1., 6., 5., 1., 4., 6., 6.]),
+    ('min', True, 'no_na', True,
+        [0.125, 0.125, 0.75, 0.625, 0.125, 0.5, 0.75, 0.75]),
+    ('min', False, 'no_na', False, [3., 3., 6., 1., 3., 2., 6., 6.]),
+    ('min', False, 'no_na', True,
+        [0.375, 0.375, 0.75, 0.125, 0.375, 0.25, 0.75, 0.75]),
+    ('max', True, 'no_na', False, [3., 3., 8., 5., 3., 4., 8., 8.]),
+    ('max', True, 'no_na', True,
+        [0.375, 0.375, 1., 0.625, 0.375, 0.5, 1., 1.]),
+    ('max', False, 'no_na', False, [5., 5., 8., 1., 5., 2., 8., 8.]),
+    ('max', False, 'no_na', True,
+        [0.625, 0.625, 1., 0.125, 0.625, 0.25, 1., 1.]),
+    ('first', True, 'no_na', False, [1., 2., 6., 5., 3., 4., 7., 8.]),
+    ('first', True, 'no_na', True,
+        [0.125, 0.25, 0.75, 0.625, 0.375, 0.5, 0.875, 1.]),
+    ('first', False, 'no_na', False, [3., 4., 6., 1., 5., 2., 7., 8.]),
+    ('first', False, 'no_na', True,
+        [0.375, 0.5, 0.75, 0.125, 0.625, 0.25, 0.875, 1.]),
+    ('dense', True, 'no_na', False, [1., 1., 4., 3., 1., 2., 4., 4.]),
+    ('dense', True, 'no_na', True,
+        [0.125, 0.125, 0.5, 0.375, 0.125, 0.25, 0.5, 0.5]),
+    ('dense', False, 'no_na', False, [3., 3., 4., 1., 3., 2., 4., 4.]),
+    ('dense', False, 'no_na', True,
+        [0.375, 0.375, 0.5, 0.125, 0.375, 0.25, 0.5, 0.5])
+])
+def test_rank_args_missing(grps, vals, ties_method, ascending,
+                           na_option, pct, exp):
+    key = np.repeat(grps, len(vals))
+    vals = vals * len(grps)
+    df = DataFrame({'key': key, 'val': vals})
+    result = df.groupby('key').rank(method=ties_method,
+                                    ascending=ascending,
+                                    na_option=na_option, pct=pct)
+
+    exp_df = DataFrame(exp * len(grps), columns=['val'])
+    tm.assert_frame_equal(result, exp_df)
+
+
+@pytest.mark.parametrize("pct,exp", [
+    (False, [3., 3., 3., 3., 3.]),
+    (True, [.6, .6, .6, .6, .6])])
+def test_rank_resets_each_group(pct, exp):
+    df = DataFrame(
+        {'key': ['a', 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'b'],
+         'val': [1] * 10}
+    )
+    result = df.groupby('key').rank(pct=pct)
+    exp_df = DataFrame(exp * 2, columns=['val'])
+    tm.assert_frame_equal(result, exp_df)
+
+
+def test_rank_avg_even_vals():
+    df = DataFrame({'key': ['a'] * 4, 'val': [1] * 4})
+    result = df.groupby('key').rank()
+    exp_df = DataFrame([2.5, 2.5, 2.5, 2.5], columns=['val'])
+    tm.assert_frame_equal(result, exp_df)
+
+
+@pytest.mark.parametrize("ties_method", [
+    'average', 'min', 'max', 'first', 'dense'])
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("na_option", ["keep", "top", "bottom"])
+@pytest.mark.parametrize("pct", [True, False])
+@pytest.mark.parametrize("vals", [
+    ['bar', 'bar', 'foo', 'bar', 'baz'],
+    ['bar', np.nan, 'foo', np.nan, 'baz']
+])
+def test_rank_object_raises(ties_method, ascending, na_option,
+                            pct, vals):
+    df = DataFrame({'key': ['foo'] * 5, 'val': vals})
+    with tm.assert_raises_regex(TypeError, "not callable"):
+        df.groupby('key').rank(method=ties_method,
+                               ascending=ascending,
+                               na_option=na_option, pct=pct)
diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
index 390b99d0fab1c..626057c1ea760 100644
--- a/pandas/tests/groupby/test_transform.py
+++ b/pandas/tests/groupby/test_transform.py
@@ -10,728 +10,758 @@
     _ensure_platform_int, is_timedelta64_dtype)
 from pandas.compat import StringIO
 from pandas._libs import groupby
-from .common import MixIn, assert_fp_equal
 
 from pandas.util.testing import assert_frame_equal, assert_series_equal
 from pandas.core.groupby.groupby import DataError
 from pandas.core.config import option_context
 
 
-class TestGroupBy(MixIn):
-
-    def test_transform(self):
-        data = Series(np.arange(9) // 3, index=np.arange(9))
-
-        index = np.arange(9)
-        np.random.shuffle(index)
-        data = data.reindex(index)
-
-        grouped = data.groupby(lambda x: x // 3)
-
-        transformed = grouped.transform(lambda x: x * x.sum())
-        assert transformed[7] == 12
-
-        # GH 8046
-        # make sure that we preserve the input order
-
-        df = DataFrame(
-            np.arange(6, dtype='int64').reshape(
-                3, 2), columns=["a", "b"], index=[0, 2, 1])
-        key = [0, 0, 1]
-        expected = df.sort_index().groupby(key).transform(
-            lambda x: x - x.mean()).groupby(key).mean()
-        result = df.groupby(key).transform(lambda x: x - x.mean()).groupby(
-            key).mean()
-        assert_frame_equal(result, expected)
-
-        def demean(arr):
-            return arr - arr.mean()
-
-        people = DataFrame(np.random.randn(5, 5),
-                           columns=['a', 'b', 'c', 'd', 'e'],
-                           index=['Joe', 'Steve', 'Wes', 'Jim', 'Travis'])
-        key = ['one', 'two', 'one', 'two', 'one']
-        result = people.groupby(key).transform(demean).groupby(key).mean()
-        expected = people.groupby(key).apply(demean).groupby(key).mean()
-        assert_frame_equal(result, expected)
-
-        # GH 8430
-        df = tm.makeTimeDataFrame()
-        g = df.groupby(pd.Grouper(freq='M'))
-        g.transform(lambda x: x - 1)
-
-        # GH 9700
-        df = DataFrame({'a': range(5, 10), 'b': range(5)})
-        result = df.groupby('a').transform(max)
-        expected = DataFrame({'b': range(5)})
-        tm.assert_frame_equal(result, expected)
-
-    def test_transform_fast(self):
-
-        df = DataFrame({'id': np.arange(100000) / 3,
-                        'val': np.random.randn(100000)})
-
-        grp = df.groupby('id')['val']
-
-        values = np.repeat(grp.mean().values,
-                           _ensure_platform_int(grp.count().values))
-        expected = pd.Series(values, index=df.index, name='val')
-
-        result = grp.transform(np.mean)
-        assert_series_equal(result, expected)
-
-        result = grp.transform('mean')
-        assert_series_equal(result, expected)
-
-        # GH 12737
-        df = pd.DataFrame({'grouping': [0, 1, 1, 3], 'f': [1.1, 2.1, 3.1, 4.5],
-                           'd': pd.date_range('2014-1-1', '2014-1-4'),
-                           'i': [1, 2, 3, 4]},
-                          columns=['grouping', 'f', 'i', 'd'])
-        result = df.groupby('grouping').transform('first')
-
-        dates = [pd.Timestamp('2014-1-1'), pd.Timestamp('2014-1-2'),
-                 pd.Timestamp('2014-1-2'), pd.Timestamp('2014-1-4')]
-        expected = pd.DataFrame({'f': [1.1, 2.1, 2.1, 4.5],
-                                 'd': dates,
-                                 'i': [1, 2, 2, 4]},
-                                columns=['f', 'i', 'd'])
-        assert_frame_equal(result, expected)
-
-        # selection
-        result = df.groupby('grouping')[['f', 'i']].transform('first')
-        expected = expected[['f', 'i']]
-        assert_frame_equal(result, expected)
-
-        # dup columns
-        df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['g', 'a', 'a'])
-        result = df.groupby('g').transform('first')
-        expected = df.drop('g', axis=1)
-        assert_frame_equal(result, expected)
-
-    def test_transform_broadcast(self):
-        grouped = self.ts.groupby(lambda x: x.month)
-        result = grouped.transform(np.mean)
-
-        tm.assert_index_equal(result.index, self.ts.index)
-        for _, gp in grouped:
-            assert_fp_equal(result.reindex(gp.index), gp.mean())
-
-        grouped = self.tsframe.groupby(lambda x: x.month)
-        result = grouped.transform(np.mean)
-        tm.assert_index_equal(result.index, self.tsframe.index)
-        for _, gp in grouped:
-            agged = gp.mean()
-            res = result.reindex(gp.index)
-            for col in self.tsframe:
-                assert_fp_equal(res[col], agged[col])
-
-        # group columns
-        grouped = self.tsframe.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1},
-                                       axis=1)
-        result = grouped.transform(np.mean)
-        tm.assert_index_equal(result.index, self.tsframe.index)
-        tm.assert_index_equal(result.columns, self.tsframe.columns)
-        for _, gp in grouped:
-            agged = gp.mean(1)
-            res = result.reindex(columns=gp.columns)
-            for idx in gp.index:
-                assert_fp_equal(res.xs(idx), agged[idx])
-
-    def test_transform_axis(self):
-
-        # make sure that we are setting the axes
-        # correctly when on axis=0 or 1
-        # in the presence of a non-monotonic indexer
-        # GH12713
-
-        base = self.tsframe.iloc[0:5]
-        r = len(base.index)
-        c = len(base.columns)
-        tso = DataFrame(np.random.randn(r, c),
-                        index=base.index,
-                        columns=base.columns,
-                        dtype='float64')
-        # monotonic
-        ts = tso
-        grouped = ts.groupby(lambda x: x.weekday())
-        result = ts - grouped.transform('mean')
-        expected = grouped.apply(lambda x: x - x.mean())
-        assert_frame_equal(result, expected)
-
-        ts = ts.T
-        grouped = ts.groupby(lambda x: x.weekday(), axis=1)
-        result = ts - grouped.transform('mean')
-        expected = grouped.apply(lambda x: (x.T - x.mean(1)).T)
-        assert_frame_equal(result, expected)
-
-        # non-monotonic
-        ts = tso.iloc[[1, 0] + list(range(2, len(base)))]
-        grouped = ts.groupby(lambda x: x.weekday())
-        result = ts - grouped.transform('mean')
-        expected = grouped.apply(lambda x: x - x.mean())
-        assert_frame_equal(result, expected)
-
-        ts = ts.T
-        grouped = ts.groupby(lambda x: x.weekday(), axis=1)
-        result = ts - grouped.transform('mean')
-        expected = grouped.apply(lambda x: (x.T - x.mean(1)).T)
-        assert_frame_equal(result, expected)
-
-    def test_transform_dtype(self):
-        # GH 9807
-        # Check transform dtype output is preserved
-        df = DataFrame([[1, 3], [2, 3]])
-        result = df.groupby(1).transform('mean')
-        expected = DataFrame([[1.5], [1.5]])
-        assert_frame_equal(result, expected)
-
-    def test_transform_bug(self):
-        # GH 5712
-        # transforming on a datetime column
-        df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5)))
-        result = df.groupby('A')['B'].transform(
-            lambda x: x.rank(ascending=False))
-        expected = Series(np.arange(5, 0, step=-1), name='B')
-        assert_series_equal(result, expected)
-
-    def test_transform_numeric_to_boolean(self):
-        # GH 16875
-        # inconsistency in transforming boolean values
-        expected = pd.Series([True, True], name='A')
-
-        df = pd.DataFrame({'A': [1.1, 2.2], 'B': [1, 2]})
-        result = df.groupby('B').A.transform(lambda x: True)
-        assert_series_equal(result, expected)
-
-        df = pd.DataFrame({'A': [1, 2], 'B': [1, 2]})
-        result = df.groupby('B').A.transform(lambda x: True)
-        assert_series_equal(result, expected)
-
-    def test_transform_datetime_to_timedelta(self):
-        # GH 15429
-        # transforming a datetime to timedelta
-        df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5)))
-        expected = pd.Series([
-            Timestamp('20130101') - Timestamp('20130101')] * 5, name='A')
-
-        # this does date math without changing result type in transform
-        base_time = df['A'][0]
-        result = df.groupby('A')['A'].transform(
-            lambda x: x.max() - x.min() + base_time) - base_time
-        assert_series_equal(result, expected)
-
-        # this does date math and causes the transform to return timedelta
-        result = df.groupby('A')['A'].transform(lambda x: x.max() - x.min())
-        assert_series_equal(result, expected)
-
-    def test_transform_datetime_to_numeric(self):
-        # GH 10972
-        # convert dt to float
-        df = DataFrame({
-            'a': 1, 'b': date_range('2015-01-01', periods=2, freq='D')})
-        result = df.groupby('a').b.transform(
-            lambda x: x.dt.dayofweek - x.dt.dayofweek.mean())
-
-        expected = Series([-0.5, 0.5], name='b')
-        assert_series_equal(result, expected)
-
-        # convert dt to int
-        df = DataFrame({
-            'a': 1, 'b': date_range('2015-01-01', periods=2, freq='D')})
-        result = df.groupby('a').b.transform(
-            lambda x: x.dt.dayofweek - x.dt.dayofweek.min())
-
-        expected = Series([0, 1], name='b')
-        assert_series_equal(result, expected)
-
-    def test_transform_casting(self):
-        # 13046
-        data = """
-        idx     A         ID3              DATETIME
-        0   B-028  b76cd912ff "2014-10-08 13:43:27"
-        1   B-054  4a57ed0b02 "2014-10-08 14:26:19"
-        2   B-076  1a682034f8 "2014-10-08 14:29:01"
-        3   B-023  b76cd912ff "2014-10-08 18:39:34"
-        4   B-023  f88g8d7sds "2014-10-08 18:40:18"
-        5   B-033  b76cd912ff "2014-10-08 18:44:30"
-        6   B-032  b76cd912ff "2014-10-08 18:46:00"
-        7   B-037  b76cd912ff "2014-10-08 18:52:15"
-        8   B-046  db959faf02 "2014-10-08 18:59:59"
-        9   B-053  b76cd912ff "2014-10-08 19:17:48"
-        10  B-065  b76cd912ff "2014-10-08 19:21:38"
-        """
-        df = pd.read_csv(StringIO(data), sep=r'\s+',
-                         index_col=[0], parse_dates=['DATETIME'])
-
-        result = df.groupby('ID3')['DATETIME'].transform(lambda x: x.diff())
-        assert is_timedelta64_dtype(result.dtype)
-
-        result = df[['ID3', 'DATETIME']].groupby('ID3').transform(
-            lambda x: x.diff())
-        assert is_timedelta64_dtype(result.DATETIME.dtype)
-
-    def test_transform_multiple(self):
-        grouped = self.ts.groupby([lambda x: x.year, lambda x: x.month])
-
-        grouped.transform(lambda x: x * 2)
-        grouped.transform(np.mean)
-
-    def test_dispatch_transform(self):
-        df = self.tsframe[::5].reindex(self.tsframe.index)
-
-        grouped = df.groupby(lambda x: x.month)
-
-        filled = grouped.fillna(method='pad')
-        fillit = lambda x: x.fillna(method='pad')
-        expected = df.groupby(lambda x: x.month).transform(fillit)
-        assert_frame_equal(filled, expected)
-
-    def test_transform_select_columns(self):
-        f = lambda x: x.mean()
-        result = self.df.groupby('A')['C', 'D'].transform(f)
-
-        selection = self.df[['C', 'D']]
-        expected = selection.groupby(self.df['A']).transform(f)
-
-        assert_frame_equal(result, expected)
-
-    def test_transform_exclude_nuisance(self):
-
-        # this also tests orderings in transform between
-        # series/frame to make sure it's consistent
-        expected = {}
-        grouped = self.df.groupby('A')
-        expected['C'] = grouped['C'].transform(np.mean)
-        expected['D'] = grouped['D'].transform(np.mean)
-        expected = DataFrame(expected)
-        result = self.df.groupby('A').transform(np.mean)
-
-        assert_frame_equal(result, expected)
-
-    def test_transform_function_aliases(self):
-        result = self.df.groupby('A').transform('mean')
-        expected = self.df.groupby('A').transform(np.mean)
-        assert_frame_equal(result, expected)
-
-        result = self.df.groupby('A')['C'].transform('mean')
-        expected = self.df.groupby('A')['C'].transform(np.mean)
-        assert_series_equal(result, expected)
-
-    def test_series_fast_transform_date(self):
-        # GH 13191
-        df = pd.DataFrame({'grouping': [np.nan, 1, 1, 3],
-                           'd': pd.date_range('2014-1-1', '2014-1-4')})
-        result = df.groupby('grouping')['d'].transform('first')
-        dates = [pd.NaT, pd.Timestamp('2014-1-2'), pd.Timestamp('2014-1-2'),
-                 pd.Timestamp('2014-1-4')]
-        expected = pd.Series(dates, name='d')
-        assert_series_equal(result, expected)
-
-    def test_transform_length(self):
-        # GH 9697
-        df = pd.DataFrame({'col1': [1, 1, 2, 2], 'col2': [1, 2, 3, np.nan]})
-        expected = pd.Series([3.0] * 4)
-
-        def nsum(x):
-            return np.nansum(x)
-
-        results = [df.groupby('col1').transform(sum)['col2'],
-                   df.groupby('col1')['col2'].transform(sum),
-                   df.groupby('col1').transform(nsum)['col2'],
-                   df.groupby('col1')['col2'].transform(nsum)]
-        for result in results:
-            assert_series_equal(result, expected, check_names=False)
-
-    def test_transform_coercion(self):
-
-        # 14457
-        # when we are transforming be sure to not coerce
-        # via assignment
-        df = pd.DataFrame(dict(A=['a', 'a'], B=[0, 1]))
-        g = df.groupby('A')
-
-        expected = g.transform(np.mean)
-        result = g.transform(lambda x: np.mean(x))
-        assert_frame_equal(result, expected)
-
-    def test_groupby_transform_with_int(self):
-
-        # GH 3740, make sure that we might upcast on item-by-item transform
-
-        # floats
-        df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=Series(1, dtype='float64'),
-                            C=Series(
-                                [1, 2, 3, 1, 2, 3], dtype='float64'), D='foo'))
-        with np.errstate(all='ignore'):
-            result = df.groupby('A').transform(
-                lambda x: (x - x.mean()) / x.std())
-        expected = DataFrame(dict(B=np.nan, C=Series(
-            [-1, 0, 1, -1, 0, 1], dtype='float64')))
-        assert_frame_equal(result, expected)
-
-        # int case
-        df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1,
-                            C=[1, 2, 3, 1, 2, 3], D='foo'))
-        with np.errstate(all='ignore'):
-            result = df.groupby('A').transform(
-                lambda x: (x - x.mean()) / x.std())
-        expected = DataFrame(dict(B=np.nan, C=[-1, 0, 1, -1, 0, 1]))
-        assert_frame_equal(result, expected)
-
-        # int that needs float conversion
-        s = Series([2, 3, 4, 10, 5, -1])
-        df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1, C=s, D='foo'))
-        with np.errstate(all='ignore'):
-            result = df.groupby('A').transform(
-                lambda x: (x - x.mean()) / x.std())
-
-        s1 = s.iloc[0:3]
-        s1 = (s1 - s1.mean()) / s1.std()
-        s2 = s.iloc[3:6]
-        s2 = (s2 - s2.mean()) / s2.std()
-        expected = DataFrame(dict(B=np.nan, C=concat([s1, s2])))
-        assert_frame_equal(result, expected)
-
-        # int downcasting
-        result = df.groupby('A').transform(lambda x: x * 2 / 2)
-        expected = DataFrame(dict(B=1, C=[2, 3, 4, 10, 5, -1]))
-        assert_frame_equal(result, expected)
-
-    def test_groupby_transform_with_nan_group(self):
-        # GH 9941
-        df = pd.DataFrame({'a': range(10),
-                           'b': [1, 1, 2, 3, np.nan, 4, 4, 5, 5, 5]})
-        result = df.groupby(df.b)['a'].transform(max)
-        expected = pd.Series([1., 1., 2., 3., np.nan, 6., 6., 9., 9., 9.],
-                             name='a')
-        assert_series_equal(result, expected)
-
-    def test_transform_mixed_type(self):
-        index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]
-                                        ])
-        df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
-                        'c': np.tile(['a', 'b', 'c'], 2),
-                        'v': np.arange(1., 7.)}, index=index)
-
-        def f(group):
-            group['g'] = group['d'] * 2
-            return group[:1]
-
-        grouped = df.groupby('c')
-        result = grouped.apply(f)
-
-        assert result['d'].dtype == np.float64
-
-        # this is by definition a mutating operation!
-        with option_context('mode.chained_assignment', None):
-            for key, group in grouped:
-                res = f(group)
-                assert_frame_equal(res, result.loc[key])
-
-    def test_cython_group_transform_algos(self):
-        # GH 4095
-        dtypes = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint32,
-                  np.uint64, np.float32, np.float64]
-
-        ops = [(groupby.group_cumprod_float64, np.cumproduct, [np.float64]),
-               (groupby.group_cumsum, np.cumsum, dtypes)]
-
-        is_datetimelike = False
-        for pd_op, np_op, dtypes in ops:
-            for dtype in dtypes:
-                data = np.array([[1], [2], [3], [4]], dtype=dtype)
-                ans = np.zeros_like(data)
-                labels = np.array([0, 0, 0, 0], dtype=np.int64)
-                pd_op(ans, data, labels, is_datetimelike)
-                tm.assert_numpy_array_equal(np_op(data), ans[:, 0],
-                                            check_dtype=False)
-
-        # with nans
-        labels = np.array([0, 0, 0, 0, 0], dtype=np.int64)
-
-        data = np.array([[1], [2], [3], [np.nan], [4]], dtype='float64')
-        actual = np.zeros_like(data)
-        actual.fill(np.nan)
-        groupby.group_cumprod_float64(actual, data, labels, is_datetimelike)
-        expected = np.array([1, 2, 6, np.nan, 24], dtype='float64')
-        tm.assert_numpy_array_equal(actual[:, 0], expected)
-
-        actual = np.zeros_like(data)
-        actual.fill(np.nan)
-        groupby.group_cumsum(actual, data, labels, is_datetimelike)
-        expected = np.array([1, 3, 6, np.nan, 10], dtype='float64')
-        tm.assert_numpy_array_equal(actual[:, 0], expected)
-
-        # timedelta
-        is_datetimelike = True
-        data = np.array([np.timedelta64(1, 'ns')] * 5, dtype='m8[ns]')[:, None]
-        actual = np.zeros_like(data, dtype='int64')
-        groupby.group_cumsum(actual, data.view('int64'), labels,
-                             is_datetimelike)
-        expected = np.array([np.timedelta64(1, 'ns'), np.timedelta64(
-            2, 'ns'), np.timedelta64(3, 'ns'), np.timedelta64(4, 'ns'),
-            np.timedelta64(5, 'ns')])
-        tm.assert_numpy_array_equal(actual[:, 0].view('m8[ns]'), expected)
-
-    @pytest.mark.parametrize(
-        "op, args, targop",
-        [('cumprod', (), lambda x: x.cumprod()),
-         ('cumsum', (), lambda x: x.cumsum()),
-         ('shift', (-1, ), lambda x: x.shift(-1)),
-         ('shift', (1, ), lambda x: x.shift())])
-    def test_cython_transform_series(self, op, args, targop):
-        # GH 4095
-        s = Series(np.random.randn(1000))
-        s_missing = s.copy()
-        s_missing.iloc[2:10] = np.nan
-        labels = np.random.randint(0, 50, size=1000).astype(float)
-
-        # series
-        for data in [s, s_missing]:
-            # print(data.head())
-            expected = data.groupby(labels).transform(targop)
-
-            tm.assert_series_equal(
+def assert_fp_equal(a, b):
+    assert (np.abs(a - b) < 1e-12).all()
+
+
+def test_transform():
+    data = Series(np.arange(9) // 3, index=np.arange(9))
+
+    index = np.arange(9)
+    np.random.shuffle(index)
+    data = data.reindex(index)
+
+    grouped = data.groupby(lambda x: x // 3)
+
+    transformed = grouped.transform(lambda x: x * x.sum())
+    assert transformed[7] == 12
+
+    # GH 8046
+    # make sure that we preserve the input order
+
+    df = DataFrame(
+        np.arange(6, dtype='int64').reshape(
+            3, 2), columns=["a", "b"], index=[0, 2, 1])
+    key = [0, 0, 1]
+    expected = df.sort_index().groupby(key).transform(
+        lambda x: x - x.mean()).groupby(key).mean()
+    result = df.groupby(key).transform(lambda x: x - x.mean()).groupby(
+        key).mean()
+    assert_frame_equal(result, expected)
+
+    def demean(arr):
+        return arr - arr.mean()
+
+    people = DataFrame(np.random.randn(5, 5),
+                       columns=['a', 'b', 'c', 'd', 'e'],
+                       index=['Joe', 'Steve', 'Wes', 'Jim', 'Travis'])
+    key = ['one', 'two', 'one', 'two', 'one']
+    result = people.groupby(key).transform(demean).groupby(key).mean()
+    expected = people.groupby(key).apply(demean).groupby(key).mean()
+    assert_frame_equal(result, expected)
+
+    # GH 8430
+    df = tm.makeTimeDataFrame()
+    g = df.groupby(pd.Grouper(freq='M'))
+    g.transform(lambda x: x - 1)
+
+    # GH 9700
+    df = DataFrame({'a': range(5, 10), 'b': range(5)})
+    result = df.groupby('a').transform(max)
+    expected = DataFrame({'b': range(5)})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_transform_fast():
+
+    df = DataFrame({'id': np.arange(100000) / 3,
+                    'val': np.random.randn(100000)})
+
+    grp = df.groupby('id')['val']
+
+    values = np.repeat(grp.mean().values,
+                       _ensure_platform_int(grp.count().values))
+    expected = pd.Series(values, index=df.index, name='val')
+
+    result = grp.transform(np.mean)
+    assert_series_equal(result, expected)
+
+    result = grp.transform('mean')
+    assert_series_equal(result, expected)
+
+    # GH 12737
+    df = pd.DataFrame({'grouping': [0, 1, 1, 3], 'f': [1.1, 2.1, 3.1, 4.5],
+                       'd': pd.date_range('2014-1-1', '2014-1-4'),
+                       'i': [1, 2, 3, 4]},
+                      columns=['grouping', 'f', 'i', 'd'])
+    result = df.groupby('grouping').transform('first')
+
+    dates = [pd.Timestamp('2014-1-1'), pd.Timestamp('2014-1-2'),
+             pd.Timestamp('2014-1-2'), pd.Timestamp('2014-1-4')]
+    expected = pd.DataFrame({'f': [1.1, 2.1, 2.1, 4.5],
+                             'd': dates,
+                             'i': [1, 2, 2, 4]},
+                            columns=['f', 'i', 'd'])
+    assert_frame_equal(result, expected)
+
+    # selection
+    result = df.groupby('grouping')[['f', 'i']].transform('first')
+    expected = expected[['f', 'i']]
+    assert_frame_equal(result, expected)
+
+    # dup columns
+    df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['g', 'a', 'a'])
+    result = df.groupby('g').transform('first')
+    expected = df.drop('g', axis=1)
+    assert_frame_equal(result, expected)
+
+
+def test_transform_broadcast(tsframe, ts):
+    grouped = ts.groupby(lambda x: x.month)
+    result = grouped.transform(np.mean)
+
+    tm.assert_index_equal(result.index, ts.index)
+    for _, gp in grouped:
+        assert_fp_equal(result.reindex(gp.index), gp.mean())
+
+    grouped = tsframe.groupby(lambda x: x.month)
+    result = grouped.transform(np.mean)
+    tm.assert_index_equal(result.index, tsframe.index)
+    for _, gp in grouped:
+        agged = gp.mean()
+        res = result.reindex(gp.index)
+        for col in tsframe:
+            assert_fp_equal(res[col], agged[col])
+
+    # group columns
+    grouped = tsframe.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1},
+                              axis=1)
+    result = grouped.transform(np.mean)
+    tm.assert_index_equal(result.index, tsframe.index)
+    tm.assert_index_equal(result.columns, tsframe.columns)
+    for _, gp in grouped:
+        agged = gp.mean(1)
+        res = result.reindex(columns=gp.columns)
+        for idx in gp.index:
+            assert_fp_equal(res.xs(idx), agged[idx])
+
+
+def test_transform_axis(tsframe):
+
+    # make sure that we are setting the axes
+    # correctly when on axis=0 or 1
+    # in the presence of a non-monotonic indexer
+    # GH12713
+
+    base = tsframe.iloc[0:5]
+    r = len(base.index)
+    c = len(base.columns)
+    tso = DataFrame(np.random.randn(r, c),
+                    index=base.index,
+                    columns=base.columns,
+                    dtype='float64')
+    # monotonic
+    ts = tso
+    grouped = ts.groupby(lambda x: x.weekday())
+    result = ts - grouped.transform('mean')
+    expected = grouped.apply(lambda x: x - x.mean())
+    assert_frame_equal(result, expected)
+
+    ts = ts.T
+    grouped = ts.groupby(lambda x: x.weekday(), axis=1)
+    result = ts - grouped.transform('mean')
+    expected = grouped.apply(lambda x: (x.T - x.mean(1)).T)
+    assert_frame_equal(result, expected)
+
+    # non-monotonic
+    ts = tso.iloc[[1, 0] + list(range(2, len(base)))]
+    grouped = ts.groupby(lambda x: x.weekday())
+    result = ts - grouped.transform('mean')
+    expected = grouped.apply(lambda x: x - x.mean())
+    assert_frame_equal(result, expected)
+
+    ts = ts.T
+    grouped = ts.groupby(lambda x: x.weekday(), axis=1)
+    result = ts - grouped.transform('mean')
+    expected = grouped.apply(lambda x: (x.T - x.mean(1)).T)
+    assert_frame_equal(result, expected)
+
+
+def test_transform_dtype():
+    # GH 9807
+    # Check transform dtype output is preserved
+    df = DataFrame([[1, 3], [2, 3]])
+    result = df.groupby(1).transform('mean')
+    expected = DataFrame([[1.5], [1.5]])
+    assert_frame_equal(result, expected)
+
+
+def test_transform_bug():
+    # GH 5712
+    # transforming on a datetime column
+    df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5)))
+    result = df.groupby('A')['B'].transform(
+        lambda x: x.rank(ascending=False))
+    expected = Series(np.arange(5, 0, step=-1), name='B')
+    assert_series_equal(result, expected)
+
+
+def test_transform_numeric_to_boolean():
+    # GH 16875
+    # inconsistency in transforming boolean values
+    expected = pd.Series([True, True], name='A')
+
+    df = pd.DataFrame({'A': [1.1, 2.2], 'B': [1, 2]})
+    result = df.groupby('B').A.transform(lambda x: True)
+    assert_series_equal(result, expected)
+
+    df = pd.DataFrame({'A': [1, 2], 'B': [1, 2]})
+    result = df.groupby('B').A.transform(lambda x: True)
+    assert_series_equal(result, expected)
+
+
+def test_transform_datetime_to_timedelta():
+    # GH 15429
+    # transforming a datetime to timedelta
+    df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5)))
+    expected = pd.Series([
+        Timestamp('20130101') - Timestamp('20130101')] * 5, name='A')
+
+    # this does date math without changing result type in transform
+    base_time = df['A'][0]
+    result = df.groupby('A')['A'].transform(
+        lambda x: x.max() - x.min() + base_time) - base_time
+    assert_series_equal(result, expected)
+
+    # this does date math and causes the transform to return timedelta
+    result = df.groupby('A')['A'].transform(lambda x: x.max() - x.min())
+    assert_series_equal(result, expected)
+
+
+def test_transform_datetime_to_numeric():
+    # GH 10972
+    # convert dt to float
+    df = DataFrame({
+        'a': 1, 'b': date_range('2015-01-01', periods=2, freq='D')})
+    result = df.groupby('a').b.transform(
+        lambda x: x.dt.dayofweek - x.dt.dayofweek.mean())
+
+    expected = Series([-0.5, 0.5], name='b')
+    assert_series_equal(result, expected)
+
+    # convert dt to int
+    df = DataFrame({
+        'a': 1, 'b': date_range('2015-01-01', periods=2, freq='D')})
+    result = df.groupby('a').b.transform(
+        lambda x: x.dt.dayofweek - x.dt.dayofweek.min())
+
+    expected = Series([0, 1], name='b')
+    assert_series_equal(result, expected)
+
+
+def test_transform_casting():
+    # 13046
+    data = """
+    idx     A         ID3              DATETIME
+    0   B-028  b76cd912ff "2014-10-08 13:43:27"
+    1   B-054  4a57ed0b02 "2014-10-08 14:26:19"
+    2   B-076  1a682034f8 "2014-10-08 14:29:01"
+    3   B-023  b76cd912ff "2014-10-08 18:39:34"
+    4   B-023  f88g8d7sds "2014-10-08 18:40:18"
+    5   B-033  b76cd912ff "2014-10-08 18:44:30"
+    6   B-032  b76cd912ff "2014-10-08 18:46:00"
+    7   B-037  b76cd912ff "2014-10-08 18:52:15"
+    8   B-046  db959faf02 "2014-10-08 18:59:59"
+    9   B-053  b76cd912ff "2014-10-08 19:17:48"
+    10  B-065  b76cd912ff "2014-10-08 19:21:38"
+    """
+    df = pd.read_csv(StringIO(data), sep=r'\s+',
+                     index_col=[0], parse_dates=['DATETIME'])
+
+    result = df.groupby('ID3')['DATETIME'].transform(lambda x: x.diff())
+    assert is_timedelta64_dtype(result.dtype)
+
+    result = df[['ID3', 'DATETIME']].groupby('ID3').transform(
+        lambda x: x.diff())
+    assert is_timedelta64_dtype(result.DATETIME.dtype)
+
+
+def test_transform_multiple(ts):
+    grouped = ts.groupby([lambda x: x.year, lambda x: x.month])
+
+    grouped.transform(lambda x: x * 2)
+    grouped.transform(np.mean)
+
+
+def test_dispatch_transform(tsframe):
+    df = tsframe[::5].reindex(tsframe.index)
+
+    grouped = df.groupby(lambda x: x.month)
+
+    filled = grouped.fillna(method='pad')
+    fillit = lambda x: x.fillna(method='pad')
+    expected = df.groupby(lambda x: x.month).transform(fillit)
+    assert_frame_equal(filled, expected)
+
+
+def test_transform_select_columns(df):
+    f = lambda x: x.mean()
+    result = df.groupby('A')['C', 'D'].transform(f)
+
+    selection = df[['C', 'D']]
+    expected = selection.groupby(df['A']).transform(f)
+
+    assert_frame_equal(result, expected)
+
+
+def test_transform_exclude_nuisance(df):
+
+    # this also tests orderings in transform between
+    # series/frame to make sure it's consistent
+    expected = {}
+    grouped = df.groupby('A')
+    expected['C'] = grouped['C'].transform(np.mean)
+    expected['D'] = grouped['D'].transform(np.mean)
+    expected = DataFrame(expected)
+    result = df.groupby('A').transform(np.mean)
+
+    assert_frame_equal(result, expected)
+
+
+def test_transform_function_aliases(df):
+    result = df.groupby('A').transform('mean')
+    expected = df.groupby('A').transform(np.mean)
+    assert_frame_equal(result, expected)
+
+    result = df.groupby('A')['C'].transform('mean')
+    expected = df.groupby('A')['C'].transform(np.mean)
+    assert_series_equal(result, expected)
+
+
+def test_series_fast_transform_date():
+    # GH 13191
+    df = pd.DataFrame({'grouping': [np.nan, 1, 1, 3],
+                       'd': pd.date_range('2014-1-1', '2014-1-4')})
+    result = df.groupby('grouping')['d'].transform('first')
+    dates = [pd.NaT, pd.Timestamp('2014-1-2'), pd.Timestamp('2014-1-2'),
+             pd.Timestamp('2014-1-4')]
+    expected = pd.Series(dates, name='d')
+    assert_series_equal(result, expected)
+
+
+def test_transform_length():
+    # GH 9697
+    df = pd.DataFrame({'col1': [1, 1, 2, 2], 'col2': [1, 2, 3, np.nan]})
+    expected = pd.Series([3.0] * 4)
+
+    def nsum(x):
+        return np.nansum(x)
+
+    results = [df.groupby('col1').transform(sum)['col2'],
+               df.groupby('col1')['col2'].transform(sum),
+               df.groupby('col1').transform(nsum)['col2'],
+               df.groupby('col1')['col2'].transform(nsum)]
+    for result in results:
+        assert_series_equal(result, expected, check_names=False)
+
+
+def test_transform_coercion():
+
+    # 14457
+    # when we are transforming be sure to not coerce
+    # via assignment
+    df = pd.DataFrame(dict(A=['a', 'a'], B=[0, 1]))
+    g = df.groupby('A')
+
+    expected = g.transform(np.mean)
+    result = g.transform(lambda x: np.mean(x))
+    assert_frame_equal(result, expected)
+
+
+def test_groupby_transform_with_int():
+
+    # GH 3740, make sure that we might upcast on item-by-item transform
+
+    # floats
+    df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=Series(1, dtype='float64'),
+                        C=Series(
+                            [1, 2, 3, 1, 2, 3], dtype='float64'), D='foo'))
+    with np.errstate(all='ignore'):
+        result = df.groupby('A').transform(
+            lambda x: (x - x.mean()) / x.std())
+    expected = DataFrame(dict(B=np.nan, C=Series(
+        [-1, 0, 1, -1, 0, 1], dtype='float64')))
+    assert_frame_equal(result, expected)
+
+    # int case
+    df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1,
+                        C=[1, 2, 3, 1, 2, 3], D='foo'))
+    with np.errstate(all='ignore'):
+        result = df.groupby('A').transform(
+            lambda x: (x - x.mean()) / x.std())
+    expected = DataFrame(dict(B=np.nan, C=[-1, 0, 1, -1, 0, 1]))
+    assert_frame_equal(result, expected)
+
+    # int that needs float conversion
+    s = Series([2, 3, 4, 10, 5, -1])
+    df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1, C=s, D='foo'))
+    with np.errstate(all='ignore'):
+        result = df.groupby('A').transform(
+            lambda x: (x - x.mean()) / x.std())
+
+    s1 = s.iloc[0:3]
+    s1 = (s1 - s1.mean()) / s1.std()
+    s2 = s.iloc[3:6]
+    s2 = (s2 - s2.mean()) / s2.std()
+    expected = DataFrame(dict(B=np.nan, C=concat([s1, s2])))
+    assert_frame_equal(result, expected)
+
+    # int downcasting
+    result = df.groupby('A').transform(lambda x: x * 2 / 2)
+    expected = DataFrame(dict(B=1, C=[2, 3, 4, 10, 5, -1]))
+    assert_frame_equal(result, expected)
+
+
+def test_groupby_transform_with_nan_group():
+    # GH 9941
+    df = pd.DataFrame({'a': range(10),
+                       'b': [1, 1, 2, 3, np.nan, 4, 4, 5, 5, 5]})
+    result = df.groupby(df.b)['a'].transform(max)
+    expected = pd.Series([1., 1., 2., 3., np.nan, 6., 6., 9., 9., 9.],
+                         name='a')
+    assert_series_equal(result, expected)
+
+
+def test_transform_mixed_type():
+    index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]
+                                    ])
+    df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
+                    'c': np.tile(['a', 'b', 'c'], 2),
+                    'v': np.arange(1., 7.)}, index=index)
+
+    def f(group):
+        group['g'] = group['d'] * 2
+        return group[:1]
+
+    grouped = df.groupby('c')
+    result = grouped.apply(f)
+
+    assert result['d'].dtype == np.float64
+
+    # this is by definition a mutating operation!
+    with option_context('mode.chained_assignment', None):
+        for key, group in grouped:
+            res = f(group)
+            assert_frame_equal(res, result.loc[key])
+
+
+def test_cython_group_transform_algos():
+    # GH 4095
+    dtypes = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint32,
+              np.uint64, np.float32, np.float64]
+
+    ops = [(groupby.group_cumprod_float64, np.cumproduct, [np.float64]),
+           (groupby.group_cumsum, np.cumsum, dtypes)]
+
+    is_datetimelike = False
+    for pd_op, np_op, dtypes in ops:
+        for dtype in dtypes:
+            data = np.array([[1], [2], [3], [4]], dtype=dtype)
+            ans = np.zeros_like(data)
+            labels = np.array([0, 0, 0, 0], dtype=np.int64)
+            pd_op(ans, data, labels, is_datetimelike)
+            tm.assert_numpy_array_equal(np_op(data), ans[:, 0],
+                                        check_dtype=False)
+
+    # with nans
+    labels = np.array([0, 0, 0, 0, 0], dtype=np.int64)
+
+    data = np.array([[1], [2], [3], [np.nan], [4]], dtype='float64')
+    actual = np.zeros_like(data)
+    actual.fill(np.nan)
+    groupby.group_cumprod_float64(actual, data, labels, is_datetimelike)
+    expected = np.array([1, 2, 6, np.nan, 24], dtype='float64')
+    tm.assert_numpy_array_equal(actual[:, 0], expected)
+
+    actual = np.zeros_like(data)
+    actual.fill(np.nan)
+    groupby.group_cumsum(actual, data, labels, is_datetimelike)
+    expected = np.array([1, 3, 6, np.nan, 10], dtype='float64')
+    tm.assert_numpy_array_equal(actual[:, 0], expected)
+
+    # timedelta
+    is_datetimelike = True
+    data = np.array([np.timedelta64(1, 'ns')] * 5, dtype='m8[ns]')[:, None]
+    actual = np.zeros_like(data, dtype='int64')
+    groupby.group_cumsum(actual, data.view('int64'), labels,
+                         is_datetimelike)
+    expected = np.array([np.timedelta64(1, 'ns'), np.timedelta64(
+        2, 'ns'), np.timedelta64(3, 'ns'), np.timedelta64(4, 'ns'),
+        np.timedelta64(5, 'ns')])
+    tm.assert_numpy_array_equal(actual[:, 0].view('m8[ns]'), expected)
+
+
+@pytest.mark.parametrize(
+    "op, args, targop",
+    [('cumprod', (), lambda x: x.cumprod()),
+     ('cumsum', (), lambda x: x.cumsum()),
+     ('shift', (-1, ), lambda x: x.shift(-1)),
+     ('shift', (1, ), lambda x: x.shift())])
+def test_cython_transform_series(op, args, targop):
+    # GH 4095
+    s = Series(np.random.randn(1000))
+    s_missing = s.copy()
+    s_missing.iloc[2:10] = np.nan
+    labels = np.random.randint(0, 50, size=1000).astype(float)
+
+    # series
+    for data in [s, s_missing]:
+        # print(data.head())
+        expected = data.groupby(labels).transform(targop)
+
+        tm.assert_series_equal(
+            expected,
+            data.groupby(labels).transform(op, *args))
+        tm.assert_series_equal(expected, getattr(
+            data.groupby(labels), op)(*args))
+
+
+@pytest.mark.parametrize("op", ['cumprod', 'cumsum'])
+@pytest.mark.parametrize("skipna", [False, True])
+@pytest.mark.parametrize('input, exp', [
+    # When everything is NaN
+    ({'key': ['b'] * 10, 'value': np.nan},
+     pd.Series([np.nan] * 10, name='value')),
+    # When there is a single NaN
+    ({'key': ['b'] * 10 + ['a'] * 2,
+      'value': [3] * 3 + [np.nan] + [3] * 8},
+     {('cumprod', False): [3.0, 9.0, 27.0] + [np.nan] * 7 + [3.0, 9.0],
+      ('cumprod', True): [3.0, 9.0, 27.0, np.nan, 81., 243., 729.,
+                          2187., 6561., 19683., 3.0, 9.0],
+      ('cumsum', False): [3.0, 6.0, 9.0] + [np.nan] * 7 + [3.0, 6.0],
+      ('cumsum', True): [3.0, 6.0, 9.0, np.nan, 12., 15., 18.,
+                         21., 24., 27., 3.0, 6.0]})])
+def test_groupby_cum_skipna(op, skipna, input, exp):
+    df = pd.DataFrame(input)
+    result = df.groupby('key')['value'].transform(op, skipna=skipna)
+    if isinstance(exp, dict):
+        expected = exp[(op, skipna)]
+    else:
+        expected = exp
+    expected = pd.Series(expected, name='value')
+    tm.assert_series_equal(expected, result)
+
+
+@pytest.mark.parametrize(
+    "op, args, targop",
+    [('cumprod', (), lambda x: x.cumprod()),
+     ('cumsum', (), lambda x: x.cumsum()),
+     ('shift', (-1, ), lambda x: x.shift(-1)),
+     ('shift', (1, ), lambda x: x.shift())])
+def test_cython_transform_frame(op, args, targop):
+    s = Series(np.random.randn(1000))
+    s_missing = s.copy()
+    s_missing.iloc[2:10] = np.nan
+    labels = np.random.randint(0, 50, size=1000).astype(float)
+    strings = list('qwertyuiopasdfghjklz')
+    strings_missing = strings[:]
+    strings_missing[5] = np.nan
+    df = DataFrame({'float': s,
+                    'float_missing': s_missing,
+                    'int': [1, 1, 1, 1, 2] * 200,
+                    'datetime': pd.date_range('1990-1-1', periods=1000),
+                    'timedelta': pd.timedelta_range(1, freq='s',
+                                                    periods=1000),
+                    'string': strings * 50,
+                    'string_missing': strings_missing * 50},
+                   columns=['float', 'float_missing', 'int', 'datetime',
+                            'timedelta', 'string', 'string_missing'])
+    df['cat'] = df['string'].astype('category')
+
+    df2 = df.copy()
+    df2.index = pd.MultiIndex.from_product([range(100), range(10)])
+
+    # DataFrame - Single and MultiIndex,
+    # group by values, index level, columns
+    for df in [df, df2]:
+        for gb_target in [dict(by=labels), dict(level=0), dict(by='string')
+                          ]:  # dict(by='string_missing')]:
+            # dict(by=['int','string'])]:
+
+            gb = df.groupby(**gb_target)
+            # whitelisted methods set the selection before applying
+            # bit a of hack to make sure the cythonized shift
+            # is equivalent to pre 0.17.1 behavior
+            if op == 'shift':
+                gb._set_group_selection()
+
+            if op != 'shift' and 'int' not in gb_target:
+                # numeric apply fastpath promotes dtype so have
+                # to apply separately and concat
+                i = gb[['int']].apply(targop)
+                f = gb[['float', 'float_missing']].apply(targop)
+                expected = pd.concat([f, i], axis=1)
+            else:
+                expected = gb.apply(targop)
+
+            expected = expected.sort_index(axis=1)
+            tm.assert_frame_equal(expected,
+                                  gb.transform(op, *args).sort_index(
+                                      axis=1))
+            tm.assert_frame_equal(
                 expected,
-                data.groupby(labels).transform(op, *args))
-            tm.assert_series_equal(expected, getattr(
-                data.groupby(labels), op)(*args))
-
-    @pytest.mark.parametrize("op", ['cumprod', 'cumsum'])
-    @pytest.mark.parametrize("skipna", [False, True])
-    @pytest.mark.parametrize('input, exp', [
-        # When everything is NaN
-        ({'key': ['b'] * 10, 'value': np.nan},
-         pd.Series([np.nan] * 10, name='value')),
-        # When there is a single NaN
-        ({'key': ['b'] * 10 + ['a'] * 2,
-          'value': [3] * 3 + [np.nan] + [3] * 8},
-         {('cumprod', False): [3.0, 9.0, 27.0] + [np.nan] * 7 + [3.0, 9.0],
-          ('cumprod', True): [3.0, 9.0, 27.0, np.nan, 81., 243., 729.,
-                              2187., 6561., 19683., 3.0, 9.0],
-          ('cumsum', False): [3.0, 6.0, 9.0] + [np.nan] * 7 + [3.0, 6.0],
-          ('cumsum', True): [3.0, 6.0, 9.0, np.nan, 12., 15., 18.,
-                             21., 24., 27., 3.0, 6.0]})])
-    def test_groupby_cum_skipna(self, op, skipna, input, exp):
-        df = pd.DataFrame(input)
-        result = df.groupby('key')['value'].transform(op, skipna=skipna)
-        if isinstance(exp, dict):
-            expected = exp[(op, skipna)]
-        else:
-            expected = exp
-        expected = pd.Series(expected, name='value')
-        tm.assert_series_equal(expected, result)
-
-    @pytest.mark.parametrize(
-        "op, args, targop",
-        [('cumprod', (), lambda x: x.cumprod()),
-         ('cumsum', (), lambda x: x.cumsum()),
-         ('shift', (-1, ), lambda x: x.shift(-1)),
-         ('shift', (1, ), lambda x: x.shift())])
-    def test_cython_transform_frame(self, op, args, targop):
-        s = Series(np.random.randn(1000))
-        s_missing = s.copy()
-        s_missing.iloc[2:10] = np.nan
-        labels = np.random.randint(0, 50, size=1000).astype(float)
-        strings = list('qwertyuiopasdfghjklz')
-        strings_missing = strings[:]
-        strings_missing[5] = np.nan
-        df = DataFrame({'float': s,
-                        'float_missing': s_missing,
-                        'int': [1, 1, 1, 1, 2] * 200,
-                        'datetime': pd.date_range('1990-1-1', periods=1000),
-                        'timedelta': pd.timedelta_range(1, freq='s',
-                                                        periods=1000),
-                        'string': strings * 50,
-                        'string_missing': strings_missing * 50},
-                       columns=['float', 'float_missing', 'int', 'datetime',
-                                'timedelta', 'string', 'string_missing'])
-        df['cat'] = df['string'].astype('category')
-
-        df2 = df.copy()
-        df2.index = pd.MultiIndex.from_product([range(100), range(10)])
-
-        # DataFrame - Single and MultiIndex,
-        # group by values, index level, columns
-        for df in [df, df2]:
-            for gb_target in [dict(by=labels), dict(level=0), dict(by='string')
-                              ]:  # dict(by='string_missing')]:
-                # dict(by=['int','string'])]:
-
-                gb = df.groupby(**gb_target)
-                # whitelisted methods set the selection before applying
-                # bit a of hack to make sure the cythonized shift
-                # is equivalent to pre 0.17.1 behavior
-                if op == 'shift':
-                    gb._set_group_selection()
-
-                if op != 'shift' and 'int' not in gb_target:
-                    # numeric apply fastpath promotes dtype so have
-                    # to apply separately and concat
-                    i = gb[['int']].apply(targop)
-                    f = gb[['float', 'float_missing']].apply(targop)
-                    expected = pd.concat([f, i], axis=1)
+                getattr(gb, op)(*args).sort_index(axis=1))
+            # individual columns
+            for c in df:
+                if c not in ['float', 'int', 'float_missing'
+                             ] and op != 'shift':
+                    pytest.raises(DataError, gb[c].transform, op)
+                    pytest.raises(DataError, getattr(gb[c], op))
                 else:
-                    expected = gb.apply(targop)
-
-                expected = expected.sort_index(axis=1)
-                tm.assert_frame_equal(expected,
-                                      gb.transform(op, *args).sort_index(
-                                          axis=1))
-                tm.assert_frame_equal(
-                    expected,
-                    getattr(gb, op)(*args).sort_index(axis=1))
-                # individual columns
-                for c in df:
-                    if c not in ['float', 'int', 'float_missing'
-                                 ] and op != 'shift':
-                        pytest.raises(DataError, gb[c].transform, op)
-                        pytest.raises(DataError, getattr(gb[c], op))
-                    else:
-                        expected = gb[c].apply(targop)
-                        expected.name = c
-                        tm.assert_series_equal(expected,
-                                               gb[c].transform(op, *args))
-                        tm.assert_series_equal(expected,
-                                               getattr(gb[c], op)(*args))
-
-    def test_transform_with_non_scalar_group(self):
-        # GH 10165
-        cols = pd.MultiIndex.from_tuples([
-            ('syn', 'A'), ('mis', 'A'), ('non', 'A'),
-            ('syn', 'C'), ('mis', 'C'), ('non', 'C'),
-            ('syn', 'T'), ('mis', 'T'), ('non', 'T'),
-            ('syn', 'G'), ('mis', 'G'), ('non', 'G')])
-        df = pd.DataFrame(np.random.randint(1, 10, (4, 12)),
-                          columns=cols,
-                          index=['A', 'C', 'G', 'T'])
-        tm.assert_raises_regex(ValueError, 'transform must return '
-                               'a scalar value for each '
-                               'group.*',
-                               df.groupby(axis=1, level=1).transform,
-                               lambda z: z.div(z.sum(axis=1), axis=0))
-
-    @pytest.mark.parametrize('cols,exp,comp_func', [
-        ('a', pd.Series([1, 1, 1], name='a'), tm.assert_series_equal),
-        (['a', 'c'], pd.DataFrame({'a': [1, 1, 1], 'c': [1, 1, 1]}),
-         tm.assert_frame_equal)
-    ])
-    @pytest.mark.parametrize('agg_func', [
-        'count', 'rank', 'size'])
-    def test_transform_numeric_ret(self, cols, exp, comp_func, agg_func):
-        if agg_func == 'size' and isinstance(cols, list):
-            pytest.xfail("'size' transformation not supported with "
-                         "NDFrameGroupy")
-
-        # GH 19200
-        df = pd.DataFrame(
-            {'a': pd.date_range('2018-01-01', periods=3),
-             'b': range(3),
-             'c': range(7, 10)})
-
-        result = df.groupby('b')[cols].transform(agg_func)
-
-        if agg_func == 'rank':
-            exp = exp.astype('float')
-
-        comp_func(result, exp)
-
-    @pytest.mark.parametrize("mix_groupings", [True, False])
-    @pytest.mark.parametrize("as_series", [True, False])
-    @pytest.mark.parametrize("val1,val2", [
-        ('foo', 'bar'), (1, 2), (1., 2.)])
-    @pytest.mark.parametrize("fill_method,limit,exp_vals", [
-        ("ffill", None,
-         [np.nan, np.nan, 'val1', 'val1', 'val1', 'val2', 'val2', 'val2']),
-        ("ffill", 1,
-         [np.nan, np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan]),
-        ("bfill", None,
-         ['val1', 'val1', 'val1', 'val2', 'val2', 'val2', np.nan, np.nan]),
-        ("bfill", 1,
-         [np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan, np.nan])
-    ])
-    def test_group_fill_methods(self, mix_groupings, as_series, val1, val2,
-                                fill_method, limit, exp_vals):
-        vals = [np.nan, np.nan, val1, np.nan, np.nan, val2, np.nan, np.nan]
-        _exp_vals = list(exp_vals)
-        # Overwrite placeholder values
-        for index, exp_val in enumerate(_exp_vals):
-            if exp_val == 'val1':
-                _exp_vals[index] = val1
-            elif exp_val == 'val2':
-                _exp_vals[index] = val2
-
-        # Need to modify values and expectations depending on the
-        # Series / DataFrame that we ultimately want to generate
-        if mix_groupings:  # ['a', 'b', 'a, 'b', ...]
-            keys = ['a', 'b'] * len(vals)
-
-            def interweave(list_obj):
-                temp = list()
-                for x in list_obj:
-                    temp.extend([x, x])
-
-                return temp
-
-            _exp_vals = interweave(_exp_vals)
-            vals = interweave(vals)
-        else:  # ['a', 'a', 'a', ... 'b', 'b', 'b']
-            keys = ['a'] * len(vals) + ['b'] * len(vals)
-            _exp_vals = _exp_vals * 2
-            vals = vals * 2
-
-        df = DataFrame({'key': keys, 'val': vals})
-        if as_series:
-            result = getattr(
-                df.groupby('key')['val'], fill_method)(limit=limit)
-            exp = Series(_exp_vals, name='val')
-            assert_series_equal(result, exp)
-        else:
-            result = getattr(df.groupby('key'), fill_method)(limit=limit)
-            exp = DataFrame({'key': keys, 'val': _exp_vals})
-            assert_frame_equal(result, exp)
-
-    @pytest.mark.parametrize("test_series", [True, False])
-    @pytest.mark.parametrize("periods,fill_method,limit", [
-        (1, 'ffill', None), (1, 'ffill', 1),
-        (1, 'bfill', None), (1, 'bfill', 1),
-        (-1, 'ffill', None), (-1, 'ffill', 1),
-        (-1, 'bfill', None), (-1, 'bfill', 1)])
-    def test_pct_change(self, test_series, periods, fill_method, limit):
-        vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan]
-        exp_vals = Series(vals).pct_change(periods=periods,
-                                           fill_method=fill_method,
-                                           limit=limit).tolist()
-
-        df = DataFrame({'key': ['a'] * len(vals) + ['b'] * len(vals),
-                        'vals': vals * 2})
-        grp = df.groupby('key')
-
-        def get_result(grp_obj):
-            return grp_obj.pct_change(periods=periods,
-                                      fill_method=fill_method,
-                                      limit=limit)
-
-        if test_series:
-            exp = pd.Series(exp_vals * 2)
-            exp.name = 'vals'
-            grp = grp['vals']
-            result = get_result(grp)
-            tm.assert_series_equal(result, exp)
-        else:
-            exp = DataFrame({'vals': exp_vals * 2})
-            result = get_result(grp)
-            tm.assert_frame_equal(result, exp)
-
-    @pytest.mark.parametrize("func", [np.any, np.all])
-    def test_any_all_np_func(self, func):
-        # GH 20653
-        df = pd.DataFrame([['foo', True],
-                           [np.nan, True],
-                           ['foo', True]], columns=['key', 'val'])
-
-        exp = pd.Series([True, np.nan, True], name='val')
-
-        res = df.groupby('key')['val'].transform(func)
-        tm.assert_series_equal(res, exp)
+                    expected = gb[c].apply(targop)
+                    expected.name = c
+                    tm.assert_series_equal(expected,
+                                           gb[c].transform(op, *args))
+                    tm.assert_series_equal(expected,
+                                           getattr(gb[c], op)(*args))
+
+
+def test_transform_with_non_scalar_group():
+    # GH 10165
+    cols = pd.MultiIndex.from_tuples([
+        ('syn', 'A'), ('mis', 'A'), ('non', 'A'),
+        ('syn', 'C'), ('mis', 'C'), ('non', 'C'),
+        ('syn', 'T'), ('mis', 'T'), ('non', 'T'),
+        ('syn', 'G'), ('mis', 'G'), ('non', 'G')])
+    df = pd.DataFrame(np.random.randint(1, 10, (4, 12)),
+                      columns=cols,
+                      index=['A', 'C', 'G', 'T'])
+    tm.assert_raises_regex(ValueError, 'transform must return '
+                           'a scalar value for each '
+                           'group.*',
+                           df.groupby(axis=1, level=1).transform,
+                           lambda z: z.div(z.sum(axis=1), axis=0))
+
+
+@pytest.mark.parametrize('cols,exp,comp_func', [
+    ('a', pd.Series([1, 1, 1], name='a'), tm.assert_series_equal),
+    (['a', 'c'], pd.DataFrame({'a': [1, 1, 1], 'c': [1, 1, 1]}),
+     tm.assert_frame_equal)
+])
+@pytest.mark.parametrize('agg_func', [
+    'count', 'rank', 'size'])
+def test_transform_numeric_ret(cols, exp, comp_func, agg_func):
+    if agg_func == 'size' and isinstance(cols, list):
+        pytest.xfail("'size' transformation not supported with "
+                     "NDFrameGroupy")
+
+    # GH 19200
+    df = pd.DataFrame(
+        {'a': pd.date_range('2018-01-01', periods=3),
+         'b': range(3),
+         'c': range(7, 10)})
+
+    result = df.groupby('b')[cols].transform(agg_func)
+
+    if agg_func == 'rank':
+        exp = exp.astype('float')
+
+    comp_func(result, exp)
+
+
+@pytest.mark.parametrize("mix_groupings", [True, False])
+@pytest.mark.parametrize("as_series", [True, False])
+@pytest.mark.parametrize("val1,val2", [
+    ('foo', 'bar'), (1, 2), (1., 2.)])
+@pytest.mark.parametrize("fill_method,limit,exp_vals", [
+    ("ffill", None,
+     [np.nan, np.nan, 'val1', 'val1', 'val1', 'val2', 'val2', 'val2']),
+    ("ffill", 1,
+     [np.nan, np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan]),
+    ("bfill", None,
+     ['val1', 'val1', 'val1', 'val2', 'val2', 'val2', np.nan, np.nan]),
+    ("bfill", 1,
+     [np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan, np.nan])
+])
+def test_group_fill_methods(mix_groupings, as_series, val1, val2,
+                            fill_method, limit, exp_vals):
+    vals = [np.nan, np.nan, val1, np.nan, np.nan, val2, np.nan, np.nan]
+    _exp_vals = list(exp_vals)
+    # Overwrite placeholder values
+    for index, exp_val in enumerate(_exp_vals):
+        if exp_val == 'val1':
+            _exp_vals[index] = val1
+        elif exp_val == 'val2':
+            _exp_vals[index] = val2
+
+    # Need to modify values and expectations depending on the
+    # Series / DataFrame that we ultimately want to generate
+    if mix_groupings:  # ['a', 'b', 'a, 'b', ...]
+        keys = ['a', 'b'] * len(vals)
+
+        def interweave(list_obj):
+            temp = list()
+            for x in list_obj:
+                temp.extend([x, x])
+
+            return temp
+
+        _exp_vals = interweave(_exp_vals)
+        vals = interweave(vals)
+    else:  # ['a', 'a', 'a', ... 'b', 'b', 'b']
+        keys = ['a'] * len(vals) + ['b'] * len(vals)
+        _exp_vals = _exp_vals * 2
+        vals = vals * 2
+
+    df = DataFrame({'key': keys, 'val': vals})
+    if as_series:
+        result = getattr(
+            df.groupby('key')['val'], fill_method)(limit=limit)
+        exp = Series(_exp_vals, name='val')
+        assert_series_equal(result, exp)
+    else:
+        result = getattr(df.groupby('key'), fill_method)(limit=limit)
+        exp = DataFrame({'key': keys, 'val': _exp_vals})
+        assert_frame_equal(result, exp)
+
+
+@pytest.mark.parametrize("test_series", [True, False])
+@pytest.mark.parametrize("periods,fill_method,limit", [
+    (1, 'ffill', None), (1, 'ffill', 1),
+    (1, 'bfill', None), (1, 'bfill', 1),
+    (-1, 'ffill', None), (-1, 'ffill', 1),
+    (-1, 'bfill', None), (-1, 'bfill', 1)])
+def test_pct_change(test_series, periods, fill_method, limit):
+    vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan]
+    exp_vals = Series(vals).pct_change(periods=periods,
+                                       fill_method=fill_method,
+                                       limit=limit).tolist()
+
+    df = DataFrame({'key': ['a'] * len(vals) + ['b'] * len(vals),
+                    'vals': vals * 2})
+    grp = df.groupby('key')
+
+    def get_result(grp_obj):
+        return grp_obj.pct_change(periods=periods,
+                                  fill_method=fill_method,
+                                  limit=limit)
+
+    if test_series:
+        exp = pd.Series(exp_vals * 2)
+        exp.name = 'vals'
+        grp = grp['vals']
+        result = get_result(grp)
+        tm.assert_series_equal(result, exp)
+    else:
+        exp = DataFrame({'vals': exp_vals * 2})
+        result = get_result(grp)
+        tm.assert_frame_equal(result, exp)
+
+
+@pytest.mark.parametrize("func", [np.any, np.all])
+def test_any_all_np_func(func):
+    # GH 20653
+    df = pd.DataFrame([['foo', True],
+                       [np.nan, True],
+                       ['foo', True]], columns=['key', 'val'])
+
+    exp = pd.Series([True, np.nan, True], name='val')
+
+    res = df.groupby('key')['val'].transform(func)
+    tm.assert_series_equal(res, exp)