From 4c3035fa57b9c1a9d7f347e6f442bdc5d2299f2a Mon Sep 17 00:00:00 2001 From: Fabrizio Primerano Date: Sun, 23 Apr 2023 21:19:26 +0200 Subject: [PATCH] TST: Grouping with categorical interval columns (#52818) --- pandas/tests/groupby/test_groupby.py | 42 ++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 53148eb37e15ad..cd33f031720e18 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -16,6 +16,7 @@ DataFrame, Grouper, Index, + Interval, MultiIndex, RangeIndex, Series, @@ -2972,6 +2973,47 @@ def test_groupby_numeric_only_std_no_result(numeric_only): dfgb.std(numeric_only=numeric_only) +def test_grouping_with_categorical_interval_columns(): + # GH#34164 + df = DataFrame({"x": [0.1, 0.2, 0.3, -0.4, 0.5], "w": ["a", "b", "a", "c", "a"]}) + qq = pd.qcut(df["x"], q=np.linspace(0, 1, 5)) + result = df.groupby([qq, "w"], observed=False)["x"].agg("mean") + categorical_index_level_1 = Categorical( + [ + Interval(-0.401, 0.1, closed="right"), + Interval(0.1, 0.2, closed="right"), + Interval(0.2, 0.3, closed="right"), + Interval(0.3, 0.5, closed="right"), + ], + ordered=True, + ) + index_level_2 = ["a", "b", "c"] + mi = MultiIndex.from_product( + [categorical_index_level_1, index_level_2], names=["x", "w"] + ) + expected = Series( + np.array( + [ + 0.1, + np.nan, + -0.4, + np.nan, + 0.2, + np.nan, + 0.3, + np.nan, + np.nan, + 0.5, + np.nan, + np.nan, + ] + ), + index=mi, + name="x", + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("bug_var", [1, "a"]) def test_groupby_sum_on_nan_should_return_nan(bug_var): # GH 24196