From 3dd215053ff0e604814b0373ae8d6f5da25dce28 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 18 Oct 2019 07:43:34 -0500 Subject: [PATCH] API: Restore getting name from MultiIndex level (#29061) * API: Restore getting name from MultiIndex level xref https://issues.apache.org/jira/browse/ARROW-6922 / https://github.com/pandas-dev/pandas/pull/27242#issuecomment-543302582 / https://github.com/pandas-dev/pandas/issues/29032 No docs yet, since it isn't clear how this will eventually sort out. But we at least want to preserve this behavior for 1.0 * fixups --- pandas/core/indexes/multi.py | 7 ++++-- pandas/tests/frame/test_alter_axes.py | 2 +- .../tests/indexes/multi/test_constructor.py | 6 ++--- pandas/tests/indexes/multi/test_names.py | 23 ++++++++++++------- pandas/tests/indexes/multi/test_reindex.py | 4 ++-- pandas/tests/indexes/multi/test_reshape.py | 4 ++-- pandas/tests/reshape/test_concat.py | 6 +++-- pandas/tests/test_multilevel.py | 21 ++++++++--------- 8 files changed, 42 insertions(+), 31 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 6b89896fe5255a..734ffa319f5a12 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -639,7 +639,10 @@ def from_frame(cls, df, sortorder=None, names=None): @property def levels(self): - return self._levels + result = [ + x._shallow_copy(name=name) for x, name in zip(self._levels, self._names) + ] + return FrozenList(result) @property def _values(self): @@ -830,7 +833,7 @@ def _set_codes( if level is None: new_codes = FrozenList( _ensure_frozen(level_codes, lev, copy=copy)._shallow_copy() - for lev, level_codes in zip(self.levels, codes) + for lev, level_codes in zip(self._levels, codes) ) else: level = [self._get_level_number(l) for l in level] diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index b310335be5f650..017cbea7ec7230 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -978,7 +978,7 @@ def test_reset_index(self, float_frame): ): values = lev.take(level_codes) name = names[i] - tm.assert_index_equal(values, Index(deleveled[name].rename(name=None))) + tm.assert_index_equal(values, Index(deleveled[name])) stacked.index.names = [None, None] deleveled2 = stacked.reset_index() diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index 993979f31a35b9..ff98da85cfb2d7 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -17,7 +17,7 @@ def test_constructor_single_level(): levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"] ) assert isinstance(result, MultiIndex) - expected = Index(["foo", "bar", "baz", "qux"]) + expected = Index(["foo", "bar", "baz", "qux"], name="first") tm.assert_index_equal(result.levels[0], expected) assert result.names == ["first"] @@ -292,7 +292,7 @@ def test_from_arrays_empty(): # 1 level result = MultiIndex.from_arrays(arrays=[[]], names=["A"]) assert isinstance(result, MultiIndex) - expected = Index([]) + expected = Index([], name="A") tm.assert_index_equal(result.levels[0], expected) assert result.names == ["A"] @@ -440,7 +440,7 @@ def test_from_product_empty_zero_levels(): def test_from_product_empty_one_level(): result = MultiIndex.from_product([[]], names=["A"]) - expected = pd.Index([]) + expected = pd.Index([], name="A") tm.assert_index_equal(result.levels[0], expected) assert result.names == ["A"] diff --git a/pandas/tests/indexes/multi/test_names.py b/pandas/tests/indexes/multi/test_names.py index 679e045a68f294..5c3a48c9dd4810 100644 --- a/pandas/tests/indexes/multi/test_names.py +++ b/pandas/tests/indexes/multi/test_names.py @@ -27,7 +27,7 @@ def test_index_name_retained(): def test_changing_names(idx): - assert [level.name for level in idx.levels] == [None, None] + assert [level.name for level in idx.levels] == ["first", "second"] view = idx.view() copy = idx.copy() @@ -36,16 +36,16 @@ def test_changing_names(idx): # changing names should not change level names on object new_names = [name + "a" for name in idx.names] idx.names = new_names - check_level_names(idx, [None, None]) + check_level_names(idx, ["firsta", "seconda"]) # and not on copies - check_level_names(view, [None, None]) - check_level_names(copy, [None, None]) - check_level_names(shallow_copy, [None, None]) + check_level_names(view, ["first", "second"]) + check_level_names(copy, ["first", "second"]) + check_level_names(shallow_copy, ["first", "second"]) # and copies shouldn't change original shallow_copy.names = [name + "c" for name in shallow_copy.names] - check_level_names(idx, [None, None]) + check_level_names(idx, ["firsta", "seconda"]) def test_take_preserve_name(idx): @@ -81,7 +81,7 @@ def test_names(idx, index_names): # names are assigned in setup assert index_names == ["first", "second"] level_names = [level.name for level in idx.levels] - assert level_names == [None, None] + assert level_names == index_names # setting bad names on existing index = idx @@ -109,7 +109,7 @@ def test_names(idx, index_names): # names are assigned on index, but not transferred to the levels index.names = ["a", "b"] level_names = [level.name for level in index.levels] - assert level_names == [None, None] + assert level_names == ["a", "b"] def test_duplicate_level_names_access_raises(idx): @@ -117,3 +117,10 @@ def test_duplicate_level_names_access_raises(idx): idx.names = ["foo", "foo"] with pytest.raises(ValueError, match="name foo occurs multiple times"): idx._get_level_number("foo") + + +def test_get_names_from_levels(): + idx = pd.MultiIndex.from_product([["a"], [1, 2]], names=["a", "b"]) + + assert idx.levels[0].name == "a" + assert idx.levels[1].name == "b" diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py index 970288e5747c7f..513efa8941de8e 100644 --- a/pandas/tests/indexes/multi/test_reindex.py +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -10,13 +10,13 @@ def test_reindex(idx): result, indexer = idx.reindex(list(idx[:4])) assert isinstance(result, MultiIndex) assert result.names == ["first", "second"] - assert [level.name for level in result.levels] == [None, None] + assert [level.name for level in result.levels] == ["first", "second"] result, indexer = idx.reindex(list(idx)) assert isinstance(result, MultiIndex) assert indexer is None assert result.names == ["first", "second"] - assert [level.name for level in result.levels] == [None, None] + assert [level.name for level in result.levels] == ["first", "second"] def test_reindex_level(idx): diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py index e79f212f300785..37df420e9ea2e3 100644 --- a/pandas/tests/indexes/multi/test_reshape.py +++ b/pandas/tests/indexes/multi/test_reshape.py @@ -15,11 +15,11 @@ def test_insert(idx): # key not contained in all levels new_index = idx.insert(0, ("abc", "three")) - exp0 = Index(list(idx.levels[0]) + ["abc"]) + exp0 = Index(list(idx.levels[0]) + ["abc"], name="first") tm.assert_index_equal(new_index.levels[0], exp0) assert new_index.names == ["first", "second"] - exp1 = Index(list(idx.levels[1]) + ["three"]) + exp1 = Index(list(idx.levels[1]) + ["three"], name="second") tm.assert_index_equal(new_index.levels[1], exp1) assert new_index[0] == ("abc", "three") diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 33cbaaed1848d7..eda7bc0ec4df7e 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1219,7 +1219,7 @@ def test_concat_keys_specific_levels(self): names=["group_key"], ) - tm.assert_index_equal(result.columns.levels[0], Index(level)) + tm.assert_index_equal(result.columns.levels[0], Index(level, name="group_key")) tm.assert_index_equal(result.columns.levels[1], Index([0, 1, 2, 3])) assert result.columns.names == ["group_key", None] @@ -1412,7 +1412,9 @@ def test_concat_keys_and_levels(self): names=["first", "second"], ) assert result.index.names == ("first", "second", None) - tm.assert_index_equal(result.index.levels[0], Index(["baz", "foo"])) + tm.assert_index_equal( + result.index.levels[0], Index(["baz", "foo"], name="first") + ) def test_concat_keys_levels_no_overlap(self): # GH #1406 diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 76436f44808099..79c9fe2b60bd95 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -975,11 +975,11 @@ def test_count(self): series.index.names = ["a", "b"] result = series.count(level="b") - expect = self.series.count(level=1) + expect = self.series.count(level=1).rename_axis("b") tm.assert_series_equal(result, expect) result = series.count(level="a") - expect = self.series.count(level=0) + expect = self.series.count(level=0).rename_axis("a") tm.assert_series_equal(result, expect) msg = "Level x not found" @@ -1641,16 +1641,14 @@ def test_constructor_with_tz(self): result = MultiIndex.from_arrays([index, columns]) assert result.names == ["dt1", "dt2"] - # levels don't have names set, so set name of index/columns to None in checks - tm.assert_index_equal(result.levels[0], index.rename(name=None)) - tm.assert_index_equal(result.levels[1], columns.rename(name=None)) + tm.assert_index_equal(result.levels[0], index) + tm.assert_index_equal(result.levels[1], columns) result = MultiIndex.from_arrays([Series(index), Series(columns)]) assert result.names == ["dt1", "dt2"] - # levels don't have names set, so set name of index/columns to None in checks - tm.assert_index_equal(result.levels[0], index.rename(name=None)) - tm.assert_index_equal(result.levels[1], columns.rename(name=None)) + tm.assert_index_equal(result.levels[0], index) + tm.assert_index_equal(result.levels[1], columns) def test_set_index_datetime(self): # GH 3950 @@ -1672,17 +1670,18 @@ def test_set_index_datetime(self): df.index = df.index.tz_convert("US/Pacific") expected = pd.DatetimeIndex( - ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"] + ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], + name="datetime", ) expected = expected.tz_localize("UTC").tz_convert("US/Pacific") df = df.set_index("label", append=True) tm.assert_index_equal(df.index.levels[0], expected) - tm.assert_index_equal(df.index.levels[1], Index(["a", "b"])) + tm.assert_index_equal(df.index.levels[1], Index(["a", "b"], name="label")) assert df.index.names == ["datetime", "label"] df = df.swaplevel(0, 1) - tm.assert_index_equal(df.index.levels[0], Index(["a", "b"])) + tm.assert_index_equal(df.index.levels[0], Index(["a", "b"], name="label")) tm.assert_index_equal(df.index.levels[1], expected) assert df.index.names == ["label", "datetime"]