Skip to content

Commit

Permalink
Separate MultiIndex names from levels
Browse files Browse the repository at this point in the history
  • Loading branch information
topper-123 committed Jul 5, 2019
1 parent 2efb607 commit ab2fdf5
Show file tree
Hide file tree
Showing 14 changed files with 84 additions and 61 deletions.
12 changes: 12 additions & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,18 @@ is respected in indexing. (:issue:`24076`, :issue:`16785`)
df['2019-01-01 12:00:00+04:00':'2019-01-01 13:00:00+04:00']
.. _whatsnew_0250.api_breaking.MultiIndex._names:


``MultiIndex.levels`` do not hold level names any longer
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

A :class:`MultiIndex` previously stored the level names as attributes of each of its
:attr:`MultiIndex.levels`. From Pandas 0.25, the names are only accessed through
:attr:`MultiIndex.names` (which was also possible previously). This is done in order to
make :attr:`MultiIndex.levels` more similar to :attr:`CategoricalIndex.categories`.


.. _whatsnew_0250.api_breaking.multi_indexing:


Expand Down
3 changes: 2 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7797,7 +7797,8 @@ def _count_level(self, level, axis=0, numeric_only=False):
if isinstance(level, str):
level = count_axis._get_level_number(level)

level_index = count_axis.levels[level]
level_name = count_axis._names[level]
level_index = count_axis.levels[level]._shallow_copy(name=level_name)
level_codes = ensure_int64(count_axis.codes[level])
counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=0)

Expand Down
15 changes: 8 additions & 7 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ def __new__(
result._set_levels(levels, copy=copy, validate=False)
result._set_codes(codes, copy=copy, validate=False)

result._names = [None for _ in levels]
if names is not None:
# handles name validation
result._set_names(names)
Expand Down Expand Up @@ -1176,7 +1177,7 @@ def __len__(self):
return len(self.codes[0])

def _get_names(self):
return FrozenList(level.name for level in self.levels)
return FrozenList(self._names)

def _set_names(self, names, level=None, validate=True):
"""
Expand Down Expand Up @@ -1222,7 +1223,7 @@ def _set_names(self, names, level=None, validate=True):
level = [self._get_level_number(l) for l in level]

# set the name
for l, name in zip(level, names):
for lev, name in zip(level, names):
if name is not None:
# GH 20527
# All items in 'names' need to be hashable:
Expand All @@ -1232,7 +1233,7 @@ def _set_names(self, names, level=None, validate=True):
self.__class__.__name__
)
)
self.levels[l].rename(name, inplace=True)
self._names[lev] = name

names = property(
fset=_set_names, fget=_get_names, doc="""\nNames of levels in MultiIndex\n"""
Expand Down Expand Up @@ -1546,13 +1547,13 @@ def _get_level_values(self, level, unique=False):
values : ndarray
"""

values = self.levels[level]
lev = self.levels[level]
level_codes = self.codes[level]
name = self._names[level]
if unique:
level_codes = algos.unique(level_codes)
filled = algos.take_1d(values._values, level_codes, fill_value=values._na_value)
values = values._shallow_copy(filled)
return values
filled = algos.take_1d(lev._values, level_codes, fill_value=lev._na_value)
return lev._shallow_copy(filled, name=name)

def get_level_values(self, level):
"""
Expand Down
17 changes: 12 additions & 5 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,10 +260,13 @@ def get_new_values(self):
def get_new_columns(self):
if self.value_columns is None:
if self.lift == 0:
return self.removed_level
lev = self.removed_level._shallow_copy()
lev.name = self.removed_name
return lev

lev = self.removed_level
return lev.insert(0, lev._na_value)
lev = self.removed_level.insert(0, item=self.removed_level._na_value)
lev.name = self.removed_name
return lev

stride = len(self.removed_level) + self.lift
width = len(self.value_columns)
Expand Down Expand Up @@ -302,7 +305,9 @@ def get_new_index(self):
lev, lab = self.new_index_levels[0], result_codes[0]
if (lab == -1).any():
lev = lev.insert(len(lev), lev._na_value)
return lev.take(lab)
new_index = lev.take(lab)
new_index.name = self.new_index_names[0]
return new_index

return MultiIndex(
levels=self.new_index_levels,
Expand Down Expand Up @@ -658,7 +663,9 @@ def _convert_level_number(level_num, columns):
new_names = this.columns.names[:-1]
new_columns = MultiIndex.from_tuples(unique_groups, names=new_names)
else:
new_columns = unique_groups = this.columns.levels[0]
new_columns = this.columns.levels[0]._shallow_copy()
new_columns.name = this.columns.names[0]
unique_groups = new_columns

# time to ravel the values
new_data = {}
Expand Down
6 changes: 4 additions & 2 deletions pandas/io/json/table_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,10 @@ def build_table_schema(data, index=True, primary_key=None, version=True):

if index:
if data.index.nlevels > 1:
for level in data.index.levels:
fields.append(convert_pandas_type_to_json_field(level))
for level, name in zip(data.index.levels, data.index.names):
new_field = convert_pandas_type_to_json_field(level)
new_field["name"] = name
fields.append(new_field)
else:
fields.append(convert_pandas_type_to_json_field(data.index))

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_alter_axes.py
Original file line number Diff line number Diff line change
Expand Up @@ -979,7 +979,7 @@ def test_reset_index(self, float_frame):
):
values = lev.take(level_codes)
name = names[i]
tm.assert_index_equal(values, Index(deleveled[name]))
tm.assert_index_equal(values, Index(deleveled[name]), check_names=False)

stacked.index.names = [None, None]
deleveled2 = stacked.reset_index()
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/multi/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def test_astype(idx):
actual = idx.astype("O")
assert_copy(actual.levels, expected.levels)
assert_copy(actual.codes, expected.codes)
assert [level.name for level in actual.levels] == list(expected.names)
assert actual.names == list(expected.names)

with pytest.raises(TypeError, match="^Setting.*dtype.*object"):
idx.astype(np.dtype(int))
Expand Down
8 changes: 5 additions & 3 deletions pandas/tests/indexes/multi/test_constructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_constructor_single_level():
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
)
assert isinstance(result, MultiIndex)
expected = Index(["foo", "bar", "baz", "qux"], name="first")
expected = Index(["foo", "bar", "baz", "qux"])
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ["first"]

Expand Down Expand Up @@ -292,8 +292,9 @@ def test_from_arrays_empty():
# 1 level
result = MultiIndex.from_arrays(arrays=[[]], names=["A"])
assert isinstance(result, MultiIndex)
expected = Index([], name="A")
expected = Index([])
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ["A"]

# N levels
for N in [2, 3]:
Expand Down Expand Up @@ -426,8 +427,9 @@ def test_from_product_empty_zero_levels():

def test_from_product_empty_one_level():
result = MultiIndex.from_product([[]], names=["A"])
expected = pd.Index([], name="A")
expected = pd.Index([])
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ["A"]


@pytest.mark.parametrize(
Expand Down
25 changes: 11 additions & 14 deletions pandas/tests/indexes/multi/test_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,25 @@ def test_index_name_retained():


def test_changing_names(idx):

# names should be applied to levels
level_names = [level.name for level in idx.levels]
check_level_names(idx, idx.names)
assert [level.name for level in idx.levels] == [None, None]

view = idx.view()
copy = idx.copy()
shallow_copy = idx._shallow_copy()

# changing names should change level names on object
# changing names should not change level names on object
new_names = [name + "a" for name in idx.names]
idx.names = new_names
check_level_names(idx, new_names)
check_level_names(idx, [None, None])

# but not on copies
check_level_names(view, level_names)
check_level_names(copy, level_names)
check_level_names(shallow_copy, level_names)
# and not on copies
check_level_names(view, [None, None])
check_level_names(copy, [None, None])
check_level_names(shallow_copy, [None, None])

# and copies shouldn't change original
shallow_copy.names = [name + "c" for name in shallow_copy.names]
check_level_names(idx, new_names)
check_level_names(idx, [None, None])


def test_take_preserve_name(idx):
Expand Down Expand Up @@ -84,7 +81,8 @@ def test_names(idx, index_names):
# names are assigned in setup
names = index_names
level_names = [level.name for level in idx.levels]
assert names == level_names
assert names == ["first", "second"]
assert level_names == [None, None]

# setting bad names on existing
index = idx
Expand All @@ -111,9 +109,8 @@ def test_names(idx, index_names):

# names are assigned
index.names = ["a", "b"]
ind_names = list(index.names)
level_names = [level.name for level in index.levels]
assert ind_names == level_names
assert level_names == [None, None]


def test_duplicate_level_names_access_raises(idx):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/multi/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ def check_level_names(index, names):
def test_reindex(idx):
result, indexer = idx.reindex(list(idx[:4]))
assert isinstance(result, MultiIndex)
check_level_names(result, idx[:4].names)
check_level_names(result, [None, None])

result, indexer = idx.reindex(list(idx))
assert isinstance(result, MultiIndex)
assert indexer is None
check_level_names(result, idx.names)
check_level_names(result, [None, None])


def test_reindex_level(idx):
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/indexes/multi/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@ def test_insert(idx):
# key not contained in all levels
new_index = idx.insert(0, ("abc", "three"))

exp0 = Index(list(idx.levels[0]) + ["abc"], name="first")
exp0 = Index(list(idx.levels[0]) + ["abc"])
tm.assert_index_equal(new_index.levels[0], exp0)
assert new_index.names == ["first", "second"]

exp1 = Index(list(idx.levels[1]) + ["three"], name="second")
exp1 = Index(list(idx.levels[1]) + ["three"])
tm.assert_index_equal(new_index.levels[1], exp1)
assert new_index[0] == ("abc", "three")

Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/reshape/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -1220,8 +1220,10 @@ def test_concat_keys_specific_levels(self):
names=["group_key"],
)

tm.assert_index_equal(result.columns.levels[0], Index(level, name="group_key"))
assert result.columns.names[0] == "group_key"
tm.assert_index_equal(result.columns.levels[0], Index(level))
tm.assert_index_equal(result.columns.levels[1], Index([0, 1, 2, 3]))

assert result.columns.names == ["group_key", None]

def test_concat_dataframe_keys_bug(self, sort):
t1 = DataFrame(
Expand Down Expand Up @@ -1410,10 +1412,8 @@ def test_concat_keys_and_levels(self):
keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")],
names=["first", "second"],
)
assert result.index.names == ("first", "second") + (None,)
tm.assert_index_equal(
result.index.levels[0], Index(["baz", "foo"], name="first")
)
assert result.index.names == ("first", "second", None)
tm.assert_index_equal(result.index.levels[0], Index(["baz", "foo"]))

def test_concat_keys_levels_no_overlap(self):
# GH #1406
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/reshape/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,7 @@ def test_reshaping_multi_index_categorical(self):
df.index.names = ["major", "minor"]
df["str"] = "foo"

dti = df.index.levels[0]
dti = df.index.levels[0].set_names(["major"])

df["category"] = df["str"].astype("category")
result = df["category"].unstack()
Expand Down
32 changes: 16 additions & 16 deletions pandas/tests/test_multilevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ def test_count_level_corner(self):
df = self.frame[:0]
result = df.count(level=0)
expected = (
DataFrame(index=s.index.levels[0], columns=df.columns)
DataFrame(index=s.index.levels[0].set_names(["first"]), columns=df.columns)
.fillna(0)
.astype(np.int64)
)
Expand Down Expand Up @@ -960,13 +960,11 @@ def test_count(self):

result = series.count(level="b")
expect = self.series.count(level=1)
tm.assert_series_equal(result, expect, check_names=False)
assert result.index.name == "b"
tm.assert_series_equal(result, expect)

result = series.count(level="a")
expect = self.series.count(level=0)
tm.assert_series_equal(result, expect, check_names=False)
assert result.index.name == "a"
tm.assert_series_equal(result, expect)

msg = "Level x not found"
with pytest.raises(KeyError, match=msg):
Expand Down Expand Up @@ -1020,10 +1018,10 @@ def aggf(x):
# for good measure, groupby detail
level_index = frame._get_axis(axis).levels[level]

tm.assert_index_equal(leftside._get_axis(axis), level_index)
tm.assert_index_equal(rightside._get_axis(axis), level_index)
tm.assert_index_equal(leftside._get_axis(axis), level_index, check_names=False)
tm.assert_index_equal(rightside._get_axis(axis), level_index, check_names=False)

tm.assert_frame_equal(leftside, rightside)
tm.assert_frame_equal(leftside, rightside, check_names=False)

def test_stat_op_corner(self):
obj = Series([10.0], index=MultiIndex.from_tuples([(2, 3)]))
Expand Down Expand Up @@ -1609,12 +1607,12 @@ def test_constructor_with_tz(self):
)

result = MultiIndex.from_arrays([index, columns])
tm.assert_index_equal(result.levels[0], index)
tm.assert_index_equal(result.levels[1], columns)
tm.assert_index_equal(result.levels[0], index, check_names=False)
tm.assert_index_equal(result.levels[1], columns, check_names=False)

result = MultiIndex.from_arrays([Series(index), Series(columns)])
tm.assert_index_equal(result.levels[0], index)
tm.assert_index_equal(result.levels[1], columns)
tm.assert_index_equal(result.levels[0], index, check_names=False)
tm.assert_index_equal(result.levels[1], columns, check_names=False)

def test_set_index_datetime(self):
# GH 3950
Expand Down Expand Up @@ -1642,12 +1640,14 @@ def test_set_index_datetime(self):
expected = expected.tz_localize("UTC").tz_convert("US/Pacific")

df = df.set_index("label", append=True)
tm.assert_index_equal(df.index.levels[0], expected)
tm.assert_index_equal(df.index.levels[1], Index(["a", "b"], name="label"))
tm.assert_index_equal(df.index.levels[0], expected, check_names=False)
tm.assert_index_equal(df.index.levels[1], Index(["a", "b"]))
assert df.index.names == ["datetime", "label"]

df = df.swaplevel(0, 1)
tm.assert_index_equal(df.index.levels[0], Index(["a", "b"], name="label"))
tm.assert_index_equal(df.index.levels[1], expected)
tm.assert_index_equal(df.index.levels[0], Index(["a", "b"]))
tm.assert_index_equal(df.index.levels[1], expected, check_names=False)
assert df.index.names == ["label", "datetime"]

df = DataFrame(np.random.random(6))
idx1 = pd.DatetimeIndex(
Expand Down

0 comments on commit ab2fdf5

Please sign in to comment.