Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Separate MultiIndex names from levels #27242

Merged
merged 5 commits into from
Oct 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,37 @@ source, you should no longer need to install Cython into your build environment
Backwards incompatible API changes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

- :class:`pandas.core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`).
.. _whatsnew_1000.api_breaking.MultiIndex._names:

``MultiIndex.levels`` do not hold level names any longer
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

- A :class:`MultiIndex` previously stored the level names as attributes of each of its
TomAugspurger marked this conversation as resolved.
Show resolved Hide resolved
:attr:`MultiIndex.levels`. From Pandas 1.0, the names are only accessed through
:attr:`MultiIndex.names` (which was also possible previously). This is done in order to
make :attr:`MultiIndex.levels` more similar to :attr:`CategoricalIndex.categories` (:issue:`27242`:).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can u show the previous way in a code block and the new way in an ipython block

*pandas 0.25.x*

.. code-block:: ipython

In [1]: mi = pd.MultiIndex.from_product([[1, 2], ['a', 'b']], names=['x', 'y'])
Out[2]: mi
MultiIndex([(1, 'a'),
(1, 'b'),
(2, 'a'),
(2, 'b')],
names=['x', 'y'])
Out[3]: mi.levels[0].name
'x'

*pandas 1.0.0*

.. ipython:: python

mi = pd.MultiIndex.from_product([[1, 2], ['a', 'b']], names=['x', 'y'])
mi.levels[0].name

- :class:`pandas.core.arrays.IntervalArray` adopts a new ``__repr__`` in accordance with other array classes (:issue:`25022`)

*pandas 0.25.x*
Expand All @@ -149,6 +179,7 @@ Backwards incompatible API changes
Other API changes
^^^^^^^^^^^^^^^^^

- :class:`pandas.core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`)
- :meth:`pandas.api.types.infer_dtype` will now return "integer-na" for integer and ``np.nan`` mix (:issue:`27283`)
- :meth:`MultiIndex.from_arrays` will no longer infer names from arrays if ``names=None`` is explicitly provided (:issue:`27292`)
- In order to improve tab-completion, Pandas does not include most deprecated attributes when introspecting a pandas object using ``dir`` (e.g. ``dir(df)``).
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7772,7 +7772,8 @@ def _count_level(self, level, axis=0, numeric_only=False):
if isinstance(level, str):
level = count_axis._get_level_number(level)

level_index = count_axis.levels[level]
level_name = count_axis._names[level]
level_index = count_axis.levels[level]._shallow_copy(name=level_name)
level_codes = ensure_int64(count_axis.codes[level])
counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=0)

Expand Down
15 changes: 8 additions & 7 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ def __new__(
result._set_levels(levels, copy=copy, validate=False)
result._set_codes(codes, copy=copy, validate=False)

result._names = [None] * len(levels)
if names is not None:
# handles name validation
result._set_names(names)
Expand Down Expand Up @@ -1216,7 +1217,7 @@ def __len__(self):
return len(self.codes[0])

def _get_names(self):
return FrozenList(level.name for level in self.levels)
return FrozenList(self._names)

def _set_names(self, names, level=None, validate=True):
"""
Expand Down Expand Up @@ -1262,7 +1263,7 @@ def _set_names(self, names, level=None, validate=True):
level = [self._get_level_number(l) for l in level]

# set the name
for l, name in zip(level, names):
for lev, name in zip(level, names):
if name is not None:
# GH 20527
# All items in 'names' need to be hashable:
Expand All @@ -1272,7 +1273,7 @@ def _set_names(self, names, level=None, validate=True):
self.__class__.__name__
)
)
self.levels[l].rename(name, inplace=True)
self._names[lev] = name

names = property(
fset=_set_names, fget=_get_names, doc="""\nNames of levels in MultiIndex.\n"""
Expand Down Expand Up @@ -1582,13 +1583,13 @@ def _get_level_values(self, level, unique=False):
values : ndarray
"""

values = self.levels[level]
lev = self.levels[level]
level_codes = self.codes[level]
name = self._names[level]
if unique:
level_codes = algos.unique(level_codes)
filled = algos.take_1d(values._values, level_codes, fill_value=values._na_value)
values = values._shallow_copy(filled)
return values
filled = algos.take_1d(lev._values, level_codes, fill_value=lev._na_value)
return lev._shallow_copy(filled, name=name)

def get_level_values(self, level):
"""
Expand Down
17 changes: 9 additions & 8 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,10 @@ def get_new_values(self):
def get_new_columns(self):
if self.value_columns is None:
if self.lift == 0:
return self.removed_level
return self.removed_level._shallow_copy(name=self.removed_name)

lev = self.removed_level
return lev.insert(0, lev._na_value)
lev = self.removed_level.insert(0, item=self.removed_level._na_value)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would use .rename()

return lev.rename(self.removed_name)

stride = len(self.removed_level) + self.lift
width = len(self.value_columns)
Expand Down Expand Up @@ -298,10 +298,10 @@ def get_new_index(self):

# construct the new index
if len(self.new_index_levels) == 1:
lev, lab = self.new_index_levels[0], result_codes[0]
if (lab == -1).any():
lev = lev.insert(len(lev), lev._na_value)
return lev.take(lab)
level, level_codes = self.new_index_levels[0], result_codes[0]
if (level_codes == -1).any():
level = level.insert(len(level), level._na_value)
return level.take(level_codes).rename(self.new_index_names[0])

return MultiIndex(
levels=self.new_index_levels,
Expand Down Expand Up @@ -661,7 +661,8 @@ def _convert_level_number(level_num, columns):
new_names = this.columns.names[:-1]
new_columns = MultiIndex.from_tuples(unique_groups, names=new_names)
else:
new_columns = unique_groups = this.columns.levels[0]
new_columns = this.columns.levels[0]._shallow_copy(name=this.columns.names[0])
unique_groups = new_columns

# time to ravel the values
new_data = {}
Expand Down
6 changes: 4 additions & 2 deletions pandas/io/json/_table_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,10 @@ def build_table_schema(data, index=True, primary_key=None, version=True):

if index:
if data.index.nlevels > 1:
for level in data.index.levels:
fields.append(convert_pandas_type_to_json_field(level))
for level, name in zip(data.index.levels, data.index.names):
new_field = convert_pandas_type_to_json_field(level)
new_field["name"] = name
fields.append(new_field)
else:
fields.append(convert_pandas_type_to_json_field(data.index))

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_alter_axes.py
Original file line number Diff line number Diff line change
Expand Up @@ -978,7 +978,7 @@ def test_reset_index(self, float_frame):
):
values = lev.take(level_codes)
name = names[i]
tm.assert_index_equal(values, Index(deleveled[name]))
tm.assert_index_equal(values, Index(deleveled[name].rename(name=None)))

stacked.index.names = [None, None]
deleveled2 = stacked.reset_index()
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/multi/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def test_astype(idx):
actual = idx.astype("O")
assert_copy(actual.levels, expected.levels)
assert_copy(actual.codes, expected.codes)
assert [level.name for level in actual.levels] == list(expected.names)
assert actual.names == list(expected.names)

with pytest.raises(TypeError, match="^Setting.*dtype.*object"):
idx.astype(np.dtype(int))
Expand Down
8 changes: 5 additions & 3 deletions pandas/tests/indexes/multi/test_constructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_constructor_single_level():
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
)
assert isinstance(result, MultiIndex)
expected = Index(["foo", "bar", "baz", "qux"], name="first")
expected = Index(["foo", "bar", "baz", "qux"])
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ["first"]

Expand Down Expand Up @@ -292,8 +292,9 @@ def test_from_arrays_empty():
# 1 level
result = MultiIndex.from_arrays(arrays=[[]], names=["A"])
assert isinstance(result, MultiIndex)
expected = Index([], name="A")
expected = Index([])
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ["A"]

# N levels
for N in [2, 3]:
Expand Down Expand Up @@ -439,8 +440,9 @@ def test_from_product_empty_zero_levels():

def test_from_product_empty_one_level():
result = MultiIndex.from_product([[]], names=["A"])
expected = pd.Index([], name="A")
expected = pd.Index([])
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ["A"]


@pytest.mark.parametrize(
Expand Down
28 changes: 12 additions & 16 deletions pandas/tests/indexes/multi/test_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,25 @@ def test_index_name_retained():


def test_changing_names(idx):

# names should be applied to levels
level_names = [level.name for level in idx.levels]
check_level_names(idx, idx.names)
assert [level.name for level in idx.levels] == [None, None]

view = idx.view()
copy = idx.copy()
shallow_copy = idx._shallow_copy()

# changing names should change level names on object
# changing names should not change level names on object
new_names = [name + "a" for name in idx.names]
idx.names = new_names
check_level_names(idx, new_names)
check_level_names(idx, [None, None])

# but not on copies
check_level_names(view, level_names)
check_level_names(copy, level_names)
check_level_names(shallow_copy, level_names)
# and not on copies
check_level_names(view, [None, None])
check_level_names(copy, [None, None])
check_level_names(shallow_copy, [None, None])

# and copies shouldn't change original
shallow_copy.names = [name + "c" for name in shallow_copy.names]
check_level_names(idx, new_names)
check_level_names(idx, [None, None])


def test_take_preserve_name(idx):
Expand Down Expand Up @@ -82,9 +79,9 @@ def test_copy_names():
def test_names(idx, index_names):

# names are assigned in setup
names = index_names
assert index_names == ["first", "second"]
level_names = [level.name for level in idx.levels]
assert names == level_names
assert level_names == [None, None]

# setting bad names on existing
index = idx
Expand All @@ -109,11 +106,10 @@ def test_names(idx, index_names):
names=["first", "second", "third"],
)

# names are assigned
# names are assigned on index, but not transferred to the levels
index.names = ["a", "b"]
ind_names = list(index.names)
level_names = [level.name for level in index.levels]
assert ind_names == level_names
assert level_names == [None, None]


def test_duplicate_level_names_access_raises(idx):
Expand Down
10 changes: 4 additions & 6 deletions pandas/tests/indexes/multi/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,17 @@
import pandas.util.testing as tm


def check_level_names(index, names):
assert [level.name for level in index.levels] == list(names)


def test_reindex(idx):
result, indexer = idx.reindex(list(idx[:4]))
assert isinstance(result, MultiIndex)
check_level_names(result, idx[:4].names)
assert result.names == ["first", "second"]
assert [level.name for level in result.levels] == [None, None]

result, indexer = idx.reindex(list(idx))
assert isinstance(result, MultiIndex)
assert indexer is None
check_level_names(result, idx.names)
assert result.names == ["first", "second"]
assert [level.name for level in result.levels] == [None, None]


def test_reindex_level(idx):
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/indexes/multi/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@ def test_insert(idx):
# key not contained in all levels
new_index = idx.insert(0, ("abc", "three"))

exp0 = Index(list(idx.levels[0]) + ["abc"], name="first")
exp0 = Index(list(idx.levels[0]) + ["abc"])
tm.assert_index_equal(new_index.levels[0], exp0)
assert new_index.names == ["first", "second"]

exp1 = Index(list(idx.levels[1]) + ["three"], name="second")
exp1 = Index(list(idx.levels[1]) + ["three"])
tm.assert_index_equal(new_index.levels[1], exp1)
assert new_index[0] == ("abc", "three")

Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/reshape/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -1219,8 +1219,10 @@ def test_concat_keys_specific_levels(self):
names=["group_key"],
)

tm.assert_index_equal(result.columns.levels[0], Index(level, name="group_key"))
assert result.columns.names[0] == "group_key"
tm.assert_index_equal(result.columns.levels[0], Index(level))
tm.assert_index_equal(result.columns.levels[1], Index([0, 1, 2, 3]))

assert result.columns.names == ["group_key", None]

def test_concat_dataframe_keys_bug(self, sort):
t1 = DataFrame(
Expand Down Expand Up @@ -1409,10 +1411,8 @@ def test_concat_keys_and_levels(self):
keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")],
names=["first", "second"],
)
assert result.index.names == ("first", "second") + (None,)
tm.assert_index_equal(
result.index.levels[0], Index(["baz", "foo"], name="first")
)
assert result.index.names == ("first", "second", None)
tm.assert_index_equal(result.index.levels[0], Index(["baz", "foo"]))

def test_concat_keys_levels_no_overlap(self):
# GH #1406
Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/reshape/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,16 +618,15 @@ def test_reshaping_multi_index_categorical(self):
df.index.names = ["major", "minor"]
df["str"] = "foo"

dti = df.index.levels[0]

df["category"] = df["str"].astype("category")
result = df["category"].unstack()

dti = df.index.levels[0]
c = Categorical(["foo"] * len(dti))
expected = DataFrame(
{"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()},
columns=Index(list("ABCD"), name="minor"),
index=dti,
index=dti.rename("major"),
)
tm.assert_frame_equal(result, expected)

Expand Down
Loading