Skip to content

Commit

Permalink
API: Added squeeze keyword to MultiIndex ctors
Browse files Browse the repository at this point in the history
Adds a new ``squeeze`` keyword to control whether the various MultiIndex
constructors should squeeze down to an ``Index`` when all the values are
length-1 tuples.

In [3]: MultiIndex.from_tuples([('a',), ('b',), ('c',)])
Out[3]: Index(['a', 'b', 'c'], dtype='object')

In [4]: MultiIndex.from_tuples([('a',), ('b',), ('c',)], squeeze=False)
Out[4]:
MultiIndex(levels=[['a', 'b', 'c']],
           labels=[[0, 1, 2]])

This is helpful for routines that rely on the MultiIndex constructors always
returning a MultiIndex, regardless of the data values (e.g. hash_tuples).
  • Loading branch information
TomAugspurger committed Aug 5, 2017
1 parent 929c66f commit 999035e
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 9 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ Other Enhancements
- :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`)
- :func:`DataFrame.clip()` and :func:`Series.clip()` have gained an ``inplace`` argument. (:issue:`15388`)
- :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when ``margins=True``. (:issue:`15972`)
- The various :class:`MultiIndex` constructors all take a ``squeeze`` keyword to control whether to squeeze down to a regular ``Index`` when
the values are all tuples of length one (the default is ``True``, as before) (:issue:`17178`)
- :func:`DataFrame.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`)
- :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year (:issue:`9313`)
- :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`)
Expand Down
44 changes: 35 additions & 9 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import pandas.core.base as base
from pandas.util._decorators import (Appender, cache_readonly,
deprecate, deprecate_kwarg)
from pandas.util._validators import validate_bool_kwarg
import pandas.core.common as com
import pandas.core.missing as missing
import pandas.core.algorithms as algos
Expand Down Expand Up @@ -68,6 +69,11 @@ class MultiIndex(Index):
Copy the meta-data
verify_integrity : boolean, default True
Check that the levels/labels are consistent and valid
squeeze : bool, default True
Whether to squeeze an iterable of length 1 tuples down
to an Index, or return a MultiIndex with a single dimension
.. versionadded:: 0.21.0
"""

# initialize to zero-length tuples to make everything work
Expand All @@ -79,9 +85,11 @@ class MultiIndex(Index):
rename = Index.set_names

def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
copy=False, verify_integrity=True, _set_identity=True,
copy=False, verify_integrity=True, squeeze=True,
_set_identity=True,
name=None, **kwargs):

validate_bool_kwarg(squeeze, 'squeeze')
# compat with Index
if name is not None:
names = name
Expand All @@ -91,7 +99,7 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
raise ValueError('Length of levels and labels must be the same.')
if len(levels) == 0:
raise ValueError('Must pass non-zero number of levels/labels')
if len(levels) == 1:
if len(levels) == 1 and squeeze:
if names:
name = names[0]
else:
Expand Down Expand Up @@ -1052,7 +1060,7 @@ def lexsort_depth(self):
return 0

@classmethod
def from_arrays(cls, arrays, sortorder=None, names=None):
def from_arrays(cls, arrays, sortorder=None, names=None, squeeze=True):
"""
Convert arrays to MultiIndex
Expand All @@ -1064,6 +1072,11 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
sortorder : int or None
Level of sortedness (must be lexicographically sorted by that
level)
squeeze : bool, default True
Whether to squeeze an iterable of length 1 tuples down
to an Index, or return a MultiIndex with a single dimension
.. versionadded:: 0.21.0
Returns
-------
Expand All @@ -1080,7 +1093,7 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
MultiIndex.from_product : Make a MultiIndex from cartesian product
of iterables
"""
if len(arrays) == 1:
if len(arrays) == 1 and squeeze:
name = None if names is None else names[0]
return Index(arrays[0], name=name)

Expand All @@ -1097,10 +1110,10 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
names = [getattr(arr, "name", None) for arr in arrays]

return MultiIndex(levels=levels, labels=labels, sortorder=sortorder,
names=names, verify_integrity=False)
names=names, verify_integrity=False, squeeze=squeeze)

@classmethod
def from_tuples(cls, tuples, sortorder=None, names=None):
def from_tuples(cls, tuples, sortorder=None, names=None, squeeze=True):
"""
Convert list of tuples to MultiIndex
Expand All @@ -1111,6 +1124,11 @@ def from_tuples(cls, tuples, sortorder=None, names=None):
sortorder : int or None
Level of sortedness (must be lexicographically sorted by that
level)
squeeze : bool, default True
Whether to squeeze an iterable of length 1 tuples down
to an Index, or return a MultiIndex with a single dimension
.. versionadded:: 0.21.0
Returns
-------
Expand Down Expand Up @@ -1143,10 +1161,11 @@ def from_tuples(cls, tuples, sortorder=None, names=None):
else:
arrays = lzip(*tuples)

return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names)
return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names,
squeeze=squeeze)

@classmethod
def from_product(cls, iterables, sortorder=None, names=None):
def from_product(cls, iterables, sortorder=None, names=None, squeeze=True):
"""
Make a MultiIndex from the cartesian product of multiple iterables
Expand All @@ -1160,6 +1179,12 @@ def from_product(cls, iterables, sortorder=None, names=None):
names : list / sequence of strings or None
Names for the levels in the index.
squeeze : bool, default True
Whether to squeeze an iterable of length 1 tuples down
to an Index, or return a MultiIndex with a single dimension
.. versionadded:: 0.21.0
Returns
-------
index : MultiIndex
Expand All @@ -1184,7 +1209,8 @@ def from_product(cls, iterables, sortorder=None, names=None):

labels, levels = _factorize_from_iterables(iterables)
labels = cartesian_product(labels)
return MultiIndex(levels, labels, sortorder=sortorder, names=names)
return MultiIndex(levels, labels, sortorder=sortorder, names=names,
squeeze=squeeze)

def _sort_levels_monotonic(self):
"""
Expand Down
39 changes: 39 additions & 0 deletions pandas/tests/indexes/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,45 @@ def test_constructor_single_level(self):
labels=[[0, 1, 2, 3]])
assert single_level.name is None

def test_constructor_single_level_no_squeeze(self):
single_level = MultiIndex(levels=[[('foo',), ('bar',), ('baz',),
('qux',)]],
labels=[[0, 1, 2, 3]], names=['first'],
squeeze=False)
assert isinstance(single_level, MultiIndex)
assert single_level.names == ['first']

def test_from_tuples_no_squeeze(self):
idx = MultiIndex.from_tuples([('a',), ('b',)], names=['name'])
expected = Index(['a', 'b'], name='name')
tm.assert_index_equal(idx, expected)

idx = MultiIndex.from_tuples([('a',), ('b',)], names=['name'],
squeeze=False)
assert isinstance(idx, MultiIndex)
tm.assert_index_equal(idx.levels[0], expected)

def test_from_product_no_squeeze(self):
idx = MultiIndex.from_product([('a',)], names=['a'])
expected = Index(['a'], name='a')
tm.assert_index_equal(idx, expected)

idx = MultiIndex.from_product([('a',)], names=['a'], squeeze=False)
assert idx.names == ['a']
tm.assert_index_equal(idx.levels[0], expected)

def test_from_arrays_no_squeeze(self):
idx = MultiIndex.from_arrays([('a',)], names=['name'])
expected = Index(['a'], name='name')
assert isinstance(idx, Index)
assert not isinstance(idx, MultiIndex)

result = MultiIndex.from_arrays([('a',)], names=['name'],
squeeze=False)
assert isinstance(result, MultiIndex)
assert result.names == ['name']
tm.assert_index_equal(result.levels[0], expected)

def test_constructor_no_levels(self):
tm.assert_raises_regex(ValueError, "non-zero number "
"of levels/labels",
Expand Down

0 comments on commit 999035e

Please sign in to comment.