From 8a98f5ed541c87a9bf101c9331bd6cfa8f007cc9 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Wed, 21 Jun 2017 06:52:19 -0400 Subject: [PATCH] BUG: do not raise UnsortedIndexError if sorting is not required closes #16734 Author: Pietro Battiston This patch had conflicts when merged, resolved by Committer: Jeff Reback Closes #16736 from toobaz/index_what_you_can and squashes the following commits: f77e2b3 [Pietro Battiston] BUG: do not raise UnsortedIndexError if sorting is not required --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/common.py | 7 +++++++ pandas/core/indexes/multi.py | 21 ++++++++------------- pandas/tests/indexes/test_multi.py | 7 ++++++- pandas/tests/indexing/test_multiindex.py | 17 +++++++++++++---- 5 files changed, 35 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 2daa03e6e97b1..046a6c885bd24 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -99,6 +99,7 @@ Indexing ^^^^^^^^ - When called with a null slice (e.g. ``df.iloc[:]``), the ``.iloc`` and ``.loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`). +- When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`). I/O diff --git a/pandas/core/common.py b/pandas/core/common.py index 0dc6a7a1e9c7b..ed768a5743666 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -411,6 +411,13 @@ def is_null_slice(obj): obj.stop is None and obj.step is None) +def is_true_slices(l): + """ + Find non-trivial slices in "l": return a list of booleans with same length. + """ + return [isinstance(k, slice) and not is_null_slice(k) for k in l] + + def is_full_slice(obj, l): """ we have a full length slice """ return (isinstance(obj, slice) and obj.start == 0 and obj.stop == l and diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f30da5b05f8ae..1a762732b1213 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -23,7 +23,8 @@ from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.core.common import (_values_from_object, is_bool_indexer, - is_null_slice) + is_null_slice, + is_true_slices) import pandas.core.base as base from pandas.util._decorators import (Appender, cache_readonly, @@ -1035,12 +1036,6 @@ def is_lexsorted(self): """ return self.lexsort_depth == self.nlevels - def is_lexsorted_for_tuple(self, tup): - """ - Return True if we are correctly lexsorted given the passed tuple - """ - return len(tup) <= self.lexsort_depth - @cache_readonly def lexsort_depth(self): if self.sortorder is not None: @@ -2262,12 +2257,12 @@ def get_locs(self, tup): """ # must be lexsorted to at least as many levels - if not self.is_lexsorted_for_tuple(tup): - raise UnsortedIndexError('MultiIndex Slicing requires the index ' - 'to be fully lexsorted tuple len ({0}), ' - 'lexsort depth ({1})' - .format(len(tup), self.lexsort_depth)) - + true_slices = [i for (i, s) in enumerate(is_true_slices(tup)) if s] + if true_slices and true_slices[-1] >= self.lexsort_depth: + raise UnsortedIndexError('MultiIndex slicing requires the index ' + 'to be lexsorted: slicing on levels {0}, ' + 'lexsort depth {1}' + .format(true_slices, self.lexsort_depth)) # indexer # this is the list of all values that we want to select n = len(self) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 3f6fd8c8aa827..ef8806246c2c5 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -2826,8 +2826,13 @@ def test_unsortedindex(self): df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi, columns=['one', 'two']) + # GH 16734: not sorted, but no real slicing + result = df.loc(axis=0)['z', 'a'] + expected = df.iloc[0] + tm.assert_series_equal(result, expected) + with pytest.raises(UnsortedIndexError): - df.loc(axis=0)['z', :] + df.loc(axis=0)['z', slice('a')] df.sort_index(inplace=True) assert len(df.loc(axis=0)['z', :]) == 2 diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index fc6c627075c96..c12bb8910ffc9 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -817,9 +817,13 @@ def f(): assert df.index.lexsort_depth == 0 with tm.assert_raises_regex( UnsortedIndexError, - 'MultiIndex Slicing requires the index to be fully ' - r'lexsorted tuple len \(2\), lexsort depth \(0\)'): - df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] + 'MultiIndex slicing requires the index to be ' + r'lexsorted: slicing on levels \[1\], lexsort depth 0'): + df.loc[(slice(None), slice('bar')), :] + + # GH 16734: not sorted, but no real slicing + result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] + tm.assert_frame_equal(result, df.iloc[[1, 3], :]) def test_multiindex_slicers_non_unique(self): @@ -1001,9 +1005,14 @@ def test_per_axis_per_level_doc_examples(self): # not sorted def f(): - df.loc['A1', (slice(None), 'foo')] + df.loc['A1', ('a', slice('foo'))] pytest.raises(UnsortedIndexError, f) + + # GH 16734: not sorted, but no real slicing + tm.assert_frame_equal(df.loc['A1', (slice(None), 'foo')], + df.loc['A1'].iloc[:, [0, 2]]) + df = df.sort_index(axis=1) # slicing