-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Micro optimizations to improve indexing #9002
Changes from 12 commits
95c1ef7
53c0603
9d50a39
20f250d
de6e4c3
e6c53d2
04b08e1
28983f5
1cc75cf
f2494d6
5bd48fd
4a7411b
d526ae4
f442532
904de4f
f10c6f4
57c5bd2
4edd8cc
fefb3dc
7f7b691
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -296,15 +296,16 @@ def slice_slice(old_slice: slice, applied_slice: slice, size: int) -> slice: | |
|
||
|
||
def _index_indexer_1d(old_indexer, applied_indexer, size: int): | ||
assert isinstance(applied_indexer, integer_types + (slice, np.ndarray)) | ||
if isinstance(applied_indexer, slice) and applied_indexer == slice(None): | ||
# shortcut for the usual case | ||
return old_indexer | ||
if isinstance(old_indexer, slice): | ||
if isinstance(applied_indexer, slice): | ||
indexer = slice_slice(old_indexer, applied_indexer, size) | ||
elif isinstance(applied_indexer, integer_types): | ||
indexer = range(*old_indexer.indices(size))[applied_indexer] # type: ignore[assignment] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These kinds of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This one is due to the ambiguity between slice and int for these indexers. I can look into it if you want. However. It starts to touch some tuple to lists conversation that was identified as something I shouldn't adjust too much due to potential conflicts with an active in development branch. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The following patch would
would move the error to
While I don't disagree that:
I would rather keep this for a future change once the branch |
||
else: | ||
indexer = _expand_slice(old_indexer, size)[applied_indexer] # type: ignore[assignment] | ||
indexer = _expand_slice(old_indexer, size)[applied_indexer] | ||
else: | ||
indexer = old_indexer[applied_indexer] | ||
return indexer | ||
|
@@ -591,7 +592,7 @@ def __getitem__(self, key: Any): | |
class LazilyIndexedArray(ExplicitlyIndexedNDArrayMixin): | ||
"""Wrap an array to make basic and outer indexing lazy.""" | ||
|
||
__slots__ = ("array", "key") | ||
__slots__ = ("array", "key", "_shape") | ||
|
||
def __init__(self, array: Any, key: ExplicitIndexer | None = None): | ||
""" | ||
|
@@ -614,6 +615,14 @@ def __init__(self, array: Any, key: ExplicitIndexer | None = None): | |
self.array = as_indexable(array) | ||
self.key = key | ||
|
||
shape: _Shape = () | ||
for size, k in zip(self.array.shape, self.key.tuple): | ||
if isinstance(k, slice): | ||
shape += (len(range(*k.indices(size))),) | ||
elif isinstance(k, np.ndarray): | ||
shape += (k.size,) | ||
self._shape = shape | ||
|
||
def _updated_key(self, new_key: ExplicitIndexer) -> BasicIndexer | OuterIndexer: | ||
iter_new_key = iter(expanded_indexer(new_key.tuple, self.ndim)) | ||
full_key = [] | ||
|
@@ -630,13 +639,7 @@ def _updated_key(self, new_key: ExplicitIndexer) -> BasicIndexer | OuterIndexer: | |
|
||
@property | ||
def shape(self) -> _Shape: | ||
shape = [] | ||
for size, k in zip(self.array.shape, self.key.tuple): | ||
if isinstance(k, slice): | ||
shape.append(len(range(*k.indices(size)))) | ||
elif isinstance(k, np.ndarray): | ||
shape.append(k.size) | ||
return tuple(shape) | ||
return self._shape | ||
|
||
def get_duck_array(self): | ||
if isinstance(self.array, ExplicitlyIndexedNDArrayMixin): | ||
|
@@ -1653,10 +1656,15 @@ class PandasIndexingAdapter(ExplicitlyIndexedNDArrayMixin): | |
|
||
__slots__ = ("array", "_dtype") | ||
|
||
def __init__(self, array: pd.Index, dtype: DTypeLike = None): | ||
def __init__( | ||
self, array: pd.Index, dtype: DTypeLike = None, *, fastpath: bool = False | ||
): | ||
from xarray.core.indexes import safe_cast_to_index | ||
|
||
self.array = safe_cast_to_index(array) | ||
if fastpath: | ||
dcherian marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self.array = array | ||
else: | ||
self.array = safe_cast_to_index(array) | ||
|
||
if dtype is None: | ||
self._dtype = get_valid_numpy_dtype(array) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm a bit wary of adding
fastpath
everywhere. It makes things a bit hard to reason about. Is this one that consequential? We do first check forisinstance(pd.Index)
insafe_cast_to_index
...There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Anecdotally, I remember thinking that indexes are created and copied a few too many times. I believe this was done for correctness in a big refactor, but reducing any useless copies would be a big win since we would call
_replace
less.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this in general is my feeling after my repeated deep diving into the codebase.