Skip to content

Commit

Permalink
Add missing_dims argument allowing isel() to ignore missing dimensions (
Browse files Browse the repository at this point in the history
#3923)

* Add missing_dims argument allowing isel() to ignore missing dimensions

* Add missing_dims to whats-new.rst

* Fix typos in TestVariable.test_isel()

* Change values for missing_dims argument to {'raise', 'warn', 'ignore'}

Matches the possible values used elsewhere for drop_vars arguments.

* Add missing_dims argument Dataset.isel()

* Mention Dataset.isel in whats-new.rst description of missing_dims
  • Loading branch information
johnomotani authored Apr 3, 2020
1 parent 6bccbff commit 9b5140e
Show file tree
Hide file tree
Showing 8 changed files with 151 additions and 18 deletions.
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ New Features
:py:func:`combine_by_coords` and :py:func:`combine_nested` using
combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`)
By `John Omotani <https://github.com/johnomotani>`_
- 'missing_dims' argument to :py:meth:`Dataset.isel`,
`:py:meth:`DataArray.isel` and :py:meth:`Variable.isel` to allow replacing
the exception when a dimension passed to ``isel`` is not present with a
warning, or just ignore the dimension. (:issue:`3866`, :pull:`3923`)
By `John Omotani <https://github.com/johnomotani>`_
- Limited the length of array items with long string reprs to a
reasonable width (:pull:`3900`)
By `Maximilian Roos <https://github.com/max-sixty>`_
Expand Down
30 changes: 28 additions & 2 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1007,25 +1007,51 @@ def isel(
self,
indexers: Mapping[Hashable, Any] = None,
drop: bool = False,
missing_dims: str = "raise",
**indexers_kwargs: Any,
) -> "DataArray":
"""Return a new DataArray whose data is given by integer indexing
along the specified dimension(s).
Parameters
----------
indexers : dict, optional
A dict with keys matching dimensions and values given
by integers, slice objects or arrays.
indexer can be a integer, slice, array-like or DataArray.
If DataArrays are passed as indexers, xarray-style indexing will be
carried out. See :ref:`indexing` for the details.
One of indexers or indexers_kwargs must be provided.
drop : bool, optional
If ``drop=True``, drop coordinates variables indexed by integers
instead of making them scalar.
missing_dims : {"raise", "warn", "ignore"}, default "raise"
What to do if dimensions that should be selected from are not present in the
DataArray:
- "exception": raise an exception
- "warning": raise a warning, and ignore the missing dimensions
- "ignore": ignore the missing dimensions
**indexers_kwargs : {dim: indexer, ...}, optional
The keyword arguments form of ``indexers``.
See Also
--------
Dataset.isel
DataArray.sel
"""

indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel")

if any(is_fancy_indexer(idx) for idx in indexers.values()):
ds = self._to_temp_dataset()._isel_fancy(indexers, drop=drop)
ds = self._to_temp_dataset()._isel_fancy(
indexers, drop=drop, missing_dims=missing_dims
)
return self._from_temp_dataset(ds)

# Much faster algorithm for when all indexers are ints, slices, one-dimensional
# lists, or zero or one-dimensional np.ndarray's

variable = self._variable.isel(indexers)
variable = self._variable.isel(indexers, missing_dims=missing_dims)

coords = {}
for coord_name, coord_value in self._coords.items():
Expand Down
30 changes: 20 additions & 10 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
_check_inplace,
_default,
decode_numpy_dict_values,
drop_dims_from_indexers,
either_dict_or_kwargs,
hashable,
infix_dims,
Expand Down Expand Up @@ -1767,7 +1768,7 @@ def maybe_chunk(name, var, chunks):
return self._replace(variables)

def _validate_indexers(
self, indexers: Mapping[Hashable, Any]
self, indexers: Mapping[Hashable, Any], missing_dims: str = "raise",
) -> Iterator[Tuple[Hashable, Union[int, slice, np.ndarray, Variable]]]:
""" Here we make sure
+ indexer has a valid keys
Expand All @@ -1777,9 +1778,7 @@ def _validate_indexers(
"""
from .dataarray import DataArray

invalid = indexers.keys() - self.dims.keys()
if invalid:
raise ValueError("dimensions %r do not exist" % invalid)
indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims)

# all indexers should be int, slice, np.ndarrays, or Variable
for k, v in indexers.items():
Expand Down Expand Up @@ -1875,6 +1874,7 @@ def isel(
self,
indexers: Mapping[Hashable, Any] = None,
drop: bool = False,
missing_dims: str = "raise",
**indexers_kwargs: Any,
) -> "Dataset":
"""Returns a new dataset with each array indexed along the specified
Expand All @@ -1896,6 +1896,12 @@ def isel(
drop : bool, optional
If ``drop=True``, drop coordinates variables indexed by integers
instead of making them scalar.
missing_dims : {"raise", "warn", "ignore"}, default "raise"
What to do if dimensions that should be selected from are not present in the
Dataset:
- "exception": raise an exception
- "warning": raise a warning, and ignore the missing dimensions
- "ignore": ignore the missing dimensions
**indexers_kwargs : {dim: indexer, ...}, optional
The keyword arguments form of ``indexers``.
One of indexers or indexers_kwargs must be provided.
Expand All @@ -1918,13 +1924,11 @@ def isel(
"""
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel")
if any(is_fancy_indexer(idx) for idx in indexers.values()):
return self._isel_fancy(indexers, drop=drop)
return self._isel_fancy(indexers, drop=drop, missing_dims=missing_dims)

# Much faster algorithm for when all indexers are ints, slices, one-dimensional
# lists, or zero or one-dimensional np.ndarray's
invalid = indexers.keys() - self.dims.keys()
if invalid:
raise ValueError("dimensions %r do not exist" % invalid)
indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims)

variables = {}
dims: Dict[Hashable, Tuple[int, ...]] = {}
Expand Down Expand Up @@ -1958,10 +1962,16 @@ def isel(
file_obj=self._file_obj,
)

def _isel_fancy(self, indexers: Mapping[Hashable, Any], *, drop: bool) -> "Dataset":
def _isel_fancy(
self,
indexers: Mapping[Hashable, Any],
*,
drop: bool,
missing_dims: str = "raise",
) -> "Dataset":
# Note: we need to preserve the original indexers variable in order to merge the
# coords below
indexers_list = list(self._validate_indexers(indexers))
indexers_list = list(self._validate_indexers(indexers, missing_dims))

variables: Dict[Hashable, Variable] = {}
indexes: Dict[Hashable, pd.Index] = {}
Expand Down
49 changes: 49 additions & 0 deletions xarray/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
Sequence,
Tuple,
TypeVar,
Union,
cast,
)

Expand Down Expand Up @@ -738,6 +739,54 @@ def get_temp_dimname(dims: Container[Hashable], new_dim: Hashable) -> Hashable:
return new_dim


def drop_dims_from_indexers(
indexers: Mapping[Hashable, Any],
dims: Union[list, Mapping[Hashable, int]],
missing_dims: str,
) -> Mapping[Hashable, Any]:
""" Depending on the setting of missing_dims, drop any dimensions from indexers that
are not present in dims.
Parameters
----------
indexers : dict
dims : sequence
missing_dims : {"raise", "warn", "ignore"}
"""

if missing_dims == "raise":
invalid = indexers.keys() - set(dims)
if invalid:
raise ValueError(
f"dimensions {invalid} do not exist. Expected one or more of {dims}"
)

return indexers

elif missing_dims == "warn":

# don't modify input
indexers = dict(indexers)

invalid = indexers.keys() - set(dims)
if invalid:
warnings.warn(
f"dimensions {invalid} do not exist. Expected one or more of {dims}"
)
for key in invalid:
indexers.pop(key)

return indexers

elif missing_dims == "ignore":
return {key: val for key, val in indexers.items() if key in dims}

else:
raise ValueError(
f"Unrecognised option {missing_dims} for missing_dims argument"
)


# Singleton type, as per https://github.com/python/typing/pull/240
class Default(Enum):
token = 0
Expand Down
14 changes: 9 additions & 5 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
OrderedSet,
_default,
decode_numpy_dict_values,
drop_dims_from_indexers,
either_dict_or_kwargs,
ensure_us_time_resolution,
infix_dims,
Expand Down Expand Up @@ -1030,6 +1031,7 @@ def _to_dense(self):
def isel(
self: VariableType,
indexers: Mapping[Hashable, Any] = None,
missing_dims: str = "raise",
**indexers_kwargs: Any,
) -> VariableType:
"""Return a new array indexed along the specified dimension(s).
Expand All @@ -1039,6 +1041,12 @@ def isel(
**indexers : {dim: indexer, ...}
Keyword arguments with names matching dimensions and values given
by integers, slice objects or arrays.
missing_dims : {"raise", "warn", "ignore"}, default "raise"
What to do if dimensions that should be selected from are not present in the
DataArray:
- "exception": raise an exception
- "warning": raise a warning, and ignore the missing dimensions
- "ignore": ignore the missing dimensions
Returns
-------
Expand All @@ -1050,11 +1058,7 @@ def isel(
"""
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel")

invalid = indexers.keys() - set(self.dims)
if invalid:
raise ValueError(
f"dimensions {invalid} do not exist. Expected one or more of {self.dims}"
)
indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims)

key = tuple(indexers.get(dim, slice(None)) for dim in self.dims)
return self[key]
Expand Down
13 changes: 13 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,19 @@ def test_isel(self):
assert_identical(self.dv, self.dv.isel(x=slice(None)))
assert_identical(self.dv[:3], self.dv.isel(x=slice(3)))
assert_identical(self.dv[:3, :5], self.dv.isel(x=slice(3), y=slice(5)))
with raises_regex(
ValueError,
r"dimensions {'not_a_dim'} do not exist. Expected "
r"one or more of \('x', 'y'\)",
):
self.dv.isel(not_a_dim=0)
with pytest.warns(
UserWarning,
match=r"dimensions {'not_a_dim'} do not exist. "
r"Expected one or more of \('x', 'y'\)",
):
self.dv.isel(not_a_dim=0, missing_dims="warn")
assert_identical(self.dv, self.dv.isel(not_a_dim=0, missing_dims="ignore"))

def test_isel_types(self):
# regression test for #1405
Expand Down
15 changes: 15 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1023,6 +1023,21 @@ def test_isel(self):

with pytest.raises(ValueError):
data.isel(not_a_dim=slice(0, 2))
with raises_regex(
ValueError,
r"dimensions {'not_a_dim'} do not exist. Expected "
r"one or more of "
r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*",
):
data.isel(not_a_dim=slice(0, 2))
with pytest.warns(
UserWarning,
match=r"dimensions {'not_a_dim'} do not exist. "
r"Expected one or more of "
r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*",
):
data.isel(not_a_dim=slice(0, 2), missing_dims="warn")
assert_identical(data, data.isel(not_a_dim=slice(0, 2), missing_dims="ignore"))

ret = data.isel(dim1=0)
assert {"time": 20, "dim2": 9, "dim3": 10} == ret.dims
Expand Down
13 changes: 12 additions & 1 deletion xarray/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1254,8 +1254,19 @@ def test_isel(self):
assert_identical(v.isel(x=0), v[:, 0])
assert_identical(v.isel(x=[0, 2]), v[:, [0, 2]])
assert_identical(v.isel(time=[]), v[[]])
with raises_regex(ValueError, "do not exist"):
with raises_regex(
ValueError,
r"dimensions {'not_a_dim'} do not exist. Expected one or more of "
r"\('time', 'x'\)",
):
v.isel(not_a_dim=0)
with pytest.warns(
UserWarning,
match=r"dimensions {'not_a_dim'} do not exist. Expected one or more of "
r"\('time', 'x'\)",
):
v.isel(not_a_dim=0, missing_dims="warn")
assert_identical(v, v.isel(not_a_dim=0, missing_dims="ignore"))

def test_index_0d_numpy_string(self):
# regression test to verify our work around for indexing 0d strings
Expand Down

0 comments on commit 9b5140e

Please sign in to comment.