diff --git a/doc/indexing.rst b/doc/indexing.rst index 58700d7b572..9ee8f1dddf8 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -392,14 +392,6 @@ These methods may also be applied to ``Dataset`` objects You may find increased performance by loading your data into memory first, e.g., with :py:meth:`~xarray.Dataset.load`. -.. note:: - - Vectorized indexing is a new feature in v0.10. - In older versions of xarray, dimensions of indexers are ignored. - Dedicated methods for some advanced indexing use cases, - ``isel_points`` and ``sel_points`` are now deprecated. - See :ref:`more_advanced_indexing` for their alternative. - .. note:: If an indexer is a :py:meth:`~xarray.DataArray`, its coordinates should not diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d029f1263ea..7ce60dc6e81 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,6 +21,14 @@ v0.13.0 (unreleased) This release increases the minimum required Python version from 3.5.0 to 3.5.3 (:issue:`3089`). By `Guido Imperiale `_. +Breaking changes +~~~~~~~~~~~~~~~~ + + The ``isel_points`` and ``sel_points`` methods are removed, having been deprecated + since v0.10.0. These are redundant with the ``isel`` / ``sel`` methods. + See :ref:`vectorized_indexing` for the details + By `Maximilian Roos `_ + New functions/methods ~~~~~~~~~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index f147a97d39b..4bd80553588 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1026,32 +1026,6 @@ def sel( ) return self._from_temp_dataset(ds) - def isel_points(self, dim="points", **indexers) -> "DataArray": - """Return a new DataArray whose data is given by pointwise integer - indexing along the specified dimension(s). - - See Also - -------- - Dataset.isel_points - """ - ds = self._to_temp_dataset().isel_points(dim=dim, **indexers) - return self._from_temp_dataset(ds) - - def sel_points( - self, dim="points", method=None, tolerance=None, **indexers - ) -> "DataArray": - """Return a new DataArray whose dataset is given by pointwise selection - of index labels along the specified dimension(s). - - See Also - -------- - Dataset.sel_points - """ - ds = self._to_temp_dataset().sel_points( - dim=dim, method=method, tolerance=tolerance, **indexers - ) - return self._from_temp_dataset(ds) - def broadcast_like( self, other: Union["DataArray", Dataset], exclude: Iterable[Hashable] = None ) -> "DataArray": diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a09a0076b4e..4cb5d1454ce 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -40,7 +40,6 @@ duck_array_ops, formatting, groupby, - indexing, ops, pdcompat, resample, @@ -1999,214 +1998,6 @@ def sel( result = self.isel(indexers=pos_indexers, drop=drop) return result._overwrite_indexes(new_indexes) - def isel_points(self, dim: Any = "points", **indexers: Any) -> "Dataset": - """Returns a new dataset with each array indexed pointwise along the - specified dimension(s). - - This method selects pointwise values from each array and is akin to - the NumPy indexing behavior of `arr[[0, 1], [0, 1]]`, except this - method does not require knowing the order of each array's dimensions. - - Parameters - ---------- - dim : hashable or DataArray or pandas.Index or other list-like object, - optional - Name of the dimension to concatenate along. If dim is provided as a - hashable, it must be a new dimension name, in which case it is added - along axis=0. If dim is provided as a DataArray or Index or - list-like object, its name, which must not be present in the - dataset, is used as the dimension to concatenate along and the - values are added as a coordinate. - **indexers : {dim: indexer, ...} - Keyword arguments with names matching dimensions and values given - by array-like objects. All indexers must be the same length and - 1 dimensional. - - Returns - ------- - obj : Dataset - A new Dataset with the same contents as this dataset, except each - array and dimension is indexed by the appropriate indexers. With - pointwise indexing, the new Dataset will always be a copy of the - original. - - See Also - -------- - Dataset.sel - Dataset.isel - Dataset.sel_points - DataArray.isel_points - """ # noqa - warnings.warn( - "Dataset.isel_points is deprecated: use Dataset.isel()" "instead.", - DeprecationWarning, - stacklevel=2, - ) - - indexer_dims = set(indexers) - - def take(variable, slices): - # Note: remove helper function when once when numpy - # supports vindex https://github.com/numpy/numpy/pull/6075 - if hasattr(variable.data, "vindex"): - # Special case for dask backed arrays to use vectorised list - # indexing - sel = variable.data.vindex[slices] - else: - # Otherwise assume backend is numpy array with 'fancy' indexing - sel = variable.data[slices] - return sel - - def relevant_keys(mapping): - return [ - k for k, v in mapping.items() if any(d in indexer_dims for d in v.dims) - ] - - coords = relevant_keys(self.coords) - indexers = {k: np.asarray(v) for k, v in indexers.items()} - non_indexed_dims = set(self.dims) - indexer_dims - non_indexed_coords = set(self.coords) - set(coords) - - # All the indexers should be iterables - # Check that indexers are valid dims, integers, and 1D - for k, v in indexers.items(): - if k not in self.dims: - raise ValueError("dimension %s does not exist" % k) - if v.dtype.kind != "i": # type: ignore - raise TypeError("Indexers must be integers") - if v.ndim != 1: # type: ignore - raise ValueError("Indexers must be 1 dimensional") - - # all the indexers should have the same length - lengths = {len(v) for k, v in indexers.items()} - if len(lengths) > 1: - raise ValueError("All indexers must be the same length") - - # Existing dimensions are not valid choices for the dim argument - if isinstance(dim, str): - if dim in self.dims: - # dim is an invalid string - raise ValueError( - "Existing dimension names are not valid " - "choices for the dim argument in sel_points" - ) - - elif hasattr(dim, "dims"): - # dim is a DataArray or Coordinate - if dim.name in self.dims: - # dim already exists - raise ValueError( - "Existing dimensions are not valid choices " - "for the dim argument in sel_points" - ) - - # Set the new dim_name, and optionally the new dim coordinate - # dim is either an array-like or a string - if not utils.is_scalar(dim): - # dim is array like get name or assign 'points', get as variable - dim_name = "points" if not hasattr(dim, "name") else dim.name - dim_coord = as_variable(dim, name=dim_name) - else: - # dim is a string - dim_name = dim - dim_coord = None # type: ignore - - reordered = self.transpose(*list(indexer_dims), *list(non_indexed_dims)) - - variables = OrderedDict() # type: ignore - - for name, var in reordered.variables.items(): - if name in indexers or any(d in indexer_dims for d in var.dims): - # slice if var is an indexer or depends on an indexed dim - slc = [indexers.get(k, slice(None)) for k in var.dims] - - var_dims = [dim_name] + [d for d in var.dims if d in non_indexed_dims] - selection = take(var, tuple(slc)) - var_subset = type(var)(var_dims, selection, var.attrs) - variables[name] = var_subset - else: - # If not indexed just add it back to variables or coordinates - variables[name] = var - - coord_names = (set(coords) & set(variables)) | non_indexed_coords - - dset = self._replace_vars_and_dims(variables, coord_names=coord_names) - # Add the dim coord to the new dset. Must be done after creation - # because_replace_vars_and_dims can only access existing coords, - # not add new ones - if dim_coord is not None: - dset.coords[dim_name] = dim_coord - return dset - - def sel_points( - self, - dim: Any = "points", - method: str = None, - tolerance: Number = None, - **indexers: Any - ): - """Returns a new dataset with each array indexed pointwise by tick - labels along the specified dimension(s). - - In contrast to `Dataset.isel_points`, indexers for this method should - use labels instead of integers. - - In contrast to `Dataset.sel`, this method selects points along the - diagonal of multi-dimensional arrays, not the intersection. - - Parameters - ---------- - dim : hashable or DataArray or pandas.Index or other list-like object, - optional - Name of the dimension to concatenate along. If dim is provided as a - hashable, it must be a new dimension name, in which case it is added - along axis=0. If dim is provided as a DataArray or Index or - list-like object, its name, which must not be present in the - dataset, is used as the dimension to concatenate along and the - values are added as a coordinate. - method : {None, 'nearest', 'pad'/'ffill', 'backfill'/'bfill'}, optional - Method to use for inexact matches (requires pandas>=0.16): - - * None (default): only exact matches - * pad / ffill: propagate last valid index value forward - * backfill / bfill: propagate next valid index value backward - * nearest: use nearest valid index value - tolerance : optional - Maximum distance between original and new labels for inexact - matches. The values of the index at the matching locations must - satisfy the equation ``abs(index[indexer] - target) <= tolerance``. - Requires pandas>=0.17. - **indexers : {dim: indexer, ...} - Keyword arguments with names matching dimensions and values given - by array-like objects. All indexers must be the same length and - 1 dimensional. - - Returns - ------- - obj : Dataset - A new Dataset with the same contents as this dataset, except each - array and dimension is indexed by the appropriate indexers. With - pointwise indexing, the new Dataset will always be a copy of the - original. - - See Also - -------- - Dataset.sel - Dataset.isel - Dataset.isel_points - DataArray.sel_points - """ # noqa - warnings.warn( - "Dataset.sel_points is deprecated: use Dataset.sel()" "instead.", - DeprecationWarning, - stacklevel=2, - ) - - pos_indexers, _ = indexing.remap_label_indexers( - self, indexers, method=method, tolerance=tolerance - ) - return self.isel_points(dim=dim, **pos_indexers) - def broadcast_like( self, other: Union["Dataset", "DataArray"], exclude: Iterable[Hashable] = None ) -> "Dataset": diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index d2355e28f6e..6a68fb73837 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1001,64 +1001,6 @@ def test_isel_drop(self): selected = data.isel(x=0, drop=False) assert_identical(expected, selected) - @pytest.mark.filterwarnings("ignore:Dataset.isel_points") - def test_isel_points(self): - shape = (10, 5, 6) - np_array = np.random.random(shape) - da = DataArray( - np_array, dims=["time", "y", "x"], coords={"time": np.arange(0, 100, 10)} - ) - y = [1, 3] - x = [3, 0] - - expected = da.values[:, y, x] - - actual = da.isel_points(y=y, x=x, dim="test_coord") - assert actual.coords["test_coord"].shape == (len(y),) - assert list(actual.coords) == ["time"] - assert actual.dims == ("test_coord", "time") - - actual = da.isel_points(y=y, x=x) - assert "points" in actual.dims - # Note that because xarray always concatenates along the first - # dimension, We must transpose the result to match the numpy style of - # concatenation. - np.testing.assert_equal(actual.T, expected) - - # a few corner cases - da.isel_points(time=[1, 2], x=[2, 2], y=[3, 4]) - np.testing.assert_allclose( - da.isel_points(time=[1], x=[2], y=[4]).values.squeeze(), - np_array[1, 4, 2].squeeze(), - ) - da.isel_points(time=[1, 2]) - y = [-1, 0] - x = [-2, 2] - expected = da.values[:, y, x] - actual = da.isel_points(x=x, y=y).values - np.testing.assert_equal(actual.T, expected) - - # test that the order of the indexers doesn't matter - assert_identical(da.isel_points(y=y, x=x), da.isel_points(x=x, y=y)) - - # make sure we're raising errors in the right places - with raises_regex(ValueError, "All indexers must be the same length"): - da.isel_points(y=[1, 2], x=[1, 2, 3]) - with raises_regex(ValueError, "dimension bad_key does not exist"): - da.isel_points(bad_key=[1, 2]) - with raises_regex(TypeError, "Indexers must be integers"): - da.isel_points(y=[1.5, 2.2]) - with raises_regex(TypeError, "Indexers must be integers"): - da.isel_points(x=[1, 2, 3], y=slice(3)) - with raises_regex(ValueError, "Indexers must be 1 dimensional"): - da.isel_points(y=1, x=2) - with raises_regex(ValueError, "Existing dimension names are not"): - da.isel_points(y=[1, 2], x=[1, 2], dim="x") - - # using non string dims - actual = da.isel_points(y=[1, 2], x=[1, 2], dim=["A", "B"]) - assert "points" in actual.coords - def test_loc(self): self.ds["x"] = ("x", np.array(list("abcdefghij"))) da = self.ds["foo"] diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 4636b963acd..82ae65d955f 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1410,116 +1410,6 @@ def test_isel_drop(self): selected = data.isel(x=0, drop=False) assert_identical(expected, selected) - @pytest.mark.filterwarnings("ignore:Dataset.isel_points") - def test_isel_points(self): - data = create_test_data() - - pdim1 = [1, 2, 3] - pdim2 = [4, 5, 1] - pdim3 = [1, 2, 3] - actual = data.isel_points(dim1=pdim1, dim2=pdim2, dim3=pdim3, dim="test_coord") - assert "test_coord" in actual.dims - assert actual.coords["test_coord"].shape == (len(pdim1),) - - actual = data.isel_points(dim1=pdim1, dim2=pdim2) - assert "points" in actual.dims - assert "dim3" in actual.dims - assert "dim3" not in actual.data_vars - np.testing.assert_array_equal(data["dim2"][pdim2], actual["dim2"]) - - # test that the order of the indexers doesn't matter - assert_identical( - data.isel_points(dim1=pdim1, dim2=pdim2), - data.isel_points(dim2=pdim2, dim1=pdim1), - ) - - # make sure we're raising errors in the right places - with raises_regex(ValueError, "All indexers must be the same length"): - data.isel_points(dim1=[1, 2], dim2=[1, 2, 3]) - with raises_regex(ValueError, "dimension bad_key does not exist"): - data.isel_points(bad_key=[1, 2]) - with raises_regex(TypeError, "Indexers must be integers"): - data.isel_points(dim1=[1.5, 2.2]) - with raises_regex(TypeError, "Indexers must be integers"): - data.isel_points(dim1=[1, 2, 3], dim2=slice(3)) - with raises_regex(ValueError, "Indexers must be 1 dimensional"): - data.isel_points(dim1=1, dim2=2) - with raises_regex(ValueError, "Existing dimension names are not valid"): - data.isel_points(dim1=[1, 2], dim2=[1, 2], dim="dim2") - - # test to be sure we keep around variables that were not indexed - ds = Dataset({"x": [1, 2, 3, 4], "y": 0}) - actual = ds.isel_points(x=[0, 1, 2]) - assert_identical(ds["y"], actual["y"]) - - # tests using index or DataArray as a dim - stations = Dataset() - stations["station"] = ("station", ["A", "B", "C"]) - stations["dim1s"] = ("station", [1, 2, 3]) - stations["dim2s"] = ("station", [4, 5, 1]) - - actual = data.isel_points( - dim1=stations["dim1s"], dim2=stations["dim2s"], dim=stations["station"] - ) - assert "station" in actual.coords - assert "station" in actual.dims - assert_identical(actual["station"].drop(["dim2"]), stations["station"]) - - # make sure we get the default 'points' coordinate when passed a list - actual = data.isel_points( - dim1=stations["dim1s"], dim2=stations["dim2s"], dim=["A", "B", "C"] - ) - assert "points" in actual.coords - assert actual.coords["points"].values.tolist() == ["A", "B", "C"] - - # test index - actual = data.isel_points( - dim1=stations["dim1s"].values, - dim2=stations["dim2s"].values, - dim=pd.Index(["A", "B", "C"], name="letters"), - ) - assert "letters" in actual.coords - - # can pass a numpy array - data.isel_points( - dim1=stations["dim1s"], dim2=stations["dim2s"], dim=np.array([4, 5, 6]) - ) - - @pytest.mark.filterwarnings("ignore:Dataset.sel_points") - @pytest.mark.filterwarnings("ignore:Dataset.isel_points") - def test_sel_points(self): - data = create_test_data() - - # add in a range() index - data["dim1"] = data.dim1 - - pdim1 = [1, 2, 3] - pdim2 = [4, 5, 1] - pdim3 = [1, 2, 3] - expected = data.isel_points( - dim1=pdim1, dim2=pdim2, dim3=pdim3, dim="test_coord" - ) - actual = data.sel_points( - dim1=data.dim1[pdim1], - dim2=data.dim2[pdim2], - dim3=data.dim3[pdim3], - dim="test_coord", - ) - assert_identical(expected, actual) - - data = Dataset({"foo": (("x", "y"), np.arange(9).reshape(3, 3))}) - expected = Dataset({"foo": ("points", [0, 4, 8])}) - actual = data.sel_points(x=[0, 1, 2], y=[0, 1, 2]) - assert_identical(expected, actual) - - data.coords.update({"x": [0, 1, 2], "y": [0, 1, 2]}) - expected.coords.update({"x": ("points", [0, 1, 2]), "y": ("points", [0, 1, 2])}) - actual = data.sel_points(x=[0.1, 1.1, 2.5], y=[0, 1.2, 2.0], method="pad") - assert_identical(expected, actual) - - with pytest.raises(KeyError): - data.sel_points(x=[2.5], y=[2.0], method="pad", tolerance=1e-3) - @pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_sel_fancy(self): data = create_test_data()